Skip to content

Commit

Permalink
added new dockerfile (#403)
Browse files Browse the repository at this point in the history
* added new dockerfile

* changed Python version

* changed dockerfile

* remove explicit scheduler

* updated shell script

* replace localhost

* change localhost for all pymongo

* update tests

* pfpath

* add dockerfile in different tags

* add new dockerfiles in workflow

* change dockerfile names

* recover change

* amd64

* correct pymongo path

* added pythonpath

* added pythonpath

* pymongo location

* tests for arm64

* tests

* update python

* disable sse and avx in env

* env CFLAGS

* export CFLAGS

* fix CFLAGS

* cflags

* change script, add docker-clean, fix localhost

* changed dockerfile events to schedule

* build tasks only in schedule

* removed notebook and hardcoded pythonpath

* deleted mpi version and moved pythonpath

* deleted docker build workflow for mpi version

* specified pandas version

* updated base container and mongo version

* Removed deprecated codecov from Python requirement
  • Loading branch information
Aristoeu committed Apr 14, 2023
1 parent 551ffab commit cd1b19c
Show file tree
Hide file tree
Showing 10 changed files with 737 additions and 38 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,95 @@ jobs:
run: |
rm -rf /tmp/.buildx-cache-dev
mv /tmp/.buildx-cache-dev-new /tmp/.buildx-cache-dev
build-mpi:

if: ${{ github.event_name == 'schedule' }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v2

# Setup for Multi-platform image
- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Cache Docker layers
uses: actions/cache@v2
with:
path: |
/tmp/.buildx-cache-mpi
key: ${{ runner.os }}-buildx.mpi-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx.mpi-
# Login against a Docker registry
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Log in to Docker Hub
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
flavor: |
latest=true
images: |
mspass/mspass
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# Build and push mpi image
- name: Build and push mpi image (amd64 only)
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
file: Dockerfile_ubuntu20.04_jupyterlab
platforms: linux/amd64
push: true
tags: |
mspass/mspass:mpi
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:mpi
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=local,src=/tmp/.buildx-cache-mpi
cache-to: type=local,dest=/tmp/.buildx-cache-mpi-new

- name: Build and push mpi image (amd64 amd arm64)
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
file: Dockerfile_ubuntu20.04_jupyterlab
platforms: linux/amd64,linux/arm64
push: true
tags: |
mspass/mspass:mpi
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:mpi
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=local,src=/tmp/.buildx-cache-mpi
cache-to: type=local,dest=/tmp/.buildx-cache-mpi-new

# Temp fix
# https://github.com/docker/build-push-action/issues/252
# https://github.com/moby/buildkit/issues/1896
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache-mpi
mv /tmp/.buildx-cache-mpi-new /tmp/.buildx-cache-mpi
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
- name: Install Python Dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade --upgrade-strategy eager pytest pytest-cov codecov
python -m pip install --upgrade --upgrade-strategy eager pytest pytest-cov
if [ -f requirements.txt ]; then pip install --upgrade --upgrade-strategy eager -r requirements.txt; fi
- name: Install Apache Spark
run: |
Expand Down
158 changes: 141 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,110 @@
#Image: mspass/mspass
#Version: 0.0.1

FROM mongo:4.4.0
FROM ghcr.io/seisscoped/container-base:ubuntu22.04_jupyterlab

LABEL maintainer="Ian Wang <yinzhi.wang.cug@gmail.com>"

# add our user and group first to make sure their IDs get assigned consistently, regardless of whatever dependencies get added
RUN set -eux; \
groupadd --gid 999 --system mongodb; \
useradd --uid 999 --system --gid mongodb --home-dir /data/db mongodb; \
mkdir -p /data/db /data/configdb; \
chown -R mongodb:mongodb /data/db /data/configdb \
&& docker-clean

RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates \
dirmngr \
gnupg \
jq \
numactl \
procps \
; \
rm -rf /var/lib/apt/lists/* \
&& docker-clean

# grab "js-yaml" for parsing mongod's YAML config files (https://github.com/nodeca/js-yaml/releases)
ENV JSYAML_VERSION 3.13.1

RUN set -ex; \
\
savedAptMark="$(apt-mark showmanual)"; \
apt-get update; \
apt-get install -y --no-install-recommends \
wget \
; \
rm -rf /var/lib/apt/lists/*; \
\
dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \
gpgconf --kill all; \
\
wget -O /js-yaml.js "https://github.com/nodeca/js-yaml/raw/${JSYAML_VERSION}/dist/js-yaml.js"; \
# TODO some sort of download verification here
\
apt-mark auto '.*' > /dev/null; \
apt-mark manual $savedAptMark > /dev/null; \
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& docker-clean

RUN mkdir /docker-entrypoint-initdb.d

RUN set -ex; \
export GNUPGHOME="$(mktemp -d)"; \
set -- '39BD841E4BE5FB195A65400E6A26B1AE64C3C388'; \
for key; do \
gpg --batch --keyserver keyserver.ubuntu.com --recv-keys "$key"; \
done; \
mkdir -p /etc/apt/keyrings; \
gpg --batch --export "$@" > /etc/apt/keyrings/mongodb.gpg; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" \
&& docker-clean

# Allow build-time overrides (eg. to build image with MongoDB Enterprise version)
# Options for MONGO_PACKAGE: mongodb-org OR mongodb-enterprise
# Options for MONGO_REPO: repo.mongodb.org OR repo.mongodb.com
# Example: docker build --build-arg MONGO_PACKAGE=mongodb-enterprise --build-arg MONGO_REPO=repo.mongodb.com .
ARG MONGO_PACKAGE=mongodb-org
ARG MONGO_REPO=repo.mongodb.org
ENV MONGO_PACKAGE=${MONGO_PACKAGE} MONGO_REPO=${MONGO_REPO}

ENV MONGO_MAJOR 6.0
RUN echo "deb [ signed-by=/etc/apt/keyrings/mongodb.gpg ] http://$MONGO_REPO/apt/ubuntu jammy/${MONGO_PACKAGE%-unstable}/$MONGO_MAJOR multiverse" | tee "/etc/apt/sources.list.d/${MONGO_PACKAGE%-unstable}.list"

# https://docs.mongodb.org/master/release-notes/6.0/
ENV MONGO_VERSION 6.0.5
# 03/08/2023, https://github.com/mongodb/mongo/tree/c9a99c120371d4d4c52cbb15dac34a36ce8d3b1d

RUN set -x \
# installing "mongodb-enterprise" pulls in "tzdata" which prompts for input
&& export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y \
${MONGO_PACKAGE}=$MONGO_VERSION \
${MONGO_PACKAGE}-server=$MONGO_VERSION \
${MONGO_PACKAGE}-shell=$MONGO_VERSION \
${MONGO_PACKAGE}-mongos=$MONGO_VERSION \
${MONGO_PACKAGE}-tools=$MONGO_VERSION \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /var/lib/mongodb \
&& mv /etc/mongod.conf /etc/mongod.conf.orig \
&& docker-clean

VOLUME /data/db /data/configdb

# ensure that if running as custom user that "mongosh" has a valid "HOME"
# https://github.com/docker-library/mongo/issues/524
ENV HOME /data/db

COPY docker-entrypoint.sh /usr/local/bin/

EXPOSE 27017

RUN apt-get update \
&& apt-get install -y wget ssh rsync vim-tiny less \
build-essential python3-setuptools \
Expand All @@ -14,7 +114,8 @@ RUN apt-get update \
liblapack-dev libboost-dev libboost-serialization-dev libyaml-dev \
zip unzip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
&& docker-clean

ARG TARGETARCH

Expand All @@ -30,44 +131,56 @@ ARG SPARK_URL=${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSIO

# Download & install Spark
RUN wget -qO - ${SPARK_URL} | tar -xz -C /usr/local/ \
&& cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark
&& cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark \
&& docker-clean
RUN ln -s /usr/local/spark/bin/pyspark /usr/bin/pyspark
RUN ln -s /usr/local/spark/python/pyspark /usr/local/lib/python3.6/dist-packages/pyspark
RUN unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d /usr/local/lib/python3.6/dist-packages/
RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \
PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \
ln -s /usr/local/spark/python/pyspark ${PYTHON_SITE_PACKAGES_PATH}/pyspark && \
unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d ${PYTHON_SITE_PACKAGES_PATH}/ \
&& docker-clean

# Patch pyspark for machines don't have localhost defined in /etc/hosts
RUN sed -i 's/localhost/127.0.0.1/' /usr/local/spark/python/pyspark/accumulators.py
RUN unzip /usr/local/spark/python/lib/pyspark.zip \
&& sed -i 's/localhost/127.0.0.1/' ./pyspark/accumulators.py \
&& zip /usr/local/spark/python/lib/pyspark.zip pyspark/accumulators.py \
&& rm -r ./pyspark
&& rm -r ./pyspark \
&& docker-clean

# Install Python dependencies through pip
ENV DISABLE_NUMCODECS_SSE2 true
ENV DISABLE_NUMCODECS_AVX2 true
ENV CFLAGS -g
ADD requirements.txt requirements.txt
RUN pip3 --no-cache-dir install --upgrade pip
RUN pip3 --no-cache-dir install --upgrade pip \
&& docker-clean
RUN pip3 --no-cache-dir install numpy \
&& pip3 --no-cache-dir install -r requirements.txt \
&& rm -f requirements.txt
&& rm -f requirements.txt \
&& docker-clean

# Download & install pybind11
ARG PYBIND11_VERSION=2.6.0
ARG PYBIND11_URL=https://github.com/pybind/pybind11/archive/v${PYBIND11_VERSION}.tar.gz
RUN wget -qO - ${PYBIND11_URL} | tar -xz -C /usr/local/ \
&& cd /usr/local/pybind11-${PYBIND11_VERSION} \
&& mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install
&& mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install && docker-clean
RUN rm -r /usr/local/pybind11-${PYBIND11_VERSION}

# Upgrade setuptools to enable namespace package
RUN pip3 --no-cache-dir install --upgrade setuptools
RUN pip3 --no-cache-dir install --upgrade setuptools \
&& docker-clean

# Add cxx library
ADD cxx /mspass/cxx
RUN cd /mspass/cxx \
RUN ln -s /opt/conda/include/yaml-cpp /usr/include/yaml-cpp && unset CFLAGS && cd /mspass/cxx \
&& mkdir build && cd build \
&& cmake .. \
&& make \
&& make install \
&& rm -rf ../build
&& rm -rf ../build \
&& docker-clean

# Add data and env variable for the MetadataDefinition class
ADD data /mspass/data
Expand All @@ -76,10 +189,11 @@ ENV MSPASS_HOME /mspass
# Add setup.py to install python components
ADD setup.py /mspass/setup.py
ADD python /mspass/python
RUN pip3 install /mspass -v
RUN unset CFLAGS && pip3 install /mspass -v \
&& docker-clean

# Install Jupyter notebook
RUN pip3 --no-cache-dir install jedi==0.17.2 notebook==6.2.0
# Install jedi
RUN pip3 --no-cache-dir install jedi==0.17.2 && docker-clean

# Tini operates as a process subreaper for jupyter.
ARG TINI_VERSION=v0.19.0
Expand All @@ -89,14 +203,24 @@ RUN chmod +x /usr/sbin/tini
# Add startup script
ADD scripts/start-mspass.sh /usr/sbin/start-mspass.sh
RUN chmod +x /usr/sbin/start-mspass.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
RUN sed -i '/set -- mongod "$@"/i [[ -d data ]] || mkdir data' /usr/local/bin/docker-entrypoint.sh

# replace localhost to 127.0.0.1 in pymongo to run on HPC
RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \
PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \
sed -i "s/localhost:27020,/127.0.0.1:27020,/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/encryption_options.py" && \
sed -i 's/HOST = "localhost"/HOST = "127.0.0.1"/g' "${PYTHON_SITE_PACKAGES_PATH}/pymongo/mongo_client.py" && \
sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/settings.py" && \
sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/pool.py"
ENV PYTHONPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}"
ENV PATH="${SPARK_HOME}/bin:${SPARK_HOME}/python:${PATH}"

# Set the default behavior of this container
ENV SPARK_MASTER_PORT 7077
ENV DASK_SCHEDULER_PORT 8786
ENV MONGODB_PORT 27017
ENV JUPYTER_PORT 8888
ENV MSPASS_ROLE all
ENV MSPASS_SCHEDULER dask

# ENV MSPASS_SCHEDULER dask
ENTRYPOINT ["/usr/sbin/tini", "-s", "-g", "--", "/usr/sbin/start-mspass.sh"]
Loading

0 comments on commit cd1b19c

Please sign in to comment.