From cd1b19cf668278918ab6aa4a161d1e7534fdc811 Mon Sep 17 00:00:00 2001 From: Chenxiao Wang <50203394+Aristoeu@users.noreply.github.com> Date: Fri, 14 Apr 2023 01:07:07 -0500 Subject: [PATCH] added new dockerfile (#403) * added new dockerfile * changed Python version * changed dockerfile * remove explicit scheduler * updated shell script * replace localhost * change localhost for all pymongo * update tests * pfpath * add dockerfile in different tags * add new dockerfiles in workflow * change dockerfile names * recover change * amd64 * correct pymongo path * added pythonpath * added pythonpath * pymongo location * tests for arm64 * tests * update python * disable sse and avx in env * env CFLAGS * export CFLAGS * fix CFLAGS * cflags * change script, add docker-clean, fix localhost * changed dockerfile events to schedule * build tasks only in schedule * removed notebook and hardcoded pythonpath * deleted mpi version and moved pythonpath * deleted docker build workflow for mpi version * specified pandas version * updated base container and mongo version * Removed deprecated codecov from Python requirement --- .github/workflows/docker-publish.yml | 92 ++++++++ .github/workflows/python-package.yml | 2 +- Dockerfile | 158 +++++++++++-- Dockerfile_dev | 158 +++++++++++-- Dockerfile_mpi | 221 ++++++++++++++++++ Dockerfile_old | 102 ++++++++ docker-entrypoint.sh | 37 +++ .../global_history/test_manager_spark_dask.py | 2 +- requirements.txt | 1 + scripts/start-mspass.sh | 2 - 10 files changed, 737 insertions(+), 38 deletions(-) create mode 100644 Dockerfile_mpi create mode 100644 Dockerfile_old create mode 100644 docker-entrypoint.sh diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index f4d45235d..acd763fea 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -203,3 +203,95 @@ jobs: run: | rm -rf /tmp/.buildx-cache-dev mv /tmp/.buildx-cache-dev-new /tmp/.buildx-cache-dev + + build-mpi: + + if: ${{ github.event_name == 'schedule' }} + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Setup for Multi-platform image + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: | + /tmp/.buildx-cache-mpi + key: ${{ runner.os }}-buildx.mpi-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx.mpi- + + # Login against a Docker registry + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Log in to Docker Hub + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + flavor: | + latest=true + images: | + mspass/mspass + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push mpi image + - name: Build and push mpi image (amd64 only) + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: . + file: Dockerfile_ubuntu20.04_jupyterlab + platforms: linux/amd64 + push: true + tags: | + mspass/mspass:mpi + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:mpi + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=local,src=/tmp/.buildx-cache-mpi + cache-to: type=local,dest=/tmp/.buildx-cache-mpi-new + + - name: Build and push mpi image (amd64 amd arm64) + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: . + file: Dockerfile_ubuntu20.04_jupyterlab + platforms: linux/amd64,linux/arm64 + push: true + tags: | + mspass/mspass:mpi + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:mpi + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=local,src=/tmp/.buildx-cache-mpi + cache-to: type=local,dest=/tmp/.buildx-cache-mpi-new + + # Temp fix + # https://github.com/docker/build-push-action/issues/252 + # https://github.com/moby/buildkit/issues/1896 + - name: Move cache + run: | + rm -rf /tmp/.buildx-cache-mpi + mv /tmp/.buildx-cache-mpi-new /tmp/.buildx-cache-mpi diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 78739a6d5..1f1c1976e 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -35,7 +35,7 @@ jobs: - name: Install Python Dependencies run: | python -m pip install --upgrade pip - python -m pip install --upgrade --upgrade-strategy eager pytest pytest-cov codecov + python -m pip install --upgrade --upgrade-strategy eager pytest pytest-cov if [ -f requirements.txt ]; then pip install --upgrade --upgrade-strategy eager -r requirements.txt; fi - name: Install Apache Spark run: | diff --git a/Dockerfile b/Dockerfile index b4a32e196..c9c314bc0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,110 @@ #Image: mspass/mspass #Version: 0.0.1 -FROM mongo:4.4.0 +FROM ghcr.io/seisscoped/container-base:ubuntu22.04_jupyterlab LABEL maintainer="Ian Wang " +# add our user and group first to make sure their IDs get assigned consistently, regardless of whatever dependencies get added +RUN set -eux; \ + groupadd --gid 999 --system mongodb; \ + useradd --uid 999 --system --gid mongodb --home-dir /data/db mongodb; \ + mkdir -p /data/db /data/configdb; \ + chown -R mongodb:mongodb /data/db /data/configdb \ + && docker-clean + +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + dirmngr \ + gnupg \ + jq \ + numactl \ + procps \ + ; \ + rm -rf /var/lib/apt/lists/* \ + && docker-clean + +# grab "js-yaml" for parsing mongod's YAML config files (https://github.com/nodeca/js-yaml/releases) +ENV JSYAML_VERSION 3.13.1 + +RUN set -ex; \ + \ + savedAptMark="$(apt-mark showmanual)"; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + wget \ + ; \ + rm -rf /var/lib/apt/lists/*; \ + \ + dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ + export GNUPGHOME="$(mktemp -d)"; \ + gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \ + gpgconf --kill all; \ + \ + wget -O /js-yaml.js "https://github.com/nodeca/js-yaml/raw/${JSYAML_VERSION}/dist/js-yaml.js"; \ +# TODO some sort of download verification here + \ + apt-mark auto '.*' > /dev/null; \ + apt-mark manual $savedAptMark > /dev/null; \ + apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + && docker-clean + +RUN mkdir /docker-entrypoint-initdb.d + +RUN set -ex; \ + export GNUPGHOME="$(mktemp -d)"; \ + set -- '39BD841E4BE5FB195A65400E6A26B1AE64C3C388'; \ + for key; do \ + gpg --batch --keyserver keyserver.ubuntu.com --recv-keys "$key"; \ + done; \ + mkdir -p /etc/apt/keyrings; \ + gpg --batch --export "$@" > /etc/apt/keyrings/mongodb.gpg; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" \ + && docker-clean + +# Allow build-time overrides (eg. to build image with MongoDB Enterprise version) +# Options for MONGO_PACKAGE: mongodb-org OR mongodb-enterprise +# Options for MONGO_REPO: repo.mongodb.org OR repo.mongodb.com +# Example: docker build --build-arg MONGO_PACKAGE=mongodb-enterprise --build-arg MONGO_REPO=repo.mongodb.com . +ARG MONGO_PACKAGE=mongodb-org +ARG MONGO_REPO=repo.mongodb.org +ENV MONGO_PACKAGE=${MONGO_PACKAGE} MONGO_REPO=${MONGO_REPO} + +ENV MONGO_MAJOR 6.0 +RUN echo "deb [ signed-by=/etc/apt/keyrings/mongodb.gpg ] http://$MONGO_REPO/apt/ubuntu jammy/${MONGO_PACKAGE%-unstable}/$MONGO_MAJOR multiverse" | tee "/etc/apt/sources.list.d/${MONGO_PACKAGE%-unstable}.list" + +# https://docs.mongodb.org/master/release-notes/6.0/ +ENV MONGO_VERSION 6.0.5 +# 03/08/2023, https://github.com/mongodb/mongo/tree/c9a99c120371d4d4c52cbb15dac34a36ce8d3b1d + +RUN set -x \ +# installing "mongodb-enterprise" pulls in "tzdata" which prompts for input + && export DEBIAN_FRONTEND=noninteractive \ + && apt-get update \ + && apt-get install -y \ + ${MONGO_PACKAGE}=$MONGO_VERSION \ + ${MONGO_PACKAGE}-server=$MONGO_VERSION \ + ${MONGO_PACKAGE}-shell=$MONGO_VERSION \ + ${MONGO_PACKAGE}-mongos=$MONGO_VERSION \ + ${MONGO_PACKAGE}-tools=$MONGO_VERSION \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /var/lib/mongodb \ + && mv /etc/mongod.conf /etc/mongod.conf.orig \ + && docker-clean + +VOLUME /data/db /data/configdb + +# ensure that if running as custom user that "mongosh" has a valid "HOME" +# https://github.com/docker-library/mongo/issues/524 +ENV HOME /data/db + +COPY docker-entrypoint.sh /usr/local/bin/ + +EXPOSE 27017 + RUN apt-get update \ && apt-get install -y wget ssh rsync vim-tiny less \ build-essential python3-setuptools \ @@ -14,7 +114,8 @@ RUN apt-get update \ liblapack-dev libboost-dev libboost-serialization-dev libyaml-dev \ zip unzip \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && docker-clean ARG TARGETARCH @@ -30,44 +131,56 @@ ARG SPARK_URL=${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSIO # Download & install Spark RUN wget -qO - ${SPARK_URL} | tar -xz -C /usr/local/ \ - && cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark + && cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark \ + && docker-clean RUN ln -s /usr/local/spark/bin/pyspark /usr/bin/pyspark -RUN ln -s /usr/local/spark/python/pyspark /usr/local/lib/python3.6/dist-packages/pyspark -RUN unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d /usr/local/lib/python3.6/dist-packages/ +RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \ + PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \ + ln -s /usr/local/spark/python/pyspark ${PYTHON_SITE_PACKAGES_PATH}/pyspark && \ + unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d ${PYTHON_SITE_PACKAGES_PATH}/ \ + && docker-clean # Patch pyspark for machines don't have localhost defined in /etc/hosts RUN sed -i 's/localhost/127.0.0.1/' /usr/local/spark/python/pyspark/accumulators.py RUN unzip /usr/local/spark/python/lib/pyspark.zip \ && sed -i 's/localhost/127.0.0.1/' ./pyspark/accumulators.py \ && zip /usr/local/spark/python/lib/pyspark.zip pyspark/accumulators.py \ - && rm -r ./pyspark + && rm -r ./pyspark \ + && docker-clean # Install Python dependencies through pip +ENV DISABLE_NUMCODECS_SSE2 true +ENV DISABLE_NUMCODECS_AVX2 true +ENV CFLAGS -g ADD requirements.txt requirements.txt -RUN pip3 --no-cache-dir install --upgrade pip +RUN pip3 --no-cache-dir install --upgrade pip \ + && docker-clean RUN pip3 --no-cache-dir install numpy \ && pip3 --no-cache-dir install -r requirements.txt \ - && rm -f requirements.txt + && rm -f requirements.txt \ + && docker-clean # Download & install pybind11 ARG PYBIND11_VERSION=2.6.0 ARG PYBIND11_URL=https://github.com/pybind/pybind11/archive/v${PYBIND11_VERSION}.tar.gz RUN wget -qO - ${PYBIND11_URL} | tar -xz -C /usr/local/ \ && cd /usr/local/pybind11-${PYBIND11_VERSION} \ - && mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install + && mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install && docker-clean RUN rm -r /usr/local/pybind11-${PYBIND11_VERSION} # Upgrade setuptools to enable namespace package -RUN pip3 --no-cache-dir install --upgrade setuptools +RUN pip3 --no-cache-dir install --upgrade setuptools \ + && docker-clean # Add cxx library ADD cxx /mspass/cxx -RUN cd /mspass/cxx \ +RUN ln -s /opt/conda/include/yaml-cpp /usr/include/yaml-cpp && unset CFLAGS && cd /mspass/cxx \ && mkdir build && cd build \ && cmake .. \ && make \ && make install \ - && rm -rf ../build + && rm -rf ../build \ + && docker-clean # Add data and env variable for the MetadataDefinition class ADD data /mspass/data @@ -76,10 +189,11 @@ ENV MSPASS_HOME /mspass # Add setup.py to install python components ADD setup.py /mspass/setup.py ADD python /mspass/python -RUN pip3 install /mspass -v +RUN unset CFLAGS && pip3 install /mspass -v \ + && docker-clean -# Install Jupyter notebook -RUN pip3 --no-cache-dir install jedi==0.17.2 notebook==6.2.0 +# Install jedi +RUN pip3 --no-cache-dir install jedi==0.17.2 && docker-clean # Tini operates as a process subreaper for jupyter. ARG TINI_VERSION=v0.19.0 @@ -89,14 +203,24 @@ RUN chmod +x /usr/sbin/tini # Add startup script ADD scripts/start-mspass.sh /usr/sbin/start-mspass.sh RUN chmod +x /usr/sbin/start-mspass.sh +RUN chmod +x /usr/local/bin/docker-entrypoint.sh RUN sed -i '/set -- mongod "$@"/i [[ -d data ]] || mkdir data' /usr/local/bin/docker-entrypoint.sh +# replace localhost to 127.0.0.1 in pymongo to run on HPC +RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \ + PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \ + sed -i "s/localhost:27020,/127.0.0.1:27020,/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/encryption_options.py" && \ + sed -i 's/HOST = "localhost"/HOST = "127.0.0.1"/g' "${PYTHON_SITE_PACKAGES_PATH}/pymongo/mongo_client.py" && \ + sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/settings.py" && \ + sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/pool.py" +ENV PYTHONPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}" +ENV PATH="${SPARK_HOME}/bin:${SPARK_HOME}/python:${PATH}" + # Set the default behavior of this container ENV SPARK_MASTER_PORT 7077 ENV DASK_SCHEDULER_PORT 8786 ENV MONGODB_PORT 27017 ENV JUPYTER_PORT 8888 ENV MSPASS_ROLE all -ENV MSPASS_SCHEDULER dask - +# ENV MSPASS_SCHEDULER dask ENTRYPOINT ["/usr/sbin/tini", "-s", "-g", "--", "/usr/sbin/start-mspass.sh"] diff --git a/Dockerfile_dev b/Dockerfile_dev index 2fe041327..a6c47b791 100644 --- a/Dockerfile_dev +++ b/Dockerfile_dev @@ -1,10 +1,110 @@ #Image: mspass/mspass #Version: 0.0.1 -FROM mongo:4.4.0 +FROM ghcr.io/seisscoped/container-base:ubuntu22.04_jupyterlab LABEL maintainer="Ian Wang " +# add our user and group first to make sure their IDs get assigned consistently, regardless of whatever dependencies get added +RUN set -eux; \ + groupadd --gid 999 --system mongodb; \ + useradd --uid 999 --system --gid mongodb --home-dir /data/db mongodb; \ + mkdir -p /data/db /data/configdb; \ + chown -R mongodb:mongodb /data/db /data/configdb \ + && docker-clean + +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + dirmngr \ + gnupg \ + jq \ + numactl \ + procps \ + ; \ + rm -rf /var/lib/apt/lists/* \ + && docker-clean + +# grab "js-yaml" for parsing mongod's YAML config files (https://github.com/nodeca/js-yaml/releases) +ENV JSYAML_VERSION 3.13.1 + +RUN set -ex; \ + \ + savedAptMark="$(apt-mark showmanual)"; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + wget \ + ; \ + rm -rf /var/lib/apt/lists/*; \ + \ + dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ + export GNUPGHOME="$(mktemp -d)"; \ + gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \ + gpgconf --kill all; \ + \ + wget -O /js-yaml.js "https://github.com/nodeca/js-yaml/raw/${JSYAML_VERSION}/dist/js-yaml.js"; \ +# TODO some sort of download verification here + \ + apt-mark auto '.*' > /dev/null; \ + apt-mark manual $savedAptMark > /dev/null; \ + apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + && docker-clean + +RUN mkdir /docker-entrypoint-initdb.d + +RUN set -ex; \ + export GNUPGHOME="$(mktemp -d)"; \ + set -- '39BD841E4BE5FB195A65400E6A26B1AE64C3C388'; \ + for key; do \ + gpg --batch --keyserver keyserver.ubuntu.com --recv-keys "$key"; \ + done; \ + mkdir -p /etc/apt/keyrings; \ + gpg --batch --export "$@" > /etc/apt/keyrings/mongodb.gpg; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" \ + && docker-clean + +# Allow build-time overrides (eg. to build image with MongoDB Enterprise version) +# Options for MONGO_PACKAGE: mongodb-org OR mongodb-enterprise +# Options for MONGO_REPO: repo.mongodb.org OR repo.mongodb.com +# Example: docker build --build-arg MONGO_PACKAGE=mongodb-enterprise --build-arg MONGO_REPO=repo.mongodb.com . +ARG MONGO_PACKAGE=mongodb-org +ARG MONGO_REPO=repo.mongodb.org +ENV MONGO_PACKAGE=${MONGO_PACKAGE} MONGO_REPO=${MONGO_REPO} + +ENV MONGO_MAJOR 6.0 +RUN echo "deb [ signed-by=/etc/apt/keyrings/mongodb.gpg ] http://$MONGO_REPO/apt/ubuntu jammy/${MONGO_PACKAGE%-unstable}/$MONGO_MAJOR multiverse" | tee "/etc/apt/sources.list.d/${MONGO_PACKAGE%-unstable}.list" + +# https://docs.mongodb.org/master/release-notes/6.0/ +ENV MONGO_VERSION 6.0.5 +# 03/08/2023, https://github.com/mongodb/mongo/tree/c9a99c120371d4d4c52cbb15dac34a36ce8d3b1d + +RUN set -x \ +# installing "mongodb-enterprise" pulls in "tzdata" which prompts for input + && export DEBIAN_FRONTEND=noninteractive \ + && apt-get update \ + && apt-get install -y \ + ${MONGO_PACKAGE}=$MONGO_VERSION \ + ${MONGO_PACKAGE}-server=$MONGO_VERSION \ + ${MONGO_PACKAGE}-shell=$MONGO_VERSION \ + ${MONGO_PACKAGE}-mongos=$MONGO_VERSION \ + ${MONGO_PACKAGE}-tools=$MONGO_VERSION \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /var/lib/mongodb \ + && mv /etc/mongod.conf /etc/mongod.conf.orig \ + && docker-clean + +VOLUME /data/db /data/configdb + +# ensure that if running as custom user that "mongosh" has a valid "HOME" +# https://github.com/docker-library/mongo/issues/524 +ENV HOME /data/db + +COPY docker-entrypoint.sh /usr/local/bin/ + +EXPOSE 27017 + RUN apt-get update \ && apt-get install -y wget ssh rsync vim-tiny less \ build-essential python3-setuptools \ @@ -14,7 +114,8 @@ RUN apt-get update \ liblapack-dev libboost-dev libboost-serialization-dev libyaml-dev \ zip unzip \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && docker-clean ARG TARGETARCH @@ -30,44 +131,56 @@ ARG SPARK_URL=${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSIO # Download & install Spark RUN wget -qO - ${SPARK_URL} | tar -xz -C /usr/local/ \ - && cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark + && cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark \ + && docker-clean RUN ln -s /usr/local/spark/bin/pyspark /usr/bin/pyspark -RUN ln -s /usr/local/spark/python/pyspark /usr/local/lib/python3.6/dist-packages/pyspark -RUN unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d /usr/local/lib/python3.6/dist-packages/ +RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \ + PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \ + ln -s /usr/local/spark/python/pyspark ${PYTHON_SITE_PACKAGES_PATH}/pyspark && \ + unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d ${PYTHON_SITE_PACKAGES_PATH}/ \ + && docker-clean # Patch pyspark for machines don't have localhost defined in /etc/hosts RUN sed -i 's/localhost/127.0.0.1/' /usr/local/spark/python/pyspark/accumulators.py RUN unzip /usr/local/spark/python/lib/pyspark.zip \ && sed -i 's/localhost/127.0.0.1/' ./pyspark/accumulators.py \ && zip /usr/local/spark/python/lib/pyspark.zip pyspark/accumulators.py \ - && rm -r ./pyspark + && rm -r ./pyspark \ + && docker-clean # Install Python dependencies through pip +ENV DISABLE_NUMCODECS_SSE2 true +ENV DISABLE_NUMCODECS_AVX2 true +ENV CFLAGS -g ADD requirements.txt requirements.txt -RUN pip3 --no-cache-dir install --upgrade pip +RUN pip3 --no-cache-dir install --upgrade pip \ + && docker-clean RUN pip3 --no-cache-dir install numpy \ && pip3 --no-cache-dir install -r requirements.txt \ - && rm -f requirements.txt + && rm -f requirements.txt \ + && docker-clean # Download & install pybind11 ARG PYBIND11_VERSION=2.6.0 ARG PYBIND11_URL=https://github.com/pybind/pybind11/archive/v${PYBIND11_VERSION}.tar.gz RUN wget -qO - ${PYBIND11_URL} | tar -xz -C /usr/local/ \ && cd /usr/local/pybind11-${PYBIND11_VERSION} \ - && mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install + && mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install && docker-clean RUN rm -r /usr/local/pybind11-${PYBIND11_VERSION} # Upgrade setuptools to enable namespace package -RUN pip3 --no-cache-dir install --upgrade setuptools +RUN pip3 --no-cache-dir install --upgrade setuptools \ + && docker-clean # Add cxx library ADD cxx /mspass/cxx -RUN cd /mspass/cxx \ +RUN ln -s /opt/conda/include/yaml-cpp /usr/include/yaml-cpp && unset CFLAGS && cd /mspass/cxx \ && mkdir build && cd build \ && cmake -DCMAKE_BUILD_TYPE=Debug .. \ && make \ && make install \ - && rm -rf ../build + && rm -rf ../build \ + && docker-clean # Add data and env variable for the MetadataDefinition class ADD data /mspass/data @@ -76,10 +189,11 @@ ENV MSPASS_HOME /mspass # Add setup.py to install python components ADD setup.py /mspass/setup.py ADD python /mspass/python -RUN pip3 install --global-option build --global-option --debug /mspass -v +RUN unset CFLAGS && pip3 install --global-option build --global-option --debug /mspass -v \ + && docker-clean -# Install Jupyter notebook -RUN pip3 --no-cache-dir install jedi==0.17.2 notebook==6.2.0 +# Install jedi +RUN pip3 --no-cache-dir install jedi==0.17.2 && docker-clean # Tini operates as a process subreaper for jupyter. ARG TINI_VERSION=v0.19.0 @@ -89,14 +203,24 @@ RUN chmod +x /usr/sbin/tini # Add startup script ADD scripts/start-mspass.sh /usr/sbin/start-mspass.sh RUN chmod +x /usr/sbin/start-mspass.sh +RUN chmod +x /usr/local/bin/docker-entrypoint.sh RUN sed -i '/set -- mongod "$@"/i [[ -d data ]] || mkdir data' /usr/local/bin/docker-entrypoint.sh +# replace localhost to 127.0.0.1 in pymongo to run on HPC +RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \ + PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \ + sed -i "s/localhost:27020,/127.0.0.1:27020,/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/encryption_options.py" && \ + sed -i 's/HOST = "localhost"/HOST = "127.0.0.1"/g' "${PYTHON_SITE_PACKAGES_PATH}/pymongo/mongo_client.py" && \ + sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/settings.py" && \ + sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/pool.py" +ENV PYTHONPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}" +ENV PATH="${SPARK_HOME}/bin:${SPARK_HOME}/python:${PATH}" + # Set the default behavior of this container ENV SPARK_MASTER_PORT 7077 ENV DASK_SCHEDULER_PORT 8786 ENV MONGODB_PORT 27017 ENV JUPYTER_PORT 8888 ENV MSPASS_ROLE all -ENV MSPASS_SCHEDULER dask - +# ENV MSPASS_SCHEDULER dask ENTRYPOINT ["/usr/sbin/tini", "-s", "-g", "--", "/usr/sbin/start-mspass.sh"] diff --git a/Dockerfile_mpi b/Dockerfile_mpi new file mode 100644 index 000000000..e9ab55561 --- /dev/null +++ b/Dockerfile_mpi @@ -0,0 +1,221 @@ +#Image: mspass/mspass +#Version: 0.0.1 + +FROM ghcr.io/seisscoped/container-base:latest + +LABEL maintainer="Ian Wang " + +# add our user and group first to make sure their IDs get assigned consistently, regardless of whatever dependencies get added +RUN set -eux; \ + groupadd --gid 999 --system mongodb; \ + useradd --uid 999 --system --gid mongodb --home-dir /data/db mongodb; \ + mkdir -p /data/db /data/configdb; \ + chown -R mongodb:mongodb /data/db /data/configdb \ + && docker-clean + +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + dirmngr \ + gnupg \ + jq \ + numactl \ + procps \ + ; \ + rm -rf /var/lib/apt/lists/* \ + && docker-clean + +# grab "js-yaml" for parsing mongod's YAML config files (https://github.com/nodeca/js-yaml/releases) +ENV JSYAML_VERSION 3.13.1 + +RUN set -ex; \ + \ + savedAptMark="$(apt-mark showmanual)"; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + wget \ + ; \ + rm -rf /var/lib/apt/lists/*; \ + \ + dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ + export GNUPGHOME="$(mktemp -d)"; \ + gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \ + gpgconf --kill all; \ + \ + wget -O /js-yaml.js "https://github.com/nodeca/js-yaml/raw/${JSYAML_VERSION}/dist/js-yaml.js"; \ +# TODO some sort of download verification here + \ + apt-mark auto '.*' > /dev/null; \ + apt-mark manual $savedAptMark > /dev/null; \ + apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + && docker-clean + +RUN mkdir /docker-entrypoint-initdb.d + +RUN set -ex; \ + export GNUPGHOME="$(mktemp -d)"; \ + set -- '39BD841E4BE5FB195A65400E6A26B1AE64C3C388'; \ + for key; do \ + gpg --batch --keyserver keyserver.ubuntu.com --recv-keys "$key"; \ + done; \ + mkdir -p /etc/apt/keyrings; \ + gpg --batch --export "$@" > /etc/apt/keyrings/mongodb.gpg; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" \ + && docker-clean + +# Allow build-time overrides (eg. to build image with MongoDB Enterprise version) +# Options for MONGO_PACKAGE: mongodb-org OR mongodb-enterprise +# Options for MONGO_REPO: repo.mongodb.org OR repo.mongodb.com +# Example: docker build --build-arg MONGO_PACKAGE=mongodb-enterprise --build-arg MONGO_REPO=repo.mongodb.com . +ARG MONGO_PACKAGE=mongodb-org +ARG MONGO_REPO=repo.mongodb.org +ENV MONGO_PACKAGE=${MONGO_PACKAGE} MONGO_REPO=${MONGO_REPO} + +ENV MONGO_MAJOR 6.0 +RUN echo "deb [ signed-by=/etc/apt/keyrings/mongodb.gpg ] http://$MONGO_REPO/apt/ubuntu jammy/${MONGO_PACKAGE%-unstable}/$MONGO_MAJOR multiverse" | tee "/etc/apt/sources.list.d/${MONGO_PACKAGE%-unstable}.list" + +# https://docs.mongodb.org/master/release-notes/6.0/ +ENV MONGO_VERSION 6.0.5 +# 03/08/2023, https://github.com/mongodb/mongo/tree/c9a99c120371d4d4c52cbb15dac34a36ce8d3b1d + +RUN set -x \ +# installing "mongodb-enterprise" pulls in "tzdata" which prompts for input + && export DEBIAN_FRONTEND=noninteractive \ + && apt-get update \ + && apt-get install -y \ + ${MONGO_PACKAGE}=$MONGO_VERSION \ + ${MONGO_PACKAGE}-server=$MONGO_VERSION \ + ${MONGO_PACKAGE}-shell=$MONGO_VERSION \ + ${MONGO_PACKAGE}-mongos=$MONGO_VERSION \ + ${MONGO_PACKAGE}-tools=$MONGO_VERSION \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /var/lib/mongodb \ + && mv /etc/mongod.conf /etc/mongod.conf.orig \ + && docker-clean + +VOLUME /data/db /data/configdb + +# ensure that if running as custom user that "mongosh" has a valid "HOME" +# https://github.com/docker-library/mongo/issues/524 +ENV HOME /data/db + +COPY docker-entrypoint.sh /usr/local/bin/ + +EXPOSE 27017 + +RUN apt-get update \ + && apt-get install -y wget ssh rsync vim-tiny less \ + build-essential python3-setuptools \ + python3-dev python3-pip \ + openjdk-8-jdk \ + git cmake gfortran gdb \ + liblapack-dev libboost-dev libboost-serialization-dev libyaml-dev \ + zip unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && docker-clean + +ARG TARGETARCH + +# Prepare the environment +ARG SPARK_VERSION=3.0.0 + +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-${TARGETARCH} +ENV SPARK_HOME /usr/local/spark +ENV PYSPARK_PYTHON python3 + +ARG APACHE_MIRROR=https://archive.apache.org/dist +ARG SPARK_URL=${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz + +# Download & install Spark +RUN wget -qO - ${SPARK_URL} | tar -xz -C /usr/local/ \ + && cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark \ + && docker-clean +RUN ln -s /usr/local/spark/bin/pyspark /usr/bin/pyspark +RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \ + PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \ + ln -s /usr/local/spark/python/pyspark ${PYTHON_SITE_PACKAGES_PATH}/pyspark && \ + unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d ${PYTHON_SITE_PACKAGES_PATH}/ \ + && docker-clean + +# Patch pyspark for machines don't have localhost defined in /etc/hosts +RUN sed -i 's/localhost/127.0.0.1/' /usr/local/spark/python/pyspark/accumulators.py +RUN unzip /usr/local/spark/python/lib/pyspark.zip \ + && sed -i 's/localhost/127.0.0.1/' ./pyspark/accumulators.py \ + && zip /usr/local/spark/python/lib/pyspark.zip pyspark/accumulators.py \ + && rm -r ./pyspark \ + && docker-clean + +# Install Python dependencies through pip +ADD requirements.txt requirements.txt +RUN pip3 --no-cache-dir install --upgrade pip \ + && docker-clean +RUN pip3 --no-cache-dir install numpy \ + && pip3 --no-cache-dir install -r requirements.txt \ + && rm -f requirements.txt && docker-clean + +# Download & install pybind11 +ARG PYBIND11_VERSION=2.6.0 +ARG PYBIND11_URL=https://github.com/pybind/pybind11/archive/v${PYBIND11_VERSION}.tar.gz +RUN wget -qO - ${PYBIND11_URL} | tar -xz -C /usr/local/ \ + && cd /usr/local/pybind11-${PYBIND11_VERSION} \ + && mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install && docker-clean +RUN rm -r /usr/local/pybind11-${PYBIND11_VERSION} + +# Upgrade setuptools to enable namespace package +RUN pip3 --no-cache-dir install --upgrade setuptools \ + && docker-clean + +# Add cxx library +ADD cxx /mspass/cxx +RUN ln -s /opt/conda/include/yaml-cpp /usr/include/yaml-cpp && cd /mspass/cxx \ + && mkdir build && cd build \ + && cmake .. \ + && make \ + && make install \ + && rm -rf ../build && docker-clean + +# Add data and env variable for the MetadataDefinition class +ADD data /mspass/data +ENV MSPASS_HOME /mspass + +# Add setup.py to install python components +ADD setup.py /mspass/setup.py +ADD python /mspass/python +RUN pip3 install /mspass -v \ + && docker-clean + +# Install jedi +RUN pip3 --no-cache-dir install jedi==0.17.2 && docker-clean + +# Tini operates as a process subreaper for jupyter. +ARG TINI_VERSION=v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TARGETARCH} /usr/sbin/tini +RUN chmod +x /usr/sbin/tini + +# Add startup script +ADD scripts/start-mspass.sh /usr/sbin/start-mspass.sh +RUN chmod +x /usr/sbin/start-mspass.sh +RUN chmod +x /usr/local/bin/docker-entrypoint.sh +RUN sed -i '/set -- mongod "$@"/i [[ -d data ]] || mkdir data' /usr/local/bin/docker-entrypoint.sh + +# replace localhost to 127.0.0.1 in pymongo to run on HPC +RUN python -c "import site; print(site.getsitepackages()[0])" > site_packages_path.txt && \ + PYTHON_SITE_PACKAGES_PATH=$(cat site_packages_path.txt) && \ + sed -i "s/localhost:27020,/127.0.0.1:27020,/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/encryption_options.py" && \ + sed -i 's/HOST = "localhost"/HOST = "127.0.0.1"/g' "${PYTHON_SITE_PACKAGES_PATH}/pymongo/mongo_client.py" && \ + sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/settings.py" && \ + sed -i "s/'localhost'/'127.0.0.1'/g" "${PYTHON_SITE_PACKAGES_PATH}/pymongo/pool.py" +ENV PYTHONPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}" +ENV PATH="${SPARK_HOME}/bin:${SPARK_HOME}/python:${PATH}" + +# Set the default behavior of this container +ENV SPARK_MASTER_PORT 7077 +ENV DASK_SCHEDULER_PORT 8786 +ENV MONGODB_PORT 27017 +ENV JUPYTER_PORT 8888 +ENV MSPASS_ROLE all +# ENV MSPASS_SCHEDULER dask +ENTRYPOINT ["/usr/sbin/tini", "-s", "-g", "--", "/usr/sbin/start-mspass.sh"] diff --git a/Dockerfile_old b/Dockerfile_old new file mode 100644 index 000000000..b4a32e196 --- /dev/null +++ b/Dockerfile_old @@ -0,0 +1,102 @@ +#Image: mspass/mspass +#Version: 0.0.1 + +FROM mongo:4.4.0 + +LABEL maintainer="Ian Wang " + +RUN apt-get update \ + && apt-get install -y wget ssh rsync vim-tiny less \ + build-essential python3-setuptools \ + python3-dev python3-pip \ + openjdk-8-jdk \ + git cmake gfortran gdb \ + liblapack-dev libboost-dev libboost-serialization-dev libyaml-dev \ + zip unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ARG TARGETARCH + +# Prepare the environment +ARG SPARK_VERSION=3.0.0 + +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-${TARGETARCH} +ENV SPARK_HOME /usr/local/spark +ENV PYSPARK_PYTHON python3 + +ARG APACHE_MIRROR=https://archive.apache.org/dist +ARG SPARK_URL=${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz + +# Download & install Spark +RUN wget -qO - ${SPARK_URL} | tar -xz -C /usr/local/ \ + && cd /usr/local && ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 spark +RUN ln -s /usr/local/spark/bin/pyspark /usr/bin/pyspark +RUN ln -s /usr/local/spark/python/pyspark /usr/local/lib/python3.6/dist-packages/pyspark +RUN unzip /usr/local/spark/python/lib/py4j-0.10.9-src.zip -d /usr/local/lib/python3.6/dist-packages/ + +# Patch pyspark for machines don't have localhost defined in /etc/hosts +RUN sed -i 's/localhost/127.0.0.1/' /usr/local/spark/python/pyspark/accumulators.py +RUN unzip /usr/local/spark/python/lib/pyspark.zip \ + && sed -i 's/localhost/127.0.0.1/' ./pyspark/accumulators.py \ + && zip /usr/local/spark/python/lib/pyspark.zip pyspark/accumulators.py \ + && rm -r ./pyspark + +# Install Python dependencies through pip +ADD requirements.txt requirements.txt +RUN pip3 --no-cache-dir install --upgrade pip +RUN pip3 --no-cache-dir install numpy \ + && pip3 --no-cache-dir install -r requirements.txt \ + && rm -f requirements.txt + +# Download & install pybind11 +ARG PYBIND11_VERSION=2.6.0 +ARG PYBIND11_URL=https://github.com/pybind/pybind11/archive/v${PYBIND11_VERSION}.tar.gz +RUN wget -qO - ${PYBIND11_URL} | tar -xz -C /usr/local/ \ + && cd /usr/local/pybind11-${PYBIND11_VERSION} \ + && mkdir build && cd build && cmake .. -DPYBIND11_TEST=OFF && make install +RUN rm -r /usr/local/pybind11-${PYBIND11_VERSION} + +# Upgrade setuptools to enable namespace package +RUN pip3 --no-cache-dir install --upgrade setuptools + +# Add cxx library +ADD cxx /mspass/cxx +RUN cd /mspass/cxx \ + && mkdir build && cd build \ + && cmake .. \ + && make \ + && make install \ + && rm -rf ../build + +# Add data and env variable for the MetadataDefinition class +ADD data /mspass/data +ENV MSPASS_HOME /mspass + +# Add setup.py to install python components +ADD setup.py /mspass/setup.py +ADD python /mspass/python +RUN pip3 install /mspass -v + +# Install Jupyter notebook +RUN pip3 --no-cache-dir install jedi==0.17.2 notebook==6.2.0 + +# Tini operates as a process subreaper for jupyter. +ARG TINI_VERSION=v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TARGETARCH} /usr/sbin/tini +RUN chmod +x /usr/sbin/tini + +# Add startup script +ADD scripts/start-mspass.sh /usr/sbin/start-mspass.sh +RUN chmod +x /usr/sbin/start-mspass.sh +RUN sed -i '/set -- mongod "$@"/i [[ -d data ]] || mkdir data' /usr/local/bin/docker-entrypoint.sh + +# Set the default behavior of this container +ENV SPARK_MASTER_PORT 7077 +ENV DASK_SCHEDULER_PORT 8786 +ENV MONGODB_PORT 27017 +ENV JUPYTER_PORT 8888 +ENV MSPASS_ROLE all +ENV MSPASS_SCHEDULER dask + +ENTRYPOINT ["/usr/sbin/tini", "-s", "-g", "--", "/usr/sbin/start-mspass.sh"] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 000000000..924070af1 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -Eeuo pipefail + +dpkgArch="$(dpkg --print-architecture)" +case "$dpkgArch" in + amd64) # https://github.com/docker-library/mongo/issues/485#issuecomment-891991814 + if ! grep -qE '^flags.* avx( .*|$)' /proc/cpuinfo; then + { + echo + echo 'WARNING: MongoDB 5.0+ requires a CPU with AVX support, and your current system does not appear to have that!' + echo ' see https://jira.mongodb.org/browse/SERVER-54407' + echo ' see also https://www.mongodb.com/community/forums/t/mongodb-5-0-cpu-intel-g4650-compatibility/116610/2' + echo ' see also https://github.com/docker-library/mongo/issues/485#issuecomment-891991814' + echo + } >&2 + fi + ;; + + arm64) # https://github.com/docker-library/mongo/issues/485#issuecomment-970864306 + # https://en.wikichip.org/wiki/arm/armv8#ARMv8_Extensions_and_Processor_Features + # http://javathunderx.blogspot.com/2018/11/cheat-sheet-for-cpuinfo-features-on.html + if ! grep -qE '^Features.* (fphp|dcpop|sha3|sm3|sm4|asimddp|sha512|sve)( .*|$)' /proc/cpuinfo; then + { + echo + echo 'WARNING: MongoDB requires ARMv8.2-A or higher, and your current system does not appear to implement any of the common features for that!' + echo ' applies to all versions ≥5.0, any of 4.4 ≥4.4.19, and any of 4.2 ≥4.2.19' + echo ' see https://jira.mongodb.org/browse/SERVER-71772' + echo ' see https://jira.mongodb.org/browse/SERVER-55178' + echo ' see also https://en.wikichip.org/wiki/arm/armv8#ARMv8_Extensions_and_Processor_Features' + echo ' see also https://github.com/docker-library/mongo/issues/485#issuecomment-970864306' + echo + } >&2 + fi + ;; +esac + +exec "$@" diff --git a/python/tests/global_history/test_manager_spark_dask.py b/python/tests/global_history/test_manager_spark_dask.py index 9034b4790..4130e1ffd 100644 --- a/python/tests/global_history/test_manager_spark_dask.py +++ b/python/tests/global_history/test_manager_spark_dask.py @@ -672,7 +672,7 @@ def test_mspass_map_with_filePath(self, spark_context): d[i].t0 = -5 # parameters string - pfPath = "python/mspasspy/data/pf/RFdeconProcessor.pf" + pfPath = "./data/pf/RFdeconProcessor.pf" pf = AntelopePf(pfPath) pf_dict = AntelopePf2dict(pf) parameter_dict = collections.OrderedDict() diff --git a/requirements.txt b/requirements.txt index 76c9cf274..31f9c5b14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ requests==2.27.1 setuptools<=65.5.1 xarray zarr +pandas==1.5.3 \ No newline at end of file diff --git a/scripts/start-mspass.sh b/scripts/start-mspass.sh index cfbc81a65..898532736 100644 --- a/scripts/start-mspass.sh +++ b/scripts/start-mspass.sh @@ -32,8 +32,6 @@ MONGO_DATA=${MSPASS_DB_DIR}/data MONGO_LOG=${MSPASS_LOG_DIR}/mongo_log export SPARK_WORKER_DIR=${MSPASS_WORKER_DIR} export SPARK_LOG_DIR=${MSPASS_LOG_DIR} -export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip:$PYTHONPATH -export PATH=$SPARK_HOME/bin:$SPARK_HOME/python:$PATH if [ $# -eq 0 ] || [ $1 = "--batch" ]; then