From f9592c8385ab0bf3f8e5b39f417df72f29b29cc6 Mon Sep 17 00:00:00 2001 From: Eli Uriegas Date: Mon, 1 Feb 2021 11:37:38 -0800 Subject: [PATCH] conda: Add a way to build singular conda images Adds an option to build conda builder images with only one version of CUDA installed effectively reducing image sizes from 20GB+ to ~10GB Signed-off-by: Eli Uriegas --- conda/Dockerfile | 43 ++++++++++++++++++++++++--------------- conda/README.md | 8 ++++---- conda/build_all_docker.sh | 9 ++++++++ conda/build_docker.sh | 41 +++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 20 deletions(-) create mode 100755 conda/build_all_docker.sh create mode 100755 conda/build_docker.sh diff --git a/conda/Dockerfile b/conda/Dockerfile index 790526dac..adf344801 100644 --- a/conda/Dockerfile +++ b/conda/Dockerfile @@ -1,3 +1,5 @@ +ARG CUDA_VERSION=10.2 +ARG BASE_TARGET=cuda${CUDA_VERSION} FROM nvidia/cuda:9.2-devel-centos7 as base ENV LC_ALL en_US.UTF-8 @@ -19,6 +21,7 @@ ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH RUN yum install -y autoconf aclocal automake make sudo +RUN rm -rf /usr/local/cuda-* FROM base as patchelf # Install patchelf @@ -35,43 +38,51 @@ RUN /opt/conda/bin/conda install -y conda-package-handling=1.6.0 # Install CUDA FROM base as cuda +RUN rm -rf /usr/local/cuda-* ADD ./common/install_cuda.sh install_cuda.sh -FROM cuda as cuda92 +FROM cuda as cuda9.2 RUN bash ./install_cuda.sh 9.2 +ENV DESIRED_CUDA=9.2 -FROM cuda as cuda101 +FROM cuda as cuda10.1 RUN bash ./install_cuda.sh 10.1 +ENV DESIRED_CUDA=10.1 -FROM cuda as cuda102 +FROM cuda as cuda10.2 RUN bash ./install_cuda.sh 10.2 +ENV DESIRED_CUDA=10.2 -FROM cuda as cuda110 +FROM cuda as cuda11.0 RUN bash ./install_cuda.sh 11.0 +ENV DESIRED_CUDA=11.0 -FROM cuda as cuda111 +FROM cuda as cuda11.1 RUN bash ./install_cuda.sh 11.1 +ENV DESIRED_CUDA=11.1 -FROM cuda as cuda112 +FROM cuda as cuda11.2 RUN bash ./install_cuda.sh 11.2 +ENV DESIRED_CUDA=11.2 # Install MNIST test data FROM base as mnist ADD ./common/install_mnist.sh install_mnist.sh RUN bash ./install_mnist.sh -FROM base as final +FROM base as all_cuda +COPY --from=cuda9.2 /usr/local/cuda-9.2 /usr/local/cuda-9.2 +COPY --from=cuda10.1 /usr/local/cuda-10.1 /usr/local/cuda-10.1 +COPY --from=cuda10.2 /usr/local/cuda-10.2 /usr/local/cuda-10.2 +COPY --from=cuda11.0 /usr/local/cuda-11.0 /usr/local/cuda-11.0 +COPY --from=cuda11.1 /usr/local/cuda-11.1 /usr/local/cuda-11.1 +COPY --from=cuda11.2 /usr/local/cuda-11.2 /usr/local/cuda-11.2 + +FROM ${BASE_TARGET} as final COPY --from=patchelf /patchelf /usr/local/bin/patchelf COPY --from=conda /opt/conda /opt/conda -RUN rm -rf /usr/local/cuda-* -COPY --from=cuda92 /usr/local/cuda-9.2 /usr/local/cuda-9.2 -COPY --from=cuda101 /usr/local/cuda-10.1 /usr/local/cuda-10.1 -COPY --from=cuda102 /usr/local/cuda-10.2 /usr/local/cuda-10.2 -COPY --from=cuda110 /usr/local/cuda-11.0 /usr/local/cuda-11.0 -COPY --from=cuda111 /usr/local/cuda-11.1 /usr/local/cuda-11.1 -COPY --from=cuda112 /usr/local/cuda-11.2 /usr/local/cuda-11.2 -ADD ./java/jni.h /usr/local/include/jni.h -ENV PATH /opt/conda/bin:$PATH +ADD ./java/jni.h /usr/local/include/jni.h +ENV PATH /opt/conda/bin:$PATH COPY --from=mnist /usr/local/mnist /usr/local/mnist RUN rm -rf /usr/local/cuda RUN chmod o+rw /usr/local diff --git a/conda/README.md b/conda/README.md index 0208bcb5f..7b2594e07 100644 --- a/conda/README.md +++ b/conda/README.md @@ -6,12 +6,12 @@ - `git add pytorch-$BUILD_VERSION` - Run `./build_pytorch.sh` on an OSX machine and a Linux machine -## build base docker image +## build base docker images ```sh -cd .. -docker build -t soumith/conda-cuda -f conda/Dockerfile . -docker push soumith/conda-cuda +conda/build_all_docker.sh +# Will push all of the images +docker push pytorch/conda-builder ``` ## building pytorch / torchvision etc. diff --git a/conda/build_all_docker.sh b/conda/build_all_docker.sh new file mode 100755 index 000000000..2e45fef03 --- /dev/null +++ b/conda/build_all_docker.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -eou pipefail + +TOPDIR=$(git rev-parse --show-toplevel) + +for CUDA_VERSION in 11.2 11.1 11.0 10.2 10.1 cpu; do + CUDA_VERSION="${CUDA_VERSION}" conda/build_docker.sh +done diff --git a/conda/build_docker.sh b/conda/build_docker.sh new file mode 100755 index 000000000..082a2669c --- /dev/null +++ b/conda/build_docker.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +export DOCKER_BUILDKIT=1 +TOPDIR=$(git rev-parse --show-toplevel) + +CUDA_VERSION=${CUDA_VERSION:-10.2} + +case ${CUDA_VERSION} in + cpu) + BASE_TARGET=base + DOCKER_TAG=cpu + ;; + all) + BASE_TARGET=all_cuda + DOCKER_TAG=latest + ;; + *) + BASE_TARGET=cuda${CUDA_VERSION} + DOCKER_TAG=cuda${CUDA_VERSION} + ;; +esac + +( + set -x + docker build \ + --target final \ + --build-arg "BASE_TARGET=${BASE_TARGET}" \ + --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ + -t "pytorch/conda-builder:${DOCKER_TAG}" \ + -f "${TOPDIR}/conda/Dockerfile" \ + ${TOPDIR} +) + +if [[ ${DOCKER_TAG} =~ ^cuda* ]]; then + # Meant for legacy scripts since they only do the version without the "." + # TODO: Eventually remove this + ( + set -x + docker tag "pytorch/conda-builder:${DOCKER_TAG}" pytorch/conda-builder:cuda${CUDA_VERSION/./} + ) +fi