diff --git a/elasticdl/docker/Dockerfile b/elasticdl/docker/Dockerfile index 91da12b6e..2d22605c4 100644 --- a/elasticdl/docker/Dockerfile +++ b/elasticdl/docker/Dockerfile @@ -3,13 +3,30 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} as dev ARG EXTRA_PYPI_INDEX=https://pypi.org/simple +ARG HOROVOD_COMMIT_ID="3108a24" COPY elasticdl/docker/bashrc /etc/bash.bashrc RUN chmod a+rx /etc/bash.bashrc -RUN apt-get -qq update && \ - apt-get -qq install -y unzip curl git software-properties-common g++ wget \ - shellcheck libeigen3-dev clang-format > /dev/null && \ +RUN apt-get -qq update && apt-get -qq install -y \ + unzip \ + curl \ + git \ + software-properties-common \ + g++ \ + wget \ + build-essential \ + cmake \ + vim \ + ca-certificates \ + libjpeg-dev \ + libpng-dev \ + librdmacm1 \ + libibverbs1 \ + ibverbs-providers \ + shellcheck \ + libeigen3-dev \ + clang-format > /dev/null && \ python -m pip install --quiet --upgrade pip COPY elasticdl_client/requirements.txt /requirements.txt @@ -61,6 +78,8 @@ COPY elasticdl/python/data/recordio_gen/heart_recordio_gen.py /scripts/heart_rec FROM dev as allreduce +RUN pip install future typing + # Note that pip is having issue downloading PyTorch on manylinux so we use curl # to download it instead RUN curl -sLo torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl \ @@ -72,3 +91,16 @@ RUN cd /root && git clone --depth=1 https://github.com/caicloud/ftlib.git RUN cd /root/ftlib && python -m pip install --quiet -r requirements.txt RUN cd /root/ftlib/ftlib/consensus/gossip && bash ./gen_shared_lib.sh RUN cp -r /root/ftlib/ftlib /usr/local/lib/python3.6/dist-packages/ftlib + +# The latest package of Horovod does not support elastic training, +# so we need to git clone and install it using source codes. +ENV HOROVOD_PATH /tmp/${HOROVOD_COMMIT_ID} +RUN cd /tmp \ + && git clone --recursive https://github.com/horovod/horovod.git \ + -b master ${HOROVOD_COMMIT_ID} + +RUN cd ${HOROVOD_PATH} && HOROVOD_WITHOUT_MPI=1 \ + HOROVOD_WITHOUT_MXNET=1 \ + HOROVOD_WITH_TENSORFLOW=1 \ + HOROVOD_WITH_PYTORCH=1 \ + python setup.py install