-
Notifications
You must be signed in to change notification settings - Fork 239
/
Dockerfile
152 lines (129 loc) · 4.53 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Copyright 2020 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG CUDA_VER=10.1
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
ENV PIP_NO_CACHE_DIR=1
LABEL maintainer="yashab@iguazio.com"
LABEL org="iguazio.com"
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH
# Set default shell to /bin/bash
SHELL ["/bin/bash", "-cu"]
RUN apt-get update && \
apt-mark hold libcublas-dev libcublas10 && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends \
build-essential \
bzip2 \
cmake \
curl \
git \
ibverbs-providers \
libboost-dev \
libboost-filesystem-dev \
libboost-system-dev \
libc6 \
libglib2.0-0 \
libibverbs1\
libjpeg-dev \
libnvinfer-plugin6=6.0.1-1+cuda10.1 \
libnvinfer6=6.0.1-1+cuda10.1 \
libpng-dev \
librdmacm1 \
libreadline-dev \
libreadline7 \
libsm6 \
libxext6 \
libxrender1 \
mercurial \
openssh-client \
openssh-server \
subversion \
wget && \
apt install -y ca-certificates && \
update-ca-certificates --fresh && \
apt clean && \
apt autoremove && \
rm -rf /var/lib/apt/lists/*
ENV SSL_CERT_DIR /etc/ssl/certs
RUN mkdir -p /var/run/sshd && \
# Allow OpenSSH to talk to containers without asking for confirmation
cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
ARG OMPI=4.0.3
# Install Open MPI
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
curl -o openmpi-${OMPI}.tar.gz https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-${OMPI}.tar.gz && \
tar zxf openmpi-${OMPI}.tar.gz && \
cd openmpi-${OMPI} && \
./configure --enable-orterun-prefix-by-default && \
make -j`nproc` all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/installconda.sh && \
/bin/bash ~/installconda.sh -b -p /opt/conda && \
rm ~/installconda.sh && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
RUN conda config --add channels conda-forge && \
conda config --add channels anaconda && \
conda update -n base conda && \
conda install -n base \
bokeh \
cmake \
cudatoolkit=10.1 \
cython \
fsspec \
lifelines \
matplotlib \
numba \
numpy \
pandas \
pyarrow \
pytest \
scikit-build \
scikit-learn \
scikit-optimize \
scikit-plot \
scipy \
seaborn \
tensorflow-gpu \
wrapt
RUN conda install -n base -c rapidsai -c nvidia \
-c anaconda -c conda-forge -c defaults rapids=0.12 python=3.7
RUN conda install -n base -c pytorch pytorch torchvision cudatoolkit=10.1
RUN python -m pip install --upgrade pip
# TODO: MAKEFLAGS="-j1" work around some transient concurrency problem with installing horovod remove it when
# possible (should be safe to remove if it works ~5 times without it)
RUN ldconfig /usr/local/cuda-10.1/targets/x86_64-linux/lib/stubs && \
MAKEFLAGS="-j1" HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 \
python -m pip install 'horovod~=0.20.0' && \
ldconfig
RUN conda clean -aqy
WORKDIR /mlrun
# do the dask package pinning with pip / faster
COPY ./dockerfiles/models-gpu/requirements.txt ./
RUN python -m pip install -r requirements.txt
COPY . .
RUN python -m pip install .[complete]
ARG MLRUN_MLUTILS_GITHUB_TAG=development
ARG MLRUN_MLUTILS_CACHE_DATE=initial
RUN python -m pip install \
git+https://github.com/mlrun/mlutils.git@${MLRUN_MLUTILS_GITHUB_TAG}