Skip to content

Commit

Permalink
Merge pull request #3615 from lissyx/docker-sztd
Browse files Browse the repository at this point in the history
Optimize a bit Docker
  • Loading branch information
lissyx committed Apr 9, 2021
2 parents ab134af + acecbc3 commit 0ce835a
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 131 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ jobs:
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
submodules: 'recursive'
fetch-depth: 1
- run: |
make Dockerfile.${{ matrix.template }} \
DEEPSPEECH_REPO=https://github.com/${{ github.repository }} \
DEEPSPEECH_SHA=${{ github.sha }}
- run: |
docker build -t app:${{ matrix.template }} -f Dockerfile.${{ matrix.template }} .
mkdir /tmp/empty
- run: |
cd /tmp/empty; docker build -t app:${{ matrix.template }} -f ${{ github.workspace }}/Dockerfile.${{ matrix.template }} .
- run: |
docker save app:${{ matrix.template}} | zstd -o app_${{ matrix.template }}.zstd
157 changes: 67 additions & 90 deletions Dockerfile.build.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# Need devel version cause we need /usr/include/cudnn.h
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#

# >> START Install base software

Expand Down Expand Up @@ -39,62 +39,59 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
sox \
unzip \
wget \
zlib1g-dev

RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1

# Install Bazel
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb"
RUN dpkg -i bazel_*.deb

# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
zlib1g-dev; \
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \
# Install Bazel \
curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \
# Try and free some space \
rm -rf /var/lib/apt/lists/* bazel_*.deb

# << END Install base software

# >> START Configure Tensorflow Build

# GPU Environment Setup
ENV TF_NEED_ROCM 0
ENV TF_NEED_OPENCL_SYCL 0
ENV TF_NEED_OPENCL 0
ENV TF_NEED_CUDA 1
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.1
ENV TF_CUDNN_VERSION 7.6
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.8

# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
ENV TF_BUILD_OPTIONS OPT
ENV TF_BUILD_DISABLE_GCP 1
ENV TF_BUILD_ENABLE_XLA 0
ENV TF_BUILD_PYTHON_VERSION PYTHON3
ENV TF_BUILD_IS_OPT OPT
ENV TF_BUILD_IS_PIP PIP

# Other Parameters
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
ENV TF_NEED_GCP 0
ENV TF_NEED_HDFS 0
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_OPENCL 0
ENV TF_CUDA_CLANG 0
ENV TF_NEED_MKL 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_AWS 0
ENV TF_NEED_KAFKA 0
ENV TF_NEED_NGRAPH 0
ENV TF_DOWNLOAD_CLANG 0
ENV TF_NEED_TENSORRT 0
ENV TF_NEED_GDR 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_OPENCL_SYCL 0

ENV PYTHON_BIN_PATH /usr/bin/python3.6
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
ENV TF_NEED_ROCM=0 \
TF_NEED_OPENCL_SYCL=0 \
TF_NEED_OPENCL=0 \
TF_NEED_CUDA=1 \
TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \
TF_CUDA_VERSION=10.1 \
TF_CUDNN_VERSION=7.6 \
TF_CUDA_COMPUTE_CAPABILITIES=6.0 \
TF_NCCL_VERSION=2.8 \
# Common Environment Setup \
TF_BUILD_CONTAINER_TYPE=GPU \
TF_BUILD_OPTIONS=OPT \
TF_BUILD_DISABLE_GCP=1 \
TF_BUILD_ENABLE_XLA=0 \
TF_BUILD_PYTHON_VERSION=PYTHON3 \
TF_BUILD_IS_OPT=OPT \
TF_BUILD_IS_PIP=PIP \
# Build client.cc and install Python client and decoder bindings \
TFDIR=/DeepSpeech/tensorflow \
# Allow Python printing utf-8 \
PYTHONIOENCODING=UTF-8 \
# Other Parameters \
CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \
TF_NEED_GCP=0 \
TF_NEED_HDFS=0 \
TF_NEED_JEMALLOC=1 \
TF_NEED_OPENCL=0 \
TF_CUDA_CLANG=0 \
TF_NEED_MKL=0 \
TF_ENABLE_XLA=0 \
TF_NEED_AWS=0 \
TF_NEED_KAFKA=0 \
TF_NEED_NGRAPH=0 \
TF_DOWNLOAD_CLANG=0 \
TF_NEED_TENSORRT=0 \
TF_NEED_GDR=0 \
TF_NEED_VERBS=0 \
TF_NEED_OPENCL_SYCL=0 \
PYTHON_BIN_PATH=/usr/bin/python3.6 \
PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages

# << END Configure Tensorflow Build

Expand All @@ -103,37 +100,31 @@ ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc
RUN echo "startup --batch" >>/etc/bazel.bazelrc; \
echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc

# << END Configure Bazel

WORKDIR /

RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech
WORKDIR /DeepSpeech
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
RUN git submodule sync tensorflow/ && git submodule update --init tensorflow/
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \
cd /DeepSpeech && \
git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \
git submodule sync tensorflow/ && git submodule update --init tensorflow/; \
git submodule sync kenlm/ && git submodule update --init kenlm/

# >> START Build and bind

WORKDIR /DeepSpeech/tensorflow

# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
RUN ./configure

# Using CPU optimizations:
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
# Adding --config=cuda flag to build using CUDA.

# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment

# Build DeepSpeech
RUN bazel build \
RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
--config=monolithic \
--config=cuda \
Expand All @@ -151,36 +142,22 @@ RUN bazel build \
--copt=-fvisibility=hidden \
//native_client:libdeepspeech.so \
--verbose_failures \
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \
cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \
rm -fr /root/.cache/*

# Copy built libs to /DeepSpeech/native_client
RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/

# Build client.cc and install Python client and decoder bindings
ENV TFDIR /DeepSpeech/tensorflow

RUN nproc

WORKDIR /DeepSpeech/native_client
RUN make NUM_PROCESSES=$(nproc) deepspeech

WORKDIR /DeepSpeech
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/python/dist/*.whl

RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \
cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \
pip3 install --upgrade dist/*.whl; \
cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \
pip3 install --upgrade dist/*.whl

# << END Build and bind

# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8

# Build KenLM in /DeepSpeech/kenlm folder
WORKDIR /DeepSpeech/kenlm
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
RUN ls -hal
RUN mkdir -p build && \
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \
mkdir -p build && \
cd build && \
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
make -j $(nproc)
Expand Down
64 changes: 27 additions & 37 deletions Dockerfile.train.tmpl
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"

FROM tensorflow/tensorflow:1.15.4-gpu-py3
ENV DEBIAN_FRONTEND=noninteractive

ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
ENV DEBIAN_FRONTEND=noninteractive \
DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \
DEEPSPEECH_SHA=#DEEPSPEECH_SHA#

RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
Expand All @@ -20,48 +19,39 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
python3-venv \
unzip \
xz-utils \
wget

# We need to remove it because it's breaking deepspeech install later with
# weird errors about setuptools
RUN apt-get purge -y python3-xdg

# Install dependencies for audio augmentation
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1

# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
wget && \
# We need to remove it because it's breaking deepspeech install later with \
# weird errors about setuptools \
apt-get purge -y python3-xdg && \
# Install dependencies for audio augmentation \
apt-get install -y --no-install-recommends libopus0 libsndfile1 && \
# Try and free some space \
rm -rf /var/lib/apt/lists/*

WORKDIR /
RUN git clone $DEEPSPEECH_REPO DeepSpeech

WORKDIR /DeepSpeech
RUN git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA
RUN git submodule sync kenlm/ && git submodule update --init kenlm/
RUN git clone $DEEPSPEECH_REPO DeepSpeech && \
cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \
git submodule sync kenlm/ && git submodule update --init kenlm/

# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \
pip3 install --upgrade dist/*.whl

# Prepare deps
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0

# Install DeepSpeech
# - No need for the decoder since we did it earlier
# - There is already correct TensorFlow GPU installed on the base image,
# we don't want to break that
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .

# Tool to convert output graph for inference
RUN curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format

RUN chmod +x convert_graphdef_memmapped_format
RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \
# Install DeepSpeech \
# - No need for the decoder since we did it earlier \
# - There is already correct TensorFlow GPU installed on the base image, \
# we don't want to break that \
DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \
# Tool to convert output graph for inference \
curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \
chmod +x convert_graphdef_memmapped_format

# Build KenLM to generate new scorers
WORKDIR /DeepSpeech/kenlm
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj
RUN ls -hal
RUN mkdir -p build && \
RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \
mkdir -p build && \
cd build && \
EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \
make -j $(nproc)
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
DEEPSPEECH_SHA ?= origin/master
DEEPSPEECH_SHA ?= master

Dockerfile%: Dockerfile%.tmpl
sed \
Expand Down

0 comments on commit 0ce835a

Please sign in to comment.