Skip to content

Commit

Permalink
Merge pull request #1088 from kermitt2/reduce-image-size
Browse files Browse the repository at this point in the history
Reduce docker image size for the NN grobid version
  • Loading branch information
kermitt2 committed Mar 11, 2024
2 parents d4822e1 + 32ba274 commit cb10576
Showing 1 changed file with 19 additions and 18 deletions.
37 changes: 19 additions & 18 deletions Dockerfile.delft
Expand Up @@ -67,12 +67,12 @@ FROM tensorflow/tensorflow:2.7.0-gpu
ENV LANG C.UTF-8

# update NVIDIA Cuda key (following a key rotation in April 2022)
RUN apt-get install -y wget
RUN apt-key del 7fa2af80
RUN rm /etc/apt/sources.list.d/cuda.list
RUN rm /etc/apt/sources.list.d/nvidia-ml.list
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
RUN dpkg -i cuda-keyring_1.0-1_all.deb
RUN apt-get install -y wget && \
apt-key del 7fa2af80 && \
rm /etc/apt/sources.list.d/cuda.list && \
rm /etc/apt/sources.list.d/nvidia-ml.list && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb

# Add Tini
ENV TINI_VERSION v0.19.0
Expand All @@ -96,7 +96,7 @@ COPY --from=builder /opt/grobid .
RUN python3 -m pip install pip --upgrade

# install DeLFT via pypi
RUN pip3 install requests delft==0.3.3
RUN pip3 install requests delft==0.3.4
# link the data directory to /data
# the current working directory will most likely be /opt/grobid
RUN mkdir -p /data \
Expand All @@ -112,13 +112,14 @@ ENV JAVA_OPTS=-Xmx4g

# install jep (and temporarily the matching JDK)
ENV JDK_URL=https://download.java.net/java/GA/jdk17.0.2/dfd4a8d0985749f896bed50d7138ee7f/8/GPL/openjdk-17.0.2_linux-x64_bin.tar.gz
RUN curl --fail --show-error --location -q ${JDK_URL} -o /tmp/openjdk.tar.gz
RUN mkdir /tmp/jdk-17
RUN tar xvfz /tmp/openjdk.tar.gz --directory /tmp/jdk-17 --strip-components 1 --no-same-owner
RUN /tmp/jdk-17/bin/javac -version
RUN JAVA_HOME=/tmp/jdk-17 pip3 install jep==4.0.2
RUN rm -f /tmp/openjdk.tar.gz
RUN rm -rf /tmp/jdk-17
RUN curl --fail --show-error --location -q ${JDK_URL} -o /tmp/openjdk.tar.gz && \
mkdir /tmp/jdk-17 && \
tar xvfz /tmp/openjdk.tar.gz --directory /tmp/jdk-17 --strip-components 1 --no-same-owner && \
/tmp/jdk-17/bin/javac -version && \
JAVA_HOME=/tmp/jdk-17 pip3 install jep==4.0.2 && \
rm -f /tmp/openjdk.tar.gz && \
rm -rf /tmp/jdk-17

ENV LD_LIBRARY_PATH=/usr/local/lib/python3.8/dist-packages/jep:grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep:${LD_LIBRARY_PATH}
# remove libjep.so because we are providing our own version in the virtual env above
RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so
Expand All @@ -128,11 +129,11 @@ RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so

COPY --from=builder /opt/grobid-source/grobid-home/scripts/preload_embeddings.py .
COPY --from=builder /opt/grobid-source/grobid-home/config/resources-registry.json .
RUN python3 preload_embeddings.py --registry ./resources-registry.json
RUN ln -s /opt/grobid /opt/delft
RUN python3 preload_embeddings.py --registry ./resources-registry.json && \
ln -s /opt/grobid /opt/delft

RUN mkdir delft
RUN cp ./resources-registry.json delft/
RUN mkdir delft && \
cp ./resources-registry.json delft/

ENV GROBID_SERVICE_OPTS "--add-opens java.base/java.lang=ALL-UNNAMED"

Expand Down

0 comments on commit cb10576

Please sign in to comment.