From b69f4fd444ab767d6810466d492dfd39c6304d1c Mon Sep 17 00:00:00 2001 From: Lars Francke Date: Fri, 12 Apr 2024 09:10:01 +0200 Subject: [PATCH] Hopefully reduce intermediate image size by combining downloading, building and deleting in a single step for Hive & Hadoop --- hadoop/Dockerfile | 27 +++++++++++++------ hive/Dockerfile | 22 +++++++-------- ...uildi.patch => 002-HIVE-21939-3.1.3.patch} | 0 hive/stackable/patches/apply_patches.sh | 0 4 files changed, 29 insertions(+), 20 deletions(-) rename hive/stackable/patches/3.1.3/{002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch => 002-HIVE-21939-3.1.3.patch} (100%) mode change 100644 => 100755 hive/stackable/patches/apply_patches.sh diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index a569875c9..59a4695e8 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -17,7 +17,16 @@ ARG TARGETOS RUN microdnf update && \ microdnf install \ # Required for Hadoop build - cmake cyrus-sasl-devel fuse-devel gcc gcc-c++ maven openssl-devel tar xz git \ + cmake \ + cyrus-sasl-devel \ + fuse-devel \ + gcc \ + gcc-c++ \ + git \ + maven \ + openssl-devel \ + tar \ + xz \ # Required for log4shell.sh unzip zip && \ microdnf clean all @@ -26,8 +35,6 @@ WORKDIR /stackable COPY hadoop/stackable /stackable -# Build from source to enable FUSE module, and to apply custom patches. -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -52,20 +59,24 @@ RUN curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobu tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner && \ ./configure --prefix=/opt/protobuf && \ make "-j$(nproc)" && \ - make install + make install && \ + rm -rf /opt/protobuf-src ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" WORKDIR /stackable -RUN patches/apply_patches.sh ${PRODUCT} -WORKDIR /stackable/hadoop-${PRODUCT}-src # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module -RUN mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ +# Build from source to enable FUSE module, and to apply custom patches. +RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \ + patches/apply_patches.sh ${PRODUCT} && \ + cd hadoop-${PRODUCT}-src && \ + mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \ # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves - cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin + cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin && \ + rm -rf /stackable/hadoop-${PRODUCT}-src # === # Mitigation for CVE-2021-44228 (Log4Shell) diff --git a/hive/Dockerfile b/hive/Dockerfile index ef4cd7ddb..5e883041e 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -31,17 +31,15 @@ COPY --chown=stackable:stackable hive/stackable /stackable USER stackable WORKDIR /stackable -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . -RUN chmod +x patches/apply_patches.sh -RUN patches/apply_patches.sh ${PRODUCT} -RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ - mvn clean package -DskipTests -Pdist -RUN cd /stackable/apache-hive-${PRODUCT}-src/ && \ +RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-src.tar.gz" | tar -xzC . && \ + patches/apply_patches.sh ${PRODUCT} && \ + cd /stackable/apache-hive-${PRODUCT}-src/ && \ + mvn clean package -DskipTests -Pdist && \ tar -xzf packaging/target/apache-hive-${PRODUCT}-bin.tar.gz -C /stackable && \ mv /stackable/apache-hive-${PRODUCT}-bin /stackable/apache-hive-${PRODUCT} && \ ln -s /stackable/apache-hive-${PRODUCT}/ /stackable/hive && \ - cp /stackable/bin/start-metastore /stackable/hive/bin - + cp /stackable/bin/start-metastore /stackable/hive/bin && \ + rm -rf /stackable/apache-hive-${PRODUCT}-src # TODO: Remove hardcoded _new_ version # Replace the old (postgresql-9.4.1208.jre7.jar) postgresql JDBC driver with a newer one, as the old one does only support MD5 based authentication. @@ -53,7 +51,7 @@ RUN rm /stackable/apache-hive-${PRODUCT}/lib/postgresql-9.4.1208.jre7.jar && \ curl --fail -L https://repo.stackable.tech/repository/packages/pgjdbc/postgresql-42.7.2.jar -o /stackable/hive/lib/postgresql-42.7.2.jar -COPY --link --from=hadoop-builder /stackable/hadoop /stackable/hadoop +COPY --from=hadoop-builder /stackable/hadoop /stackable/hadoop # The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards # This way the build will fail should one of the files not be available anymore in a later Hadoop version! @@ -122,15 +120,15 @@ RUN microdnf update && \ USER stackable WORKDIR /stackable -COPY --link --from=builder /stackable/apache-hive-${PRODUCT} /stackable/apache-hive-${PRODUCT} +COPY --from=builder /stackable/apache-hive-${PRODUCT} /stackable/apache-hive-${PRODUCT} RUN ln -s /stackable/apache-hive-${PRODUCT}/ /stackable/hive # It is useful to see which version of Hadoop is used at a glance # Therefore the use of the full name here -COPY --link --from=builder /stackable/hadoop /stackable/hadoop-${HADOOP} +COPY --from=builder /stackable/hadoop /stackable/hadoop-${HADOOP} RUN ln -s /stackable/hadoop-${HADOOP}/ /stackable/hadoop -COPY --link --from=builder /stackable/jmx /stackable/jmx +COPY --from=builder /stackable/jmx /stackable/jmx COPY hive/licenses /licenses # Mitigation for CVE-2021-44228 (Log4Shell) diff --git a/hive/stackable/patches/3.1.3/002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch b/hive/stackable/patches/3.1.3/002-HIVE-21939-3.1.3.patch similarity index 100% rename from hive/stackable/patches/3.1.3/002-HIVE-21939-protoc-2.5.0-dependence-has-broken-buildi.patch rename to hive/stackable/patches/3.1.3/002-HIVE-21939-3.1.3.patch diff --git a/hive/stackable/patches/apply_patches.sh b/hive/stackable/patches/apply_patches.sh old mode 100644 new mode 100755