diff --git a/CHANGELOG.md b/CHANGELOG.md index 22499dc2e..f21441e3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,11 +14,13 @@ All notable changes to this project will be documented in this file. - stackable-base: bump ubi8-minimal image to latest 8.9 ([#514]). - Bump ubi8-rust-builder toolchain to `1.74.0` ([#517]). - GH workflows: make preflight an independent manual workflow and update to version 1.7.2 ([#519]). +- hadoop: Build from source ([#526]). [#493]: https://github.com/stackabletech/docker-images/pull/493 [#514]: https://github.com/stackabletech/docker-images/pull/514 [#517]: https://github.com/stackabletech/docker-images/pull/517 [#519]: https://github.com/stackabletech/docker-images/pull/519 +[#526]: https://github.com/stackabletech/docker-images/pull/526 ## [23.11.0] - 2023-11-30 diff --git a/conf.py b/conf.py index d7a96cdd2..866c2aa27 100644 --- a/conf.py +++ b/conf.py @@ -63,6 +63,7 @@ "java-base": "11", "java": "11", "jmx_exporter": "0.20.0", + "protobuf": "2.5.0", "topology_provider": "0.1.0" }, { @@ -70,6 +71,7 @@ "java-base": "11", "java": "11", "jmx_exporter": "0.20.0", + "protobuf": "2.5.0", "topology_provider": "0.1.0" }, { @@ -77,6 +79,7 @@ "java-base": "11", "java": "11", "jmx_exporter": "0.20.0", + "protobuf": "3.7.1", "topology_provider": "0.1.0" }, { @@ -84,6 +87,7 @@ "java-base": "11", "java": "11", "jmx_exporter": "0.20.0", + "protobuf": "3.7.1", "topology_provider": "0.1.0" }, ], diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index 649fab279..b63a76bac 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -4,40 +4,27 @@ FROM stackable/image/java-base AS builder ARG PRODUCT ARG JAVA ARG JMX_EXPORTER +ARG PROTOBUF ARG TOPOLOGY_PROVIDER # https://github.com/hadolint/hadolint/wiki/DL4006 SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# unzip & zip are required for log4shell.sh # All others are required for the FUSE build RUN microdnf update && \ microdnf install \ - cmake \ - cyrus-sasl-devel \ - fuse-devel \ - gcc \ - gcc-c++ \ - java-${JAVA}-openjdk-devel \ - maven \ - openssl-devel \ - tar \ - unzip \ - zip && \ + # Required for Hadoop build + cmake cyrus-sasl-devel fuse-devel gcc gcc-c++ java-11-openjdk-devel maven openssl-devel tar xz git \ + # Required for log4shell.sh + unzip zip && \ microdnf clean all WORKDIR /stackable -# This is needed here because it creates the JMX directory, we could create it any other way but this works COPY hadoop/stackable /stackable -# The source is needed to build FUSE. The rest of the src package will not make it into the final image. -# Both the src and binary variants extract into different root folders -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \ - curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}.tar.gz" | tar -xzC . && \ - ln -s "/stackable/hadoop-${PRODUCT}" /stackable/hadoop && \ - rm -rf /stackable/hadoop/lib/native/examples && \ - rm -rf /stackable/hadoop/share/doc +# Build from source to enable FUSE module, and to apply custom patches. +RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -50,6 +37,28 @@ RUN curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jm ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \ ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar +# This Protobuf version is the exact version as used in the Hadoop Dockerfile +# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh +# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github) +WORKDIR /opt/protobuf-src +RUN curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-${PROTOBUF}.tar.gz -o /opt/protobuf.tar.gz && \ + tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner && \ + ./configure --prefix=/opt/protobuf && \ + make "-j$(nproc)" && \ + make install + +ENV PROTOBUF_HOME /opt/protobuf +ENV PATH "${PATH}:/opt/protobuf/bin" + +WORKDIR /stackable +RUN patches/apply_patches.sh ${PRODUCT} + +WORKDIR /stackable/hadoop-${PRODUCT}-src +# Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module +RUN mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ + cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \ + # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves + cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin # === # Mitigation for CVE-2021-44228 (Log4Shell) @@ -74,33 +83,6 @@ COPY shared/log4shell_scanner /bin/log4shell_scanner RUN /bin/log4shell_scanner s "/stackable/hadoop-${PRODUCT}" # === - -# This Protobuf version is the exact version as used in the Hadoop Dockerfile -# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh -# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github) -# For now all versions of Hadoop we support use Protobuf 3.7.1 so we can hardcode it here. -# Should it ever differ between versions we'll need to make this a variable as well. -RUN mkdir -p /opt/protobuf-src && \ - curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-3.7.1.tar.gz -o /opt/protobuf.tar.gz && \ - tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src --no-same-owner && \ - cd /opt/protobuf-src && \ - ./configure --prefix=/opt/protobuf && \ - make "-j$(nproc)" && \ - make install && \ - cd /root && \ - rm -rf /opt/protobuf-src - -ENV PROTOBUF_HOME /opt/protobuf -ENV PATH "${PATH}:/opt/protobuf/bin" - -WORKDIR /stackable/hadoop-${PRODUCT}-src/hadoop-hdfs-project/hadoop-hdfs-native-client - -# This command comes from hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README -RUN mvn clean package -Pnative -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ - cp target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop/bin && \ - rm -rf /stackable/hadoop-${PRODUCT}-src - - # Final Image FROM stackable/image/java-base diff --git a/hadoop/stackable/patches/3.2.2/001-HADOOP-15767-3.2.2.patch b/hadoop/stackable/patches/3.2.2/001-HADOOP-15767-3.2.2.patch new file mode 100644 index 000000000..8b7c9c7b4 --- /dev/null +++ b/hadoop/stackable/patches/3.2.2/001-HADOOP-15767-3.2.2.patch @@ -0,0 +1,180 @@ +diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml +index fa4a838babb2..708228c90daa 100644 +--- a/hadoop-common-project/hadoop-common/pom.xml ++++ b/hadoop-common-project/hadoop-common/pom.xml +@@ -612,48 +612,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- +- +- compile +- +- javah +- +- +- ${env.JAVA_HOME}/bin/javah +- +- org.apache.hadoop.io.compress.zlib.ZlibCompressor +- org.apache.hadoop.io.compress.zlib.ZlibDecompressor +- org.apache.hadoop.io.compress.bzip2.Bzip2Compressor +- org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor +- org.apache.hadoop.security.JniBasedUnixGroupsMapping +- org.apache.hadoop.io.nativeio.NativeIO +- org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory +- org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping +- org.apache.hadoop.io.compress.snappy.SnappyCompressor +- org.apache.hadoop.io.compress.snappy.SnappyDecompressor +- org.apache.hadoop.io.compress.zstd.ZStandardCompressor +- org.apache.hadoop.io.compress.zstd.ZStandardDecompressor +- org.apache.hadoop.io.compress.lz4.Lz4Compressor +- org.apache.hadoop.io.compress.lz4.Lz4Decompressor +- org.apache.hadoop.io.erasurecode.ErasureCodeNative +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawDecoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawDecoder +- org.apache.hadoop.crypto.OpensslCipher +- org.apache.hadoop.crypto.random.OpensslSecureRandom +- org.apache.hadoop.util.NativeCrc32 +- org.apache.hadoop.net.unix.DomainSocket +- org.apache.hadoop.net.unix.DomainSocketWatcher +- +- ${project.build.directory}/native/javah +- +- +- +- + + org.apache.hadoop + hadoop-maven-plugins +@@ -770,43 +728,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- +- +- compile +- +- javah +- +- +- ${env.JAVA_HOME}/bin/javah +- +- org.apache.hadoop.io.compress.zlib.ZlibCompressor +- org.apache.hadoop.io.compress.zlib.ZlibDecompressor +- org.apache.hadoop.security.JniBasedUnixGroupsMapping +- org.apache.hadoop.io.nativeio.NativeIO +- org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping +- org.apache.hadoop.io.compress.snappy.SnappyCompressor +- org.apache.hadoop.io.compress.snappy.SnappyDecompressor +- org.apache.hadoop.io.compress.zstd.ZStandardCompressor +- org.apache.hadoop.io.compress.zstd.ZStandardDecompressor +- org.apache.hadoop.io.compress.lz4.Lz4Compressor +- org.apache.hadoop.io.compress.lz4.Lz4Decompressor +- org.apache.hadoop.io.erasurecode.ErasureCodeNative +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawDecoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawDecoder +- org.apache.hadoop.crypto.OpensslCipher +- org.apache.hadoop.crypto.random.OpensslSecureRandom +- org.apache.hadoop.util.NativeCrc32 +- +- ${project.build.directory}/native/javah +- +- +- +- + + org.codehaus.mojo + exec-maven-plugin +diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml +index 1c9f5ee67ea7..c16a798ad956 100644 +--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml ++++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml +@@ -139,26 +139,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- +- +- compile +- +- javah +- +- +- ${env.JAVA_HOME}/bin/javah +- +- org.apache.hadoop.mapred.nativetask.NativeBatchProcessor +- org.apache.hadoop.mapred.nativetask.NativeRuntime +- +- ${project.build.directory}/native/javah +- +- +- +- + + org.apache.maven.plugins + maven-antrun-plugin +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index e76347962b29..e1ee4b117682 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -171,7 +171,6 @@ + 1.9 + 1.3.1 + 1.0-beta-1 +- 1.0-alpha-8 + 900 + 1.11.563 + 2.3.4 +@@ -1609,11 +1608,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- ${native-maven-plugin.version} +- + + org.codehaus.mojo + make-maven-plugin +@@ -2079,6 +2073,27 @@ + + + ++ ++ native ++ ++ false ++ ++ ++ ++ ++ org.apache.maven.plugins ++ maven-compiler-plugin ++ ++ ++ ++ -h ++ ${project.build.directory}/native/javah/ ++ ++ ++ ++ ++ ++ + + + diff --git a/hadoop/stackable/patches/3.2.4/001-HADOOP-15767-3.2.4.patch b/hadoop/stackable/patches/3.2.4/001-HADOOP-15767-3.2.4.patch new file mode 100644 index 000000000..821ada355 --- /dev/null +++ b/hadoop/stackable/patches/3.2.4/001-HADOOP-15767-3.2.4.patch @@ -0,0 +1,180 @@ +diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml +index 4acc2a4ef4a0..eaf5a545fd3d 100644 +--- a/hadoop-common-project/hadoop-common/pom.xml ++++ b/hadoop-common-project/hadoop-common/pom.xml +@@ -617,48 +617,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- +- +- compile +- +- javah +- +- +- ${env.JAVA_HOME}/bin/javah +- +- org.apache.hadoop.io.compress.zlib.ZlibCompressor +- org.apache.hadoop.io.compress.zlib.ZlibDecompressor +- org.apache.hadoop.io.compress.bzip2.Bzip2Compressor +- org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor +- org.apache.hadoop.security.JniBasedUnixGroupsMapping +- org.apache.hadoop.io.nativeio.NativeIO +- org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory +- org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping +- org.apache.hadoop.io.compress.snappy.SnappyCompressor +- org.apache.hadoop.io.compress.snappy.SnappyDecompressor +- org.apache.hadoop.io.compress.zstd.ZStandardCompressor +- org.apache.hadoop.io.compress.zstd.ZStandardDecompressor +- org.apache.hadoop.io.compress.lz4.Lz4Compressor +- org.apache.hadoop.io.compress.lz4.Lz4Decompressor +- org.apache.hadoop.io.erasurecode.ErasureCodeNative +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawDecoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawDecoder +- org.apache.hadoop.crypto.OpensslCipher +- org.apache.hadoop.crypto.random.OpensslSecureRandom +- org.apache.hadoop.util.NativeCrc32 +- org.apache.hadoop.net.unix.DomainSocket +- org.apache.hadoop.net.unix.DomainSocketWatcher +- +- ${project.build.directory}/native/javah +- +- +- +- + + org.apache.hadoop + hadoop-maven-plugins +@@ -775,43 +733,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- +- +- compile +- +- javah +- +- +- ${env.JAVA_HOME}/bin/javah +- +- org.apache.hadoop.io.compress.zlib.ZlibCompressor +- org.apache.hadoop.io.compress.zlib.ZlibDecompressor +- org.apache.hadoop.security.JniBasedUnixGroupsMapping +- org.apache.hadoop.io.nativeio.NativeIO +- org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping +- org.apache.hadoop.io.compress.snappy.SnappyCompressor +- org.apache.hadoop.io.compress.snappy.SnappyDecompressor +- org.apache.hadoop.io.compress.zstd.ZStandardCompressor +- org.apache.hadoop.io.compress.zstd.ZStandardDecompressor +- org.apache.hadoop.io.compress.lz4.Lz4Compressor +- org.apache.hadoop.io.compress.lz4.Lz4Decompressor +- org.apache.hadoop.io.erasurecode.ErasureCodeNative +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawDecoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawEncoder +- org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawDecoder +- org.apache.hadoop.crypto.OpensslCipher +- org.apache.hadoop.crypto.random.OpensslSecureRandom +- org.apache.hadoop.util.NativeCrc32 +- +- ${project.build.directory}/native/javah +- +- +- +- + + org.codehaus.mojo + exec-maven-plugin +diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml +index 2d2336e7e244..2454a84daa8a 100644 +--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml ++++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml +@@ -139,26 +139,6 @@ + + + +- +- org.codehaus.mojo +- native-maven-plugin +- +- +- compile +- +- javah +- +- +- ${env.JAVA_HOME}/bin/javah +- +- org.apache.hadoop.mapred.nativetask.NativeBatchProcessor +- org.apache.hadoop.mapred.nativetask.NativeRuntime +- +- ${project.build.directory}/native/javah +- +- +- +- + + org.apache.maven.plugins + maven-antrun-plugin +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 23d1c875b713..1cd4e11ef3f5 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -172,7 +172,6 @@ + 1.9 + 1.3.1 + 1.0-beta-1 +- 1.0-alpha-8 + 900 + 1.11.901 + 2.3.4 +@@ -1704,11 +1703,6 @@ + maven-war-plugin + ${maven-war-plugin.version} + +- +- org.codehaus.mojo +- native-maven-plugin +- ${native-maven-plugin.version} +- + + org.codehaus.mojo + make-maven-plugin +@@ -2174,6 +2168,27 @@ + + + ++ ++ native ++ ++ false ++ ++ ++ ++ ++ org.apache.maven.plugins ++ maven-compiler-plugin ++ ++ ++ ++ -h ++ ${project.build.directory}/native/javah/ ++ ++ ++ ++ ++ ++ + + + diff --git a/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch b/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch new file mode 100644 index 000000000..b82c910e8 --- /dev/null +++ b/hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch @@ -0,0 +1,13 @@ +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 0b2f6f17157d..9dc8b653eb93 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -212,7 +212,7 @@ + 5.3.0 + 2.4.7 + 9.8.1 +- v12.22.1 ++ v14.0.0 + v1.22.5 + 1.10.11 + diff --git a/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.4.patch b/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.4.patch new file mode 100644 index 000000000..c4ccc9299 --- /dev/null +++ b/hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.4.patch @@ -0,0 +1,13 @@ +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index f1ac43ed5b38..73d0c7580338 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -213,7 +213,7 @@ + 1.1.3.Final + 5.4.0 + 9.8.1 +- v12.22.1 ++ v14.0.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/stackable/patches/apply_patches.sh b/hadoop/stackable/patches/apply_patches.sh new file mode 100755 index 000000000..14f0f257c --- /dev/null +++ b/hadoop/stackable/patches/apply_patches.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# Enable error handling and unset variable checking +set -eu +set -o pipefail + +# Check if $1 (VERSION) is provided +if [ -z "${1-}" ]; then + echo "Please provide a value for VERSION as the first argument." + exit 1 +fi + +VERSION="$1" +PATCH_DIR="patches/$VERSION" + +# Check if version-specific patches directory exists +if [ ! -d "$PATCH_DIR" ]; then + echo "Patches directory '$PATCH_DIR' does not exist." + exit 1 +fi + +# Create an array to hold the patches in sorted order +declare -a patch_files=() + +echo "Applying patches from ${PATCH_DIR}" now + +# Read the patch files into the array +while IFS= read -r -d $'\0' file; do + patch_files+=("$file") +done < <(find "$PATCH_DIR" -name "*.patch" -print0 | sort -zV) + +echo "Found ${#patch_files[@]} patches, applying now" + +# Iterate through sorted patch files +for patch_file in "${patch_files[@]}"; do + echo "Applying $patch_file" + git apply --directory "hadoop-${VERSION}-src" "$patch_file" || { + echo "Failed to apply $patch_file" + exit 1 + } +done + +echo "All patches applied successfully."