diff --git a/.hadolint.yaml b/.hadolint.yaml index c7198bbde..047399557 100644 --- a/.hadolint.yaml +++ b/.hadolint.yaml @@ -58,6 +58,16 @@ ignored: # sets the default shell to Bash where [[ ]] is defined. - SC3010 + # In POSIX sh, == in place of = is undefined. + # https://www.shellcheck.net/wiki/SC3014 + # Reason: Ignoring because we inherit SHELL from the base image which sets the default shell to Bash where == is defined + - SC3014 + + # In POSIX sh, =~ regex matching is undefined. + # https://www.shellcheck.net/wiki/SC3015 + # Reason: Ignoring because we inherit SHELL from the base image which sets the default shell to Bash where =~ regex matching is defined + - SC3015 + # In POSIX sh, string replacement is undefined. # https://www.shellcheck.net/wiki/SC3060 # Reason: Ignoring because we inherit SHELL from the base image which diff --git a/CHANGELOG.md b/CHANGELOG.md index 1084fe37c..c6085a0a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,16 +8,17 @@ All notable changes to this project will be documented in this file. - superset: Add 6.0.0-rc2 ([#1337]). - hive: Build [hive-metastore-opa-authorizer](https://github.com/boschglobal/hive-metastore-opa-authorizer) from source and add to image ([#1340]). +- hive: Add `4.2.0` ([#1356]). ### Changed - airflow: Extend list of providers for 3.0.6 ([#1336]) - airflow: Bump celery version to 5.5.3 for Airflow 3.x ([#1343]). - testing-tools: refactoring: Split image into multiple images, remove unnecessary components and switch to UBI as base image ([#1354]). +- hive: fixed 4.0.1 shaded hive-metastore-opa-authorizer jar by relocating dependencies ([#1356]). ### Removed -- hive: Remove `4.0.0` ([#1340]). - opensearch: Remove the `performance-analyzer` plugin from the OpenSearch image ([#1357]). [#1336]: https://github.com/stackabletech/docker-images/pull/1336 @@ -25,6 +26,7 @@ All notable changes to this project will be documented in this file. [#1340]: https://github.com/stackabletech/docker-images/pull/1340 [#1343]: https://github.com/stackabletech/docker-images/pull/1343 [#1354]: https://github.com/stackabletech/docker-images/pull/1354 +[#1356]: https://github.com/stackabletech/docker-images/pull/1356 [#1357]: https://github.com/stackabletech/docker-images/pull/1357 ## [25.11.0] - 2025-11-07 diff --git a/hive/Dockerfile b/hive/Dockerfile index 037bb0321..cf114407b 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -45,10 +45,6 @@ ENV NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}" # thus taking a bit (which is annoying while development) RUN /stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION} > /tmp/HIVE_SOURCE_DIR -# Use bash for regex machting, otherwise docker lint is complaining: -# hive/Dockerfile:51 SC3014 warning: In POSIX sh, == in place of = is undefined. -SHELL ["/bin/bash", "-c"] - # Make expensive maven build a separate layer for better caching # Cache mounts are owned by root by default # We need to explicitly give the uid to use @@ -84,7 +80,6 @@ elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then # We only seem to get a .tar.gz archive, so let's extract that to the correct location tar --extract --directory=/stackable -f standalone-metastore/metastore-server/target/apache-hive-standalone-metastore-server-${NEW_VERSION}-bin.tar.gz mv standalone-metastore/metastore-server/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json - # TODO: Remove once the fix https://github.com/apache/hive/pull/5419 is merged and released # The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${NEW_VERSION}-bin/bin/ext/schemaTool.sh @@ -239,7 +234,7 @@ USER ${STACKABLE_USER_UID} ENV HIVE_HOME=/stackable/hive-metastore ENV HADOOP_HOME=/stackable/hadoop -ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive-metastore/bin +ENV PATH="${PATH}:${HADOOP_HOME}/bin:${HIVE_HOME}/bin" # The following 2 env-vars are required for common hadoop scripts even if the respective libraries are never used. # We set them here to a sensible default. diff --git a/hive/boil-config.toml b/hive/boil-config.toml index f4d4ed316..a216f20e7 100644 --- a/hive/boil-config.toml +++ b/hive/boil-config.toml @@ -13,6 +13,21 @@ aws-java-sdk-bundle-version = "1.12.367" azure-storage-version = "7.0.1" azure-keyvault-core-version = "1.0.0" +[versions."4.0.0".local-images] +# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 +java-base = "11" +java-devel = "8" +"hadoop/hadoop" = "3.3.6" +# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.0.0-hadoop-3.3.6" + +[versions."4.0.0".build-arguments] +jmx-exporter-version = "1.3.0" +# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 +aws-java-sdk-bundle-version = "1.12.367" +azure-storage-version = "7.0.1" +azure-keyvault-core-version = "1.0.0" + [versions."4.0.1".local-images] # Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 java-base = "11" @@ -44,3 +59,20 @@ aws-java-sdk-bundle-version = "2.29.52" azure-storage-version = "7.0.1" # Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 azure-keyvault-core-version = "1.0.0" + +[versions."4.2.0".local-images] +# Hive 4.2 requires Java 21 (according to GitHub README) +java-base = "21" +java-devel = "21" +"hadoop/hadoop" = "3.4.2" +# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.2.0-hadoop-3.4.2" + +[versions."4.2.0".build-arguments] +jmx-exporter-version = "1.3.0" +# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2 TODO: CHECK! +aws-java-sdk-bundle-version = "2.29.52" +# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2 TODO: CHECK! +azure-storage-version = "7.0.1" +# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 TODO: CHECK! +azure-keyvault-core-version = "1.0.0" diff --git a/hive/hive-metastore-opa-authorizer/Dockerfile b/hive/hive-metastore-opa-authorizer/Dockerfile index 4c3f3bee2..63e2f95ae 100644 --- a/hive/hive-metastore-opa-authorizer/Dockerfile +++ b/hive/hive-metastore-opa-authorizer/Dockerfile @@ -23,14 +23,10 @@ COPY --chown=${STACKABLE_USER_UID}:0 hive/hive-metastore-opa-authorizer/stackabl COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-libs /stackable/patched-libs -# Use bash for regex machting, otherwise docker lint is complaining: -# hive/hive-metastore-opa-authorizer/Dockerfile:26 SC3015 warning: In POSIX sh, =~ regex matching is undefined. -SHELL ["/bin/bash", "-c"] - # Make expensive maven build a separate layer for better caching # Cache mounts are owned by root by default # We need to explicitly give the uid to use -RUN --mount=type=cache,id=maven-hive-metastore-opa-authorizer-${AUTHORIZER_VERSION},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository < 4.0.x only works with the non shaded jar else mvn clean package -DskipTests -Dhive.version=${HIVE_VERSION} -Dhadoop.version=${HADOOP_VERSION} -f hms-v4/pom.xml - - # The hive-metastore-opa-authorizer offers a shaded jar from version 4.x.x. Using the shaded jar leads to problems with schema tool at pod startup. - # mv hms-v4/target/com.bosch.bdps.hms4-${HIVE_VERSION}-${HADOOP_VERSION}-dev.jar /stackable/opa-authorizer-bin - mv hms-v4/target/hms4-${AUTHORIZER_VERSION}.jar /stackable/opa-authorizer-bin + mv hms-v4/target/hms4-${AUTHORIZER_VERSION}.jar /stackable/opa-authorizer-bin/hms4-${HIVE_VERSION}-${HADOOP_VERSION}-${AUTHORIZER_VERSION}.jar fi # We're removing these to make the intermediate layer smaller diff --git a/hive/hive-metastore-opa-authorizer/boil-config.toml b/hive/hive-metastore-opa-authorizer/boil-config.toml index f23915f57..a2eb55fcd 100644 --- a/hive/hive-metastore-opa-authorizer/boil-config.toml +++ b/hive/hive-metastore-opa-authorizer/boil-config.toml @@ -7,6 +7,15 @@ authorizer-version = "v1.0.0" hive-version = "3.1.3" delete-caches = "true" +[versions."v1.0.0-hive-4.0.0-hadoop-3.3.6".local-images] +"java-devel" = "11" +"hadoop/hadoop" = "3.3.6" + +[versions."v1.0.0-hive-4.0.0-hadoop-3.3.6".build-arguments] +authorizer-version = "v1.0.0" +hive-version = "4.0.0" +delete-caches = "true" + [versions."v1.0.0-hive-4.0.1-hadoop-3.3.6".local-images] "java-devel" = "11" "hadoop/hadoop" = "3.3.6" @@ -24,3 +33,12 @@ delete-caches = "true" authorizer-version = "v1.0.0" hive-version = "4.1.0" delete-caches = "true" + +[versions."v1.0.0-hive-4.2.0-hadoop-3.4.2".local-images] +"java-devel" = "21" +"hadoop/hadoop" = "3.4.2" + +[versions."v1.0.0-hive-4.2.0-hadoop-3.4.2".build-arguments] +authorizer-version = "v1.0.0" +hive-version = "4.2.0" +delete-caches = "true" diff --git a/hive/hive-metastore-opa-authorizer/stackable/patches/v1.0.0/0001-Relocate-jline-in-shading-plugin.patch b/hive/hive-metastore-opa-authorizer/stackable/patches/v1.0.0/0001-Relocate-jline-in-shading-plugin.patch new file mode 100644 index 000000000..2dbe84d47 --- /dev/null +++ b/hive/hive-metastore-opa-authorizer/stackable/patches/v1.0.0/0001-Relocate-jline-in-shading-plugin.patch @@ -0,0 +1,37 @@ +From e9b5503e9c952ab691952fb14724ac55a6890420 Mon Sep 17 00:00:00 2001 +From: Malte Sander +Date: Mon, 24 Nov 2025 18:05:16 +0100 +Subject: Relocate jline in shading plugin + +For Hive 4.0.1 the dependency loading is a mess. We explicitly shade jline to avoid a schematool dependency problem: + +Exception in thread "main" java.lang.NoSuchMethodError: 'void org.jline.reader.impl.completer.StringsCompleter.(org.jline.reader.Candidate[])' + at sqlline.SqlLineOpts.setOptionCompleters(SqlLineOpts.java:160) + at sqlline.Application.getCommandHandlers(Application.java:294) + at sqlline.SqlLine$Config.(SqlLine.java:1946) + at sqlline.SqlLine.setAppConfig(SqlLine.java:1875) + at sqlline.SqlLine.(SqlLine.java:229) + at org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool.execSql(MetastoreSchemaTool.java:313) + +This does not affect versions >= 4.1.x since the non shaded jar is used. +--- + hms-v4/pom.xml | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hms-v4/pom.xml b/hms-v4/pom.xml +index 4ebeac4..b2940ab 100644 +--- a/hms-v4/pom.xml ++++ b/hms-v4/pom.xml +@@ -67,6 +67,12 @@ + + + ++ ++ ++ org.jline ++ com.bosch.bdps.jline ++ ++ + + + diff --git a/hive/stackable/patches/4.2.0/0001-Include-Postgres-driver.patch b/hive/stackable/patches/4.2.0/0001-Include-Postgres-driver.patch new file mode 100644 index 000000000..91fd4c179 --- /dev/null +++ b/hive/stackable/patches/4.2.0/0001-Include-Postgres-driver.patch @@ -0,0 +1,34 @@ +From f7913f5ee840dc277301fcb0f18e04be7849ab5c Mon Sep 17 00:00:00 2001 +From: Malte Sander +Date: Tue, 25 Nov 2025 11:29:03 +0100 +Subject: Include Postgres driver + +--- + standalone-metastore/metastore-server/pom.xml | 1 - + standalone-metastore/pom.xml | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml +index fa2418f205..0e24b3ebc9 100644 +--- a/standalone-metastore/metastore-server/pom.xml ++++ b/standalone-metastore/metastore-server/pom.xml +@@ -338,7 +338,6 @@ + + org.postgresql + postgresql +- true + + + org.eclipse.jetty +diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml +index bd6f2227b6..0bf9d09c0b 100644 +--- a/standalone-metastore/pom.xml ++++ b/standalone-metastore/pom.xml +@@ -435,7 +435,6 @@ + org.postgresql + postgresql + ${postgres.version} +- runtime + + + org.apache.httpcomponents diff --git a/hive/stackable/patches/4.2.0/0002-Include-logging-dependencies.patch b/hive/stackable/patches/4.2.0/0002-Include-logging-dependencies.patch new file mode 100644 index 000000000..0973fa336 --- /dev/null +++ b/hive/stackable/patches/4.2.0/0002-Include-logging-dependencies.patch @@ -0,0 +1,25 @@ +From 8a74bc78eb3ed664e698dd41be25107a495a018e Mon Sep 17 00:00:00 2001 +From: Malte Sander +Date: Tue, 25 Nov 2025 11:30:24 +0100 +Subject: Include logging dependencies + +--- + standalone-metastore/pom.xml | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml +index 0bf9d09c0b..40bd4fcd83 100644 +--- a/standalone-metastore/pom.xml ++++ b/standalone-metastore/pom.xml +@@ -582,6 +582,11 @@ + com.fasterxml.jackson.core + jackson-databind + ++ ++ ++ com.fasterxml.jackson.dataformat ++ jackson-dataformat-xml ++ + + + diff --git a/hive/stackable/patches/4.2.0/0003-Fix-CVE-2024-36114.patch b/hive/stackable/patches/4.2.0/0003-Fix-CVE-2024-36114.patch new file mode 100644 index 000000000..940e5a50e --- /dev/null +++ b/hive/stackable/patches/4.2.0/0003-Fix-CVE-2024-36114.patch @@ -0,0 +1,26 @@ +From 4095defdcf9e049dfa5a7c9b279760ea30615465 Mon Sep 17 00:00:00 2001 +From: Malte Sander +Date: Tue, 25 Nov 2025 11:31:34 +0100 +Subject: Fix CVE-2024-36114 + +--- + standalone-metastore/pom.xml | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml +index 40bd4fcd83..34eb45fb96 100644 +--- a/standalone-metastore/pom.xml ++++ b/standalone-metastore/pom.xml +@@ -157,6 +157,12 @@ + ${netty.version} + linux-x86_64 + ++ ++ ++ io.airlift ++ aircompressor ++ 0.27 ++ + + org.apache.orc + orc-core diff --git a/hive/stackable/patches/4.2.0/0004-Add-httpcomponents.core5-to-hive-standalone-metastor.patch b/hive/stackable/patches/4.2.0/0004-Add-httpcomponents.core5-to-hive-standalone-metastor.patch new file mode 100644 index 000000000..859383c6d --- /dev/null +++ b/hive/stackable/patches/4.2.0/0004-Add-httpcomponents.core5-to-hive-standalone-metastor.patch @@ -0,0 +1,40 @@ +From 897f67b3bd6f200affe80423a37c7de202f13782 Mon Sep 17 00:00:00 2001 +From: Malte Sander +Date: Tue, 25 Nov 2025 14:44:35 +0100 +Subject: Add httpcomponents.core5 to hive-standalone-metastore + +Required for the REST Catalog. +--- + standalone-metastore/metastore-rest-catalog/pom.xml | 5 +++++ + standalone-metastore/pom.xml | 2 ++ + 2 files changed, 7 insertions(+) + +diff --git a/standalone-metastore/metastore-rest-catalog/pom.xml b/standalone-metastore/metastore-rest-catalog/pom.xml +index c1692e7ba2..ad6cecb535 100644 +--- a/standalone-metastore/metastore-rest-catalog/pom.xml ++++ b/standalone-metastore/metastore-rest-catalog/pom.xml +@@ -26,6 +26,11 @@ + 1.9.1 + + ++ ++ org.apache.httpcomponents.core5 ++ httpcore5 ++ ${httpcomponents5.core.version} ++ + + org.apache.hive + hive-standalone-metastore-server +diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml +index 34eb45fb96..94b883b801 100644 +--- a/standalone-metastore/pom.xml ++++ b/standalone-metastore/pom.xml +@@ -117,6 +117,8 @@ + 1.7.30 + 4.4.13 + 4.5.13 ++ ++ 5.3.1 + 4.5.8 + 11.28 + 9.4.57.v20241219 diff --git a/hive/stackable/patches/4.2.0/patchable.toml b/hive/stackable/patches/4.2.0/patchable.toml new file mode 100644 index 000000000..dad9b8958 --- /dev/null +++ b/hive/stackable/patches/4.2.0/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hive.git" +base = "cb06ad72d609e51b6a3a38ccb120e34b4281067c"