Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file.
- testing-tools: Add java, tzdata-java, unzip ([#464], [#465], [#466]).

- hadoop: added support for 3.2.4, 3.3.6 ([#478]).
- hive: added upload new version script ([#472]).
- opa: add version 0.57.0 ([#471]).
- opa: add new version upload script ([#471]).
- zookeeper: add version 3.8.3 ([#470]).
Expand All @@ -33,6 +34,7 @@ All notable changes to this project will be documented in this file.
- airflow: Updated statsd-exporter to 0.24, this was accidentally moved to a very old version previously (0.3.0) ([#431]).
- airflow: Added wrapper script to allow the triggering of pre/post hook actions ([#435]).
- hadoop: bumped jmx-exporter version to 0.20.0 ([#478]).
- hive: bump jmx-exporter to 0.20.0 ([#472]).
- zookeeper: bumped jmx-exporter version to 0.20.0 ([#470]).

### Removed
Expand All @@ -42,8 +44,9 @@ All notable changes to this project will be documented in this file.
- pyspark-k8s: The PySpark image has been removed completely. Python is now installed with the Spark image ([#436])
- Removed all product specific changelogs and updated the root file ([#440])

- opa: removed versions 0.27.1, 0.28.0, 0.37.2, 0.41.0, 0.45.0 ([#471]).
- hadoop: removed support for 3.3.1, 3.3.3 ([#478]).
- hive: remove version 2.3.9 ([#472]).
- opa: removed versions 0.27.1, 0.28.0, 0.37.2, 0.41.0, 0.45.0 ([#471]).
- zookeeper: removed versions 3.5.8, 3.6.3, 3.7.0, 3.8.0 ([#470]).

[#400]: https://github.com/stackabletech/docker-images/pull/400
Expand All @@ -63,6 +66,7 @@ All notable changes to this project will be documented in this file.
[#466]: https://github.com/stackabletech/docker-images/pull/466
[#470]: https://github.com/stackabletech/docker-images/pull/470
[#471]: https://github.com/stackabletech/docker-images/pull/471
[#472]: https://github.com/stackabletech/docker-images/pull/472
[#478]: https://github.com/stackabletech/docker-images/pull/478
[#479]: https://github.com/stackabletech/docker-images/pull/479

Expand Down
10 changes: 1 addition & 9 deletions conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,17 +110,9 @@
{
"name": "hive",
"versions": [
{
"product": "2.3.9",
"java-base": "11",
"hadoop_libs": "2.10.1",
"jackson_dataformat_xml": "2.7.9",
"aws_java_sdk_bundle": "1.11.271",
"azure_storage": "7.0.1",
"azure_keyvault_core": "1.0.0",
},
{
"product": "3.1.3",
"jmx_exporter": "0.20.0",
"java-base": "11",
"hadoop_libs": "3.3.3",
"jackson_dataformat_xml": "2.12.3",
Expand Down
29 changes: 18 additions & 11 deletions hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
FROM stackable/image/java-base

ARG PRODUCT
ARG JMX_EXPORTER
ARG HADOOP_LIBS
ARG JACKSON_DATAFORMAT_XML
ARG AWS_JAVA_SDK_BUNDLE
Expand Down Expand Up @@ -31,43 +32,49 @@ USER stackable
WORKDIR /stackable

# Download hive and hadoop
RUN curl -L https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-bin.tar.gz | tar -xzC . && \
RUN curl --fail -L https://repo.stackable.tech/repository/packages/hive/apache-hive-${PRODUCT}-bin.tar.gz | tar -xzC . && \
ln -s /stackable/apache-hive-${PRODUCT}-bin/apache-hive-${PRODUCT}-bin /stackable/hive && \
ln -s /stackable/apache-hive-${PRODUCT}-bin/hadoop-${HADOOP_LIBS} /stackable/hadoop && \
# Force to overwrite the existing 'start-metastore'
ln -sf /stackable/bin/start-metastore /stackable/hive/bin/start-metastore

# Download aws module for Hadoop (support for s3a://)
RUN curl -L https://repo.stackable.tech/repository/packages/aws/hadoop-aws-${HADOOP_LIBS}.jar \
RUN curl --fail -L https://repo.stackable.tech/repository/packages/aws/hadoop-aws-${HADOOP_LIBS}.jar \
-o /stackable/hive/lib/hadoop-aws-${HADOOP_LIBS}.jar && \
chmod -x /stackable/hive/lib/hadoop-aws-${HADOOP_LIBS}.jar

# Download aws sdk bundle containing all the needed S3 Classes for hadoop-aws. Must match version hadoop-aws was compiled against
RUN curl -L https://repo.stackable.tech/repository/packages/aws/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar \
RUN curl --fail -L https://repo.stackable.tech/repository/packages/aws/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar \
-o /stackable/hive/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar && \
chmod -x /stackable/hive/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar

# Download azure module for Hadoop (support for abfs://)
RUN curl -L https://repo.stackable.tech/repository/packages/aws/hadoop-azure-${HADOOP_LIBS}.jar \
RUN curl --fail -L https://repo.stackable.tech/repository/packages/azure/hadoop-azure-${HADOOP_LIBS}.jar \
-o /stackable/hive/lib/hadoop-azure-${HADOOP_LIBS}.jar && \
chmod -x /stackable/hive/lib/hadoop-azure-${HADOOP_LIBS}.jar

# Download azure libs containing all the needed ABFS Classes for hadoop-azure. Must match version hadoop-azure was compiled against
RUN curl -L https://repo.stackable.tech/repository/packages/azure/azure-storage-${AZURE_STORAGE}.jar \
RUN curl --fail -L https://repo.stackable.tech/repository/packages/azure/azure-storage-${AZURE_STORAGE}.jar \
-o /stackable/hive/lib/azure-storage-${AZURE_STORAGE}.jar && \
chmod -x /stackable/hive/lib/azure-storage-${AZURE_STORAGE}.jar
RUN curl -L https://repo.stackable.tech/repository/packages/azure/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar \
RUN curl --fail -L https://repo.stackable.tech/repository/packages/azure/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar \
-o /stackable/hive/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar && \
chmod -x /stackable/hive/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar

# Download jmx exporter (needed for metrics)
RUN curl https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-0.16.1.jar \
-o /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar && \
chmod -x /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar
# The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode
# the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar"
# This is a TEMPORARY fix which means that we can keep the hardcoded path in HDFS operator FOR NOW as it will still point to a newer version of JMX Exporter, despite the "0.16.1" in the name.
# At the same time a new HDFS Operator will still work with older images which do not have the symlink to the versionless jar.
# After one of our next releases (23.11 or 24.x) we should update the operator to point at the non-versioned symlink (jmx_prometheus_javaagent.jar)
# And then we can also remove the symlink to 0.16.1 from this Dockerfile.
RUN curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \
chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \
ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar

# Logging
RUN rm /stackable/hive/lib/log4j-slf4j-impl* && \
curl https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar \
-o /stackable/hive/lib/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar && \
chmod -x /stackable/hive/lib/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML}.jar

Expand Down
66 changes: 66 additions & 0 deletions hive/upload_new_hive_version.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env bash

VERSION=${1:?"Missing version number argument (arg 1)"}
NEXUS_USER=${2:?"Missing Nexus username argument (arg 2)"}

read -r -s -p "Nexus Password: " NEXUS_PASSWORD
echo ""

# https://stackoverflow.com/questions/4632028/how-to-create-a-temporary-directory
# Find the directory name of the script
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# the temp directory used, within $DIR
WORK_DIR=$(mktemp -d -p "$DIR")

# check if tmp dir was created
if [[ ! "$WORK_DIR" || ! -d "$WORK_DIR" ]]; then
echo "Could not create temp dir"
exit 1
fi

# deletes the temp directory
function cleanup {
rm -rf "$WORK_DIR"
}

# register the cleanup function to be called on the EXIT signal
trap cleanup EXIT

cd "$WORK_DIR" || exit

bin_file="apache-hive-${VERSION}-bin.tar.gz"

echo "Downloading Hive (this can take a while, it is intentionally downloading from a slow mirror that contains all old versions)"
curl --fail -LOs "https://dlcdn.apache.org/hive/hive-${VERSION}/${bin_file}"
curl --fail -LOs "https://dlcdn.apache.org/hive/hive-${VERSION}/${bin_file}.asc"
curl --fail -LOs "https://dlcdn.apache.org/hive/hive-${VERSION}/${bin_file}.sha256"

# It is probably redundant to check both the checksum and the signature but it's cheap and why not
echo "Validating SHA256 Checksums"
if ! (sha256sum "${bin_file}" | diff - "${bin_file}.sha256"); then
echo "ERROR: One of the SHA256 sum does not match"
exit 1
fi

echo "Validating signatures"
echo '--> NOTE: Make sure you have downloaded and added the KEYS file (https://dlcdn.apache.org/hive/KEYS) to GPG: https://www.apache.org/info/verification.html'

if ! (gpg --verify "$bin_file.asc" "$bin_file" 2> /dev/null); then
echo "ERROR: Signature could not be verified"
exit 1
fi

echo "Uploading everything to Nexus"
EXIT_STATUS=0
curl --fail -u "$NEXUS_USER:$NEXUS_PASSWORD" --upload-file "$bin_file" 'https://repo.stackable.tech/repository/packages/hive/' || EXIT_STATUS=$?
curl --fail -u "$NEXUS_USER:$NEXUS_PASSWORD" --upload-file "$bin_file.asc" 'https://repo.stackable.tech/repository/packages/hive/' || EXIT_STATUS=$?
curl --fail -u "$NEXUS_USER:$NEXUS_PASSWORD" --upload-file "$bin_file.sha256" 'https://repo.stackable.tech/repository/packages/hive/' || EXIT_STATUS=$?

if [ $EXIT_STATUS -ne 0 ]; then
echo "ERROR: Upload failed"
exit 1
fi

echo "Successfully uploaded version $VERSION of Hive to Nexus"
echo "https://repo.stackable.tech/service/rest/repository/browse/packages/hive/"