diff --git a/airflow/CHANGELOG.md b/airflow/CHANGELOG.md index cfcd74779..baa4581ae 100644 --- a/airflow/CHANGELOG.md +++ b/airflow/CHANGELOG.md @@ -2,14 +2,29 @@ ## [Unreleased] +### Changed + +- Updated Vector to 0.31 ([#429]). +- Updated git-sync to 3.6.8 ([#431]). +- Updated statsd-exporter to 0.24, this was accidentally moved to a very old version previously (0.3.0) ([#431]). + +[#429]: https://github.com/stackabletech/docker-images/pull/429 +[#431]: https://github.com/stackabletech/docker-images/pull/431 + +## [23.7.0] + ### Added -- Added git-sync functionality to the airflow image ([#337]). - Added new airflow version `2.6.1` ([#379]). -[#337]: https://github.com/stackabletech/docker-images/pull/337 [#379]: https://github.com/stackabletech/docker-images/pull/379 +## [23.4.0] + +### Added + +- Added git-sync functionality to the airflow image ([#337]). + ### Changed - Upgraded to the base image vector:0.26.0-stackable1.1.0. The new base image @@ -17,6 +32,7 @@ stackable-base layer and is therefore removed from this image ([#291]). [#291]: https://github.com/stackabletech/docker-images/pull/291 +[#337]: https://github.com/stackabletech/docker-images/pull/337 ## [airflow-stackable0.5.0] - 2022-11-30 diff --git a/airflow/Dockerfile b/airflow/Dockerfile index f000b6d47..156f1474f 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -1,4 +1,14 @@ # syntax=docker/dockerfile:1 +ARG GIT_SYNC +ARG STATSD_EXPORTER + +FROM oci.stackable.tech/mirror/prom/statsd-exporter:${STATSD_EXPORTER} as statsd-exporter + +# For updated versions check https://github.com/kubernetes/git-sync/releases +# which should contain a image location (e.g. registry.k8s.io/git-sync/git-sync:v3.6.8) +FROM oci.stackable.tech/mirror/git-sync/git-sync:${GIT_SYNC} as gitsync-image + + FROM stackable/image/vector AS airflow-build-image ARG PRODUCT @@ -10,33 +20,33 @@ COPY airflow/constraints-${PRODUCT}-python${PYTHON}.txt /tmp/constraints.txt # Requires implementation of https://github.com/apache/airflow/blob/2.2.5/scripts/docker/install_mysql.sh ENV AIRFLOW_EXTRAS=async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino -RUN microdnf update \ - && microdnf install -y \ - gcc \ - gcc-c++ \ - python${PYTHON}-devel \ - python${PYTHON}-pip \ - python${PYTHON}-wheel \ - openssl-devel \ - cyrus-sasl-devel \ - openldap-devel \ - unixODBC-devel \ - libpq-devel \ - && microdnf clean all \ - && python3 -m venv --system-site-packages /stackable/app \ - && source /stackable/app/bin/activate \ - && pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT} --constraint /tmp/constraints.txt \ - # Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3 - && pip install --no-cache-dir s3fs - -FROM prom/statsd-exporter:0.3.0@sha256:a9c27602d6f6b86527657922b6a87c12789f7f9b39a90f1513e8c665c941f26a as statsd-exporter -FROM docker.stackable.tech/stackable/git-sync:v3.6.4 as gitsync-image +RUN microdnf update && \ + microdnf install \ + cyrus-sasl-devel \ + gcc \ + gcc-c++ \ + libpq-devel \ + openldap-devel \ + openssl-devel \ + python${PYTHON}-devel \ + python${PYTHON}-pip \ + python${PYTHON}-wheel \ + unixODBC-devel && \ + microdnf clean all && \ + python3 -m venv --system-site-packages /stackable/app && \ + source /stackable/app/bin/activate && \ + pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT} --constraint /tmp/constraints.txt && \ + # Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3 + pip install --no-cache-dir s3fs + + FROM stackable/image/vector AS airflow-main-image ARG PRODUCT ARG PYTHON ARG RELEASE +ARG TINI LABEL name="Apache Airflow" \ maintainer="info@stackable.tech" \ @@ -49,21 +59,22 @@ LABEL name="Apache Airflow" \ COPY airflow/licenses /licenses # Update image and install python -RUN microdnf install -y yum python${PYTHON} \ - openssl-libs \ - openssl-pkcs11 \ - openldap \ - openldap-clients \ +RUN microdnf update && \ + microdnf install \ + ca-certificates \ cyrus-sasl \ - unixODBC \ - libpq \ git \ + libpq \ + openldap \ + openldap-clients \ openssh-clients \ + openssl-libs \ + openssl-pkcs11 \ + python${PYTHON} \ socat \ - ca-certificates \ - && yum -y update-minimal --security --sec-severity=Important --sec-severity=Critical \ - && yum clean all \ - && microdnf clean all + unixODBC && \ + microdnf clean all && \ + rm -rf /var/cache/yum ENV HOME=/stackable ENV AIRFLOW_USER_HOME_DIR=/stackable @@ -76,15 +87,14 @@ RUN mkdir -pv ${AIRFLOW_HOME} && \ RUN chown --recursive stackable:stackable ${AIRFLOW_HOME} -# according to arch, copy binary to the name "tini" -RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-$(arch)" - +# Get the correct `tini` binary for our architecture. +# It is used as an init alternative in the entrypoint +RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-$(arch)" COPY airflow/stackable/utils/entrypoint.sh /entrypoint -COPY --from=statsd-exporter --chown=stackable:stackable /bin/statsd_exporter /stackable/statsd_exporter - RUN chmod a+x /entrypoint && \ chmod +x /usr/bin/tini +COPY --from=statsd-exporter --chown=stackable:stackable /bin/statsd_exporter /stackable/statsd_exporter COPY --from=airflow-build-image --chown=stackable:stackable /stackable/ ${HOME}/ COPY --from=gitsync-image --chown=stackable:stackable /git-sync /stackable/git-sync diff --git a/airflow/stackable/utils/entrypoint.sh b/airflow/stackable/utils/entrypoint.sh index 6f74ce68b..9b3f3b9cb 100644 --- a/airflow/stackable/utils/entrypoint.sh +++ b/airflow/stackable/utils/entrypoint.sh @@ -16,10 +16,27 @@ # specific language governing permissions and limitations # under the License. # Might be empty + +# Stackable notes: +# Source of this file is the upstream Apache Airflow project +# https://github.com/apache/airflow/blob/main/scripts/docker/entrypoint_prod.sh +# It was last synced from the upstream repo on 2023-07-31 and is up-to-date as of commit 86193f5 + + AIRFLOW_COMMAND="${1:-}" set -euo pipefail +# This one is to workaround https://github.com/apache/airflow/issues/17546 +# issue with /usr/lib/-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block +# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues +# by users of the prod image, we implement the workaround now. +# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any +# binary started and a little memory used for Heap allocated by initialization of libstdc++ +# This overhead is not happening for binaries that already link dynamically libstdc++ +LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" +export LD_PRELOAD + function run_check_with_retries { local cmd cmd="${1}" @@ -87,7 +104,7 @@ function wait_for_connection { local detected_backend detected_backend=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).scheme)" "${connection_url}") local detected_host - detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname)" "${connection_url}") + detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname or '')" "${connection_url}") local detected_port detected_port=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).port or '')" "${connection_url}") @@ -116,7 +133,11 @@ function wait_for_connection { echo DB_PORT="${DB_PORT:=${detected_port}}" readonly DB_PORT - run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}" + if [[ -n "${DB_HOST=}" ]] && [[ -n "${DB_PORT=}" ]]; then + run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}" + else + >&2 echo "The connection details to the broker could not be determined. Connectivity checks were skipped." + fi } function create_www_user() { @@ -281,7 +302,7 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then >&2 echo >&2 echo "!!!!! Installing additional requirements: '${_PIP_ADDITIONAL_REQUIREMENTS}' !!!!!!!!!!!!" >&2 echo - >&2 echo "WARNING: This is a developpment/test feature only. NEVER use it in production!" + >&2 echo "WARNING: This is a development/test feature only. NEVER use it in production!" >&2 echo " Instead, build a custom image as described in" >&2 echo >&2 echo " https://airflow.apache.org/docs/docker-stack/build.html" @@ -290,7 +311,7 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then >&2 echo " the container starts, so it is onlny useful for testing and trying out" >&2 echo " of adding dependencies." >&2 echo - pip install --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS} + pip install --root-user-action ignore --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS} fi diff --git a/airflow/stackable/utils/tini-v0.19.0 b/airflow/stackable/utils/tini-v0.19.0 deleted file mode 100644 index 86cb766c6..000000000 Binary files a/airflow/stackable/utils/tini-v0.19.0 and /dev/null differ diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index 626ebe758..16e93fb49 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -125,7 +125,8 @@ RUN microdnf update && \ fuse-libs \ krb5-workstation \ openssl && \ - microdnf clean all + microdnf clean all && \ + rm -rf /var/cache/yum COPY hadoop/licenses /licenses @@ -141,6 +142,7 @@ COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx/ RUN ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin +ENV HOME=/stackable ENV LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server ENV PATH="${PATH}":/stackable/hadoop/bin ENV HADOOP_HOME=/stackable/hadoop diff --git a/image_tools/conf.py b/image_tools/conf.py index 5036369fd..72b13143e 100644 --- a/image_tools/conf.py +++ b/image_tools/conf.py @@ -8,27 +8,42 @@ "versions": [ { "product": "2.2.3", + "git_sync": "v3.6.8", "python": "38", + "statsd_exporter": "v0.24.0", + "tini": "0.19.0", "vector": "0.31.0", }, { "product": "2.2.4", + "git_sync": "v3.6.8", "python": "39", + "statsd_exporter": "v0.24.0", + "tini": "0.19.0", "vector": "0.31.0", }, { "product": "2.2.5", + "git_sync": "v3.6.8", "python": "39", + "statsd_exporter": "v0.24.0", + "tini": "0.19.0", "vector": "0.31.0", }, { "product": "2.4.1", + "git_sync": "v3.6.8", "python": "39", + "statsd_exporter": "v0.24.0", + "tini": "0.19.0", "vector": "0.31.0", }, { "product": "2.6.1", + "git_sync": "v3.6.8", "python": "39", + "statsd_exporter": "v0.24.0", + "tini": "0.19.0", "vector": "0.31.0", }, ], @@ -74,26 +89,6 @@ { "name": "hbase", "versions": [ - { - "product": "2.4.6", - "java-base": "11", - "phoenix": "2.4-5.1.2", - }, - { - "product": "2.4.8", - "java-base": "11", - "phoenix": "2.4-5.1.2", - }, - { - "product": "2.4.9", - "java-base": "11", - "phoenix": "2.4-5.1.2", - }, - { - "product": "2.4.11", - "java-base": "11", - "phoenix": "2.4-5.1.2", - }, { "product": "2.4.12", "java-base": "11",