Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions airflow/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,37 @@

## [Unreleased]

### Changed

- Updated Vector to 0.31 ([#429]).
- Updated git-sync to 3.6.8 ([#431]).
- Updated statsd-exporter to 0.24, this was accidentally moved to a very old version previously (0.3.0) ([#431]).

[#429]: https://github.com/stackabletech/docker-images/pull/429
[#431]: https://github.com/stackabletech/docker-images/pull/431

## [23.7.0]

### Added

- Added git-sync functionality to the airflow image ([#337]).
- Added new airflow version `2.6.1` ([#379]).

[#337]: https://github.com/stackabletech/docker-images/pull/337
[#379]: https://github.com/stackabletech/docker-images/pull/379

## [23.4.0]

### Added

- Added git-sync functionality to the airflow image ([#337]).

### Changed

- Upgraded to the base image vector:0.26.0-stackable1.1.0. The new base image
provides Vector. The creation of the stackable user and group happens in the
stackable-base layer and is therefore removed from this image ([#291]).

[#291]: https://github.com/stackabletech/docker-images/pull/291
[#337]: https://github.com/stackabletech/docker-images/pull/337

## [airflow-stackable0.5.0] - 2022-11-30

Expand Down
86 changes: 48 additions & 38 deletions airflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
# syntax=docker/dockerfile:1
ARG GIT_SYNC
ARG STATSD_EXPORTER

FROM oci.stackable.tech/mirror/prom/statsd-exporter:${STATSD_EXPORTER} as statsd-exporter

# For updated versions check https://github.com/kubernetes/git-sync/releases
# which should contain a image location (e.g. registry.k8s.io/git-sync/git-sync:v3.6.8)
FROM oci.stackable.tech/mirror/git-sync/git-sync:${GIT_SYNC} as gitsync-image


FROM stackable/image/vector AS airflow-build-image

ARG PRODUCT
Expand All @@ -10,33 +20,33 @@ COPY airflow/constraints-${PRODUCT}-python${PYTHON}.txt /tmp/constraints.txt
# Requires implementation of https://github.com/apache/airflow/blob/2.2.5/scripts/docker/install_mysql.sh
ENV AIRFLOW_EXTRAS=async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino

RUN microdnf update \
&& microdnf install -y \
gcc \
gcc-c++ \
python${PYTHON}-devel \
python${PYTHON}-pip \
python${PYTHON}-wheel \
openssl-devel \
cyrus-sasl-devel \
openldap-devel \
unixODBC-devel \
libpq-devel \
&& microdnf clean all \
&& python3 -m venv --system-site-packages /stackable/app \
&& source /stackable/app/bin/activate \
&& pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT} --constraint /tmp/constraints.txt \
# Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3
&& pip install --no-cache-dir s3fs

FROM prom/statsd-exporter:0.3.0@sha256:a9c27602d6f6b86527657922b6a87c12789f7f9b39a90f1513e8c665c941f26a as statsd-exporter
FROM docker.stackable.tech/stackable/git-sync:v3.6.4 as gitsync-image
RUN microdnf update && \
microdnf install \
cyrus-sasl-devel \
gcc \
gcc-c++ \
libpq-devel \
openldap-devel \
openssl-devel \
python${PYTHON}-devel \
python${PYTHON}-pip \
python${PYTHON}-wheel \
unixODBC-devel && \
microdnf clean all && \
python3 -m venv --system-site-packages /stackable/app && \
source /stackable/app/bin/activate && \
pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT} --constraint /tmp/constraints.txt && \
# Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3
pip install --no-cache-dir s3fs
Comment thread
sbernauer marked this conversation as resolved.


FROM stackable/image/vector AS airflow-main-image

ARG PRODUCT
ARG PYTHON
ARG RELEASE
ARG TINI

LABEL name="Apache Airflow" \
maintainer="info@stackable.tech" \
Expand All @@ -49,21 +59,22 @@ LABEL name="Apache Airflow" \
COPY airflow/licenses /licenses

# Update image and install python
RUN microdnf install -y yum python${PYTHON} \
openssl-libs \
openssl-pkcs11 \
openldap \
openldap-clients \
RUN microdnf update && \
microdnf install \
ca-certificates \
cyrus-sasl \
unixODBC \
libpq \
git \
libpq \
openldap \
openldap-clients \
openssh-clients \
openssl-libs \
openssl-pkcs11 \
python${PYTHON} \
socat \
ca-certificates \
&& yum -y update-minimal --security --sec-severity=Important --sec-severity=Critical \
&& yum clean all \
&& microdnf clean all
unixODBC && \
microdnf clean all && \
rm -rf /var/cache/yum

ENV HOME=/stackable
ENV AIRFLOW_USER_HOME_DIR=/stackable
Expand All @@ -76,15 +87,14 @@ RUN mkdir -pv ${AIRFLOW_HOME} && \

RUN chown --recursive stackable:stackable ${AIRFLOW_HOME}

# according to arch, copy binary to the name "tini"
RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-$(arch)"

# Get the correct `tini` binary for our architecture.
# It is used as an init alternative in the entrypoint
RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-$(arch)"
COPY airflow/stackable/utils/entrypoint.sh /entrypoint
COPY --from=statsd-exporter --chown=stackable:stackable /bin/statsd_exporter /stackable/statsd_exporter

RUN chmod a+x /entrypoint && \
chmod +x /usr/bin/tini

COPY --from=statsd-exporter --chown=stackable:stackable /bin/statsd_exporter /stackable/statsd_exporter
COPY --from=airflow-build-image --chown=stackable:stackable /stackable/ ${HOME}/
COPY --from=gitsync-image --chown=stackable:stackable /git-sync /stackable/git-sync

Expand Down
29 changes: 25 additions & 4 deletions airflow/stackable/utils/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,27 @@
# specific language governing permissions and limitations
# under the License.
# Might be empty

# Stackable notes:
# Source of this file is the upstream Apache Airflow project
# https://github.com/apache/airflow/blob/main/scripts/docker/entrypoint_prod.sh
# It was last synced from the upstream repo on 2023-07-31 and is up-to-date as of commit 86193f5


AIRFLOW_COMMAND="${1:-}"

set -euo pipefail

# This one is to workaround https://github.com/apache/airflow/issues/17546
# issue with /usr/lib/<MACHINE>-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block
# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues
# by users of the prod image, we implement the workaround now.
# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any
# binary started and a little memory used for Heap allocated by initialization of libstdc++
# This overhead is not happening for binaries that already link dynamically libstdc++
LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6"
export LD_PRELOAD

function run_check_with_retries {
local cmd
cmd="${1}"
Expand Down Expand Up @@ -87,7 +104,7 @@ function wait_for_connection {
local detected_backend
detected_backend=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).scheme)" "${connection_url}")
local detected_host
detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname)" "${connection_url}")
detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname or '')" "${connection_url}")
local detected_port
detected_port=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).port or '')" "${connection_url}")

Expand Down Expand Up @@ -116,7 +133,11 @@ function wait_for_connection {

echo DB_PORT="${DB_PORT:=${detected_port}}"
readonly DB_PORT
run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}"
if [[ -n "${DB_HOST=}" ]] && [[ -n "${DB_PORT=}" ]]; then
run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}"
else
>&2 echo "The connection details to the broker could not be determined. Connectivity checks were skipped."
fi
}

function create_www_user() {
Expand Down Expand Up @@ -281,7 +302,7 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then
>&2 echo
>&2 echo "!!!!! Installing additional requirements: '${_PIP_ADDITIONAL_REQUIREMENTS}' !!!!!!!!!!!!"
>&2 echo
>&2 echo "WARNING: This is a developpment/test feature only. NEVER use it in production!"
>&2 echo "WARNING: This is a development/test feature only. NEVER use it in production!"
>&2 echo " Instead, build a custom image as described in"
>&2 echo
>&2 echo " https://airflow.apache.org/docs/docker-stack/build.html"
Expand All @@ -290,7 +311,7 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then
>&2 echo " the container starts, so it is onlny useful for testing and trying out"
>&2 echo " of adding dependencies."
>&2 echo
pip install --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS}
pip install --root-user-action ignore --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS}
Comment thread
lfrancke marked this conversation as resolved.
fi


Expand Down
Binary file removed airflow/stackable/utils/tini-v0.19.0
Binary file not shown.
4 changes: 3 additions & 1 deletion hadoop/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ RUN microdnf update && \
fuse-libs \
krb5-workstation \
openssl && \
microdnf clean all
microdnf clean all && \
rm -rf /var/cache/yum

COPY hadoop/licenses /licenses

Expand All @@ -141,6 +142,7 @@ COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx/
RUN ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin

ENV HOME=/stackable
ENV LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server
ENV PATH="${PATH}":/stackable/hadoop/bin
ENV HADOOP_HOME=/stackable/hadoop
Expand Down
35 changes: 15 additions & 20 deletions image_tools/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,42 @@
"versions": [
{
"product": "2.2.3",
"git_sync": "v3.6.8",
"python": "38",
"statsd_exporter": "v0.24.0",
"tini": "0.19.0",
"vector": "0.31.0",
},
{
"product": "2.2.4",
"git_sync": "v3.6.8",
"python": "39",
"statsd_exporter": "v0.24.0",
"tini": "0.19.0",
"vector": "0.31.0",
},
{
"product": "2.2.5",
"git_sync": "v3.6.8",
"python": "39",
"statsd_exporter": "v0.24.0",
"tini": "0.19.0",
"vector": "0.31.0",
},
{
"product": "2.4.1",
"git_sync": "v3.6.8",
"python": "39",
"statsd_exporter": "v0.24.0",
"tini": "0.19.0",
"vector": "0.31.0",
},
{
"product": "2.6.1",
"git_sync": "v3.6.8",
"python": "39",
"statsd_exporter": "v0.24.0",
"tini": "0.19.0",
"vector": "0.31.0",
},
],
Expand Down Expand Up @@ -74,26 +89,6 @@
{
"name": "hbase",
"versions": [
{
"product": "2.4.6",
"java-base": "11",
"phoenix": "2.4-5.1.2",
},
{
"product": "2.4.8",
"java-base": "11",
"phoenix": "2.4-5.1.2",
},
{
"product": "2.4.9",
"java-base": "11",
"phoenix": "2.4-5.1.2",
},
{
"product": "2.4.11",
"java-base": "11",
"phoenix": "2.4-5.1.2",
},
{
"product": "2.4.12",
"java-base": "11",
Expand Down