Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bump airflow version to 1.10.12 #623

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# VERSION 1.10.9
# VERSION 1.10.12
# AUTHOR: Matthieu "Puckel_" Roisil
# UPGRADE BY David Wong
# DESCRIPTION: Basic Airflow container
# BUILD: docker build --rm -t puckel/docker-airflow .
# SOURCE: https://github.com/puckel/docker-airflow
Expand All @@ -12,7 +13,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux

# Airflow
ARG AIRFLOW_VERSION=1.10.9
ARG AIRFLOW_VERSION=1.10.12
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
Expand Down
96 changes: 90 additions & 6 deletions config/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ sql_alchemy_pool_pre_ping = True
# SqlAlchemy supports databases with the concept of multiple schemas.
sql_alchemy_schema =

# Import path for connect args in SqlAlchemy. Default to an empty dict.
# This is useful when you want to configure db engine args that SqlAlchemy won't parse
# in connection string.
# See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args
# sql_alchemy_connect_args =

# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
Expand All @@ -124,11 +130,16 @@ dags_are_paused_at_creation = True
# The maximum number of active DAG runs per DAG
max_active_runs_per_dag = 16

# Whether to load the examples that ship with Airflow. It's good to
# Whether to load the DAG examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = True

# Whether to load the default connections that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_default_connections = False

# Where your Airflow plugins are stored
plugins_folder = /usr/local/airflow/plugins

Expand Down Expand Up @@ -184,17 +195,51 @@ dag_discovery_safe_mode = True
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
default_task_retries = 0

# Whether to serialises DAGs and persist them in DB.
# Whether to serialise DAGs and persist them in DB.
# If set to True, Webserver reads from DB instead of parsing DAG files
# More details: https://airflow.apache.org/docs/stable/dag-serialization.html
store_serialized_dags = False

# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate.
min_serialized_dag_update_interval = 30

# Fetching serialized DAG can not be faster than a minimum interval to reduce database
# read rate. This config controls when your DAGs are updated in the Webserver
min_serialized_dag_fetch_interval = 10

# Whether to persist DAG files code in DB.
# If set to True, Webserver reads file contents from DB instead of
# trying to access files in a DAG folder. Defaults to same as the
# ``store_serialized_dags`` setting.
# Example: store_dag_code = False
# store_dag_code =

# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
# in the Database.
# When Dag Serialization is enabled (``store_serialized_dags=True``), all the template_fields
# for each of Task Instance are stored in the Database.
# Keeping this number small may cause an error when you try to view ``Rendered`` tab in
# TaskInstance view for older tasks.
max_num_rendered_ti_fields_per_task = 30

# On each dagrun check against defined SLAs
check_slas = True

# Path to custom XCom class that will be used to store and resolve operators results
# Example: xcom_backend = path.to.CustomXCom
xcom_backend = airflow.models.xcom.BaseXCom

[secrets]
# Full class name of secrets backend to enable (will precede env vars and metastore in search path)
# Example: backend = airflow.contrib.secrets.aws_systems_manager.SystemsManagerParameterStoreBackend
backend =

# The backend_kwargs param is loaded into a dictionary and passed to __init__ of secrets backend class.
# See documentation for the secrets backend you are using. JSON is expected.
# Example for AWS Systems Manager ParameterStore:
# ``{{"connections_prefix": "/airflow/connections", "profile_name": "default"}}``
backend_kwargs =

[cli]
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
Expand All @@ -212,7 +257,9 @@ endpoint_url = http://localhost:8080
fail_fast = False

[api]
# How to authenticate users of the API
# How to authenticate users of the API. See
# https://airflow.apache.org/docs/stable/security.html for possible values.
# ("airflow.api.auth.backend.default" allows all requests for historic reasons)
auth_backend = airflow.api.auth.backend.default

[lineage]
Expand Down Expand Up @@ -245,6 +292,12 @@ default_hive_mapred_queue =
# airflow sends to point links to the right web server
base_url = http://localhost:8080

# Default timezone to display all dates in the RBAC UI, can be UTC, system, or
# any IANA timezone string (e.g. Europe/Amsterdam). If left empty the
# default value of core/default_timezone will be used
# Example: default_ui_timezone = America/New_York
default_ui_timezone =

# The ip specified when starting the web server
web_server_host = 0.0.0.0

Expand Down Expand Up @@ -273,6 +326,10 @@ worker_refresh_batch_size = 1
# Number of seconds to wait before refreshing a batch of workers.
worker_refresh_interval = 30

# If set to True, Airflow will track files in plugins_folder directory. When it detects changes,
# then reload the gunicorn.
reload_on_plugin_change = False

# Secret key used to run your flask app
# It should be as random as possible
secret_key = temporary_key
Expand Down Expand Up @@ -734,18 +791,30 @@ verify_certs = True
[kubernetes]
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
worker_container_repository =

# Path to the YAML pod file. If set, all other kubernetes-related fields are ignored.
# (This feature is experimental)
pod_template_file =
worker_container_tag =
worker_container_image_pull_policy = IfNotPresent

# If True (default), worker pods will be deleted upon termination
# If True, all worker pods will be deleted upon termination
delete_worker_pods = True

# If False (and delete_worker_pods is True),
# failed worker pods will not be deleted so users can investigate them.
delete_worker_pods_on_failure = False

# Number of Kubernetes Worker Pod creation calls per scheduler loop
worker_pods_creation_batch_size = 1

# The Kubernetes namespace where airflow workers should be created. Defaults to ``default``
namespace = default

# Allows users to launch pods in multiple namespaces.
# Will require creating a cluster-role for the scheduler
multi_namespace_mode = False

# The name of the Kubernetes ConfigMap containing the Airflow Configuration (this file)
# Example: airflow_configmap = airflow-configmap
airflow_configmap =
Expand Down Expand Up @@ -782,6 +851,9 @@ dags_in_image = False
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
dags_volume_subpath =

# For either git sync or volume mounted DAGs, the worker will mount the volume in this path
dags_volume_mount_point =

# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
dags_volume_claim =

Expand Down Expand Up @@ -810,6 +882,10 @@ env_from_secret_ref =
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
git_repo =
git_branch =

# Use a shallow clone with a history truncated to the specified number of commits.
# 0 - do not use shallow clone.
git_sync_depth = 1
git_subpath =

# The specific rev or hash the git_sync init container will checkout
Expand Down Expand Up @@ -931,10 +1007,18 @@ tolerations =
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely
# for kubernetes api responses, which will cause the scheduler to hang.
# The timeout is specified as [connect timeout, read timeout]
kube_client_request_args = {{"_request_timeout" : [60,60] }}
kube_client_request_args =

# Optional keyword arguments to pass to the ``delete_namespaced_pod`` kubernetes client
# ``core_v1_api`` method when using the Kubernetes Executor.
# This should be an object and can contain any of the options listed in the ``v1DeleteOptions``
# class defined here:
# https://github.com/kubernetes-client/python/blob/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/models/v1_delete_options.py#L19
# Example: delete_option_kwargs = {{"grace_period_seconds": 10}}
delete_option_kwargs =

# Specifies the uid to run the first process of the worker pods containers as
run_as_user =
run_as_user = 50000

# Specifies a gid to associate with all containers in the worker pods
# if using a git_ssh_key_secret_name use an fs_group
Expand Down
8 changes: 4 additions & 4 deletions docker-compose-CeleryExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
# - ./pgdata:/var/lib/postgresql/data/pgdata

webserver:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- postgres
Expand All @@ -43,7 +43,7 @@ services:
retries: 3

flower:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- redis
Expand All @@ -55,7 +55,7 @@ services:
command: flower

scheduler:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- webserver
Expand All @@ -74,7 +74,7 @@ services:
command: scheduler

worker:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- scheduler
Expand Down
2 changes: 1 addition & 1 deletion docker-compose-LocalExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ services:
max-file: "3"

webserver:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- postgres
Expand Down
6 changes: 3 additions & 3 deletions script/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ wait_for_port() {
echo >&2 "$(date) - $host:$port still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for $name... $j/$TRY_LOOP"
echo "$(date) - waiting for $name($host:$port)... $j/$TRY_LOOP"
sleep 5
done
}
Expand Down Expand Up @@ -100,8 +100,8 @@ if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then
else
# Derive useful variables from the AIRFLOW__ variables provided explicitly by the user
REDIS_ENDPOINT=$(echo -n "$AIRFLOW__CELERY__BROKER_URL" | cut -d '/' -f3 | sed -e 's,.*@,,')
REDIS_HOST=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f1)
REDIS_PORT=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f2)
REDIS_HOST=$(echo -n "$REDIS_ENDPOINT" | cut -d ':' -f1)
REDIS_PORT=$(echo -n "$REDIS_ENDPOINT" | cut -d ':' -f2)
fi

wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT"
Expand Down