diff --git a/.coveragerc b/.coveragerc
index 9a07159daeb8c..83805150083e5 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -24,4 +24,3 @@ omit =
airflow/migrations/*
airflow/www/node_modules/**
airflow/www_rbac/node_modules/**
- airflow/_vendor/*
diff --git a/.dockerignore b/.dockerignore
index e7d656456dc59..8a90d745cfbe3 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -31,10 +31,21 @@
!common
!dags
!dev
+!chart
!docs
!licenses
-!scripts
+!metastore_browser
+
+# Add those folders to the context so that they are available in the CI container
+!scripts/in_container
+!scripts/docker
+
+# Add backport packages to the context
+!backport_packages
+
+# Add tests and kubernetes_tests to context.
!tests
+!kubernetes_tests
!.coveragerc
!.rat-excludes
@@ -42,14 +53,16 @@
!.dockerignore
!pytest.ini
!CHANGELOG.txt
-!Dockerfile.ci
-!Dockerfile
!LICENSE
!MANIFEST.in
!NOTICE
!.github
-!requirements
-!entrypoint.sh
+!empty
+
+# This folder is for you if you want to add any packages to the docker context when you build your own
+# docker image. most of other files and any new folder you add will be excluded by default
+# if you need other types of files - please add the extensions here.
+!docker-context-files
# Avoid triggering context change on README change (new companies using Airflow)
# So please do not uncomment this line ;)
diff --git a/.flake8 b/.flake8
index 4a2ca5b7e1a17..099ff70f8bc03 100644
--- a/.flake8
+++ b/.flake8
@@ -1,5 +1,5 @@
[flake8]
max-line-length = 110
ignore = E731,W504,I001,W503
-exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,*/_vendor/*,node_modules
+exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,node_modules
format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000..d872017df781b
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+/chart export-ignore
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 64625e449bd9d..1e3c23d5ad347 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,10 +1,23 @@
-- [ ] Description above provides context of the change
-- [ ] Commit message contains [\[AIRFLOW-XXXX\]](https://issues.apache.org/jira/browse/AIRFLOW-XXXX) or `[AIRFLOW-XXXX]` for document-only changes
-- [ ] Unit tests coverage for changes (not needed for documentation changes)
-- [ ] Commits follow "[How to write a good git commit message](http://chris.beams.io/posts/git-commit/)"
-- [ ] Relevant documentation is updated including usage instructions.
-- [ ] I will engage committers as explained in [Contribution Workflow Example](https://github.com/apache/airflow/blob/master/CONTRIBUTING.rst#contribution-workflow-example).
+
+
+---
+**^ Add meaningful description above**
+
+Read the **[Pull Request Guidelines](https://github.com/apache/airflow/blob/master/CONTRIBUTING.rst#pull-request-guidelines)** for more information.
In case of fundamental code change, Airflow Improvement Proposal ([AIP](https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvements+Proposals)) is needed.
In case of a new dependency, check compliance with the [ASF 3rd Party License Policy](https://www.apache.org/legal/resolved.html#category-x).
In case of backwards incompatible changes please leave a note in [UPDATING.md](https://github.com/apache/airflow/blob/master/UPDATING.md).
-Read the [Pull Request Guidelines](https://github.com/apache/airflow/blob/master/CONTRIBUTING.rst#pull-request-guidelines) for more information.
diff --git a/.github/actions/cancel-workflow-runs b/.github/actions/cancel-workflow-runs
new file mode 160000
index 0000000000000..953e057dc81d3
--- /dev/null
+++ b/.github/actions/cancel-workflow-runs
@@ -0,0 +1 @@
+Subproject commit 953e057dc81d3458935a18d1184c386b0f6b5738
diff --git a/.github/actions/checks-action b/.github/actions/checks-action
new file mode 160000
index 0000000000000..9f02872da71b6
--- /dev/null
+++ b/.github/actions/checks-action
@@ -0,0 +1 @@
+Subproject commit 9f02872da71b6f558c6a6f190f925dde5e4d8798
diff --git a/.github/actions/codecov-action b/.github/actions/codecov-action
new file mode 160000
index 0000000000000..1fc7722ded470
--- /dev/null
+++ b/.github/actions/codecov-action
@@ -0,0 +1 @@
+Subproject commit 1fc7722ded4708880a5aea49f2bfafb9336f0c8d
diff --git a/.github/actions/configure-aws-credentials b/.github/actions/configure-aws-credentials
new file mode 160000
index 0000000000000..e97d7fbc8e0e5
--- /dev/null
+++ b/.github/actions/configure-aws-credentials
@@ -0,0 +1 @@
+Subproject commit e97d7fbc8e0e5af69631c13daa0f4b5a8d88165b
diff --git a/.github/actions/get-workflow-origin b/.github/actions/get-workflow-origin
new file mode 160000
index 0000000000000..588cc14f9f1cd
--- /dev/null
+++ b/.github/actions/get-workflow-origin
@@ -0,0 +1 @@
+Subproject commit 588cc14f9f1cdf1b8be3db816855e96422204fec
diff --git a/.github/actions/github-push-action b/.github/actions/github-push-action
new file mode 160000
index 0000000000000..40bf560936a80
--- /dev/null
+++ b/.github/actions/github-push-action
@@ -0,0 +1 @@
+Subproject commit 40bf560936a8022e68a3c00e7d2abefaf01305a6
diff --git a/.github/actions/label-when-approved-action b/.github/actions/label-when-approved-action
new file mode 160000
index 0000000000000..4c5190fec5661
--- /dev/null
+++ b/.github/actions/label-when-approved-action
@@ -0,0 +1 @@
+Subproject commit 4c5190fec5661e98d83f50bbd4ef9ebb48bd1194
diff --git a/.github/workflows/build-images-workflow-run.yml b/.github/workflows/build-images-workflow-run.yml
new file mode 100644
index 0000000000000..5c85cb44f0735
--- /dev/null
+++ b/.github/workflows/build-images-workflow-run.yml
@@ -0,0 +1,427 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+name: "Build Images"
+on: # yamllint disable-line rule:truthy
+ workflow_run:
+ workflows: ["CI Build"]
+ types: ['requested']
+env:
+ MOUNT_LOCAL_SOURCES: "false"
+ FORCE_ANSWER_TO_QUESTIONS: "yes"
+ FORCE_PULL_IMAGES: "true"
+ CHECK_IMAGE_FOR_REBUILD: "true"
+ SKIP_CHECK_REMOTE_IMAGE: "true"
+ DB_RESET: "true"
+ VERBOSE: "true"
+ USE_GITHUB_REGISTRY: "true"
+ GITHUB_REPOSITORY: ${{ github.repository }}
+ GITHUB_USERNAME: ${{ github.actor }}
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_REGISTRY_PULL_IMAGE_TAG: "latest"
+ GITHUB_REGISTRY_WAIT_FOR_IMAGE: "false"
+ BUILD_IMAGES: ${{ secrets.AIRFLOW_GITHUB_REGISTRY_WAIT_FOR_IMAGE != 'false' }}
+
+jobs:
+
+ cancel-workflow-runs:
+ timeout-minutes: 10
+ name: "Cancel workflow runs"
+ runs-on: ubuntu-20.04
+ outputs:
+ sourceHeadRepo: ${{ steps.source-run-info.outputs.sourceHeadRepo }}
+ sourceHeadBranch: ${{ steps.source-run-info.outputs.sourceHeadBranch }}
+ sourceHeadSha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
+ mergeCommitSha: ${{ steps.source-run-info.outputs.mergeCommitSha }}
+ targetCommitSha: ${{ steps.source-run-info.outputs.targetCommitSha }}
+ pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }}
+ pullRequestLabels: ${{ steps.source-run-info.outputs.pullRequestLabels }}
+ targetBranch: ${{ steps.source-run-info.outputs.targetBranch }}
+ sourceEvent: ${{ steps.source-run-info.outputs.sourceEvent }}
+ cacheDirective: ${{ steps.cache-directive.outputs.docker-cache }}
+ buildImages: ${{ steps.build-images.outputs.buildImages }}
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ submodules: recursive
+ - name: "Get information about the original trigger of the run"
+ uses: ./.github/actions/get-workflow-origin
+ id: source-run-info
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ sourceRunId: ${{ github.event.workflow_run.id }}
+ - name: "Cancel duplicated 'CI Build' runs"
+ uses: ./.github/actions/cancel-workflow-runs
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ cancelMode: allDuplicates
+ sourceRunId: ${{ github.event.workflow_run.id }}
+ - name: "Output BUILD_IMAGES"
+ id: build-images
+ run: |
+ # Workaround - jobs cannot access env variable in "ifs"
+ # https://github.community/t/how-to-set-and-access-a-workflow-variable/17335/16
+ echo "::set-output name=buildImages::${BUILD_IMAGES}"
+ - name: "Cancel duplicated 'Build Image' runs"
+ # We find duplicates of our own "Build Image" runs - due to a missing feature
+ # in GitHub Actions, we have to use Job names to match Event/Repo/Branch matching
+ # trick ¯\_(ツ)_/¯. We name the build-info job appropriately
+ # and then we try to find and cancel all the jobs with the same Event + Repo + Branch as the
+ # current Event/Repo/Branch combination.
+ uses: ./.github/actions/cancel-workflow-runs
+ with:
+ cancelMode: namedJobs
+ token: ${{ secrets.GITHUB_TOKEN }}
+ notifyPRCancel: true
+ jobNameRegexps: >
+ [".*Event: ${{ steps.source-run-info.outputs.sourceEvent }}
+ Repo: ${{ steps.source-run-info.outputs.sourceHeadRepo }}
+ Branch: ${{ steps.source-run-info.outputs.sourceHeadBranch }}.*"]
+ if: env.BUILD_IMAGES == 'true'
+ - name: "Cancel all 'CI Build' runs where some jobs failed"
+ # We find any of the "CI Build" workflow runs, where any of the important jobs
+ # failed. The important jobs are selected by the regexp array below.
+ # We also produce list of canceled "CI Build' runs as output, so that we
+ # can cancel all the matching "Build Images" workflow runs in the two following steps.
+ # Yeah. Adding to the complexity ¯\_(ツ)_/¯.
+ uses: ./.github/actions/cancel-workflow-runs
+ id: cancel-failed
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ cancelMode: failedJobs
+ sourceRunId: ${{ github.event.workflow_run.id }}
+ notifyPRCancel: true
+ jobNameRegexps: >
+ ["^Static checks.*", "^Build docs$", "^Spell check docs$", "^Backport packages$",
+ "^Checks: Helm tests$", "^Test OpenAPI*"]
+ - name: "Extract canceled failed runs"
+ # We use this step to build regexp that will be used to match the Source Run id in
+ # the build-info job below. If we cancelled some "CI Build" runs in the "cancel-failed' step
+ # above - we want to cancel also the corresponding "Build Images" runs. Again we have
+ # to match the jobs using job name rather than use proper API because that feature
+ # is currently missing in GitHub Actions ¯\_(ツ)_/¯.
+ id: extract-cancelled-failed-runs
+ if: steps.cancel-failed.outputs.cancelledRuns != '[]'
+ run: |
+ REGEXP="Source Run id: "
+ SEPARATOR=""
+ for run_id in $(echo "${{ steps.cancel-failed.outputs.cancelledRuns }}" | jq '.[]')
+ do
+ REGEXP="${REGEXP}${SEPARATOR}(${run_id})"
+ SEPARATOR="|"
+ done
+ echo "::set-output name=matching-regexp::[\"${REGEXP}\"]"
+ - name: "Cancel triggered 'Build Images' runs for the cancelled failed runs"
+ # In case we do have some cancelled jobs in the "cancel-failed" step above
+ # We take the extracted regexp array prepared in the previous step and we use
+ # it to cancel any jobs that have matching names containing Source Run Id:
+ # followed by one of the run ids. Yes I know it's super complex ¯\_(ツ)_/¯.
+ if: env.BUILD_IMAGES == 'true' && steps.cancel-failed.outputs.cancelledRuns != '[]'
+ uses: ./.github/actions/cancel-workflow-runs
+ with:
+ cancelMode: namedJobs
+ token: ${{ secrets.GITHUB_TOKEN }}
+ notifyPRCancel: true
+ jobNameRegexps: ${{ steps.extract-cancelled-failed-runs.outputs.matching-regexp }}
+ - name: "Cancel duplicated 'CodeQL' runs"
+ uses: ./.github/actions/cancel-workflow-runs
+ id: cancel
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ cancelMode: allDuplicates
+ workflowFileName: 'codeql-analysis.yml'
+ - name: "Set Docker Cache Directive"
+ id: cache-directive
+ run: |
+ if [[ ${{ steps.source-run-info.outputs.sourceEvent }} == 'schedule' ]]; then
+ echo "::set-output name=docker-cache::disabled"
+ else
+ echo "::set-output name=docker-cache::pulled"
+ fi
+ - name: "Cancel all duplicated 'Build Image' runs"
+ # We find duplicates of all "Build Image" runs - due to a missing feature
+ # in GitHub Actions, we have to use Job names to match Event/Repo/Branch matching
+ # trick ¯\_(ツ)_/¯. We name the build-info job appropriately and then we try to match
+ # all the jobs with the same Event + Repo + Branch match and cancel all the duplicates for those
+ # This might cancel own run, so this is the last step in the job
+ uses: ./.github/actions/cancel-workflow-runs
+ with:
+ cancelMode: allDuplicatedNamedJobs
+ token: ${{ secrets.GITHUB_TOKEN }}
+ notifyPRCancel: true
+ selfPreservation: false
+ jobNameRegexps: '["Event: \\S* Repo: \\S* Branch: \\S* "]'
+
+ build-info:
+ # The name is such long because we are using it to cancel duplicated 'Build Images' runs
+ # by matching Event/Repo/Branch. This is a workaround for a missing feature of GitHub
+ # Actions to link the source workflow run and the triggered workflow_run one.
+ # We are also cancelling SourceRunId in case we determine that we should cancel the source
+ # Run because of some failing jobs in the source run. Again ¯\_(ツ)_/¯.
+ name: >
+ Event: ${{ needs.cancel-workflow-runs.outputs.sourceEvent }}
+ Repo: ${{ needs.cancel-workflow-runs.outputs.sourceHeadRepo }}
+ Branch: ${{ needs.cancel-workflow-runs.outputs.sourceHeadBranch }}
+ Run id: ${{ github.run_id }}
+ Source Run id: ${{ github.event.workflow_run.id }}
+ Sha: ${{ github.sha }}
+ Source Sha: ${{ needs.cancel-workflow-runs.outputs.sourceHeadSha }}
+ Merge commit Sha: ${{ needs.cancel-workflow-runs.outputs.mergeCommitSha }}
+ Target commit Sha: ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }}
+ runs-on: ubuntu-20.04
+ needs: [cancel-workflow-runs]
+ env:
+ GITHUB_CONTEXT: ${{ toJson(github) }}
+ outputs:
+ pythonVersions: ${{ steps.selective-checks.python-versions }}
+ upgradeToLatestConstraints: ${{ steps.selective-checks.outputs.upgrade-to-latest-constraints }}
+ allPythonVersions: ${{ steps.selective-checks.outputs.all-python-versions }}
+ defaultPythonVersion: ${{ steps.selective-checks.outputs.default-python-version }}
+ run-tests: ${{ steps.selective-checks.outputs.run-tests }}
+ run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }}
+ image-build: ${{ steps.selective-checks.outputs.image-build }}
+ if: >
+ needs.cancel-workflow-runs.outputs.buildImages == 'true'
+ steps:
+ # First fetch the sha of merge commit in case it is pull request so that we can
+ # Run selective tests
+ - name: >
+ Fetch merge commit ${{ github.ref }} ( ${{ github.sha }}:
+ merge_commit ${{ needs.cancel-workflow-runs.outputs.mergeCommitSha }} )
+ uses: actions/checkout@v2
+ with:
+ ref: ${{ needs.cancel-workflow-runs.outputs.mergeCommitSha }}
+ fetch-depth: 2
+ if: needs.cancel-workflow-runs.outputs.sourceEvent == 'pull_request'
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ - name: >
+ Event: ${{ needs.cancel-workflow-runs.outputs.sourceEvent }}
+ Repo: ${{ needs.cancel-workflow-runs.outputs.sourceHeadRepo }}
+ Branch: ${{ needs.cancel-workflow-runs.outputs.sourceHeadBranch }}
+ Run id: ${{ github.run_id }}
+ Source Run id: ${{ github.event.workflow_run.id }}
+ Sha: ${{ github.sha }}
+ Source Sha: ${{ needs.cancel-workflow-runs.outputs.sourceHeadSha }}
+ Merge commit Sha: ${{ needs.cancel-workflow-runs.outputs.mergeCommitSha }}
+ Target commit Sha: ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }}
+ run: printenv
+ - name: >
+ Fetch incoming commit ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }} with its parent
+ uses: actions/checkout@v2
+ with:
+ ref: ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }}
+ fetch-depth: 2
+ persist-credentials: false
+ if: needs.cancel-workflow-runs.outputs.sourceEvent == 'pull_request'
+ # checkout the master version again, to use the right script in master workflow
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ - name: Selective checks
+ id: selective-checks
+ env:
+ EVENT_NAME: ${{ needs.cancel-workflow-runs.outputs.sourceEvent }}
+ TARGET_COMMIT_SHA: ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }}
+ PR_LABELS: ${{ needs.cancel-workflow-runs.outputs.pullRequestLabels }}
+ run: |
+ if [[ ${EVENT_NAME} == "pull_request" ]]; then
+ # Run selective checks
+ ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}"
+ else
+ # Run all checks
+ ./scripts/ci/selective_ci_checks.sh
+ fi
+
+ build-images:
+ timeout-minutes: 80
+ name: "Build ${{matrix.image-type}} images ${{matrix.python-version}}"
+ runs-on: ubuntu-20.04
+ needs: [build-info, cancel-workflow-runs]
+ strategy:
+ matrix:
+ # We need to attempt to build all possible versions here because workflow_run
+ # event is run from master for both master and v1-10-tests
+ python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }}
+ image-type: [CI, PROD]
+ fail-fast: true
+ if: >
+ needs.build-info.outputs.image-build == 'true' &&
+ needs.cancel-workflow-runs.outputs.buildImages == 'true'
+ env:
+ BACKEND: postgres
+ PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }}
+ GITHUB_REGISTRY_PUSH_IMAGE_TAG: ${{ github.event.workflow_run.id }}
+ UPGRADE_TO_LATEST_CONSTRAINTS: ${{ needs.build-info.outputs.upgradeToLatestConstraints }}
+ DOCKER_CACHE: ${{ needs.cancel-workflow-runs.outputs.cacheDirective }}
+ steps:
+ - name: >
+ Checkout [${{ needs.cancel-workflow-runs.outputs.sourceEvent }}]
+ Event: ${{ needs.cancel-workflow-runs.outputs.sourceEvent }}
+ Repo: ${{ needs.cancel-workflow-runs.outputs.sourceHeadRepo }}
+ Branch: ${{ needs.cancel-workflow-runs.outputs.sourceHeadBranch }}
+ Run id: ${{ github.run_id }}
+ Source Run id: ${{ github.event.workflow_run.id }}
+ Sha: ${{ github.sha }}
+ Source Sha: ${{ needs.cancel-workflow-runs.outputs.sourceHeadSha }}
+ Merge commit Sha: ${{ needs.cancel-workflow-runs.outputs.mergeCommitSha }}
+ Target commit Sha: ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }}
+ uses: actions/checkout@v2
+ with:
+ ref: ${{ needs.cancel-workflow-runs.outputs.targetCommitSha }}
+ persist-credentials: false
+ - name: "Retrieve DEFAULTS from the _initialization.sh"
+ # We cannot "source" the script here because that would be a security problem (we cannot run
+ # any code that comes from the sources coming from the PR. Therefore we extract the
+ # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH via custom grep/awk/sed commands
+ # Also 2.7 and 3.5 versions are not allowed to proceed on master
+ id: defaults
+ run: |
+ DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \
+ awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g')
+ echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV
+ DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \
+ scripts/ci/libraries/_initialization.sh | \
+ awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g')
+ echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV
+ if [[ \
+ ${DEFAULT_BRANCH} != "master" || \
+ ( ${PYTHON_MAJOR_MINOR_VERSION} != "2.7" && ${PYTHON_MAJOR_MINOR_VERSION} != "3.5" ) \
+ ]]; then
+ echo "::set-output name=proceed::true"
+ else
+ echo "::set-output name=proceed::false"
+ fi
+ - name: >
+ Checkout "${{ needs.cancel-workflow-runs.outputs.targetBranch }}" branch to 'main-airflow' folder
+ to use ci/scripts from there.
+ uses: actions/checkout@v2
+ with:
+ path: "main-airflow"
+ ref: "${{ needs.cancel-workflow-runs.outputs.targetBranch }}"
+ persist-credentials: false
+ submodules: recursive
+ if: steps.defaults.outputs.proceed == 'true'
+ - name: Initiate Github Checks for Building image
+ # Use the submodule from main, not the PR branch
+ uses: ./main-airflow/.github/actions/checks-action
+ id: build-image-check
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ name: "Status of image build ${{ matrix.image-type }}: ${{ matrix.python-version }}"
+ status: "in_progress"
+ sha: ${{ needs.cancel-workflow-runs.outputs.sourceHeadSha }}
+ details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ output: >
+ {"summary":
+ "Building the image: ${{ matrix.image-type }}: ${{ matrix.python-version }}. See the
+ [Image Build](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
+ for details" }
+ if: steps.defaults.outputs.proceed == 'true'
+ - name: "Setup python"
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.6
+ if: steps.defaults.outputs.proceed == 'true'
+ - name: >
+ Override "scripts/ci" with the "${{ needs.cancel-workflow-runs.outputs.targetBranch }}" branch
+ so that the PR does not override it
+ # We should not override those scripts which become part of the image as they will not be
+ # changed in the image built - we should only override those that are executed to build
+ # the image.
+ run: |
+ rm -rf "scripts/ci"
+ mv "main-airflow/scripts/ci" "scripts"
+ if: steps.defaults.outputs.proceed == 'true'
+ - name: "Free space"
+ run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+ if: steps.defaults.outputs.proceed == 'true'
+ - name: "Build CI images ${{ matrix.python-version }}:${{ github.event.workflow_run.id }}"
+ run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh
+ # locally built CI image is needed to prepare packages for PROD image build
+ if: steps.defaults.outputs.proceed == 'true'
+ - name: "Push CI images ${{ matrix.python-version }}:${{ github.event.workflow_run.id }}"
+ run: ./scripts/ci/images/ci_push_ci_images.sh
+ if: matrix.image-type == 'CI' && steps.defaults.outputs.proceed == 'true'
+ - name: "Build PROD images ${{ matrix.python-version }}:${{ github.event.workflow_run.id }}"
+ run: ./scripts/ci/images/ci_prepare_prod_image_on_ci.sh
+ if: matrix.image-type == 'PROD' && steps.defaults.outputs.proceed == 'true'
+ - name: "Push PROD images ${{ matrix.python-version }}:${{ github.event.workflow_run.id }}"
+ run: ./scripts/ci/images/ci_push_production_images.sh
+ if: matrix.image-type == 'PROD' && steps.defaults.outputs.proceed == 'true'
+ - name: Update Github Checks for Building image with status
+ uses: ./main-airflow/.github/actions/checks-action
+ if: always() && steps.defaults.outputs.proceed == 'true'
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ check_id: ${{ steps.build-image-check.outputs.check_id }}
+ status: "completed"
+ sha: ${{ needs.cancel-workflow-runs.outputs.sourceHeadSha }}
+ conclusion: ${{ job.status }}
+ details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ output: >
+ {"summary":
+ "Building the image: ${{ matrix.image-type }}: ${{ matrix.python-version }}. See the
+ [Image Build](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
+ for details" }
+
+ cancel-on-build-cancel:
+ name: "Cancel 'CI Build' jobs on build image cancelling."
+ runs-on: ubuntu-20.04
+ if: cancelled()
+ needs: [build-images]
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ submodules: recursive
+ - name: "Canceling the 'CI Build' source workflow in case of failure!"
+ uses: ./.github/actions/cancel-workflow-runs
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ cancelMode: self
+ notifyPRCancel: true
+ notifyPRCancelMessage: "Building image for the PR has been cancelled"
+ sourceRunId: ${{ github.event.workflow_run.id }}
+
+ cancel-on-build-failure:
+ name: "Cancel 'CI Build' jobs on build image failing."
+ runs-on: ubuntu-20.04
+ if: failure()
+ needs: [build-images]
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ submodules: recursive
+ - name: "Canceling the 'CI Build' source workflow in case of failure!"
+ uses: ./.github/actions/cancel-workflow-runs
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ cancelMode: self
+ notifyPRCancel: true
+ notifyPRCancelMessage: |
+ Building images for the PR has failed. Follow the the workflow link to check the reason.
+ sourceRunId: ${{ github.event.workflow_run.id }}
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 0000000000000..8bdd809b5d19e
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+name: "CodeQL"
+
+on: # yamllint disable-line rule:truthy
+ push:
+ branches: [master]
+ schedule:
+ - cron: '0 2 * * *'
+
+jobs:
+ selective-checks:
+ name: Selective checks
+ runs-on: ubuntu-20.04
+ outputs:
+ needs-python-scans: ${{ steps.selective-checks.outputs.needs-python-scans }}
+ needs-javascript-scans: ${{ steps.selective-checks.outputs.needs-javascript-scans }}
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 2
+ persist-credentials: false
+ - name: Selective checks
+ id: selective-checks
+ env:
+ EVENT_NAME: ${{ github.event_name }}
+ TARGET_COMMIT_SHA: ${{ github.sha }}
+ run: |
+ if [[ ${EVENT_NAME} == "pull_request" ]]; then
+ # Run selective checks
+ ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}"
+ else
+ # Run all checks
+ ./scripts/ci/selective_ci_checks.sh
+ fi
+
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-20.04
+ needs: [selective-checks]
+ strategy:
+ fail-fast: false
+ matrix:
+ # Override automatic language detection by changing the below list
+ # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
+ language: ['python', 'javascript']
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+ with:
+ # We must fetch at least the immediate parents so that if this is
+ # a pull request then we can checkout the head.
+ fetch-depth: 2
+ persist-credentials: false
+ if: |
+ matrix.language == 'python' && needs.selective-checks.outputs.needs-python-scans == 'true' ||
+ matrix.language == 'javascript' && needs.selective-checks.outputs.needs-javascript-scans == 'true'
+
+ # If this run was triggered by a pull request event, then checkout
+ # the head of the pull request instead of the merge commit.
+ - run: git checkout HEAD^2
+ if: |
+ github.event_name == 'pull_request' &&
+ (matrix.language == 'python' && needs.selective-checks.outputs.needs-python-scans == 'true' ||
+ matrix.language == 'javascript' && needs.selective-checks.outputs.needs-javascript-scans == 'true')
+
+ # Initializes the CodeQL tools for scanning.
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v1
+ with:
+ languages: ${{ matrix.language }}
+ # If you wish to specify custom queries, you can do so here or in a config file.
+ # By default, queries listed here will override any specified in a config file.
+ # Prefix the list here with "+" to use these queries and those in the config file.
+ # queries: ./path/to/local/query, your-org/your-repo/queries@main
+ if: |
+ matrix.language == 'python' && needs.selective-checks.outputs.needs-python-scans == 'true' ||
+ matrix.language == 'javascript' && needs.selective-checks.outputs.needs-javascript-scans == 'true'
+
+ # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
+ # If this step fails, then you should remove it and run the build manually (see below)
+ - name: Autobuild
+ uses: github/codeql-action/autobuild@v1
+ if: |
+ matrix.language == 'python' && needs.selective-checks.outputs.needs-python-scans == 'true' ||
+ matrix.language == 'javascript' && needs.selective-checks.outputs.needs-javascript-scans == 'true'
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v1
+ if: |
+ matrix.language == 'python' && needs.selective-checks.outputs.needs-python-scans == 'true' ||
+ matrix.language == 'javascript' && needs.selective-checks.outputs.needs-javascript-scans == 'true'
diff --git a/scripts/ci/minikdc.properties b/.github/workflows/label_when_reviewed.yml
similarity index 73%
rename from scripts/ci/minikdc.properties
rename to .github/workflows/label_when_reviewed.yml
index c70ff8448bf6d..5095953def137 100644
--- a/scripts/ci/minikdc.properties
+++ b/.github/workflows/label_when_reviewed.yml
@@ -1,4 +1,3 @@
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -15,13 +14,16 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+#
+---
+name: Label when reviewed
+on: pull_request_review # yamllint disable-line rule:truthy
+
+jobs:
-org.name=TEST
-org.domain=LOCAL
-kdc.bind.address=localhost
-kdc.port=8888
-instance=DefaultKrbServer
-max.ticket.lifetime=86400000
-max.renewable.lifetime=604800000
-transport=TCP
-debug=true
+ label-when-reviewed:
+ name: "Label PRs when reviewed"
+ runs-on: ubuntu-20.04
+ steps:
+ - name: "Do nothing. Only trigger corresponding workflow_run event"
+ run: echo
diff --git a/.github/workflows/label_when_reviewed_workflow_run.yml b/.github/workflows/label_when_reviewed_workflow_run.yml
new file mode 100644
index 0000000000000..1ed50dd95f10e
--- /dev/null
+++ b/.github/workflows/label_when_reviewed_workflow_run.yml
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+name: Label when reviewed workflow run
+on: # yamllint disable-line rule:truthy
+ workflow_run:
+ workflows: ["Label when reviewed"]
+ types: ['requested']
+jobs:
+
+ label-when-reviewed:
+ name: "Label PRs when reviewed workflow run"
+ runs-on: ubuntu-20.04
+ outputs:
+ labelSet: ${{ steps.label-when-reviewed.outputs.labelSet }}
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ submodules: recursive
+ - name: "Get information about the original trigger of the run"
+ uses: ./.github/actions/get-workflow-origin
+ id: source-run-info
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ sourceRunId: ${{ github.event.workflow_run.id }}
+ - name: Initiate Selective Build check
+ uses: ./.github/actions/checks-action
+ id: selective-build-check
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ name: "Selective build check"
+ status: "in_progress"
+ sha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
+ details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ output: >
+ {"summary":
+ "Checking selective status of the build in
+ [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
+ "}
+ - name: >
+ Event: ${{ steps.source-run-info.outputs.sourceEvent }}
+ Repo: ${{ steps.source-run-info.outputs.sourceHeadRepo }}
+ Branch: ${{ steps.source-run-info.outputs.sourceHeadBranch }}
+ Run id: ${{ github.run_id }}
+ Source Run id: ${{ github.event.workflow_run.id }}
+ Sha: ${{ github.sha }}
+ Source Sha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
+ Merge commit Sha: ${{ steps.source-run-info.outputs.mergeCommitSha }}
+ Target commit Sha: ${{ steps.source-run-info.outputs.targetCommitSha }}
+ run: printenv
+ - name: >
+ Fetch incoming commit ${{ steps.source-run-info.outputs.targetCommitSha }} with its parent
+ uses: actions/checkout@v2
+ with:
+ ref: ${{ steps.source-run-info.outputs.targetCommitSha }}
+ fetch-depth: 2
+ persist-credentials: false
+ # checkout the master version again, to use the right script in master workflow
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ submodules: recursive
+ - name: Selective checks
+ id: selective-checks
+ env:
+ EVENT_NAME: ${{ steps.source-run-info.outputs.sourceEvent }}
+ TARGET_COMMIT_SHA: ${{ steps.source-run-info.outputs.targetCommitSha }}
+ PR_LABELS: ${{ steps.source-run-info.outputs.pullRequestLabels }}
+ run: |
+ if [[ ${EVENT_NAME} == "pull_request_review" ]]; then
+ # Run selective checks
+ ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}"
+ else
+ # Run all checks
+ ./scripts/ci/selective_ci_checks.sh
+ fi
+ - name: "Label when approved by committers for PRs that require full tests"
+ uses: ./.github/actions/label-when-approved-action
+ id: label-full-test-prs-when-approved-by-commiters
+ if: >
+ steps.selective-checks.outputs.run-tests == 'true' &&
+ contains(steps.selective-checks.outputs.test-types, 'Core')
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ label: 'full tests needed'
+ require_committers_approval: 'true'
+ pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }}
+ comment: >
+ The PR most likely needs to run full matrix of tests because it modifies parts of the core
+ of Airflow. However, committers might decide to merge it quickly and take the risk.
+ If they don't merge it quickly - please rebase it to the latest master at your convenience,
+ or amend the last commit of the PR, and push it with --force-with-lease.
+ - name: "Initiate GitHub Check forcing rerun of SH ${{ github.event.pull_request.head.sha }}"
+ uses: ./.github/actions/checks-action
+ id: full-test-check
+ if: steps.label-full-test-prs-when-approved-by-commiters.outputs.labelSet == 'true'
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ name: "Please rebase or amend, and force push the PR to run full tests"
+ status: "in_progress"
+ sha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
+ details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ output: >
+ {"summary":
+ "The PR likely needs to run all tests! This was determined via selective check in
+ [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
+ "}
+ - name: "Label when approved by committers for PRs that do not require full tests"
+ uses: ./.github/actions/label-when-approved-action
+ id: label-simple-test-prs-when-approved-by-commiters
+ if: >
+ steps.selective-checks.outputs.run-tests == 'true' &&
+ ! contains(steps.selective-checks.outputs.test-types, 'Core')
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ label: 'okay to merge'
+ require_committers_approval: 'true'
+ pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }}
+ comment: >
+ The PR is likely OK to be merged with just subset of tests for default Python and Database
+ versions without running the full matrix of tests, because it does not modify the core of
+ Airflow. If the committers decide that the full tests matrix is needed, they will add the label
+ 'full tests needed'. Then you should rebase to the latest master or amend the last commit
+ of the PR, and push it with --force-with-lease.
+ - name: "Label when approved by committers for PRs that do not require tests at all"
+ uses: ./.github/actions/label-when-approved-action
+ id: label-no-test-prs-when-approved-by-commiters
+ if: steps.selective-checks.outputs.run-tests != 'true'
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ label: 'okay to merge'
+ pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }}
+ require_committers_approval: 'true'
+ comment: >
+ The PR is likely ready to be merged. No tests are needed as no important environment files,
+ nor python files were modified by it. However, committers might decide that full test matrix is
+ needed and add the 'full tests needed' label. Then you should rebase it to the latest master
+ or amend the last commit of the PR, and push it with --force-with-lease.
+ - name: Update Selective Build check
+ uses: ./.github/actions/checks-action
+ if: always()
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ check_id: ${{ steps.selective-build-check.outputs.check_id }}
+ status: "completed"
+ sha: ${{ steps.source-run-info.outputs.sourceHeadSha }}
+ conclusion: ${{ job.status }}
+ details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ output: >
+ {"summary":
+ "Checking selective status of the build completed in
+ [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
+ "}
diff --git a/.github/workflows/scheduled_quarantined.yml b/.github/workflows/scheduled_quarantined.yml
new file mode 100644
index 0000000000000..307169379c624
--- /dev/null
+++ b/.github/workflows/scheduled_quarantined.yml
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+name: Quarantined Build
+on: # yamllint disable-line rule:truthy
+ schedule:
+ # Run quarantined builds 4 times a day to gather better quarantine stats
+ - cron: '12 */6 * * *'
+
+env:
+ MOUNT_LOCAL_SOURCES: "false"
+ FORCE_ANSWER_TO_QUESTIONS: "yes"
+ FORCE_PULL_IMAGES: "true"
+ CHECK_IMAGE_FOR_REBUILD: "true"
+ SKIP_CHECK_REMOTE_IMAGE: "true"
+ DB_RESET: "true"
+ VERBOSE: "true"
+ UPGRADE_TO_LATEST_CONSTRAINTS: false
+ PYTHON_MAJOR_MINOR_VERSION: 3.6
+ USE_GITHUB_REGISTRY: "true"
+ # Since we run this build on schedule, it might be that the image has never been pushed
+ # Because the master merge was cancelled, so we have to rebuild the image for quarantined build
+ GITHUB_REPOSITORY: ${{ github.repository }}
+ GITHUB_USERNAME: ${{ github.actor }}
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_REGISTRY_PULL_IMAGE_TAG: "latest"
+ GITHUB_REGISTRY_PUSH_IMAGE_TAG: "latest"
+ GITHUB_REGISTRY_WAIT_FOR_IMAGE: "false"
+
+jobs:
+
+ trigger-tests:
+ timeout-minutes: 5
+ name: "Checks if tests should be run"
+ runs-on: ubuntu-20.04
+ outputs:
+ run-tests: ${{ steps.trigger-tests.outputs.run-tests }}
+ steps:
+ - uses: actions/checkout@v2
+ - name: "Check if tests should be run"
+ run: "./scripts/ci/tools/ci_check_if_tests_should_be_run.sh"
+ id: trigger-tests
+
+ tests-quarantined:
+ timeout-minutes: 80
+ name: "Quarantined tests"
+ runs-on: ubuntu-20.04
+ continue-on-error: true
+ needs: [trigger-tests]
+ strategy:
+ matrix:
+ python-version: [3.6]
+ postgres-version: [9.6]
+ fail-fast: false
+ env:
+ BACKEND: postgres
+ PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }}
+ POSTGRES_VERSION: ${{ matrix.postgres-version }}
+ RUN_TESTS: "true"
+ TEST_TYPE: Quarantined
+ NUM_RUNS: 20
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ if: |
+ needs.trigger-tests.outputs.run-tests == 'true' || github.event_name != 'pull_request'
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ - uses: actions/setup-python@v2
+ with:
+ python-version: '3.7'
+ - name: "Set issue id for master"
+ if: github.ref == 'refs/heads/master'
+ run: |
+ echo "ISSUE_ID=10118" >> $GITHUB_ENV
+ - name: "Set issue id for v1-10-stable"
+ if: github.ref == 'refs/heads/v1-10-stable'
+ run: |
+ echo "ISSUE_ID=10127" >> $GITHUB_ENV
+ - name: "Set issue id for v1-10-test"
+ if: github.ref == 'refs/heads/v1-10-test'
+ run: |
+ echo "ISSUE_ID=10128" >> $GITHUB_ENV
+ - name: "Free space"
+ run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+ - name: "Build CI image ${{ matrix.python-version }}"
+ run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh
+ - name: "Tests"
+ run: ./scripts/ci/testing/ci_run_airflow_testing.sh
+ - uses: actions/upload-artifact@v2
+ name: Upload Quarantine test results
+ if: always()
+ with:
+ name: 'quarantined_tests'
+ path: 'files/test_result.xml'
+ retention-days: 7
+ - uses: actions/upload-artifact@v2
+ name: Upload airflow logs
+ if: always()
+ with:
+ name: airflow-logs-quarantined-${{matrix.python-version}}-${{ matrix.postgres-version }}
+ path: './files/airflow_logs*'
+ retention-days: 7
diff --git a/.gitignore b/.gitignore
index e05a7fdde7b22..efc568633f984 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,6 @@ secrets.py
airflow.db
unittests.db
-
# Airflow temporary artifacts
airflow/git_version
airflow/www/static/coverage/
@@ -154,8 +153,8 @@ rat-results.txt
# Kubernetes generated templated files
*.generated
*.tar.gz
-scripts/ci/in_container/kubernetes/kube/.generated/airflow.yaml
-scripts/ci/in_container/kubernetes/docker/requirements.txt
+scripts/ci/kubernetes/kube/.generated/airflow.yaml
+scripts/ci/kubernetes/docker/requirements.txt
# Node & Webpack Stuff
*.entry.js
@@ -191,3 +190,26 @@ log.txt*
build-qcos-airflow/dags/
build-qcos-airflow/plugins/
+/backport_packages/CHANGELOG.txt
+
+# Docker context files
+/docker-context-files
+# Local .terraform directories
+**/.terraform/*
+
+# .tfstate files
+*.tfstate
+*.tfstate.*
+
+# Terraform variables
+*.tfvars
+
+Chart.lock
+
+# Chart dependencies
+**/charts/*.tgz
+
+# Might be generated when you build wheels
+pip-wheel-metadata
+
+.pypirc
diff --git a/.gitmodules b/.gitmodules
index ef0149e36910b..e13d8ce224f28 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,24 @@
[submodule "plugins/default_data/data"]
path = plugins/default_data/data
url = https://github.com/chasers2012/qcos-base-data.git
+[submodule ".github/actions/get-workflow-origin"]
+ path = .github/actions/get-workflow-origin
+ url = https://github.com/potiuk/get-workflow-origin
+[submodule ".github/actions/cancel-workflow-runs"]
+ path = .github/actions/cancel-workflow-runs
+ url = https://github.com/potiuk/cancel-workflow-runs
+[submodule ".github/actions/checks-action"]
+ path = .github/actions/checks-action
+ url = https://github.com/LouisBrunner/checks-action
+[submodule ".github/actions/configure-aws-credentials"]
+ path = .github/actions/configure-aws-credentials
+ url = https://github.com/aws-actions/configure-aws-credentials
+[submodule ".github/actions/codecov-action"]
+ path = .github/actions/codecov-action
+ url = https://github.com/codecov/codecov-action
+[submodule ".github/actions/github-push-action"]
+ path = .github/actions/github-push-action
+ url = https://github.com/ad-m/github-push-action
+[submodule ".github/actions/label-when-approved-action"]
+ path = .github/actions/label-when-approved-action
+ url = https://github.com/TobKed/label-when-approved-action
diff --git a/.markdownlint.yml b/.markdownlint.yml
new file mode 100644
index 0000000000000..dae821798ede4
--- /dev/null
+++ b/.markdownlint.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+# MD003/heading-style/header-style
+MD003: false
+
+# MD004/ul-style
+MD004: false
+
+# MD007/ul-indent
+MD007: false
+
+# MD012/no-multiple-blanks
+MD012: false
+
+# MD013 Line length
+MD013: false
+
+# MD014/commands-show-output
+MD014: false
+
+# MD022/blanks-around-headings/blanks-around-headers
+MD022: false
+
+# MD024/no-duplicate-heading/no-duplicate-header
+MD024: false
+
+# MD026/no-trailing-punctuation
+MD026: false
+
+# MD029/ol-prefix
+MD029: false
+
+# MD030/list-marker-space
+MD030: false
+
+# MD031/blanks-around-fences
+MD031: false
+
+# MD032/blanks-around-lists
+MD032: false
+
+# MD033/no-inline-html
+MD033: false
+
+# MD034/no-bare-urls
+MD034: false
+
+# MD036/no-emphasis-as-heading/no-emphasis-as-header
+MD036: false
+
+# MD040/fenced-code-language
+MD040: false
+
+# MD041/first-line-heading/first-line-h1
+MD041: false
+
+# MD045/no-alt-text
+MD045: false
+
+# MD046/code-block-style
+MD046: false
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 400e73d10e0d2..a98b0ea2096aa 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,15 +21,19 @@ default_language_version:
python: python3
minimum_pre_commit_version: "1.20.0"
repos:
+ - repo: meta
+ hooks:
+ - id: identity
+ - id: check-hooks-apply
- repo: https://github.com/Lucas-C/pre-commit-hooks
- rev: v1.1.7
+ rev: v1.1.9
hooks:
- id: forbid-tabs
- exclude: ^airflow/_vendor/.*$|^docs/Makefile$
+ exclude: ^docs/Makefile$|^clients/gen/go.sh|\.gitmodules$
- id: insert-license
name: Add license for all SQL files
files: \.sql$
- exclude: ^\.github/.*$|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$
args:
- --comment-style
- "/*||*/"
@@ -38,7 +42,7 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all other files
- exclude: ^\.github/.*$"|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$
args:
- --comment-style
- "|#|"
@@ -46,10 +50,10 @@ repos:
- license-templates/LICENSE.txt
- --fuzzy-match-generates-todo
files: >
- \.properties$|\.cfg$|\.conf$|\.ini$|\.ldif$|\.readthedocs$|\.service$|^Dockerfile.*$
+ \.properties$|\.cfg$|\.conf$|\.ini$|\.ldif$|\.readthedocs$|\.service$|\.tf$|Dockerfile.*$
- id: insert-license
name: Add license for all rst files
- exclude: ^\.github/.*$"|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$
args:
- --comment-style
- "||"
@@ -58,19 +62,19 @@ repos:
- --fuzzy-match-generates-todo
files: \.rst$
- id: insert-license
- name: Add license for all JS/CSS files
- files: \.(js|css)$
- exclude: ^\.github/.*$|^airflow/_vendor/.*$|^airflow/www/static/.*|^airflow/www_rbac/static/.*$
+ name: Add license for all JS/CSS/PUML files
+ files: \.(js|css|puml)$
+ exclude: ^\.github/.*$|^airflow/www/static/.*|^airflow/www_rbac/static/.*$
args:
- --comment-style
- - "/**| *| */"
+ - "/*!| *| */"
- --license-filepath
- license-templates/LICENSE.txt
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all JINJA template files
- files: ^airflow/www/templates/.*\.html$|^docs/templates/.*\.html$|^airflow/contrib/plugins/metastore_browser/templates/.*\.html$ # yamllint disable-line rule:line-length
- exclude: ^\.github/.*$|^airflow/_vendor/.*$
+ files: "^airflow/www/templates/.*\\.html$|^docs/templates/.*\\.html$.*\\.jinja2"
+ exclude: ^\.github/.*$
args:
- --comment-style
- "{#||#}"
@@ -79,7 +83,7 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all shell files
- exclude: ^\.github/.*$"|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$
files: ^breeze$|^breeze-complete$|\.sh$|\.bash$|\.bats$
args:
- --comment-style
@@ -89,7 +93,7 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all python files
- exclude: ^\.github/.*$"|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$
types: [python]
args:
- --comment-style
@@ -99,8 +103,8 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all XML files
- exclude: ^\.github/.*$"|^airflow/_vendor/.*$
- types: [xml]
+ exclude: ^\.github/.*$
+ files: \.xml$
args:
- --comment-style
- ""
@@ -109,8 +113,9 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all yaml files
- exclude: ^\.github/.*$"|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$
types: [yaml]
+ files: \.yml$|\.yaml$
args:
- --comment-style
- "|#|"
@@ -120,27 +125,33 @@ repos:
- id: insert-license
name: Add license for all md files
files: \.md$
- exclude: ^\.github/.*$|^airflow/_vendor/.*$
+ exclude: ^\.github/.*$|PROVIDER_CHANGES.*\.md
args:
- --comment-style
- ""
- --license-filepath
- license-templates/LICENSE.txt
- --fuzzy-match-generates-todo
+ - id: insert-license
+ name: Add license for all mermaid files
+ args:
+ - --comment-style
+ - "|%%|"
+ - --license-filepath
+ - license-templates/LICENSE.txt
+ - --fuzzy-match-generates-todo
+ files: \.mermaid$
- repo: https://github.com/thlorenz/doctoc.git
rev: v1.4.0
hooks:
- id: doctoc
name: Add TOC for md files
- files: ^README\.md$|^CONTRIBUTING\.md$|^UPDATING.md$|^dev/README.md$
+ files: ^README\.md$|^CONTRIBUTING\.md$|^UPDATING.*.md$|^dev/README\.md$|^dev/PROVIDER_PACKAGES.md$
args:
- "--maxlevel"
- "2"
- - repo: meta
- hooks:
- - id: check-hooks-apply
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v2.5.0
+ rev: v3.3.0
hooks:
- id: check-merge-conflict
- id: debug-statements
@@ -151,160 +162,226 @@ repos:
- id: check-xml
- id: trailing-whitespace
- repo: https://github.com/pre-commit/pygrep-hooks
- rev: v1.4.4
+ rev: v1.6.0
hooks:
- id: rst-backticks
- id: python-no-log-warn
- - repo: local
+ - repo: https://github.com/adrienverge/yamllint
+ rev: v1.25.0
hooks:
- id: yamllint
name: Check yaml files with yamllint
- entry: yamllint -c yamllint-config.yml
- language: python
- additional_dependencies: ['yamllint']
+ entry: yamllint -c yamllint-config.yml --strict
types: [yaml]
- exclude: ^.*init_git_sync\.template\.yaml$|^.*airflow\.template\.yaml$
- - id: shellcheck
- name: Check Shell scripts syntax correctness
- language: docker_image
- entry: koalaman/shellcheck:stable -x -a
- files: ^breeze$|^breeze-complete$|\.sh$|^hooks/build$|^hooks/push$|\.bash$|\.bats$
- exclude: ^airflow/_vendor/.*$
- ##
- ## Dear committer.
- ##
- ## If you ever come here to add the missing isort step here - hear a little warning.
- ##
- ## Initially isort will cause surprising duplicates of urlparse and other urllib related methods.
- ## The urllib imports seem broken for python 2 but they are actually fine due to future
- ## backport aliases installed elsewhere in the code (implicitly) - in 6 places.
- ##
- ## When you decide how to fix it (likely talking to other people in community) and you push
- ## build to CI you will find terrible truth that in Airflow 1.10 modules are so much
- ## cross-dependent, that imports in a number of places have to be done in specific order and
- ## if this is not followed properly, circular imports kick-in and you are doomed.
- ##
- ## Running isort breaks the import House of Cards and there is no easy way to fix it short of
- ## splitting a number of files and probably breaking compatibility.
- ##
- ## Luckily this has been fixed in Airflow 2.0 by proper untangling of the cross-dependencies and
- ## 1.10.* branch is really in maintenance mode, so do not really waste your time here.
- ##
- ## Unless you really want of course. But then either delete this comment or increase the counter
- ## below after you give up.
- ##
- ## Total hours wasted here = 3
- ##
+ exclude:
+ ^.*init_git_sync\.template\.yaml$|^.*airflow\.template\.yaml$|^chart/(?:templates|files)/.*\.yaml
+ ##
+ ## Dear committer.
+ ##
+ ## If you ever come here to add the missing isort step here - hear a little warning.
+ ##
+ ## Initially isort will cause surprising duplicates of urlparse and other urllib related methods.
+ ## The urllib imports seem broken for python 2 but they are actually fine due to future
+ ## backport aliases installed elsewhere in the code (implicitly) - in 6 places.
+ ##
+ ## When you decide how to fix it (likely talking to other people in community) and you push
+ ## build to CI you will find terrible truth that in Airflow 1.10 modules are so much
+ ## cross-dependent, that imports in a number of places have to be done in specific order and
+ ## if this is not followed properly, circular imports kick-in and you are doomed.
+ ##
+ ## Running isort breaks the import House of Cards and there is no easy way to fix it short of
+ ## splitting a number of files and probably breaking compatibility.
+ ##
+ ## Luckily this has been fixed in Airflow 2.0 by proper untangling of the cross-dependencies and
+ ## 1.10.* branch is really in maintenance mode, so do not really waste your time here.
+ ##
+ ## Unless you really want of course. But then either delete this comment or increase the counter
+ ## below after you give up.
+ ##
+ ## Total hours wasted here = 3
+ ##
+ - repo: local
+ hooks:
- id: lint-dockerfile
name: Lint dockerfile
language: system
- entry: "./scripts/ci/pre_commit_lint_dockerfile.sh"
- files: ^Dockerfile.*$
+ entry: "./scripts/ci/pre_commit/pre_commit_lint_dockerfile.sh"
+ files: Dockerfile.*$
pass_filenames: true
- id: setup-order
name: Checks for an order of dependencies in setup.py
language: python
files: ^setup.py$
pass_filenames: false
- require_serial: true
- entry: tests/test_order_setup.py
+ entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py
+ - id: setup-installation
+ name: Checks if all the libraries in setup.py are listed in installation.rst file
+ language: python
+ files: ^setup.py$|^docs/installation.rst$
+ pass_filenames: false
+ entry: ./scripts/ci/pre_commit/pre_commit_check_setup_installation.py
+ additional_dependencies: ['rich==9.2.0']
- id: update-breeze-file
name: Update output of breeze command in BREEZE.rst
- entry: "./scripts/ci/pre_commit_breeze_cmd_line.sh"
+ entry: "./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.sh"
language: system
files: ^BREEZE.rst$|^breeze$|^breeze-complete$
pass_filenames: false
- require_serial: true
- id: update-local-yml-file
name: Update mounts in the local yml file
- entry: "./scripts/ci/pre_commit_local_yml_mounts.sh"
+ entry: "./scripts/ci/pre_commit/pre_commit_local_yml_mounts.sh"
language: system
- files: ^scripts/ci/_utils.sh$|s^scripts/ci/docker_compose/local.yml"
+ files: ^scripts/ci/libraries/_local_mounts.sh$|s^scripts/ci/docker_compose/local.yml"
pass_filenames: false
- require_serial: true
- id: update-extras
name: Update extras in documentation
- entry: "./scripts/ci/pre_commit_update_extras.sh"
- language: system
+ entry: ./scripts/ci/pre_commit/pre_commit_insert_extras.py
+ language: python
files: ^setup.py$|^INSTALL$|^CONTRIBUTING.rst$
pass_filenames: false
- require_serial: true
- - id: python2-fastcheck
+ - id: pydevd
language: pygrep
+ name: Check for pydevd debug statements accidentally left
+ entry: "pydevd.*settrace\\("
+ pass_filenames: true
+ files: \.py$
+ - id: dont-use-safe-filter
+ language: pygrep
+ name: Don't use safe in templates
+ description: the Safe filter is error-prone, use Markup() in code instead
+ entry: "\\|\\s*safe"
+ files: \.html$
+ pass_filenames: true
+ - id: language-matters
+ language: pygrep
+ name: Check for language that we do not accept as community
+ description: Please use "deny_list" or "allow_list" instead.
+ entry: "(?i)(black|white)[_-]?list"
+ pass_filenames: true
+ exclude: >
+ (?x)
+ ^airflow/contrib/hooks/cassandra_hook.py$|
+ ^airflow/operators/hive_stats_operator.py$|
+ ^tests/contrib/hooks/test_cassandra_hook.py|
+ ^CHANGELOG.txt
+ - id: python2-fastcheck
name: Find common Python 3 vs. 2.7 compatibility problems
+ language: pygrep
entry: >
- (?x)
(?!.*\#\ noqa)(?!.*//\ noqa)( # Exclude lines with '# noqa' or '// noqa' comment
.super\(\).*| # Matches super() call from Python 3
^\s*def\s*\S*\([^:#)]*:.*| # Matches function param with Python3 type
^\sdef\s*\S*\(.*\):\s*\-\>\s*\S*.* # Matches -> return value syntax from Python3
)$
- files: \.py$
- exclude: ^airflow/_vendor|^dev/
- pass_filenames: true
- - id: check-providers-package
- language: pygrep
- name: Find providers package added in 2.0.*
- entry: >
- (?x)
- (
- ^.*airflow\.providers.* # Matches import airflow.providers
- )$
- files: \.py$
- exclude: ^airflow/_vendor
+ exclude: ^dev|^scripts|^docs|^chart
pass_filenames: true
- id: python2-compile
name: Compile code using python2
language: system
entry: python2.7 -m py_compile
files: \.py$
- exclude: ^dev/
+ exclude: ^dev|^scripts|^docs|^chart
pass_filenames: true
require_serial: true
- - id: pydevd
+ - id: incorrect-use-of-LoggingMixin
language: pygrep
- name: Check for pydevd debug statements accidentally left
- entry: "pydevd.*settrace\\("
- pass_filenames: true
+ name: Make sure LoggingMixin is not used alone
+ entry: "LoggingMixin\\(\\)"
files: \.py$
- - id: build
- name: Check if image build is needed
- entry: ./scripts/ci/pre_commit_ci_build.sh 3.5 false
+ pass_filenames: true
+ - id: check-integrations
+ name: Check if integration list is aligned
+ entry: ./scripts/ci/pre_commit/pre_commit_check_integrations.sh
language: system
- always_run: true
pass_filenames: false
+ files: ^common/_common_values.sh$|^breeze-complete$
- id: check-apache-license
name: Check if licenses are OK for Apache
- entry: "./scripts/ci/pre_commit_check_license.sh"
+ entry: "./scripts/ci/pre_commit/pre_commit_check_license.sh"
language: system
files: ^.*LICENSE.*$|^.*LICENCE.*$
pass_filenames: false
- require_serial: true
- id: airflow-config-yaml
name: Checks for consistency between config.yml and default_config.cfg
language: python
- files: "^airflow/config_templates/config.yml$|^airflow/config_templates/default_airflow.cfg$"
+ entry: ./scripts/ci/pre_commit/pre_commit_yaml_to_cfg.py
+ files: "config.yml$|default_airflow.cfg$|default.cfg$"
pass_filenames: false
- require_serial: false
- entry: scripts/ci/pre_commit_yaml_to_cfg.py
+ require_serial: true
additional_dependencies: ['pyyaml']
+ - id: pre-commit-descriptions
+ name: Check if pre-commits are described
+ entry: ./scripts/ci/pre_commit/pre_commit_check_pre_commits.sh
+ language: system
+ files: ^.pre-commit-config.yaml$|^STATIC_CODE_CHECKS.rst|^breeze-complete$
+ require_serial: true
+ - id: helm-lint
+ name: Lint Helm Chart
+ entry: ./scripts/ci/pre_commit/pre_commit_helm_lint.sh
+ language: system
+ pass_filenames: false
+ files: ^chart
+ require_serial: true
+ - id: shellcheck
+ name: Check Shell scripts syntax correctness
+ language: docker_image
+ entry: koalaman/shellcheck:stable -x -a
+ files: ^breeze$|^breeze-complete$|\.sh$|^hooks/build$|^hooks/push$|\.bash$|\.bats$
+ - id: bats-tests
+ name: Run BATS bash tests for changed bash files
+ language: system
+ entry: "./scripts/ci/pre_commit/pre_commit_bat_tests.sh"
+ files: ^breeze$|^breeze-complete$|\.sh$|\.bash$|\.bats$
+ exclude: ^tests/bats/in_container/.*bats$|^scripts/in_container/.*sh$
+ pass_filenames: false
+ - id: pre-commit-descriptions
+ name: Check if pre-commits are described
+ entry: ./scripts/ci/pre_commit/pre_commit_check_pre_commits.sh
+ language: system
+ files: ^.pre-commit-config.yaml$|^STATIC_CODE_CHECKS.rst|^breeze-complete$
+ require_serial: true
+ - id: sort-in-the-wild
+ name: Sort INTHEWILD.md alphabetically
+ entry: ./scripts/ci/pre_commit/pre_commit_sort_in_the_wild.sh
+ language: system
+ files: ^.pre-commit-config.yaml$|^INTHEWILD.md$
+ require_serial: true
+ - id: markdownlint
+ name: Run markdownlint
+ description: "Checks the style of Markdown files."
+ entry: markdownlint
+ language: node
+ types: [markdown]
+ files: \.(md|mdown|markdown)$
+ additional_dependencies: ['markdownlint-cli']
+ - id: build
+ name: Check if image build is needed
+ entry: ./scripts/ci/pre_commit/pre_commit_ci_build.sh 3.6 false
+ language: system
+ always_run: true
+ pass_filenames: false
- id: mypy
name: Run mypy
language: system
- entry: "./scripts/ci/pre_commit_mypy.sh"
+ entry: "./scripts/ci/pre_commit/pre_commit_mypy.sh"
files: \.py$
- exclude: ^airflow/_vendor/.*$|^dev
+ exclude: ^dev|^provider_packages|^chart
+ - id: mypy
+ name: Run mypy for helm chart tests
+ language: system
+ entry: "./scripts/ci/pre_commit/pre_commit_mypy.sh"
+ files: ^chart/.*\.py$
+ exclude: ^dev
require_serial: true
- id: flake8
name: Run flake8
language: system
- entry: "./scripts/ci/pre_commit_flake8.sh"
+ entry: "./scripts/ci/pre_commit/pre_commit_flake8.sh"
files: \.py$
- exclude: ^dev/
pass_filenames: true
- - id: bat-tests
- name: Run BATS bash tests for changed bash files
+ - id: bats-in-container-tests
+ name: Run in container bats tests
language: system
- entry: "./scripts/ci/pre_commit_bat_tests.sh"
- files: ^breeze$|^breeze-complete$|\.sh$|\.bash$
+ entry: "./scripts/ci/pre_commit/pre_commit_in_container_bats_test.sh"
+ files: ^tests/bats/in_container/.*.bats$|^scripts/in_container/.*sh
pass_filenames: false
diff --git a/.rat-excludes b/.rat-excludes
index 906d8044452df..69c8ccc960328 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -28,6 +28,7 @@ metastore_db
.*sql
.*svg
.*csv
+.*md5
CHANGELOG.txt
.*zip
.*lock
@@ -74,11 +75,16 @@ apache-airflow-.*\+source.tar.gz.*
apache-airflow-.*\+bin.tar.gz.*
PULL_REQUEST_TEMPLATE.md
-# vendored modules
-_vendor/*
# Locally mounted files
.*egg-info/*
.bash_history
.bash_aliases
.inputrc
+
+# the example notebook is ASF 2 licensed but RAT cannot read this
+input_notebook.ipynb
+
+# .git might be a file in case of worktree
+.git
+tmp
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index dffb384a2eab1..0000000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,233 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
----
-dist: bionic
-language: python
-os: linux
-env:
- global:
- - BUILD_ID=${TRAVIS_BUILD_ID}
- - MOUNT_LOCAL_SOURCES="false"
- - MOUNT_HOST_AIRFLOW_VOLUME="true"
- - FORCE_ANSWER_TO_QUESTIONS="yes"
- - SKIP_CHECK_REMOTE_IMAGE="true"
- - DB_RESET="true"
- - VERBOSE="true"
- - CI="true"
-python: "3.6"
-stages:
- - pre-test
- - test
-services:
- - docker
-jobs:
- include:
- - name: "Static checks"
- stage: pre-test
- script: ./scripts/ci/ci_run_all_static_tests.sh
- env: >-
- PYTHON_MAJOR_MINOR_VERSION=3.5
- AIRFLOW_MOUNT_SOURCE_DIR_FOR_STATIC_CHECKS="true"
- - name: "Build documentation"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION=3.5
- stage: test
- script: ./scripts/ci/ci_docs.sh
- - name: "Tests [Py3.6][Kubernetes][persistent]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- RUNTIME=kubernetes
- ENABLE_KIND_CLUSTER=true
- KUBERNETES_MODE=persistent_mode
- KUBERNETES_VERSION=v1.15.3
- stage: test
- - name: "Tests [Py3.5][Kubernetes][git]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.5
- RUNTIME=kubernetes
- ENABLE_KIND_CLUSTER=true
- KUBERNETES_MODE=git_mode
- KUBERNETES_VERSION=v1.15.3
- stage: test
- - name: "Tests [Py2.7][Kubernetes][persistent]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=2.7
- ENABLE_KIND_CLUSTER=true
- RUNTIME=kubernetes
- KUBERNETES_MODE=persistent_mode
- KUBERNETES_VERSION=v1.15.3
- stage: test
- - name: "Tests [Py2.7][Kubernetes][git]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=2.7
- ENABLE_KIND_CLUSTER=true
- RUNTIME=kubernetes
- KUBERNETES_MODE=git_mode
- KUBERNETES_VERSION=v1.15.3
- stage: test
- - name: "Tests [Postgres9.6][Py3.6][integrations]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- POSTGRES_VERSION=9.6
- ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
- RUN_INTEGRATION_TESTS=all
- stage: test
- - name: "Tests [Postgres9.6][Py3.6][kerberos]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- POSTGRES_VERSION=9.6
- ENABLED_INTEGRATIONS="kerberos"
- stage: test
- - name: "Tests [Postgres10][Py3.6][integrations]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- POSTGRES_VERSION=10
- ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
- RUN_INTEGRATION_TESTS=all
- stage: test
- - name: "Tests [Postgres9.6][Py3.7][kerberos]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- POSTGRES_VERSION=9.6
- ENABLED_INTEGRATIONS="kerberos"
- stage: test
- - name: "Tests [Postgres10][Py3.7][integrations]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- POSTGRES_VERSION=10
- ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
- RUN_INTEGRATION_TESTS=all
- stage: test
- - name: "Tests [Postgres10][Py3.6][kerberos]"
- env: >-
- BACKEND=postgres
- PYTHON_MAJOR_MINOR_VERSION=3.6
- POSTGRES_VERSION=10
- ENABLED_INTEGRATIONS="kerberos"
- stage: test
- - name: "Tests [Sqlite][Py2.7][integrations]"
- env: >-
- BACKEND=sqlite
- PYTHON_MAJOR_MINOR_VERSION=2.7
- ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
- RUN_INTEGRATION_TESTS=all
- stage: test
- - name: "Tests [Sqlite][Py3.5]"
- env: >-
- BACKEND=sqlite
- PYTHON_MAJOR_MINOR_VERSION=3.5
- stage: test
- - name: "Tests [MySQL5.6][Py3.6][integrations]"
- env: >-
- BACKEND=mysql
- PYTHON_MAJOR_MINOR_VERSION=3.6
- MYSQL_VERSION=5.6
- ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
- RUN_INTEGRATION_TESTS=all
- stage: test
- - name: "Tests [MySQL5.6][Py2.7][kerberos]"
- env: >-
- BACKEND=mysql
- PYTHON_MAJOR_MINOR_VERSION=2.7
- ENABLED_INTEGRATIONS="kerberos"
- MYSQL_VERSION=5.6
- stage: test
- - name: "Tests [MySQL5.7][Py3.6][integrations]"
- env: >-
- BACKEND=mysql
- PYTHON_MAJOR_MINOR_VERSION=3.6
- MYSQL_VERSION=5.7
- ENABLED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis"
- RUN_INTEGRATION_TESTS=all
- stage: test
- - name: "Tests [MySQL5.7][Py2.7][kerberos]"
- env: >-
- BACKEND=mysql
- PYTHON_MAJOR_MINOR_VERSION=2.7
- MYSQL_VERSION=5.7
- ENABLED_INTEGRATIONS="kerberos"
- MYSQL_VERSION=5.7
- stage: test
- - name: "Tests [MySQL5.7][Py3.7]"
- env: >-
- BACKEND=mysql
- PYTHON_MAJOR_MINOR_VERSION=3.7
- MYSQL_VERSION=5.7
- stage: test
- - name: "Generate requirements Py2.7"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION=2.7
- stage: test
- script: ./scripts/ci/ci_generate_requirements.sh
- - name: "Generate requirements Py3.5"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION=3.5
- stage: test
- script: ./scripts/ci/ci_generate_requirements.sh
- - name: "Generate requirements Py3.6"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION=3.6
- SHOW_GENERATE_REQUIREMENTS_INSTRUCTIONS="true"
- stage: test
- script: ./scripts/ci/ci_generate_requirements.sh
- - name: "Generate requirements Py3.7"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION=3.7
- SHOW_GENERATE_REQUIREMENTS_INSTRUCTIONS="true"
- stage: test
- script: ./scripts/ci/ci_generate_requirements.sh
- - name: "Build production image Py2.7"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION="2.7"
- stage: test
- script: ./scripts/ci/ci_build_production_image.sh
- before_install:
- - echo
- - name: "Build production image Py3.5"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION="3.5"
- stage: test
- script: ./scripts/ci/ci_build_production_image.sh
- before_install:
- - echo
- - name: "Build production image Py3.6"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION="3.6"
- stage: test
- script: ./scripts/ci/ci_build_production_image.sh
- before_install:
- - echo
- - name: "Build production image Py3.7"
- env: >-
- PYTHON_MAJOR_MINOR_VERSION="3.7"
- stage: test
- script: ./scripts/ci/ci_build_production_image.sh
- before_install:
- - echo
-before_install:
- - ./scripts/ci/ci_before_install.sh
-script: ./scripts/ci/ci_run_airflow_testing.sh
diff --git a/BREEZE.rst b/BREEZE.rst
index 6b294ccaee719..a079b4effbc63 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -15,35 +15,38 @@
specific language governing permissions and limitations
under the License.
-.. image:: images/AirflowBreeze_logo.png
- :align: center
- :alt: Airflow Breeze Logo
+.. raw:: html
+
+
+
+
.. contents:: :local:
-Airflow Breeze CI Environment
+Airflow Breeze CI environment
=============================
-Airflow Breeze is an easy-to-use development environment using
+Airflow Breeze is an easy-to-use development and test environment using
`Docker Compose `_.
The environment is available for local use and is also used in Airflow's CI tests.
-We called it *Airflow Breeze* as **It's a Breeze to develop Airflow**.
+We called it *Airflow Breeze* as **It's a Breeze to contribute to Airflow**.
The advantages and disadvantages of using the Breeze environment vs. other ways of testing Airflow
are described in `CONTRIBUTING.rst `_.
-Here is a short 10-minute video about Airflow Breeze (note that it shows an old version of Breeze. Some
-of the points in the video are not valid any more. The video will be updated shortly with more up-to-date
-version):
+Watch the video below about Airflow Breeze. It explains the motivation for Breeze
+and screencasts all its uses.
+
+.. raw:: html
-.. image:: http://img.youtube.com/vi/ffKFHV6f3PQ/0.jpg
- :width: 480px
- :height: 360px
- :scale: 100 %
- :alt: Airflow Breeze Simplified Development Workflow
- :align: center
- :target: http://www.youtube.com/watch?v=ffKFHV6f3PQ
+
Prerequisites
=============
@@ -66,9 +69,12 @@ Docker Community Edition
Here is an example configuration with more than 200GB disk space for Docker:
-.. image:: images/disk_space_osx.png
- :align: left
- :alt: Disk space OSX
+.. raw:: html
+
+
+
+
Docker Compose
--------------
@@ -76,44 +82,52 @@ Docker Compose
- **Version**: Install the latest stable Docker Compose and add it to the PATH.
See `Docker Compose Installation Guide `_ for details.
-- **Permissions**: Configure to run the ``docker-compose`` command.
+- **Permissions**: Configure permission to run the ``docker-compose`` command.
-Docker Images Used by Breeze
-----------------------------
+Docker in WSL 2
+---------------
-For all development tasks, unit tests, integration tests and static code checks, we use the
-**CI image** maintained on the DockerHub in the ``apache/airflow`` repository.
-This Docker image contains a lot test-related packages (size of ~1GB).
-Its tag follows the pattern of ``-python-ci``
-(for example, ``apache/airflow:master-python3.6-ci`` or ``apache/airflow:v1-10-test-python3.6-ci``).
-The image is built using the ``_ Dockerfile.
+- **WSL 2 installation** :
+ Install WSL 2 and a Linux Distro (e.g. Ubuntu) see
+ `WSL 2 Installation Guide `_ for details.
-For testing production image, the **Production image** is used and maintained on the DockerHub in the
-```apache/airflow`` repository. This Docker image contains only size-optimised Airflow with selected
-extras and dependencies. Its tag follows the pattern of ``-python``
-(for example, ``apache/airflow:master-python3.6`` or ``apache/airflow:v1-10-test-python3.6``).
+- **Docker Desktop installation** :
+ Install Docker Desktop for Windows. For Windows Home follow the
+ `Docker Windows Home Installation Guide `_.
+ For Windows Pro, Enterprise, or Education follow the
+ `Docker Windows Installation Guide `_.
-More information about the images can be found in ``_.
+- **Docker setting** :
+ WSL integration needs to be enabled
-By default CI images are used unless ``--production-image`` flag is used.
+.. raw:: html
-Before you run tests, enter the environment or run local static checks, the necessary local images should be
-pulled and built from Docker Hub. This happens automatically for the test environment but you need to
-manually trigger it for static checks as described in `Building the images <#building-the-images>`_
-and `Pulling the latest images <#pulling-the-latest-images>`_.
-The static checks will fail and inform what to do if the image is not yet built.
+
+
+
-Building the image first time pulls a pre-built version of images from the Docker Hub, which may take some
-time. But for subsequent source code changes, no wait time is expected.
-However, changes to sensitive files like ``setup.py`` or ``Dockerfile.ci`` will trigger a rebuild
-that may take more time though it is highly optimized to only rebuild what is needed.
+- **WSL 2 Filesystem Performance** :
+ Accessing the host Windows filesystem incurs a performance penalty,
+ it is therefore recommended to do development on the Linux filesystem.
+ E.g. Run ``cd ~`` and create a development folder in your Linux distro home
+ and git pull the Airflow repo there.
-In most cases, rebuilding an image requires network connectivity (for example, to download new
-dependencies). If you work offline and do not want to rebuild the images when needed, you can set the
-``FORCE_ANSWER_TO_QUESTIONS`` variable to ``no`` as described in the
-`Default behaviour for user interaction <#default-behaviour-for-user-interaction>`_ section.
+- **WSL 2 Memory Usage** :
+ WSL 2 can consume a lot of memory under the process name "Vmmem". To reclaim the memory after
+ development you can:
-See `Troubleshooting section <#troubleshooting>`_ for steps you can make to clean the environment.
+ * On the Linux distro clear cached memory: ``sudo sysctl -w vm.drop_caches=3``
+ * If no longer using Docker you can quit Docker Desktop
+ (right click system try icon and select "Quit Docker Desktop")
+ * If no longer using WSL you can shut it down on the Windows Host
+ with the following command: ``wsl --shutdown``
+
+- **Developing in WSL 2** :
+ You can use all the standard Linux command line utilities to develop on WSL 2.
+ Further VS Code supports developing in Windows but remotely executing in WSL.
+ If VS Code is installed on the Windows host system then in the WSL Linux Distro
+ you can run ``code .`` in the root directory of you Airflow repo to launch VS Code.
Getopt and gstat
----------------
@@ -141,7 +155,6 @@ If you use zsh, run this command and re-login:
echo 'export PATH="/usr/local/opt/gnu-getopt/bin:$PATH"' >> ~/.zprofile
. ~/.zprofile
-
Memory
------
@@ -151,45 +164,155 @@ On macOS, 2GB of RAM are available for your Docker containers by default, but mo
(4GB should be comfortable). For details see
`Docker for Mac - Advanced tab `_.
-Airflow Directory Structure inside Docker
------------------------------------------
+On Windows WSL 2 expect the Linux Distro and Docker containers to use 7 - 8 GB of RAM.
-When you are in the CI container, the following directories are used:
+Cleaning the environment
+------------------------
-.. code-block:: text
+You may need to clean up your Docker environment occasionally. The images are quite big
+(1.5GB for both images needed for static code analysis and CI tests) and, if you often rebuild/update
+them, you may end up with some unused image data.
- /opt/airflow - Contains sources of Airflow mounted from the host (AIRFLOW_SOURCES).
- /root/airflow - Contains all the "dynamic" Airflow files (AIRFLOW_HOME), such as:
- airflow.db - sqlite database in case sqlite is used;
- dags - folder with non-test dags (test dags are in /opt/airflow/tests/dags);
- logs - logs from Airflow executions;
- unittest.cfg - unit test configuration generated when entering the environment;
- webserver_config.py - webserver configuration generated when running Airflow in the container.
+To clean up the Docker environment:
-Note that when running in your local environment, the ``/root/airflow/logs`` folder is actually mounted
-from your ``logs`` directory in the Airflow sources, so all logs created in the container are automatically
-visible in the host as well. Every time you enter the container, the ``logs`` directory is
-cleaned so that logs do not accumulate.
+1. Stop Breeze with ``./breeze stop``.
-When you are in the production container, the following directories are used:
+2. Run the ``docker system prune`` command.
-.. code-block:: text
+3. Run ``docker images --all`` and ``docker ps --all`` to verify that your Docker is clean.
- /opt/airflow - Contains sources of Airflow mounted from the host (AIRFLOW_SOURCES).
- /root/airflow - Contains all the "dynamic" Airflow files (AIRFLOW_HOME), such as:
- airflow.db - sqlite database in case sqlite is used;
- dags - folder with non-test dags (test dags are in /opt/airflow/tests/dags);
- logs - logs from Airflow executions;
- unittest.cfg - unit test configuration generated when entering the environment;
- webserver_config.py - webserver configuration generated when running Airflow in the container.
+ Both commands should return an empty list of images and containers respectively.
-Note that when running in your local environment, the ``/root/airflow/logs`` folder is actually mounted
-from your ``logs`` directory in the Airflow sources, so all logs created in the container are automatically
-visible in the host as well. Every time you enter the container, the ``logs`` directory is
-cleaned so that logs do not accumulate.
+If you run into disk space errors, consider pruning your Docker images with the ``docker system prune --all``
+command. You may need to restart the Docker Engine before running this command.
+
+In case of disk space errors on macOS, increase the disk space available for Docker. See
+`Prerequisites <#prerequisites>`_ for details.
+
+
+Installation
+============
+
+Installation is as easy as checking out Airflow repository and running Breeze command.
+You enter the Breeze test environment by running the ``./breeze`` script. You can run it with
+the ``help`` command to see the list of available options. See `Breeze Command-Line Interface Reference`_
+for details.
+
+.. code-block:: bash
+
+ ./breeze
+
+The First time you run Breeze, it pulls and builds a local version of Docker images.
+It pulls the latest Airflow CI images from `Airflow DockerHub `_
+and uses them to build your local Docker images. Note that the first run (per python) might take up to 10
+minutes on a fast connection to start. Subsequent runs should be much faster.
+
+Once you enter the environment, you are dropped into bash shell of the Airflow container and you can
+run tests immediately.
+
+To use the full potential of breeze you should set up autocomplete and you can
+add the checked-out Airflow repository to your PATH to run Breeze without the ``./`` and from any directory.
+
+The ``breeze`` command comes with a built-in bash/zsh autocomplete setup command. After installing, when you
+start typing the command, you can use to show all the available switches and get
+auto-completion on typical values of parameters that you can use.
+
+You should set up the autocomplete option automatically by running:
+
+.. code-block:: bash
+
+ ./breeze setup-autocomplete
+
+You get the auto-completion working when you re-enter the shell.
+
+When you enter the Breeze environment, automatically an environment file is sourced from
+``files/airflow-breeze-config/variables.env``. The ``files`` folder from your local sources is
+automatically mounted to the container under ``/files`` path and you can put there any files you want
+to make available for the Breeze container.
+
+.. raw:: html
+
+
+
+Running tests in the CI interactive environment
+===============================================
+
+Breeze helps with running tests in the same environment/way as CI tests are run. You can run various
+types of tests while you enter Breeze CI interactive environment - this is described in detail
+in ``_
+
+.. raw:: html
+
+
+
+Choosing different Breeze environment configuration
+===================================================
+
+You can use additional ``breeze`` flags to choose your environment. You can specify a Python
+version to use, and backend (the meta-data database). Thanks to that, with Breeze, you can recreate the same
+environments as we have in matrix builds in the CI.
+
+For example, you can choose to run Python 3.6 tests with MySQL as backend and in the Docker environment as
+follows:
+
+.. code-block:: bash
+
+ ./breeze --python 3.6 --backend mysql
+
+The choices you make are persisted in the ``./.build/`` cache directory so that next time when you use the
+``breeze`` script, it could use the values that were used previously. This way you do not have to specify
+them when you run the script. You can delete the ``.build/`` directory in case you want to restore the
+default settings.
+
+The defaults when you run the Breeze environment are Python 3.6 version and SQLite database.
+
+.. raw:: html
+
+
+
+
+Troubleshooting
+===============
+
+If you are having problems with the Breeze environment, try the steps below. After each step you
+can check whether your problem is fixed.
+
+1. If you are on macOS, check if you have enough disk space for Docker.
+2. Restart Breeze with ``./breeze restart``.
+3. Delete the ``.build`` directory and run ``./breeze build-image --force-pull-images``.
+4. Clean up Docker images via ``breeze cleanup-image`` command.
+5. Restart your Docker Engine and try again.
+6. Restart your machine and try again.
+7. Re-install Docker CE and try again.
+
+In case the problems are not solved, you can set the VERBOSE_COMMANDS variable to "true":
+
+.. code-block::
-Using the Airflow Breeze Environment
-=====================================
+ export VERBOSE_COMMANDS="true"
+
+
+Then run the failed command, copy-and-paste the output from your terminal to the
+`Airflow Slack `_ #airflow-breeze channel and
+describe your problem.
+
+Other uses of the Airflow Breeze environment
+============================================
Airflow Breeze is a bash script serving as a "swiss-army-knife" of Airflow testing. Under the
hood it uses other scripts that you can also run manually if you have problem with running the Breeze
@@ -197,27 +320,52 @@ environment.
Breeze script allows performing the following tasks:
-Manage environments - CI (default) or Production - if ``--production-image`` flag is specified:
+Managing CI environment:
- * Build docker images with ``breeze build-image`` command
- * Enter interactive shell when no command are specified (default behaviour)
+ * Build CI docker image with ``breeze build-image`` command
+ * Enter interactive shell in CI container when ``shell`` (or no command) is specified
* Join running interactive shell with ``breeze exec`` command
- * Start Kind Kubernetes cluster for Kubernetes tests if ``--start-kind-cluster`` flag is specified
* Stop running interactive environment with ``breeze stop`` command
* Restart running interactive environment with ``breeze restart`` command
- * Optionally reset database if specified as extra ``--db-reset`` flag
- * Optionally start integrations (separate images) if specified as extra ``--integration`` flags (only CI)
+ * Run test specified with ``breeze tests`` command
+ * Generate constraints with ``breeze generate-constraints`` command
+ * Execute arbitrary command in the test environment with ``breeze shell`` command
+ * Execute arbitrary docker-compose command with ``breeze docker-compose`` command
+ * Push docker images with ``breeze push-image`` command (require committer's rights to push images)
+
+You can optionally reset database if specified as extra ``--db-reset`` flag and for CI image you can also
+start integrations (separate Docker images) if specified as extra ``--integration`` flags. You can also
+chose which backend database should be used with ``--backend`` flag and python version with ``--python`` flag.
-Interact with CI environment:
+You can also have breeze launch Airflow automatically ``breeze start-airflow``, this will drop you in a
+tmux session with three panes (one to monitor the scheduler, one for the webserver and one with a shell
+for additional commands.
- * Run test target specified with ``breeze test-target`` command
- * Execute arbitrary command in the test environment with ``breeze execute-command`` command
+Managing Prod environment (with ``--production-image`` flag):
+
+ * Build CI docker image with ``breeze build-image`` command
+ * Enter interactive shell in PROD container when ``shell`` (or no command) is specified
+ * Join running interactive shell with ``breeze exec`` command
+ * Stop running interactive environment with ``breeze stop`` command
+ * Restart running interactive environment with ``breeze restart`` command
+ * Execute arbitrary command in the test environment with ``breeze shell`` command
* Execute arbitrary docker-compose command with ``breeze docker-compose`` command
+ * Push docker images with ``breeze push-image`` command (require committer's rights to push images)
+
+You can optionally reset database if specified as extra ``--db-reset`` flag. You can also
+chose which backend database should be used with ``--backend`` flag and python version with ``--python`` flag.
+
+
+Manage and Interact with Kubernetes tests environment:
+
+ * Manage KinD Kubernetes cluster and deploy Airflow to KinD cluster ``breeze kind-cluster`` commands
+ * Run Kubernetes tests specified with ``breeze kind-cluster tests`` command
+ * Enter the interactive kubernetes test environment with ``breeze kind-cluster shell`` command
Run static checks:
* Run static checks - either for currently staged change or for all files with
- ``breeze static-check`` or ``breeze static-check-all-files`` command
+ ``breeze static-check`` command
Build documentation:
@@ -228,38 +376,15 @@ Set up local development environment:
* Setup local virtualenv with ``breeze setup-virtualenv`` command
* Setup autocomplete for itself with ``breeze setup-autocomplete`` command
+Database volumes in Breeze
+--------------------------
-Note that the below environment interaction is by default with the CI image. If you want to use production
-image for those commands you need to add ``--production-image`` flag.
-
-
-Entering Breeze CI environment
-------------------------------
-
-You enter the Breeze test environment by running the ``./breeze`` script. You can run it with
-the ``help`` command to see the list of available options. See `Breeze Command-Line Interface Reference`_
-for details.
-
-.. code-block:: bash
-
- ./breeze
-
-First time you run Breeze, it pulls and builds a local version of Docker images.
-It pulls the latest Airflow CI images from `Airflow DockerHub `_
-and use them to build your local Docker images. Note that the first run (per python) might take up to 10
-minutes on a fast connection to start. Subsequent runs should be much faster.
-
-Once you enter the environment, you are dropped into bash shell of the Airflow container and you can
-run tests immediately.
-
-You can `set up autocomplete <#setting-up-autocomplete>`_ for commands and add the
-checked-out Airflow repository to your PATH to run Breeze without the ``./`` and from any directory.
-
-
-When you enter the Breeze environment, automatically an environment file is sourced from
-``files/airflow-breeze-config/variables.env``. The ``files`` folder from your local sources is
-automatically mounted to the container under ``/files`` path and you can put there any files you want
-to make available fot the Breeze container.
+Breeze keeps data for all it's integration in named docker volumes. Each backend and integration
+keeps data in their own volume. Those volumes are persisted until ``./breeze stop`` command or
+``./breeze restart`` command is run. You can also preserve the volumes by adding flag
+``--preserve-volumes`` when you run either of those commands. Then, next time when you start
+``Breeze``, it will have the data pre-populated. You can always delete the volumes by
+running ``./breeze stop`` without the ``--preserve-volumes`` flag.
Launching multiple terminals
----------------------------
@@ -268,63 +393,91 @@ Often if you want to run full airflow in the Breeze environment you need to laun
run ``airflow webserver``, ``airflow scheduler``, ``airflow worker`` in separate terminals.
This can be achieved either via ``tmux`` or via exec-ing into the running container from the host. Tmux
-is installed inside the container and you can launch it with ``tmux`` command. Tmux provide you with the
+is installed inside the container and you can launch it with ``tmux`` command. Tmux provides you with the
capability of creating multiple virtual terminals and multiplex between them. More about ``tmux`` can be
-found at `tmux github wiki page `_ . Tmux has several useful shortcuts
+found at `tmux GitHub wiki page `_ . Tmux has several useful shortcuts
that allow you to split the terminals, open new tabs etc - it's pretty useful to learn it.
-Another - slightly easier - way is to exec into Breeze terminal from the host's terminal. Often you can
+.. raw:: html
+
+
+
+
+Another way is to exec into Breeze terminal from the host's terminal. Often you can
have multiple terminals in the host (Linux/MacOS/WSL2 on Windows) and you can simply use those terminals
-to enter running container. It's as easy as launching ``breeze exec`` while you already started the
+to enter the running container. It's as easy as launching ``breeze exec`` while you already started the
Breeze environment. You will be dropped into bash and environment variables will be read in the same
way as when you enter the environment. You can do it multiple times and open as many terminals as you need.
-Stopping Interactive environment
---------------------------------
-
-After starting up, the environment runs in the background and takes precious memory.
-You can always stop it via:
+.. raw:: html
-.. code-block:: bash
+
- ./breeze stop
-Restarting Breeze environment
------------------------------
+CLIs for cloud providers
+------------------------
-You can also restart the environment and enter it via:
+For development convenience we installed simple wrappers for the most common cloud providers CLIs. Those
+CLIs are not installed when you build or pull the image - they will be downloaded as docker images
+the first time you attempt to use them. It is downloaded and executed in your host's docker engine so once
+it is downloaded, it will stay until you remove the downloaded images from your host container.
-.. code-block:: bash
+For each of those CLI credentials are taken (automatically) from the credentials you have defined in
+your ${HOME} directory on host.
- ./breeze restart
+Those tools also have host Airflow source directory mounted in /opt/airflow path
+so you can directly transfer files to/from your airflow host sources.
-Choosing a Breeze Environment
------------------------------
+Those are currently installed CLIs (they are available as aliases to the docker commands):
-You can use additional ``breeze`` flags to customize your environment. For example, you can specify a Python
-version to use, backend and a container environment for testing. With Breeze, you can recreate the same
-environments as we have in matrix builds in Travis CI.
++-----------------------+----------+-------------------------------------------------+-------------------+
+| Cloud Provider | CLI tool | Docker image | Configuration dir |
++=======================+==========+=================================================+===================+
+| Amazon Web Services | aws | amazon/aws-cli:latest | .aws |
++-----------------------+----------+-------------------------------------------------+-------------------+
+| Microsoft Azure | az | mcr.microsoft.com/azure-cli:latest | .azure |
++-----------------------+----------+-------------------------------------------------+-------------------+
+| Google Cloud | bq | gcr.io/google.com/cloudsdktool/cloud-sdk:latest | .config/gcloud |
+| +----------+-------------------------------------------------+-------------------+
+| | gcloud | gcr.io/google.com/cloudsdktool/cloud-sdk:latest | .config/gcloud |
+| +----------+-------------------------------------------------+-------------------+
+| | gsutil | gcr.io/google.com/cloudsdktool/cloud-sdk:latest | .config/gcloud |
++-----------------------+----------+-------------------------------------------------+-------------------+
-For example, you can choose to run Python 3.6 tests with MySQL as backend and in the Docker environment as
-follows:
+For each of the CLIs we have also an accompanying ``*-update`` alias (for example ``aws-update``) which
+will pull the latest image for the tool. Note that all Google Cloud tools are served by one
+image and they are updated together.
-.. code-block:: bash
+Also - in case you run several different Breeze containers in parallel (from different directories,
+with different versions) - they docker images for CLI Cloud Providers tools are shared so if you update it
+for one Breeze container, they will also get updated for all the other containers.
- ./breeze --python 3.6 --backend mysql
+.. raw:: html
-The choices you make are persisted in the ``./.build/`` cache directory so that next time when you use the
-``breeze`` script, it could use the values that were used previously. This way you do not have to specify
-them when you run the script. You can delete the ``.build/`` directory in case you want to restore the
-default settings.
+
-The defaults when you run the Breeze environment are Python 3.6, Sqlite, and Docker.
-Launching Breeze Integrations
+Launching Breeze integrations
-----------------------------
When Breeze starts, it can start additional integrations. Those are additional docker containers
that are started in the same docker-compose command. Those are required by some of the tests
-as described in `TESTING.rst `_.
+as described in ``_.
By default Breeze starts only airflow container without any integration enabled. If you selected
``postgres`` or ``mysql`` backend, the container for the selected backend is also started (but only the one
@@ -338,199 +491,273 @@ Once integration is started, it will continue to run until the environment is st
Note that running integrations uses significant resources - CPU and memory.
-Cleaning the Environment
-------------------------
+.. raw:: html
-You may need to clean up your Docker environment occasionally. The images are quite big
-(1.5GB for both images needed for static code analysis and CI tests) and, if you often rebuild/update
-them, you may end up with some unused image data.
+
-To clean up the Docker environment:
+Building CI images
+------------------
-1. Stop Breeze with ``./breeze stop``.
+With Breeze you can build images that are used by Airflow CI and production ones.
-2. Run the ``docker system prune`` command.
+For all development tasks, unit tests, integration tests, and static code checks, we use the
+**CI image** maintained on the DockerHub in the ``apache/airflow`` repository.
+This Docker image contains a lot of test-related packages (size of ~1GB).
+Its tag follows the pattern of ``-python-ci``
+(for example, ``apache/airflow:master-python3.6-ci`` or ``apache/airflow:v1-10-test-python3.6-ci``).
+The image is built using the ``_ Dockerfile.
-3. Run ``docker images --all`` and ``docker ps --all`` to verify that your Docker is clean.
+The CI image is built automatically as needed, however it can be rebuilt manually with
+``build-image`` command. The production
+image should be built manually - but also a variant of this image is built automatically when
+kubernetes tests are executed see `Running Kubernetes tests <#running-kubernetes-tests>`_
- Both commands should return an empty list of images and containers respectively.
+.. raw:: html
-If you run into disk space errors, consider pruning your Docker images with the ``docker system prune --all``
-command. You may need to restart the Docker Engine before running this command.
+
-In case of disk space errors on macOS, increase the disk space available for Docker. See
-`Prerequisites <#prerequisites>`_ for details.
+Building the image first time pulls a pre-built version of images from the Docker Hub, which may take some
+time. But for subsequent source code changes, no wait time is expected.
+However, changes to sensitive files like ``setup.py`` or ``Dockerfile.ci`` will trigger a rebuild
+that may take more time though it is highly optimized to only rebuild what is needed.
-Running Arbitrary Commands in the Breeze Environment
--------------------------------------------------------
+Breeze has built in mechanism to check if your local image has not diverged too much from the
+latest image build on CI. This might happen when for example latest patches have been released as new
+Python images or when significant changes are made in the Dockerfile. In such cases, Breeze will
+download the latest images before rebuilding because this is usually faster than rebuilding the image.
-To run other commands/executables inside the Breeze Docker-based environment, use the
-``./breeze execute-command`` command. To add arguments, specify them
-together with the command surrounded with either ``"`` or ``'``, or pass them after ``--`` as extra arguments.
+In most cases, rebuilding an image requires network connectivity (for example, to download new
+dependencies). If you work offline and do not want to rebuild the images when needed, you can set the
+``FORCE_ANSWER_TO_QUESTIONS`` variable to ``no`` as described in the
+`Setting default behaviour for user interaction <#setting-default-behaviour-for-user-interaction>`_ section.
-.. code-block:: bash
+Preparing packages
+------------------
- ./breeze execute-command "ls -la"
+Breeze can also be used to prepare airflow packages - both "apache-airflow" main package and
+provider packages.
-.. code-block:: bash
+You can read more about testing provider packages in
+`TESTING.rst `_
- ./breeze execute-command ls -- --la
+There are several commands that you can run in Breeze to manage and build packages:
+* preparing Provider Readme files
+* preparing Airflow packages
+* preparing Provider packages
-Running Docker Compose Commands
--------------------------------
+Preparing provider readme files is part of the release procedure by the release managers
+and it is described in detail in `dev `_ .
-To run Docker Compose commands (such as ``help``, ``pull``, etc), use the
-``docker-compose`` command. To add extra arguments, specify them
-after ``--`` as extra arguments.
+You can prepare provider packages - by default regular provider packages are prepared, but with
+``--backport`` flag you can prepare backport packages.
+
+The packages are prepared in ``dist`` folder. Note, that this command cleans up the ``dist`` folder
+before running, so you should run it before generating airflow package below as it will be removed.
+
+The below example builds provider packages in the wheel format.
.. code-block:: bash
- ./breeze docker-compose pull -- --ignore-pull-failures
+ ./breeze prepare-provider-packages
+If you run this command without packages, you will prepare all packages, you can however specify
+providers that you would like to build. By default only ``wheel`` packages are prepared,
+but you can change it providing optional --package-format flag.
-Mounting Local Sources to Breeze
---------------------------------
-Important sources of Airflow are mounted inside the ``airflow`` container that you enter.
-This means that you can continue editing your changes on the host in your favourite IDE and have them
-visible in the Docker immediately and ready to test without rebuilding images. You can disable mounting
-by specifying ``--skip-mounting-local-sources`` flag when running Breeze. In this case you will have sources
-embedded in the container and changes to these sources will not be persistent.
+.. code-block:: bash
+ ./breeze prepare-provider-packages --package-format=both google amazon
-After you run Breeze for the first time, you will have an empty directory ``files`` in your source code,
-which will be mapped to ``/files`` in your Docker container. You can pass there any files you need to
-configure and run Docker. They will not be removed between Docker runs.
+You can also prepare backport provider packages, if you specify ``--backport`` flag. You can read more
+about backport packages in `dev `_
-By default ``/files/dags`` folder is mounted from your local ``/files/dags`` and this is
-the directory used by airflow scheduler and webserver to scan dags for. You can use it to test your dags
-from local sources in Airflow. If you wish to add local DAGs that can be run by Breeze.
+.. code-block:: bash
-Adding/Modifying Dependencies
------------------------------
+ ./breeze prepare-provider-packages --backports --package-format=both google amazon
-If you need to change apt dependencies in the ``Dockerfile.ci``, add Python packages in ``setup.py`` or
-add javascript dependencies in ``package.json``, you can either add dependencies temporarily for a single
-Breeze session or permanently in ``setup.py``, ``Dockerfile.ci``, or ``package.json`` files.
+You can see all providers available by running this command:
-Installing Dependencies for a Single Breeze Session
-...................................................
+.. code-block:: bash
-You can install dependencies inside the container using ``sudo apt install``, ``pip install`` or
-``yarn install`` (in ``airflow/www`` folder) respectively. This is useful if you want to test something
-quickly while you are in the container. However, these changes are not retained: they disappear once you
-exit the container (except for the node.js dependencies if your sources are mounted to the container).
-Therefore, if you want to retain a new dependency, follow the second option described below.
+ ./breeze prepare-provider-packages -- --help
-Adding Dependencies Permanently
-...............................
-You can add dependencies to the ``Dockerfile.ci``, ``setup.py`` or ``package.json`` and rebuild the image.
-This should happen automatically if you modify any of these files.
-After you exit the container and re-run ``breeze``, Breeze detects changes in dependencies,
-asks you to confirm rebuilding the image and proceeds with rebuilding if you confirm (or skip it
-if you do not confirm). After rebuilding is done, Breeze drops you to shell. You may also use the
-``build-image`` command to only rebuild CI image and not to go into shell.
+You can also prepare airflow packages using breeze:
-Changing apt Dependencies in the Dockerfile.ci
-..............................................
+.. code-block:: bash
-During development, changing dependencies in ``apt-get`` closer to the top of the ``Dockerfile.ci``
-invalidates cache for most of the image. It takes long time for Breeze to rebuild the image.
-So, it is a recommended practice to add new dependencies initially closer to the end
-of the ``Dockerfile.ci``. This way dependencies will be added incrementally.
+ ./breeze prepare-airflow-packages
-Before merge, these dependencies should be moved to the appropriate ``apt-get install`` command,
-which is already in the ``Dockerfile.ci``.
+This prepares airflow .whl package in the dist folder.
-Port Forwarding
----------------
+Again, you can specify optional ``--package-format`` flag to build airflow packages.
-When you run Airflow Breeze, the following ports are automatically forwarded:
+.. code-block:: bash
-* 28080 -> forwarded to Airflow webserver -> airflow:8080
-* 25433 -> forwarded to Postgres database -> postgres:5432
-* 23306 -> forwarded to MySQL database -> mysql:3306
+ ./breeze prepare-airflow-packages --package-format=bot
-You can connect to these ports/databases using:
-* Webserver: ``http://127.0.0.1:28080``
-* Postgres: ``jdbc:postgresql://127.0.0.1:25433/airflow?user=postgres&password=airflow``
-* Mysql: ``jdbc:mysql://localhost:23306/airflow?user=root``
+Building Production images
+--------------------------
-Start the webserver manually with the ``airflow webserver`` command if you want to connect
-to the webserver. You can use ``tmux`` to multiply terminals. You may need to create a user prior to
-running the webserver in order to log in. This can be done with the following command:
+The **Production image** is also maintained on the DockerHub in the
+```apache/airflow`` repository. This Docker image (and Dockerfile) contains size-optimised Airflow
+installation with selected extras and dependencies. Its tag follows the pattern of
+``-python`` (for example, ``apache/airflow:master-python3.6``
+or ``apache/airflow:v1-10-test-python3.6``).
+
+However in many cases you want to add your own custom version of the image - with added apt dependencies,
+python dependencies, additional Airflow extras. Breeze's ``build-image`` command helps to build your own,
+customized variant of the image that contains everything you need.
+
+You can switch to building the production image by adding ``--production-image`` flag to the ``build_image``
+command. Note, that the images can also be build using ``docker build`` command by passing appropriate
+build-args as described in `IMAGES.rst `_ , but Breeze provides several flags that
+makes it easier to do it. You can see all the flags by running ``./breeze build-image --help``,
+but here typical examples are presented:
.. code-block:: bash
- airflow create_user --role Admin --username admin --password admin --email admin@example.com --firstname foo --lastname bar
+ ./breeze build-image --production-image --additional-extras "jira"
-For databases, you need to run ``airflow resetdb`` at least once (or run some tests) after you started
-Airflow Breeze to get the database/tables created. You can connect to databases with IDE or any other
-database client:
+This installs additional ``jira`` extra while installing airflow in the image.
-.. image:: images/database_view.png
- :align: center
- :alt: Database view
-You can change the used host port numbers by setting appropriate environment variables:
+.. code-block:: bash
-* ``WEBSERVER_HOST_PORT``
-* ``POSTGRES_HOST_PORT``
-* ``MYSQL_HOST_PORT``
+ ./breeze build-image --production-image --additional-python-deps "torchio==0.17.10"
+
+This install additional pypi dependency - torchio in specified version.
-If you set these variables, next time when you enter the environment the new ports should be in effect.
-Setting Up Autocompletion
--------------------------
+.. code-block:: bash
-The ``breeze`` command comes with a built-in bash/zsh autocomplete option for its options. When you start typing
-the command, you can use to show all the available switches and get autocompletion on typical
-values of parameters that you can use.
+ ./breeze build-image --production-image --additional-dev-apt-deps "libasound2-dev" \
+ --additional-runtime-apt-deps "libasound2"
-You can set up the autocomplete option automatically by running:
+This install additional apt dependencies - ``libasound2-dev`` in build image and ``libasound`` in the
+final image. Those are development dependencies that might be needed to build and use python packages added
+via the ``--additional-python-deps`` flag. The ``dev`` dependencies are not installed in the final
+production image, they are only installed in the build "segment" of the production image that is used
+as an intermediate step to build the final image. Usually names of the ``dev`` dependencies end with ``-dev``
+suffix and they need to also be paired with corresponding runtime dependency added for the runtime image
+(without -dev).
.. code-block:: bash
- ./breeze setup-autocomplete
+ ./breeze build-image --production-image --python 3.7 --additional-dev-deps "libasound2-dev" \
+ --additional-runtime-apt-deps "libasound2"
-You get the autocompletion working when you re-enter the shell.
+Same as above but uses python 3.7.
-Zsh autocompletion is currently limited to only autocomplete options. Bash autocompletion also completes
-options values (for example, Python version or static check name).
+.. raw:: html
-Setting Defaults for User Interaction
---------------------------------------
+
-Sometimes during the build, you are asked whether to perform an action, skip it, or quit. This happens
-when rebuilding or removing an image - actions that take a lot of time and could be potentially destructive.
+Building Production images for 1.10 Airflow versions
+----------------------------------------------------
-For automation scripts, you can export one of the three variables to control the default
-interaction behaviour:
+With Breeze you can also use the master Dockerfile to build custom images for released Airflow versions.
+This works in the same way as building production image from master, but you need to add additional switch
+``--install-airflow-version``. You should pass version of airflow (as released in PyPI). It can be used
+to install both released versions and release candidates. Similarly as in case of master images,
+we can pass additional extras/dependencies to install via the additional flags.
-.. code-block::
+.. code-block:: bash
- export FORCE_ANSWER_TO_QUESTIONS="yes"
+ ./breeze build-image --production-image --additional-extras "jira" --install-airflow-version="1.10.11"
-If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``yes``, the images are automatically rebuilt when needed.
-Images are deleted without asking.
+Builds airflow image with released Airflow version 1.10.11 and additional extra "jira" added.
-.. code-block::
+.. code-block:: bash
- export FORCE_ANSWER_TO_QUESTIONS="no"
+ ./breeze build-image --production-image --install-airflow-version="1.10.11rc2"
-If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``no``, the old images are used even if rebuilding is needed.
-This is useful when you work offline. Deleting images is aborted.
+Builds airflow image with released Airflow version 1.10.11rc2.
-.. code-block::
- export FORCE_ANSWER_TO_QUESTIONS="quit"
+You can also build airflow directly from GitHub source code - by providing Git Reference via
+``--install-airflow-reference``. The reference can be a branch name, tag name, or commit hash. This
+is useful mostly for testing.
-If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``quit``, the whole script is aborted. Deleting images is aborted.
+.. code-block:: bash
-If more than one variable is set, ``yes`` takes precedence over ``no``, which takes precedence over ``quit``.
+ ./breeze build-image --production-image --install-airflow-reference="v1-10-test"
+
+This Builds airflow image from the current ``v1-10-test`` branch of Airflow.
+
+.. code-block:: bash
+
+ ./breeze build-image --production-image \
+ --install-airflow-reference="0d91fcf725f69e10f0969ca36f9e38e1d74110d0"
+
+This Builds airflow image from the ``0d91fcf725f69e10f0969ca36f9e38e1d74110d0`` commit hash on
+GitHub.
+
+.. raw:: html
+
+
+
+
+Running static checks
+---------------------
+
+You can run static checks via Breeze. You can also run them via pre-commit command but with auto-completion
+Breeze makes it easier to run selective static checks. If you press after the static-check and if
+you have auto-complete setup you should see auto-completable list of all checks available.
+
+.. code-block:: bash
+
+ ./breeze static-check mypy
+
+The above will run mypy check for currently staged files.
+
+You can also add arbitrary pre-commit flag after ``--``
+
+.. code-block:: bash
+
+ ./breeze static-check mypy -- --all-files
+
+The above will run mypy check for all files.
+
+.. raw:: html
+
+
+
+If you want ever need to get a list of the files that will be checked (for troubleshooting when playing with the
+``--from-ref`` and ``--to-ref``
+
+.. code-block:: bash
+
+ breeze static-check identity --verbose # currently staged files
+ breeze static-check identity --verbose -- --from-ref $(git merge-base master HEAD) --to-ref HEAD # branch updates
Building the Documentation
--------------------------
@@ -547,31 +774,69 @@ Often errors during documentation generation come from the docstrings of auto-ap
During the docs building auto-api generated files are stored in the ``docs/_api`` folder. This helps you
easily identify the location the problems with documentation originated from.
-Using Your Host IDE
-===================
+.. raw:: html
+
+
+
+Generating constraints
+----------------------
+
+Whenever setup.py gets modified, the CI master job will re-generate constraint files. Those constraint
+files are stored in separated orphan branches: ``constraints-master`` and ``constraint-1-10``.
+They are stored separately for each python version. Those are
+constraint files as described in detail in the
+``_ contributing documentation.
+
+In case someone modifies setup.py, the ``CRON`` scheduled CI build automatically upgrades and
+pushes changed to the constraint files, however you can also perform test run of this locally using
+``generate-constraints`` command of Breeze.
+
+.. code-block:: bash
+
+ ./breeze generate-constraints --python 3.6
+
+.. code-block:: bash
+
+ ./breeze generate-constraints --python 3.7
+
+.. code-block:: bash
+
+ ./breeze generate-constraints --python 3.8
+
+This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint
+and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff
+of changes vs the previous constraint files is printed.
+
+Using local virtualenv environment in Your Host IDE
+---------------------------------------------------
You can set up your host IDE (for example, IntelliJ's PyCharm/Idea) to work with Breeze
and benefit from all the features provided by your IDE, such as local and remote debugging,
-autocompletion, documentation support, etc.
+language auto-completion, documentation support, etc.
To use your host IDE with Breeze:
-1. Create a local virtual environment as follows:
+1. Create a local virtual environment:
- ``mkvirtualenv --python=python``
-
- You can use any of the following wrappers to create and manage your virtual environemnts:
+ You can use any of the following wrappers to create and manage your virtual environments:
`pyenv `_, `pyenv-virtualenv `_,
or `virtualenvwrapper `_.
- Ideally, you should have virtualenvs for all Python versions supported by Airflow (2.7, 3.5, 3.6)
- and switch between them with the ``workon`` command.
+ Ideally, you should have virtualenvs for all Python versions supported by Airflow (2.7, 3.5, 3.6, 3.7, 3.8)
-2. Use the ``workon`` command to enter the Breeze environment.
+2. Use the right command to activate the virtualenv (``workon`` if you use virtualenvwrapper or
+ ``pyenv activate`` if you use pyenv.
3. Initialize the created local virtualenv:
- ``./breeze initialize-local-virtualenv``
+.. code-block:: bash
+
+ ./breeze initialize-local-virtualenv --python 3.8
4. Select the virtualenv you created as the project's default virtualenv in your IDE.
@@ -580,261 +845,1259 @@ This is a lightweight solution that has its own limitations.
More details on using the local virtualenv are available in the `LOCAL_VIRTUALENV.rst `_.
-Running static checks in Breeze
-===============================
+.. raw:: html
-The Breeze environment is also used to run some of the static checks as described in
-`STATIC_CODE_CHECKS.rst `_.
+
+Running Kubernetes tests
+------------------------
-Running Tests in Breeze
-=======================
+Breeze helps with running Kubernetes tests in the same environment/way as CI tests are run.
+Breeze helps to setup KinD cluster for testing, setting up virtualenv and downloads the right tools
+automatically to run the tests.
-As soon as you enter the Breeze environment, you can run Airflow unit tests via the ``pytest`` command.
+This is described in detail in `Testing Kubernetes `_.
-For supported CI test suites, types of unit tests, and other tests, see `TESTING.rst `_.
+.. raw:: html
-Breeze Command-Line Interface Reference
-=======================================
+
+
+
+Internal details of Breeze
+==========================
+
+Airflow directory structure inside container
+--------------------------------------------
+
+When you are in the CI container, the following directories are used:
+
+.. code-block:: text
+
+ /opt/airflow - Contains sources of Airflow mounted from the host (AIRFLOW_SOURCES).
+ /root/airflow - Contains all the "dynamic" Airflow files (AIRFLOW_HOME), such as:
+ airflow.db - sqlite database in case sqlite is used;
+ dags - folder with non-test dags (test dags are in /opt/airflow/tests/dags);
+ logs - logs from Airflow executions;
+ unittest.cfg - unit test configuration generated when entering the environment;
+ webserver_config.py - webserver configuration generated when running Airflow in the container.
+
+Note that when running in your local environment, the ``/root/airflow/logs`` folder is actually mounted
+from your ``logs`` directory in the Airflow sources, so all logs created in the container are automatically
+visible in the host as well. Every time you enter the container, the ``logs`` directory is
+cleaned so that logs do not accumulate.
+
+When you are in the production container, the following directories are used:
+
+.. code-block:: text
+
+ /opt/airflow - Contains sources of Airflow mounted from the host (AIRFLOW_SOURCES).
+ /root/airflow - Contains all the "dynamic" Airflow files (AIRFLOW_HOME), such as:
+ airflow.db - sqlite database in case sqlite is used;
+ dags - folder with non-test dags (test dags are in /opt/airflow/tests/dags);
+ logs - logs from Airflow executions;
+ unittest.cfg - unit test configuration generated when entering the environment;
+ webserver_config.py - webserver configuration generated when running Airflow in the container.
+
+Note that when running in your local environment, the ``/root/airflow/logs`` folder is actually mounted
+from your ``logs`` directory in the Airflow sources, so all logs created in the container are automatically
+visible in the host as well. Every time you enter the container, the ``logs`` directory is
+cleaned so that logs do not accumulate.
+
+Running Arbitrary commands in the Breeze environment
+----------------------------------------------------
+
+To run other commands/executables inside the Breeze Docker-based environment, use the
+``./breeze shell`` command. You should add your command as -c "command" after ``--`` as extra arguments.
+
+.. code-block:: bash
+
+ ./breeze shell -- -c "ls -la"
+
+Running "Docker Compose" commands
+---------------------------------
+
+To run Docker Compose commands (such as ``help``, ``pull``, etc), use the
+``docker-compose`` command. To add extra arguments, specify them
+after ``--`` as extra arguments.
+
+.. code-block:: bash
+
+ ./breeze docker-compose pull -- --ignore-pull-failures
+
+Restarting Breeze environment
+-----------------------------
+
+You can also restart the environment and enter it via:
+
+.. code-block:: bash
+
+ ./breeze restart
+
+
+Setting default answers for user interaction
+--------------------------------------------
+
+Sometimes during the build, you are asked whether to perform an action, skip it, or quit. This happens
+when rebuilding or removing an image - actions that take a lot of time and could be potentially destructive.
+
+For automation scripts, you can export one of the three variables to control the default
+interaction behaviour:
+
+.. code-block::
+
+ export FORCE_ANSWER_TO_QUESTIONS="yes"
+
+If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``yes``, the images are automatically rebuilt when needed.
+Images are deleted without asking.
+
+.. code-block::
+
+ export FORCE_ANSWER_TO_QUESTIONS="no"
+
+If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``no``, the old images are used even if rebuilding is needed.
+This is useful when you work offline. Deleting images is aborted.
+
+.. code-block::
+
+ export FORCE_ANSWER_TO_QUESTIONS="quit"
+
+If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``quit``, the whole script is aborted. Deleting images is aborted.
+
+If more than one variable is set, ``yes`` takes precedence over ``no``, which takes precedence over ``quit``.
+
+Fixing File/Directory Ownership
+-------------------------------
+
+On Linux, there is a problem with propagating ownership of created files (a known Docker problem). The
+files and directories created in the container are not owned by the host user (but by the root user in our
+case). This may prevent you from switching branches, for example, if files owned by the root user are
+created within your sources. In case you are on a Linux host and have some files in your sources created
+by the root user, you can fix the ownership of those files by running this script:
+
+.. code-block::
+
+ ./scripts/ci/tools/ci_fix_ownership.sh
+
+Mounting Local Sources to Breeze
+--------------------------------
+
+Important sources of Airflow are mounted inside the ``airflow`` container that you enter.
+This means that you can continue editing your changes on the host in your favourite IDE and have them
+visible in the Docker immediately and ready to test without rebuilding images. You can disable mounting
+by specifying ``--skip-mounting-local-sources`` flag when running Breeze. In this case you will have sources
+embedded in the container and changes to these sources will not be persistent.
+
+
+After you run Breeze for the first time, you will have empty directory ``files`` in your source code,
+which will be mapped to ``/files`` in your Docker container. You can pass there any files you need to
+configure and run Docker. They will not be removed between Docker runs.
+
+By default ``/files/dags`` folder is mounted from your local ``/files/dags`` and this is
+the directory used by airflow scheduler and webserver to scan dags for. You can use it to test your dags
+from local sources in Airflow. If you wish to add local DAGs that can be run by Breeze.
+
+Port Forwarding
+---------------
+
+When you run Airflow Breeze, the following ports are automatically forwarded:
+
+* 28080 -> forwarded to Airflow webserver -> airflow:8080
+* 25555 -> forwarded to Flower dashboard -> airflow:5555
+* 25433 -> forwarded to Postgres database -> postgres:5432
+* 23306 -> forwarded to MySQL database -> mysql:3306
+* 26379 -> forwarded to Redis broker -> redis:6379
+
+You can connect to these ports/databases using:
+
+* Webserver: ``http://127.0.0.1:28080``
+* Flower: ``http://127.0.0.1:25555``
+* Postgres: ``jdbc:postgresql://127.0.0.1:25433/airflow?user=postgres&password=airflow``
+* Mysql: ``jdbc:mysql://127.0.0.1:23306/airflow?user=root``
+* Redis: ``redis://127.0.0.1:26379/0```
+
+Start the webserver manually with the ``airflow webserver`` command if you want to connect
+to the webserver. You can use ``tmux`` to multiply terminals. You may need to create a user prior to
+running the webserver in order to log in. This can be done with the following command:
+
+.. code-block:: bash
+
+ airflow users create --role Admin --username admin --password admin --email admin@example.com --firstname foo --lastname bar
+
+For databases, you need to run ``airflow db reset`` at least once (or run some tests) after you started
+Airflow Breeze to get the database/tables created. You can connect to databases with IDE or any other
+database client:
+
+
+.. raw:: html
+
+
+
+
+
+You can change the used host port numbers by setting appropriate environment variables:
+
+* ``WEBSERVER_HOST_PORT``
+* ``POSTGRES_HOST_PORT``
+* ``MYSQL_HOST_PORT``
+
+If you set these variables, next time when you enter the environment the new ports should be in effect.
+
+Managing Dependencies
+---------------------
+
+If you need to change apt dependencies in the ``Dockerfile.ci``, add Python packages in ``setup.py`` or
+add JavaScript dependencies in ``package.json``, you can either add dependencies temporarily for a single
+Breeze session or permanently in ``setup.py``, ``Dockerfile.ci``, or ``package.json`` files.
+
+Installing Dependencies for a Single Breeze Session
+...................................................
+
+You can install dependencies inside the container using ``sudo apt install``, ``pip install`` or
+``yarn install`` (in ``airflow/www`` folder) respectively. This is useful if you want to test something
+quickly while you are in the container. However, these changes are not retained: they disappear once you
+exit the container (except for the node.js dependencies if your sources are mounted to the container).
+Therefore, if you want to retain a new dependency, follow the second option described below.
+
+Adding Dependencies Permanently
+...............................
+
+You can add dependencies to the ``Dockerfile.ci``, ``setup.py`` or ``package.json`` and rebuild the image.
+This should happen automatically if you modify any of these files.
+After you exit the container and re-run ``breeze``, Breeze detects changes in dependencies,
+asks you to confirm rebuilding the image and proceeds with rebuilding if you confirm (or skip it
+if you do not confirm). After rebuilding is done, Breeze drops you to shell. You may also use the
+``build-image`` command to only rebuild CI image and not to go into shell.
+
+Incremental apt Dependencies in the Dockerfile.ci during development
+....................................................................
+
+During development, changing dependencies in ``apt-get`` closer to the top of the ``Dockerfile.ci``
+invalidates cache for most of the image. It takes long time for Breeze to rebuild the image.
+So, it is a recommended practice to add new dependencies initially closer to the end
+of the ``Dockerfile.ci``. This way dependencies will be added incrementally.
+
+Before merge, these dependencies should be moved to the appropriate ``apt-get install`` command,
+which is already in the ``Dockerfile.ci``.
+
+
+Breeze Command-Line Interface Reference
+=======================================
+
+Airflow Breeze Syntax
+---------------------
+
+This is the current syntax for `./breeze <./breeze>`_:
+
+ .. START BREEZE HELP MARKER
+
+.. code-block:: text
+
+
+ ####################################################################################################
+
+ usage: breeze [FLAGS] [COMMAND] --
+
+ By default the script enters the CI container and drops you to bash shell, but you can choose
+ one of the commands to run specific actions instead.
+
+ Add --help after each command to see details:
+
+ Commands without arguments:
+
+ shell [Default] Enters interactive shell in the container
+ build-docs Builds documentation in the container
+ build-image Builds CI or Production docker image
+ cleanup-image Cleans up the container image created
+ exec Execs into running breeze container in new terminal
+ generate-constraints Generates pinned constraint files
+ push-image Pushes images to registry
+ initialize-local-virtualenv Initializes local virtualenv
+ prepare-airflow-packages Prepares airflow packages
+ setup-autocomplete Sets up autocomplete for breeze
+ start-airflow Starts Scheduler and Webserver and enters the shell
+ stop Stops the docker-compose environment
+ restart Stops the docker-compose environment including DB cleanup
+ toggle-suppress-cheatsheet Toggles on/off cheatsheet
+ toggle-suppress-asciiart Toggles on/off asciiart
+
+ Commands with arguments:
+
+ docker-compose Executes specified docker-compose command
+ kind-cluster Manages KinD cluster on the host
+ static-check Performs selected static check for changed files
+ tests Runs selected tests in the container
+
+ Help commands:
+
+ flags Shows all breeze's flags
+ help Shows this help message
+ help-all Shows detailed help for all commands and flags
+
+ ####################################################################################################
+
+ Detailed usage
+
+ ####################################################################################################
+
+
+ Detailed usage for command: shell
+
+
+ breeze shell [FLAGS] [-- ]
+
+ This is default subcommand if no subcommand is used.
+
+ Enters interactive shell where you can run all tests, start Airflow webserver, scheduler,
+ workers, interact with the database, run DAGs etc. It is the default command if no command
+ is selected. The shell is executed in the container and in case integrations are chosen,
+ the integrations will be started as separated docker containers - under the docker-compose
+ supervision. Local sources are by default mounted to within the container so you can edit
+ them locally and run tests immediately in the container. Several folders ('files', 'dist')
+ are also mounted so that you can exchange files between the host and container.
+
+ The 'files/airflow-breeze-config/variables.env' file can contain additional variables
+ and setup. This file is automatically sourced when you enter the container. Database
+ and webserver ports are forwarded to appropriate database/webserver so that you can
+ connect to it from your host environment.
+
+ You can also pass after -- they will be passed as bash parameters, this is
+ especially useful to pass bash options, for example -c to execute command:
+
+ 'breeze shell -- -c "ls -la"'
+ 'breeze -- -c "ls -la"'
+
+ For DockerHub pull --dockerhub-user and --dockerhub-repo flags can be used to specify
+ the repository to pull from. For GitHub repository, the --github-repository
+ flag can be used for the same purpose. You can also use
+ --github-image-id | in case you want to pull the image
+ with specific COMMIT_SHA tag or RUN_ID.
+
+ 'breeze shell \
+ --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e' - pull/use image with SHA
+ 'breeze \
+ --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e' - pull/use image with SHA
+ 'breeze shell \
+ --github-image-id 209845560' - pull/use image with RUN_ID
+ 'breeze \
+ --github-image-id 209845560' - pull/use image with RUN_ID
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: build-docs
+
+
+ breeze build-docs
+
+ Builds Airflow documentation. The documentation is build inside docker container - to
+ maintain the same build environment for everyone. Appropriate sources are mapped from
+ the host to the container so that latest sources are used. The folders where documentation
+ is generated ('docs/_build') are also mounted to the container - this way results of
+ the documentation build is available in the host.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: build-image
+
+
+ breeze build-image [FLAGS]
+
+ Builds docker image (CI or production) without entering the container. You can pass
+ additional options to this command, such as '--force-build-image',
+ '--force-pull-image', '--python', '--build-cache-local' or '-build-cache-pulled'
+ in order to modify build behaviour.
+
+ You can also pass '--production-image' flag to build production image rather than CI image.
+
+ For DockerHub pull --dockerhub-user and --dockerhub-repo flags can be used to specify
+ the repository to pull from. For GitHub repository, the --github-repository
+ flag can be used for the same purpose. You can also use
+ --github-image-id | in case you want to pull the image with
+ specific COMMIT_SHA tag or RUN_ID.
+
+ Flags:
+
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
+ Python version used for the image. This is always major/minor version.
+
+ One of:
+
+ 2.7 3.5 3.6 3.7 3.8
+
+ -a, --install-airflow-version INSTALL_AIRFLOW_VERSION
+ If specified, installs Airflow directly from PIP released version. This happens at
+ image building time in production image and at container entering time for CI image. One of:
+
+ 1.10.15 1.10.14 1.10.12 1.10.11 1.10.10 1.10.9 none wheel sdist
+
+ When 'none' is used, you can install airflow from local packages. When building image,
+ airflow package should be added to 'docker-context-files' and
+ --install-from-docker-context-files flag should be used. When running an image, airflow
+ package should be added to dist folder and --install-packages-from-dist flag should be used.
+
+ -t, --install-airflow-reference INSTALL_AIRFLOW_REFERENCE
+ If specified, installs Airflow directly from reference in GitHub. This happens at
+ image building time in production image and at container entering time for CI image.
+ This can be a GitHub branch like master or v1-10-test, or a tag like 2.0.0a1.
+
+ --no-rbac-ui
+ Disables RBAC UI when Airflow 1.10.* is installed.
+
+ --install-packages-from-dist
+ If specified it will look for packages placed in dist folder and it will install the
+ packages after installing Airflow. This is useful for testing provider
+ packages.
+
+ -I, --production-image
+ Use production image for entering the environment and builds (not for tests).
+
+ -F, --force-build-images
+ Forces building of the local docker images. The images are rebuilt
+ automatically for the first time or when changes are detected in
+ package-related files, but you can force it using this flag.
+
+ -P, --force-pull-images
+ Forces pulling of images from DockerHub before building to populate cache. The
+ images are pulled by default only for the first time you run the
+ environment, later the locally build images are used as cache.
+
+ Customization options:
+
+ -E, --extras EXTRAS
+ Extras to pass to build images The default are different for CI and production images:
+
+ CI image:
+ devel_ci
+
+ Production image:
+ async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,
+ ssh,statsd,virtualenv
+
+ --image-tag TAG
+ Additional tag in the image.
+
+ --disable-pypi-when-building
+ Disable installing Airflow from pypi when building. If you use this flag and want
+ to install Airflow, you have to install it from packages placed in
+ 'docker-context-files' and use --install-from-local-files-when-building flag.
+
+ --additional-extras ADDITIONAL_EXTRAS
+ Additional extras to pass to build images The default is no additional extras.
+
+ --additional-python-deps ADDITIONAL_PYTHON_DEPS
+ Additional python dependencies to use when building the images.
+
+ --dev-apt-command DEV_APT_COMMAND
+ The basic command executed before dev apt deps are installed.
+
+ --additional-dev-apt-command ADDITIONAL_DEV_APT_COMMAND
+ Additional command executed before dev apt deps are installed.
+
+ --additional-dev-apt-deps ADDITIONAL_DEV_APT_DEPS
+ Additional apt dev dependencies to use when building the images.
+
+ --dev-apt-deps DEV_APT_DEPS
+ The basic apt dev dependencies to use when building the images.
+
+ --additional-dev-apt-deps ADDITIONAL_DEV_DEPS
+ Additional apt dev dependencies to use when building the images.
+
+ --additional-dev-apt-envs ADDITIONAL_DEV_APT_ENVS
+ Additional environment variables set when adding dev dependencies.
+
+ --runtime-apt-command RUNTIME_APT_COMMAND
+ The basic command executed before runtime apt deps are installed.
+
+ --additional-runtime-apt-command ADDITIONAL_RUNTIME_APT_COMMAND
+ Additional command executed before runtime apt deps are installed.
+
+ --runtime-apt-deps ADDITIONAL_RUNTIME_APT_DEPS
+ The basic apt runtime dependencies to use when building the images.
+
+ --additional-runtime-apt-deps ADDITIONAL_RUNTIME_DEPS
+ Additional apt runtime dependencies to use when building the images.
+
+ --additional-runtime-apt-envs ADDITIONAL_RUNTIME_APT_DEPS
+ Additional environment variables set when adding runtime dependencies.
+
+ Build options:
+
+ --disable-mysql-client-installation
+ Disables installation of the mysql client which might be problematic if you are building
+ image in controlled environment. Only valid for production image.
+
+ --constraints-location
+ Url to the constraints file. In case of the production image it can also be a path to the
+ constraint file placed in 'docker-context-files' folder, in which case it has to be
+ in the form of '/docker-context-files/'
+
+ --disable-pip-cache
+ Disables GitHub PIP cache during the build. Useful if GitHub is not reachable during build.
+
+ --install-from-local-files-when-building
+ This flag is used during image building. If it is used additionally to installing
+ Airflow from PyPI, the packages are installed from the .whl and .tar.gz packages placed
+ in the 'docker-context-files' folder. The same flag can be used during entering the image in
+ the CI image - in this case also the .whl and .tar.gz files will be installed automatically
+
+ -C, --force-clean-images
+ Force build images with cache disabled. This will remove the pulled or build images
+ and start building images from scratch. This might take a long time.
+
+ -r, --skip-rebuild-check
+ Skips checking image for rebuilds. It will use whatever image is available locally/pulled.
+
+ -L, --build-cache-local
+ Uses local cache to build images. No pulled images will be used, but results of local
+ builds in the Docker cache are used instead. This will take longer than when the pulled
+ cache is used for the first time, but subsequent '--build-cache-local' builds will be
+ faster as they will use mostly the locally build cache.
+
+ This is default strategy used by the Production image builds.
+
+ -U, --build-cache-pulled
+ Uses images pulled from registry (either DockerHub or GitHub depending on
+ --github-registry flag) to build images. The pulled images will be used as cache.
+ Those builds are usually faster than when ''--build-cache-local'' with the exception if
+ the registry images are not yet updated. The DockerHub images are updated nightly and the
+ GitHub images are updated after merges to master so it might be that the images are still
+ outdated vs. the latest version of the Dockerfiles you are using. In this case, the
+ ''--build-cache-local'' might be faster, especially if you iterate and change the
+ Dockerfiles yourself.
+
+ This is default strategy used by the CI image builds.
+
+ -X, --build-cache-disabled
+ Disables cache during docker builds. This is useful if you want to make sure you want to
+ rebuild everything from scratch.
+
+ This strategy is used by default for both Production and CI images for the scheduled
+ (nightly) builds in CI.
+
+ -D, --dockerhub-user DOCKERHUB_USER
+ DockerHub user used to pull, push and build images. Default: apache.
+
+ -H, --dockerhub-repo DOCKERHUB_REPO
+ DockerHub repository used to pull, push, build images. Default: airflow.
+
+ -c, --github-registry GITHUB_REGISTRY
+ If GitHub registry is enabled, pulls and pushes are done from the GitHub registry not
+ DockerHub. You need to be logged in to the registry in order to be able to pull/push from
+ and you need to be committer to push to Apache Airflow' GitHub registry.
+
+ -g, --github-repository GITHUB_REPOSITORY
+ GitHub repository used to pull, push images when cache is used.
+ Default: apache/airflow.
+
+ If you use this flag, automatically --github-registry flag is enabled.
+
+ -s, --github-image-id COMMIT_SHA|RUN_ID
+ or of the image. Images in GitHub registry are stored with those
+ to be able to easily find the image for particular CI runs. Once you know the
+ or , you can specify it in github-image-id flag and Breeze will
+ automatically pull and use that image so that you can easily reproduce a problem
+ that occurred in CI.
+
+ If you use this flag, automatically --github-registry is enabled.
+
+
+ Default: latest.
+
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
+
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: cleanup-image
+
+
+ breeze cleanup-image [FLAGS]
+
+ Removes the breeze-related images created in your local docker image cache. This will
+ not reclaim space in docker cache. You need to 'docker system prune' (optionally
+ with --all) to reclaim that space.
+
+ Flags:
+
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
+ Python version used for the image. This is always major/minor version.
+
+ One of:
+
+ 2.7 3.5 3.6 3.7 3.8
+
+ -I, --production-image
+ Use production image for entering the environment and builds (not for tests).
+
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
+
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: exec
+
+
+ breeze exec [-- ]
+
+ Execs into interactive shell to an already running container. The container mus be started
+ already by breeze shell command. If you are not familiar with tmux, this is the best
+ way to run multiple processes in the same container at the same time for example scheduler,
+ webserver, workers, database console and interactive terminal.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: generate-constraints
+
+
+ breeze generate-constraints [FLAGS]
+
+ Generates pinned constraint files from setup.py. Those files are generated in files folder
+ - separate files for different python version. Those constraint files when pushed to orphan
+ constraint-master and constraint-1-10 branches are used to generate repeatable
+ CI builds as well as run repeatable production image builds. You can use those constraints
+ to predictably install released Airflow versions. This is mainly used to test the constraint
+ generation - constraints are pushed to the orphan branches by a successful scheduled
+ CRON job in CI automatically.
+
+ Flags:
+
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
+ Python version used for the image. This is always major/minor version.
+
+ One of:
+
+ 2.7 3.5 3.6 3.7 3.8
+
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
+
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: push-image
+
+
+ breeze push_image [FLAGS]
+
+ Pushes images to docker registry. You can push the images to DockerHub registry (default)
+ or to the GitHub registry (if --github-registry flag is used).
+
+ For DockerHub pushes --dockerhub-user and --dockerhub-repo flags can be used to specify
+ the repository to push to. For GitHub repository, the --github-repository
+ flag can be used for the same purpose. You can also add
+ --github-image-id | in case you want to push image with specific
+ SHA tag or run id. In case you specify --github-repository or --github-image-id, you
+ do not need to specify --github-registry flag.
+
+ You can also add --production-image flag to switch to production image (default is CI one)
+
+ Examples:
+
+ 'breeze push-image' or
+ 'breeze push-image --dockerhub-user user' to push to your private registry or
+ 'breeze push-image --production-image' - to push production image or
+ 'breeze push-image --github-registry' - to push to GitHub image registry or
+ 'breeze push-image \
+ --github-repository user/airflow' - to push to your user's fork
+ 'breeze push-image \
+ --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e' - to push with COMMIT_SHA
+ 'breeze push-image \
+ --github-image-id 209845560' - to push with RUN_ID
+
+ Flags:
+
+ -D, --dockerhub-user DOCKERHUB_USER
+ DockerHub user used to pull, push and build images. Default: apache.
+
+ -H, --dockerhub-repo DOCKERHUB_REPO
+ DockerHub repository used to pull, push, build images. Default: airflow.
+
+ -c, --github-registry GITHUB_REGISTRY
+ If GitHub registry is enabled, pulls and pushes are done from the GitHub registry not
+ DockerHub. You need to be logged in to the registry in order to be able to pull/push from
+ and you need to be committer to push to Apache Airflow' GitHub registry.
+
+ -g, --github-repository GITHUB_REPOSITORY
+ GitHub repository used to pull, push images when cache is used.
+ Default: apache/airflow.
+
+ If you use this flag, automatically --github-registry flag is enabled.
+
+ -s, --github-image-id COMMIT_SHA|RUN_ID
+ or of the image. Images in GitHub registry are stored with those
+ to be able to easily find the image for particular CI runs. Once you know the
+ or , you can specify it in github-image-id flag and Breeze will
+ automatically pull and use that image so that you can easily reproduce a problem
+ that occurred in CI.
+
+ If you use this flag, automatically --github-registry is enabled.
+
+
+ Default: latest.
+
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
+
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: initialize-local-virtualenv
+
+
+ breeze initialize-local-virtualenv [FLAGS]
+
+ Initializes locally created virtualenv installing all dependencies of Airflow
+ taking into account the constraints for the version specified.
+ This local virtualenv can be used to aid auto-completion and IDE support as
+ well as run unit tests directly from the IDE. You need to have virtualenv
+ activated before running this command.
+
+ Flags:
+
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
+ Python version used for the image. This is always major/minor version.
+
+ One of:
+
+ 2.7 3.5 3.6 3.7 3.8
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: prepare-airflow-packages
+
+
+ breeze prepare-airflow-packages [FLAGS]
+
+ Prepares airflow packages (sdist and wheel) in dist folder. Note that
+ prepare-provider-packages command cleans up the dist folder, so if you want also
+ to generate provider packages, make sure you run prepare-provider-packages first,
+ and prepare-airflow-packages second.
+
+ General form:
+
+ 'breeze prepare-airflow-packages
+
+ Flags:
+
+ --package-format PACKAGE_FORMAT
+
+ Chooses format of packages to prepare.
+
+ One of:
+
+ wheel,sdist,both
+
+ Default:
+
+ -S, --version-suffix-for-pypi SUFFIX
+ Adds optional suffix to the version in the generated backport package. It can be used
+ to generate rc1/rc2 ... versions of the packages to be uploaded to PyPI.
+
+ -N, --version-suffix-for-svn SUFFIX
+ Adds optional suffix to the generated names of package. It can be used to generate
+ rc1/rc2 ... versions of the packages to be uploaded to SVN.
+
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
+
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: setup-autocomplete
+
+
+ breeze setup-autocomplete
+
+ Sets up autocomplete for breeze commands. Once you do it you need to re-enter the bash
+ shell and when typing breeze command will provide autocomplete for
+ parameters and values.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: start-airflow
+
+
+ breeze start-airflow
+
+ Like the Shell command this will enter the interactive shell, but it will also start
+ automatically the Scheduler and the Webserver. It will leave you in a tmux session where you
+ can also observe what is happening in your Airflow.
+
+ This is a convenient way to setup a development environment. Your dags will be loaded from the
+ folder 'files/dags' on your host machine (it could take some times).
+
+ If you want to load default connections and example dags you can use the dedicated flags.
+
+ Flags:
+
+ --load-example-dags
+ Include Airflow example dags.
+
+ --load-default-connections
+ Include Airflow Default Connections.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: stop
+
+
+ breeze stop
+
+ Brings down running docker compose environment. When you start the environment, the docker
+ containers will continue running so that startup time is shorter. But they take quite a lot of
+ memory and CPU. This command stops all running containers from the environment.
+
+ Flags:
+
+ --preserve-volumes
+ Use this flag if you would like to preserve data volumes from the databases used
+ by the integrations. By default, those volumes are deleted, so when you run 'stop'
+ or 'restart' commands you start from scratch, but by using this flag you can
+ preserve them. If you want to delete those volumes after stopping Breeze, just
+ run the 'breeze stop' again without this flag.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: restart
+
+
+ breeze restart [FLAGS]
+
+ Restarts running docker compose environment. When you restart the environment, the docker
+ containers will be restarted. That includes cleaning up the databases. This is
+ especially useful if you switch between different versions of Airflow.
+
+ Flags:
+
+ --preserve-volumes
+ Use this flag if you would like to preserve data volumes from the databases used
+ by the integrations. By default, those volumes are deleted, so when you run 'stop'
+ or 'restart' commands you start from scratch, but by using this flag you can
+ preserve them. If you want to delete those volumes after stopping Breeze, just
+ run the 'breeze stop' again without this flag.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: toggle-suppress-cheatsheet
+
+
+ breeze toggle-suppress-cheatsheet
+
+ Toggles on/off cheatsheet displayed before starting bash shell.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: toggle-suppress-asciiart
+
+
+ breeze toggle-suppress-asciiart
+
+ Toggles on/off asciiart displayed before starting bash shell.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: docker-compose
+
+
+ breeze docker-compose [FLAGS] COMMAND [-- ]
+
+ Run docker-compose command instead of entering the environment. Use 'help' as command
+ to see available commands. The passed after -- are treated
+ as additional options passed to docker-compose. For example
+
+ 'breeze docker-compose pull -- --ignore-pull-failures'
+
+ Flags:
+
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
+ Python version used for the image. This is always major/minor version.
+
+ One of:
+
+ 2.7 3.5 3.6 3.7 3.8
+
+ -b, --backend BACKEND
+ Backend to use for tests - it determines which database is used.
+ One of:
+
+ sqlite mysql postgres
+
+ Default: sqlite
+
+ --postgres-version POSTGRES_VERSION
+ Postgres version used. One of:
+
+ 9.6 10 11 12 13
+
+ --mysql-version MYSQL_VERSION
+ Mysql version used. One of:
+
+ 5.6 5.7
+
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
+
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: kind-cluster
+
+
+ breeze kind-cluster [FLAGS] OPERATION
+
+ Manages host-side Kind Kubernetes cluster that is used to run Kubernetes integration tests.
+ It allows to start/stop/restart/status the Kind Kubernetes cluster and deploy Airflow to it.
+ This enables you to run tests inside the breeze environment with latest airflow images.
+ Note that in case of deploying airflow, the first step is to rebuild the image and loading it
+ to the cluster so you can also pass appropriate build image flags that will influence
+ rebuilding the production image. Operation is one of:
+
+ start stop restart status deploy test shell k9s
+
+ The last two operations - shell and k9s allow you to perform interactive testing with
+ kubernetes tests. You can enter the shell from which you can run kubernetes tests and in
+ another terminal you can start the k9s CLI to debug kubernetes instance. It is an easy
+ way to debug the kubernetes deployments.
+
+ You can read more about k9s at https://k9scli.io/
+
+ Flags:
+
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
+ Python version used for the image. This is always major/minor version.
+
+ One of:
+
+ 2.7 3.5 3.6 3.7 3.8
+
+ -F, --force-build-images
+ Forces building of the local docker images. The images are rebuilt
+ automatically for the first time or when changes are detected in
+ package-related files, but you can force it using this flag.
+
+ -P, --force-pull-images
+ Forces pulling of images from DockerHub before building to populate cache. The
+ images are pulled by default only for the first time you run the
+ environment, later the locally build images are used as cache.
+
+ Customization options:
+
+ -E, --extras EXTRAS
+ Extras to pass to build images The default are different for CI and production images:
-This is the current syntax for `./breeze <./breeze>`_:
+ CI image:
+ devel_ci
- .. START BREEZE HELP MARKER
+ Production image:
+ async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,
+ ssh,statsd,virtualenv
-.. code-block:: text
+ --image-tag TAG
+ Additional tag in the image.
+ --disable-pypi-when-building
+ Disable installing Airflow from pypi when building. If you use this flag and want
+ to install Airflow, you have to install it from packages placed in
+ 'docker-context-files' and use --install-from-local-files-when-building flag.
- ####################################################################################################
+ --additional-extras ADDITIONAL_EXTRAS
+ Additional extras to pass to build images The default is no additional extras.
- Usage: breeze [FLAGS] [COMMAND] --
+ --additional-python-deps ADDITIONAL_PYTHON_DEPS
+ Additional python dependencies to use when building the images.
- By default the script enters IT environment and drops you to bash shell, but you can choose one
- of the commands to run specific actions instead. Add --help after each command to see details:
+ --dev-apt-command DEV_APT_COMMAND
+ The basic command executed before dev apt deps are installed.
- Commands without arguments:
+ --additional-dev-apt-command ADDITIONAL_DEV_APT_COMMAND
+ Additional command executed before dev apt deps are installed.
- shell [Default] Enters interactive shell in the container
- build-docs Builds documentation in the container
- build-image Builds CI or Production docker image
- cleanup-image Cleans up the container image created
- exec Execs into running breeze container in new terminal
- generate-requirements Generates pinned requirements for pip dependencies
- initialize-local-virtualenv Initializes local virtualenv
- setup-autocomplete Sets up autocomplete for breeze
- stop Stops the docker-compose evironment
- restart Stops the docker-compose evironment including DB cleanup
- toggle-suppress-cheatsheet Toggles on/off cheatsheet
- toggle-suppress-asciiart Toggles on/off asciiart
+ --additional-dev-apt-deps ADDITIONAL_DEV_APT_DEPS
+ Additional apt dev dependencies to use when building the images.
- Commands with arguments:
+ --dev-apt-deps DEV_APT_DEPS
+ The basic apt dev dependencies to use when building the images.
- docker-compose Executes specified docker-compose command
- execute-command Executes specified command in the container
- static-check Performs selected static check for changed files
- static-check-all-files Performs selected static check for all files
- test-target Runs selected test target in the container
+ --additional-dev-apt-deps ADDITIONAL_DEV_DEPS
+ Additional apt dev dependencies to use when building the images.
- Help commands:
+ --additional-dev-apt-envs ADDITIONAL_DEV_APT_ENVS
+ Additional environment variables set when adding dev dependencies.
- flags Shows all breeze's flags
- help Shows this help message
- help-all Shows detailed help for all commands and flags
+ --runtime-apt-command RUNTIME_APT_COMMAND
+ The basic command executed before runtime apt deps are installed.
- ####################################################################################################
+ --additional-runtime-apt-command ADDITIONAL_RUNTIME_APT_COMMAND
+ Additional command executed before runtime apt deps are installed.
- Detailed usage
+ --runtime-apt-deps ADDITIONAL_RUNTIME_APT_DEPS
+ The basic apt runtime dependencies to use when building the images.
- ####################################################################################################
+ --additional-runtime-apt-deps ADDITIONAL_RUNTIME_DEPS
+ Additional apt runtime dependencies to use when building the images.
- breeze [FLAGS] shell --
+ --additional-runtime-apt-envs ADDITIONAL_RUNTIME_APT_DEPS
+ Additional environment variables set when adding runtime dependencies.
- This is default subcommand if no subcommand is used.
+ Build options:
- Enters interactive shell where you can run all tests, start airflow webserver, scheduler,
- workers, interact with the database, run DAGs etc. It is the default command if no command
- is selected. The shell is executed in the container and in case integrations are chosen,
- the integrations will be started as separated docker containers - under the docker-compose
- supervision. Local sources are by default mounted to within the container so you can edit
- them locally and run tests immediately in the container. Several folders ('files', 'dist')
- are also mounted so that you can exchange files between the host and container.
+ --disable-mysql-client-installation
+ Disables installation of the mysql client which might be problematic if you are building
+ image in controlled environment. Only valid for production image.
- The 'files/airflow-breeze-config/variables.env' file can contain additional variables
- and setup. This file is automatically sourced when you enter the container. Database
- and webserver ports are forwarded to appropriate database/webserver so that you can
- connect to it from your host environment.
- ****************************************************************************************************
- breeze [FLAGS] build-docs --
+ --constraints-location
+ Url to the constraints file. In case of the production image it can also be a path to the
+ constraint file placed in 'docker-context-files' folder, in which case it has to be
+ in the form of '/docker-context-files/'
- Builds airflow documentation. The documentation is build inside docker container - to
- maintain the same build environment for everyone. Appropriate sources are mapped from
- the host to the container so that latest sources are used. The folders where documentation
- is generated ('docs/build') are also mounted to the container - this way results of
- the documentation build is available in the host.
- ****************************************************************************************************
- breeze [FLAGS] build-image --
+ --disable-pip-cache
+ Disables GitHub PIP cache during the build. Useful if GitHub is not reachable during build.
- Builds docker image (CI or production) without entering the container. You can pass
- aditional options to this command, such as '--force-build-image',
- '--force-pull-image' '--python' '--use-local-cache'' in order to modify build behaviour.
- You can also pass '--production-image' flag to build production image rather than CI image.
- ****************************************************************************************************
- breeze [FLAGS] cleanup-image --
+ --install-from-local-files-when-building
+ This flag is used during image building. If it is used additionally to installing
+ Airflow from PyPI, the packages are installed from the .whl and .tar.gz packages placed
+ in the 'docker-context-files' folder. The same flag can be used during entering the image in
+ the CI image - in this case also the .whl and .tar.gz files will be installed automatically
- Removes the breeze-related images created in your local docker image cache. This will
- not reclaim space in docker cache. You need to 'docker system prune' (optionally
- with --all) to reclaim that space.
- ****************************************************************************************************
- breeze [FLAGS] exec --
+ -C, --force-clean-images
+ Force build images with cache disabled. This will remove the pulled or build images
+ and start building images from scratch. This might take a long time.
- Execs into interactive shell to an already running container. The container mus be started
- already by breeze shell command. If you are not familiar with tmux, this is the best
- way to run multiple processes in the same container at the same time for example scheduler,
- webserver, workers, database console and interactive terminal.
- ****************************************************************************************************
- breeze [FLAGS] generate-requirements --
+ -r, --skip-rebuild-check
+ Skips checking image for rebuilds. It will use whatever image is available locally/pulled.
- Generates pinned requirements from setup.py. Those requirements are generated in requirements
- directory - separately for different python version. Those requirements are used to run
- CI builds as well as run repeatable production image builds. You can use those requirements
- to predictably install released airflow versions. You should run it always after you update
- setup.py.
- ****************************************************************************************************
- breeze [FLAGS] initialize-local-virtualenv --
+ -L, --build-cache-local
+ Uses local cache to build images. No pulled images will be used, but results of local
+ builds in the Docker cache are used instead. This will take longer than when the pulled
+ cache is used for the first time, but subsequent '--build-cache-local' builds will be
+ faster as they will use mostly the locally build cache.
- Initializes locally created virtualenv installing all dependencies of Airflow
- taking into account the frozen requirements from requirements folder.
- This local virtualenv can be used to aid autocompletion and IDE support as
- well as run unit tests directly from the IDE. You need to have virtualenv
- activated before running this command.
- ****************************************************************************************************
- breeze [FLAGS] setup-autocomplete --
+ This is default strategy used by the Production image builds.
- Sets up autocomplete for breeze commands. Once you do it you need to re-enter the bash
- shell and when typing breeze command will provide autocomplete for
- parameters and values.
- ****************************************************************************************************
- breeze [FLAGS] stop --
+ -U, --build-cache-pulled
+ Uses images pulled from registry (either DockerHub or GitHub depending on
+ --github-registry flag) to build images. The pulled images will be used as cache.
+ Those builds are usually faster than when ''--build-cache-local'' with the exception if
+ the registry images are not yet updated. The DockerHub images are updated nightly and the
+ GitHub images are updated after merges to master so it might be that the images are still
+ outdated vs. the latest version of the Dockerfiles you are using. In this case, the
+ ''--build-cache-local'' might be faster, especially if you iterate and change the
+ Dockerfiles yourself.
- Brings down running docker compose environment. When you start the environment, the docker
- containers will continue running so that startup time is shorter. But they take quite a lot of
- memory and CPU. This command stops all running containers from the environment.
- ****************************************************************************************************
- breeze [FLAGS] restart --
+ This is default strategy used by the CI image builds.
- Restarts running docker compose environment. When you restart the environment, the docker
- containers will be restarted. That includes cleaning up the databases. This is
- especially useful if you switch between different versions of airflow.
- ****************************************************************************************************
- breeze [FLAGS] toggle-suppress-cheatsheet --
+ -X, --build-cache-disabled
+ Disables cache during docker builds. This is useful if you want to make sure you want to
+ rebuild everything from scratch.
- Toggles on/off cheatsheet displayed before starting bash shell.
- ****************************************************************************************************
- breeze [FLAGS] toggle-suppress-asciiart --
+ This strategy is used by default for both Production and CI images for the scheduled
+ (nightly) builds in CI.
- Toggles on/off asciiart displayed before starting bash shell.
- ****************************************************************************************************
- breeze [FLAGS] docker-compose --
- Run docker-compose command instead of entering the environment. Use 'help' as command
- to see available commands. The passed after -- are treated
- as additional options passed to docker-compose. For example
+ ####################################################################################################
- 'breeze docker-compose pull -- --ignore-pull-failures'
- ****************************************************************************************************
- breeze [FLAGS] execute-command --
- Run chosen command instead of entering the environment. The command is run using
- 'bash -c "" if you need to pass arguments to your command, you need
- to pass them together with command surrounded with " or '. Alternatively you can
- pass arguments as passed after --. For example:
+ Detailed usage for command: static-check
- 'breeze execute-command "ls -la"' or
- 'breeze execute-command ls -- --la'
- ****************************************************************************************************
- breeze [FLAGS] static-check --
+
+ breeze static-check [FLAGS] static_check [-- ]
Run selected static checks for currently changed files. You should specify static check that
you would like to run or 'all' to run all checks. One of:
- all bat-tests check-apache-license check-executables-have-shebangs check-hooks-apply
- check-merge-conflict check-xml debug-statements doctoc detect-private-key
- end-of-file-fixer flake8 forbid-tabs insert-license lint-dockerfile
- mixed-line-ending mypy setup-order shellcheck
+ all airflow-config-yaml base-operator bats-tests bats-in-container-tests build
+ check-apache-license check-builtin-literals check-executables-have-shebangs
+ check-hooks-apply check-integrations check-merge-conflict check-xml debug-statements
+ detect-private-key doctoc dont-use-safe-filter end-of-file-fixer fix-encoding-pragma
+ flake8 forbid-tabs helm-lint identity incorrect-use-of-LoggingMixin insert-license
+ language-matters lint-dockerfile lint-openapi markdownlint mermaid mixed-line-ending
+ mypy mypy-helm no-relative-imports pre-commit-descriptions pydevd python2-compile
+ python2-fastcheck python-no-log-warn rst-backticks setup-order setup-installation
+ shellcheck sort-in-the-wild trailing-whitespace update-breeze-file update-extras
+ update-local-yml-file yamllint
You can pass extra arguments including options to to the pre-commit framework as
passed after --. For example:
'breeze static-check mypy' or
'breeze static-check mypy -- --files tests/core.py'
+ 'breeze static-check mypy -- --all-files'
+
+ To check all files that differ between you current branch and master run:
+
+ 'breeze static-check all -- --from-ref $(git merge-base master HEAD) --to-ref HEAD'
You can see all the options by adding --help EXTRA_ARG:
'breeze static-check mypy -- --help'
- ****************************************************************************************************
- breeze [FLAGS] static-check-all-files --
- Run selected static checks for all applicable files. You should specify static check that
- you would like to run or 'all' to run all checks. One of:
- all bat-tests check-apache-license check-executables-have-shebangs check-hooks-apply
- check-merge-conflict check-xml debug-statements doctoc detect-private-key
- end-of-file-fixer flake8 forbid-tabs insert-license lint-dockerfile
- mixed-line-ending mypy setup-order shellcheck
+ ####################################################################################################
- You can pass extra arguments including options to the pre-commit framework as
- passed after --. For example:
- 'breeze static-check-all-files mypy' or
- 'breeze static-check-all-files mypy -- --verbose'
+ Detailed usage for command: tests
- You can see all the options by adding --help EXTRA_ARG:
- 'breeze static-check-all-files mypy -- --help'
- ****************************************************************************************************
- breeze [FLAGS] test-target --
+ breeze tests [FLAGS] [TEST_TARGET ..] [-- ]
Run the specified unit test target. There might be multiple
targets specified separated with comas. The passed after -- are treated
- as additional options passed to pytest. For example:
+ as additional options passed to pytest. You can pass 'tests' as target to
+ run all tests. For example:
+
+ 'breeze tests tests/core/test_core.py -- --logging-level=DEBUG'
+ 'breeze tests tests
+
+ Flags:
+
+ --test-type TEST_TYPE
+ Type of the test to run. One of:
+
+ All,Core,Integration,Heisentests,Postgres,MySQL,Helm
+
+ Default: All
+
+
+ ####################################################################################################
+
+
+ Detailed usage for command: flags
- 'breeze test-target tests/test_core.py -- --logging-level=DEBUG'
- ****************************************************************************************************
- breeze [FLAGS] flags --
Explains in detail all the flags that can be used with breeze.
- ****************************************************************************************************
- breeze [FLAGS] help --
- Shows this help message.
- ****************************************************************************************************
- breeze [FLAGS] help-all --
- Shows detailed help for all commands and flags.
- ****************************************************************************************************
####################################################################################################
- Flags
+
+ Detailed usage for command: help
+
+
+ breeze help
+
+ Shows general help message for all commands.
+
####################################################################################################
- ****************************************************************************************************
- List of flags supported by breeze:
+ Detailed usage for command: help-all
+
+
+ breeze help-all
+
+ Shows detailed help for all commands and flags.
+
+
+ ####################################################################################################
+
+
+ ####################################################################################################
+
+ Summary of all flags supported by Breeze:
****************************************************************************************************
Choose Airflow variant
- ****************************************************************************************************
- -p, --python
+ -p, --python PYTHON_MAJOR_MINOR_VERSION
Python version used for the image. This is always major/minor version.
+
One of:
- 2.7 3.5 3.6 3.7
+ 2.7 3.5 3.6 3.7 3.8
+
+ ****************************************************************************************************
+ Choose backend to run for Airflow
- -b, --backend
+ -b, --backend BACKEND
Backend to use for tests - it determines which database is used.
One of:
@@ -842,107 +2105,109 @@ This is the current syntax for `./breeze <./breeze>`_:
Default: sqlite
+ --postgres-version POSTGRES_VERSION
+ Postgres version used. One of:
+
+ 9.6 10 11 12 13
+
+ --mysql-version MYSQL_VERSION
+ Mysql version used. One of:
+
+ 5.6 5.7
+
+ ****************************************************************************************************
+ Enable production image
+
+ -I, --production-image
+ Use production image for entering the environment and builds (not for tests).
+
+ ****************************************************************************************************
+ Additional actions executed while entering breeze
+
-d, --db-reset
- Resets the database at entry to the envvironment. It will drop all the tables
+ Resets the database at entry to the environment. It will drop all the tables
and data and recreate the DB from scratch even if 'restart' command was not used.
Combined with 'restart' command it enters the environment in the state that is
- ready to start airflow webserver/scheduler/worker. Without the switch, the database
+ ready to start Airflow webserver/scheduler/worker. Without the switch, the database
does not have any tables and you need to run reset db manually.
- -i, --integration
+ -i, --integration INTEGRATION
Integration to start during tests - it determines which integrations are started
for integration tests. There can be more than one integration started, or all to
start all integrations. Selected integrations are not saved for future execution.
One of:
- cassandra kerberos mongo openldap rabbitmq redis all
+ cassandra kerberos mongo openldap presto rabbitmq redis all
- -I, --production-image
- Use production image for entering the environment and builds (not for tests).
+ --init-script INIT_SCRIPT_FILE
+ Initialization script name - Sourced from files/airflow-breeze-config. Default value
+ init.sh. It will be executed after the environment is configured and started.
****************************************************************************************************
- Manage Kind kubernetes cluster (optional)
- ****************************************************************************************************
+ Additional actions executed while starting Airflow
+ --load-example-dags
+ Include Airflow example dags.
- Acion for the cluster : only one of the --kind-cluster-* flags can be used at a time:
+ --load-default-connections
+ Include Airflow Default Connections.
- -s, --kind-cluster-start
- Starts kind Kubernetes cluster after entering the environment. The cluster is started using
- Kubernetes Mode selected and Kubernetes version specified via --kubernetes-mode and
- --kubernetes-version flags.
-
- -x, --kind-cluster-stop
- Stops kind Kubernetes cluster if one has already been created. By default, if you do not
- stop environment, the Kubernetes cluster created for testing is continuously running and
- when you start Kubernetes testing again it will be reused. You can force deletion and
- recreation of such cluster with this flag.
+ ****************************************************************************************************
+ Cleanup options when stopping Airflow
- -r, --kind-cluster-recreate
+ --preserve-volumes
+ Use this flag if you would like to preserve data volumes from the databases used
+ by the integrations. By default, those volumes are deleted, so when you run 'stop'
+ or 'restart' commands you start from scratch, but by using this flag you can
+ preserve them. If you want to delete those volumes after stopping Breeze, just
+ run the 'breeze stop' again without this flag.
- Recreates kind Kubernetes cluster if one has already been created. By default, if you do
- not stop environment, the Kubernetes cluster created for testing is continuously running
- and when you start Kubernetes testing again it will be reused. You can force deletion and
- recreation of such cluster with this flag.
+ ****************************************************************************************************
+ Kind kubernetes and Kubernetes tests configuration(optional)
- Kubernetes mode/version flags:
+ Configuration for the KinD Kubernetes cluster and tests:
- -K, --kubernetes-mode
- Kubernetes mode - only used in case one of --kind-cluster-* commands is used.
+ -K, --kubernetes-mode KUBERNETES_MODE
+ Kubernetes mode - only used in case one of kind-cluster commands is used.
One of:
- persistent_mode git_mode
+ image
- Default: git_mode
+ Default: image
- -V, --kubernetes-version
- Kubernetes version - only used in case one of --kind-cluster-* commands is used.
+ -V, --kubernetes-version KUBERNETES_VERSION
+ Kubernetes version - only used in case one of kind-cluster commands is used.
One of:
- v1.15.3 v1.16.2
-
- Default: v1.15.3
+ v1.18.6 v1.17.5 v1.16.9
- ****************************************************************************************************
- Manage mounting local files
- ****************************************************************************************************
+ Default: v1.18.6
- -l, --skip-mounting-local-sources
- Skips mounting local volume with sources - you get exactly what is in the
- docker image rather than your current local sources of airflow.
+ --kind-version KIND_VERSION
+ Kind version - only used in case one of kind-cluster commands is used.
+ One of:
- ****************************************************************************************************
- Install Airflow if different than current
- ****************************************************************************************************
+ v0.8.0
- -a, --install-airflow-version
- If specified, installs airflow directly from PIP released version. One of:
+ Default: v0.8.0
- 1.10.9 1.10.8 1.10.7 1.10.6 1.10.5 1.10.4 1.10.3 1.10.2 master v1-10-test
+ --helm-version HELM_VERSION
+ Helm version - only used in case one of kind-cluster commands is used.
+ One of:
- -t, --install-airflow-reference
- Only for production image - if specified, installs airflow directly from reference in GitHub
+ v3.2.4
+ Default: v3.2.4
****************************************************************************************************
- Database versions
- ****************************************************************************************************
-
- --postgres-version
- Postgres version used. One of:
-
- 9.6 10
-
-
- --mysql-version
- Mysql version used. One of:
-
- 5.6 5.7
+ Manage mounting local files
+ -l, --skip-mounting-local-sources
+ Skips mounting local volume with sources - you get exactly what is in the
+ docker image rather than your current local sources of Airflow.
****************************************************************************************************
Assume answers to questions
- ****************************************************************************************************
-y, --assume-yes
Assume 'yes' answer to all questions.
@@ -954,25 +2219,41 @@ This is the current syntax for `./breeze <./breeze>`_:
Assume 'quit' answer to all questions.
****************************************************************************************************
- Credentials
+ Choose different Airflow version to install or run
+
+ -a, --install-airflow-version INSTALL_AIRFLOW_VERSION
+ If specified, installs Airflow directly from PIP released version. This happens at
+ image building time in production image and at container entering time for CI image. One of:
+
+ 1.10.15 1.10.14 1.10.12 1.10.11 1.10.10 1.10.9 none wheel sdist
+
+ When 'none' is used, you can install airflow from local packages. When building image,
+ airflow package should be added to 'docker-context-files' and
+ --install-from-docker-context-files flag should be used. When running an image, airflow
+ package should be added to dist folder and --install-packages-from-dist flag should be used.
+
+ -t, --install-airflow-reference INSTALL_AIRFLOW_REFERENCE
+ If specified, installs Airflow directly from reference in GitHub. This happens at
+ image building time in production image and at container entering time for CI image.
+ This can be a GitHub branch like master or v1-10-test, or a tag like 2.0.0a1.
+
+ --no-rbac-ui
+ Disables RBAC UI when Airflow 1.10.* is installed.
+
+ --install-packages-from-dist
+ If specified it will look for packages placed in dist folder and it will install the
+ packages after installing Airflow. This is useful for testing provider
+ packages.
+
****************************************************************************************************
+ Credentials
-f, --forward-credentials
Forwards host credentials to docker container. Use with care as it will make
your credentials available to everything you install in Docker.
****************************************************************************************************
- Increase verbosity of the script
- ****************************************************************************************************
-
- -v, --verbose
- Show verbose information about executed commands (enabled by default for running test).
- Note that you can further increase verbosity and see all the commands executed by breeze
- by running 'export VERBOSE_COMMANDS="true"' before running breeze.
-
- ****************************************************************************************************
- Flags for building the docker images
- ****************************************************************************************************
+ Flags for building Docker images (both CI and production)
-F, --force-build-images
Forces building of the local docker images. The images are rebuilt
@@ -984,7 +2265,9 @@ This is the current syntax for `./breeze <./breeze>`_:
images are pulled by default only for the first time you run the
environment, later the locally build images are used as cache.
- -E, --extras
+ Customization options:
+
+ -E, --extras EXTRAS
Extras to pass to build images The default are different for CI and production images:
CI image:
@@ -994,83 +2277,175 @@ This is the current syntax for `./breeze <./breeze>`_:
async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,
ssh,statsd,virtualenv
+ --image-tag TAG
+ Additional tag in the image.
+
+ --disable-pypi-when-building
+ Disable installing Airflow from pypi when building. If you use this flag and want
+ to install Airflow, you have to install it from packages placed in
+ 'docker-context-files' and use --install-from-local-files-when-building flag.
+
+ --additional-extras ADDITIONAL_EXTRAS
+ Additional extras to pass to build images The default is no additional extras.
+
+ --additional-python-deps ADDITIONAL_PYTHON_DEPS
+ Additional python dependencies to use when building the images.
+
+ --dev-apt-command DEV_APT_COMMAND
+ The basic command executed before dev apt deps are installed.
+
+ --additional-dev-apt-command ADDITIONAL_DEV_APT_COMMAND
+ Additional command executed before dev apt deps are installed.
+
+ --additional-dev-apt-deps ADDITIONAL_DEV_APT_DEPS
+ Additional apt dev dependencies to use when building the images.
+
+ --dev-apt-deps DEV_APT_DEPS
+ The basic apt dev dependencies to use when building the images.
+
+ --additional-dev-apt-deps ADDITIONAL_DEV_DEPS
+ Additional apt dev dependencies to use when building the images.
+
+ --additional-dev-apt-envs ADDITIONAL_DEV_APT_ENVS
+ Additional environment variables set when adding dev dependencies.
+
+ --runtime-apt-command RUNTIME_APT_COMMAND
+ The basic command executed before runtime apt deps are installed.
+
+ --additional-runtime-apt-command ADDITIONAL_RUNTIME_APT_COMMAND
+ Additional command executed before runtime apt deps are installed.
+
+ --runtime-apt-deps ADDITIONAL_RUNTIME_APT_DEPS
+ The basic apt runtime dependencies to use when building the images.
+
+ --additional-runtime-apt-deps ADDITIONAL_RUNTIME_DEPS
+ Additional apt runtime dependencies to use when building the images.
+
+ --additional-runtime-apt-envs ADDITIONAL_RUNTIME_APT_DEPS
+ Additional environment variables set when adding runtime dependencies.
+
+ Build options:
+
+ --disable-mysql-client-installation
+ Disables installation of the mysql client which might be problematic if you are building
+ image in controlled environment. Only valid for production image.
+
+ --constraints-location
+ Url to the constraints file. In case of the production image it can also be a path to the
+ constraint file placed in 'docker-context-files' folder, in which case it has to be
+ in the form of '/docker-context-files/'
+
+ --disable-pip-cache
+ Disables GitHub PIP cache during the build. Useful if GitHub is not reachable during build.
+
+ --install-from-local-files-when-building
+ This flag is used during image building. If it is used additionally to installing
+ Airflow from PyPI, the packages are installed from the .whl and .tar.gz packages placed
+ in the 'docker-context-files' folder. The same flag can be used during entering the image in
+ the CI image - in this case also the .whl and .tar.gz files will be installed automatically
+
-C, --force-clean-images
Force build images with cache disabled. This will remove the pulled or build images
and start building images from scratch. This might take a long time.
- -L, --use-local-cache
+ -r, --skip-rebuild-check
+ Skips checking image for rebuilds. It will use whatever image is available locally/pulled.
+
+ -L, --build-cache-local
Uses local cache to build images. No pulled images will be used, but results of local
- builds in the Docker cache are used instead.
+ builds in the Docker cache are used instead. This will take longer than when the pulled
+ cache is used for the first time, but subsequent '--build-cache-local' builds will be
+ faster as they will use mostly the locally build cache.
- ****************************************************************************************************
- Flags for pushing the docker images
- ****************************************************************************************************
+ This is default strategy used by the Production image builds.
- -u, --push-images
- After building - uploads the images to DockerHub
- It is useful in case you use your own DockerHub user to store images and you want
- to build them locally. Note that you need to use 'docker login' before you upload images.
+ -U, --build-cache-pulled
+ Uses images pulled from registry (either DockerHub or GitHub depending on
+ --github-registry flag) to build images. The pulled images will be used as cache.
+ Those builds are usually faster than when ''--build-cache-local'' with the exception if
+ the registry images are not yet updated. The DockerHub images are updated nightly and the
+ GitHub images are updated after merges to master so it might be that the images are still
+ outdated vs. the latest version of the Dockerfiles you are using. In this case, the
+ ''--build-cache-local'' might be faster, especially if you iterate and change the
+ Dockerfiles yourself.
+
+ This is default strategy used by the CI image builds.
+
+ -X, --build-cache-disabled
+ Disables cache during docker builds. This is useful if you want to make sure you want to
+ rebuild everything from scratch.
+
+ This strategy is used by default for both Production and CI images for the scheduled
+ (nightly) builds in CI.
****************************************************************************************************
- User and repo used to login to github registry
- ****************************************************************************************************
+ Flags for pulling/pushing Docker images (both CI and production)
- -D, --dockerhub-user
+ -D, --dockerhub-user DOCKERHUB_USER
DockerHub user used to pull, push and build images. Default: apache.
- -H, --dockerhub-repo
+ -H, --dockerhub-repo DOCKERHUB_REPO
DockerHub repository used to pull, push, build images. Default: airflow.
- ****************************************************************************************************
+ -c, --github-registry GITHUB_REGISTRY
+ If GitHub registry is enabled, pulls and pushes are done from the GitHub registry not
+ DockerHub. You need to be logged in to the registry in order to be able to pull/push from
+ and you need to be committer to push to Apache Airflow' GitHub registry.
- .. END BREEZE HELP MARKER
+ -g, --github-repository GITHUB_REPOSITORY
+ GitHub repository used to pull, push images when cache is used.
+ Default: apache/airflow.
-Convenience Scripts
--------------------
+ If you use this flag, automatically --github-registry flag is enabled.
-Once you run ``./breeze`` you can also execute various actions via generated convenience scripts:
+ -s, --github-image-id COMMIT_SHA|RUN_ID
+ or of the image. Images in GitHub registry are stored with those
+ to be able to easily find the image for particular CI runs. Once you know the
+ or , you can specify it in github-image-id flag and Breeze will
+ automatically pull and use that image so that you can easily reproduce a problem
+ that occurred in CI.
-.. code-block::
+ If you use this flag, automatically --github-registry is enabled.
- Enter the environment : ./.build/cmd_run
- Run command in the environment : ./.build/cmd_run "[command with args]" [bash options]
- Run tests in the environment : ./.build/test_run [test-target] [pytest options]
- Run Docker compose command : ./.build/dc [help/pull/...] [docker-compose options]
-Troubleshooting
-===============
+ Default: latest.
-If you are having problems with the Breeze environment, try the steps below. After each step you
-can check whether your problem is fixed.
+ ****************************************************************************************************
+ Flags for running tests
-1. If you are on macOS, check if you have enough disk space for Docker.
-2. Restart Breeze with ``./breeze restart``.
-3. Delete the ``.build`` directory and run ``./breeze build-image --force-pull-images``.
-4. Clean up Docker images via ``breeze cleanup-image`` command.
-5. Restart your Docker Engine and try again.
-6. Restart your machine and try again.
-7. Re-install Docker CE and try again.
+ --test-type TEST_TYPE
+ Type of the test to run. One of:
-In case the problems are not solved, you can set the VERBOSE_COMMANDS variable to "true":
+ All,Core,Integration,Heisentests,Postgres,MySQL,Helm
-.. code-block::
+ Default: All
- export VERBOSE_COMMANDS="true"
+ ****************************************************************************************************
+ Flags for generation of the packages
+ -S, --version-suffix-for-pypi SUFFIX
+ Adds optional suffix to the version in the generated backport package. It can be used
+ to generate rc1/rc2 ... versions of the packages to be uploaded to PyPI.
-Then run the failed command, copy-and-paste the output from your terminal to the
-`Airflow Slack `_ #airflow-breeze channel and
-describe your problem.
+ -N, --version-suffix-for-svn SUFFIX
+ Adds optional suffix to the generated names of package. It can be used to generate
+ rc1/rc2 ... versions of the packages to be uploaded to SVN.
-Fixing File/Directory Ownership
--------------------------------
+ ****************************************************************************************************
+ Increase verbosity of the scripts
-On Linux there is a problem with propagating ownership of created files (a known Docker problem). Basically,
-files and directories created in the container are not owned by the host user (but by the root user in our
-case). This may prevent you from switching branches, for example, if files owned by the root user are
-created within your sources. In case you are on a Linux host and have some files in your sources created
-y the root user, you can fix the ownership of those files by running this script:
+ -v, --verbose
+ Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
+ debugging - when you run breeze with --verbose flags you will be able to see the commands
+ executed under the hood and copy&paste them to your terminal to debug them more easily.
-.. code-block::
+ Note that you can further increase verbosity and see all the commands executed by breeze
+ by running 'export VERBOSE_COMMANDS="true"' before running breeze.
- ./scripts/ci/ci_fix_ownership.sh
+ ****************************************************************************************************
+ Print detailed help message
+
+ -h, --help
+ Shows detailed help message for the command specified.
+
+ .. END BREEZE HELP MARKER
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index a8aa3533bafa4..50817d0373264 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -1,3 +1,469 @@
+Airflow 1.10.15, 2021-03-16
+----------------------------
+
+Bug Fixes
+"""""""""
+
+- Fix ``airflow db upgrade`` to upgrade db as intended (#13267)
+- Moved boto3 limitation to snowflake (#13286)
+- ``KubernetesExecutor`` should accept images from ``executor_config`` (#13074)
+- Scheduler should acknowledge active runs properly (#13803)
+- Bugfix: Unable to import Airflow plugins on Python 3.8 (#12859)
+- Include ``airflow/contrib/executors`` in the dist package
+- Pin Click version for Python 2.7 users
+- Ensure all statsd timers use millisecond values. (#10633)
+- [``kubernetes_generate_dag_yaml``] - Fix dag yaml generate function (#13816)
+- Fix `airflow tasks clear` cli command wirh `--yes` (#14188)
+- Fix permission error on non-POSIX filesystem (#13121) (#14383)
+- Fixed deprecation message for "variables" command (#14457)
+- BugFix: fix the ``delete_dag`` function of json_client (#14441)
+- Fix merging of secrets and configmaps for ``KubernetesExecutor`` (#14090)
+- Fix webserver exiting when gunicorn master crashes (#13470)
+- Bump ini from 1.3.5 to 1.3.8 in ``airflow/www_rbac``
+- Bump datatables.net from 1.10.21 to 1.10.23 in ``airflow/www_rbac``
+- Webserver: Sanitize string passed to origin param (#14738)
+- Make ``rbac_app``'s ``db.session`` use the same timezone with ``@provide_session`` (#14025)
+
+Improvements
+""""""""""""
+
+- Adds airflow as viable docker command in official image (#12878)
+- ``StreamLogWriter``: Provide (no-op) close method (#10885)
+- Add 'airflow variables list' command for 1.10.x transition version (#14462)
+
+Doc only changes
+""""""""""""""""
+
+- Update URL for Airflow docs (#13561)
+- Clarifies version args for installing 1.10 in Docker (#12875)
+
+Airflow 1.10.14, 2020-12-10
+----------------------------
+
+Bug Fixes
+"""""""""
+
+- BugFix: Tasks with ``depends_on_past`` or ``task_concurrency`` are stuck (#12663)
+- Fix issue with empty Resources in executor_config (#12633)
+- Fix: Deprecated config ``force_log_out_after`` was not used (#12661)
+- Fix empty asctime field in JSON formatted logs (#10515)
+- [AIRFLOW-2809] Fix security issue regarding Flask SECRET_KEY (#3651)
+- [AIRFLOW-2884] Fix Flask SECRET_KEY security issue in www_rbac (#3729)
+- [AIRFLOW-2886] Generate random Flask SECRET_KEY in default config (#3738)
+- Add missing comma in setup.py (#12790)
+- Bugfix: Unable to import Airflow plugins on Python 3.8 (#12859)
+- Fix setup.py missing comma in ``setup_requires`` (#12880)
+- Don't emit first_task_scheduling_delay metric for only-once dags (#12835)
+
+Improvements
+""""""""""""
+
+- Update setup.py to get non-conflicting set of dependencies (#12636)
+- Rename ``[scheduler] max_threads`` to ``[scheduler] parsing_processes`` (#12605)
+- Add metric for scheduling delay between first run task & expected start time (#9544)
+- Add new-style 2.0 command names for Airflow 1.10.x (#12725)
+- Add Kubernetes cleanup-pods CLI command for Helm Chart (#11802)
+- Don't let webserver run with dangerous config (#12747)
+- Replace pkg_resources with importlib.metadata to avoid VersionConflict errors (#12694)
+
+Doc only changes
+""""""""""""""""
+
+- Clarified information about supported Databases
+
+
+Airflow 1.10.13, 2020-11-24
+----------------------------
+
+New Features
+""""""""""""
+
+- Add "already checked" to failed pods in K8sPodOperator (#11368)
+- Pass SQLAlchemy engine options to FAB based UI (#11395)
+- [AIRFLOW-4438] Add Gzip compression to S3_hook (#8571)
+- Add permission "extra_links" for Viewer role and above (#10719)
+- Add generate_yaml command to easily test KubernetesExecutor before deploying pods (#10677)
+- Add Secrets backend for Microsoft Azure Key Vault (#10898)
+
+Bug Fixes
+"""""""""
+
+- SkipMixin: Handle empty branches (#11120)
+- [AIRFLOW-5274] dag loading duration metric name too long (#5890)
+- Handle no Dagrun in DagrunIdDep (#8389) (#11343)
+- Fix Kubernetes Executor logs for long dag names (#10942)
+- Add on_kill support for the KubernetesPodOperator (#10666)
+- KubernetesPodOperator template fix (#10963)
+- Fix displaying of add serialized_dag table migration
+- Fix Start Date tooltip on DAGs page (#10637)
+- URL encode execution date in the Last Run link (#10595)
+- Fixes issue with affinity backcompat in Airflow 1.10
+- Fix KubernetesExecutor import in views.py
+- Fix issues with Gantt View (#12419)
+- Fix Entrypoint and _CMD config variables (#12411)
+- Fix operator field update for SerializedBaseOperator (#10924)
+- Limited cryptography to < 3.2 for python 2.7
+- Install cattr on Python 3.7 - Fix docs build on RTD (#12045)
+- Limit version of marshmallow-sqlalchemy
+- Pin `kubernetes` to a max version of 11.0.0 (#11974)
+- Use snakebite-py3 for HDFS dependency for Python3 (#12340)
+- Removes snakebite kerberos dependency (#10865)
+- Fix failing dependencies for FAB and Celery (#10828)
+- Fix pod_mutation_hook for 1.10.13 (#10850)
+- Fix formatting of Host information
+- Fix Logout Google Auth issue in Non-RBAC UI (#11890)
+- Add missing imports to app.py (#10650)
+- Show Generic Error for Charts & Query View in old UI (#12495)
+- TimeSensor should respect the default_timezone config (#9699)
+- TimeSensor should respect DAG timezone (#9882)
+- Unify user session lifetime configuration (#11970)
+- Handle outdated webserver session timeout gracefully. (#12332)
+
+
+Improvements
+""""""""""""
+
+- Add XCom.deserialize_value to Airflow 1.10.13 (#12328)
+- Mount airflow.cfg to pod_template_file (#12311)
+- All k8s object must comply with JSON Schema (#12003)
+- Validate airflow chart values.yaml & values.schema.json (#11990)
+- Pod template file uses custom custom env variable (#11480)
+- Bump attrs and cattrs dependencies (#11969)
+- Bump attrs to > 20.0 (#11799)
+- [AIRFLOW-3607] Only query DB once per DAG run for TriggerRuleDep (#4751)
+- Rename task with duplicate task_id
+- Manage Flask AppBuilder Tables using Alembic Migrations (#12352)
+- ``airflow test`` only works for tasks in 1.10, not whole dags (#11191)
+- Improve warning messaging for duplicate task_ids in a DAG (#11126)
+- Pins moto to 1.3.14 (#10986)
+- DbApiHook: Support kwargs in get_pandas_df (#9730)
+- Make grace_period_seconds option on K8sPodOperator (#10727)
+- Fix syntax error in Dockerfile 'maintainer' Label (#10899)
+- The entrypoints in Docker Image should be owned by Airflow (#10853)
+- Make dockerfiles Google Shell Guide Compliant (#10734)
+- clean-logs script for Dockerfile: trim logs before sleep (#10685)
+- When sending tasks to celery from a sub-process, reset signal handlers (#11278)
+- SkipMixin: Add missing session.commit() and test (#10421)
+- Webserver: Further Sanitize values passed to origin param (#12459)
+- Security upgrade lodash from 4.17.19 to 4.17.20 (#11095)
+- Log instead of raise an Error for unregistered OperatorLinks (#11959)
+- Mask Password in Log table when using the CLI (#11468)
+- [AIRFLOW-3607] Optimize dep checking when depends on past set and concurrency limit
+- Execute job cancel HTTPRequest in Dataproc Hook (#10361)
+- Use rst lexer to format airflow upgrade check output (#11259)
+- Remove deprecation warning from contrib/kubernetes/pod.py
+- adding body as templated field for CloudSqlImportOperator (#10510)
+- Change log level for User's session to DEBUG (#12414)
+
+Deprecations
+""""""""""""
+
+- Deprecate importing Hooks from plugin-created module (#12133)
+- Deprecate adding Operators and Sensors via plugins (#12069)
+
+Doc only changes
+""""""""""""""""
+
+- [Doc] Correct description for macro task_instance_key_str (#11062)
+- Checks if all the libraries in setup.py are listed in installation.rst file (#12023)
+- Revise "Project Focus" copy (#12011)
+- Move Project focus and Principles higher in the README (#11973)
+- Remove archived link from README.md (#11945)
+- Update download url for Airflow Version (#11800)
+- Add Project URLs for PyPI page (#11801)
+- Move Backport Providers docs to our docsite (#11136)
+- Refactor rebase copy (#11030)
+- Add missing images for kubernetes executor docs (#11083)
+- Fix identation in executor_config example (#10467)
+- Enhanced the Kubernetes Executor doc (#10433)
+- Refactor content to a markdown table (#10863)
+- Rename "Beyond the Horizon" section and refactor content (#10802)
+- Refactor official source section to use bullets (#10801)
+- Add section for official source code (#10678)
+- Add redbubble link to Airflow merchandise (#10359)
+- README Doc: Link to Airflow directory in ASF Directory (#11137)
+- Fix the default value for VaultBackend's config_path (#12518)
+
+Airflow 1.10.12, 2020-08-25
+----------------------------
+
+New Features
+""""""""""""
+
+- Add DateTimeSensor (#9697)
+- Add ClusterPolicyViolation support to airflow local settings (#10282)
+- Get Airflow configs with sensitive data from Secret Backends (#9645)
+- [AIRFLOW-4734] Upsert functionality for PostgresHook.insert_rows() (#8625)
+- Allow defining custom XCom class (#8560)
+
+Bug Fixes
+"""""""""
+
+- Add pre 1.10.11 Kubernetes Paths back with Deprecation Warning (#10067)
+- Fixes PodMutationHook for backwards compatibility (#9903)
+- Fix bug in executor_config when defining resources (#9935)
+- Respect DAG Serialization setting when running sync_perm (#10321)
+- Show correct duration on graph view for running task (#8311) (#8675)
+- Fix regression in SQLThresholdCheckOperator (#9312)
+- [AIRFLOW-6931] Fixed migrations to find all dependencies for MSSQL (#9891)
+- Avoid sharing session with RenderedTaskInstanceFields write and delete (#9993)
+- Fix clear future recursive when ExternalTaskMarker is used (#9515)
+- Handle IntegrityError while creating TIs (#10136)
+- Fix airflow-webserver startup errors when using Kerberos Auth (#10047)
+- Fixes treatment of open slots in scheduler (#9316) (#9505)
+- Fix KubernetesPodOperator reattachment (#10230)
+- Fix more PodMutationHook issues for backwards compatibility (#10084)
+- [AIRFLOW-5391] Do not re-run skipped tasks when they are cleared (#7276)
+- Fix task_instance_mutation_hook (#9910)
+- Fixes failing formatting of DAG file containing {} in docstring (#9779)
+- Fix is_terminal_support_colors function (#9734)
+- Fix PythonVirtualenvOperator when using ``provide_context=True`` (#8256)
+- Fix issue with mounting volumes from secrets (#10366)
+- BugFix: K8s Executor Multinamespace mode is evaluated to true by default (#10410)
+- Make KubernetesExecutor recognize kubernetes_labels (#10412)
+- Fix broken Kubernetes PodRuntimeInfoEnv (#10478)
+- Sync FAB Permissions for all base views (#12162)
+
+Improvements
+""""""""""""
+
+- Use Hash of Serialized DAG to determine DAG is changed or not (#10227)
+- Update Serialized DAGs in Webserver when DAGs are Updated (#9851)
+- Do not Update Serialized DAGs in DB if DAG did not change (#9850)
+- Add __repr__ to SerializedDagModel (#9862)
+- Update JS packages to latest versions (#9811) (#9921)
+- UI Graph View: Focus upstream / downstream task dependencies on mouseover (#9303)
+- Allow ``image`` in ``KubernetesPodOperator`` to be templated (#10068)
+- [AIRFLOW-6843] Add delete_option_kwargs to delete_namespaced_pod (#7523)
+- Improve process terminating in scheduler_job (#8064)
+- Replace deprecated base classes used in bigquery_check_operator (#10272)
+- [AIRFLOW-5897] Allow setting -1 as pool slots value in webserver (#6550)
+- Limit all google-cloud api to <2.0.0 (#10317)
+- [AIRFLOW-6706] Lazy load operator extra links (#7327) (#10318)
+- Add Snowflake support to SQL operator and sensor (#9843)
+- Makes multi-namespace mode optional (#9570)
+- Pin Pyarrow < 1.0
+- Pin pymongo version to <3.11.0
+- Pin google-cloud-container to <2 (#9901)
+- Dockerfile: Remove package.json and yarn.lock from the prod image (#9814)
+- Dockerfile: The group of embedded DAGs should be root to be OpenShift compatible (#9794)
+- Update upper limit of flask-swagger, gunicorn & jinja2 (#9684)
+- Webserver: Sanitize values passed to origin param (#10334)
+- Sort connection type list in add/edit page alphabetically (#8692)
+
+Doc only changes
+""""""""""""""""
+
+- Add new committers: Ry Walker & Leah Cole to project.rst (#9892)
+- Add Qingping Hou to committers list (#9725)
+- Updated link to official documentation (#9629)
+- Create a short-link for Airflow Slack Invites (#10034)
+- Fix docstrings in BigQueryGetDataOperator (#10042)
+- Set language on code-block on docs/howto/email-config.rst (#10238)
+- Remove duplicate line from 1.10.10 CHANGELOG (#10289)
+- Improve heading on Email Configuration page (#10175)
+- Fix link for the Jinja Project in docs/tutorial.rst (#10245)
+- Create separate section for Cron Presets (#10247)
+- Add Syntax Highlights to code-blocks in docs/best-practices.rst (#10258)
+- Fix docstrings in BigQueryGetDataOperator (#10042)
+- Fix typo in Task Lifecycle section (#9867)
+- Make Secret Backend docs clearer about Variable & Connection View (#8913)
+
+Airflow 1.10.11, 2020-07-10
+-----------------------------
+
+New Features
+""""""""""""
+
+- Add task instance mutation hook (#8852)
+- Allow changing Task States Colors (#9520)
+- Add support for AWS Secrets Manager as Secrets Backend (#8186)
+- Add airflow info command to the CLI (#8704)
+- Add Local Filesystem Secret Backend (#8596)
+- Add Airflow config CLI command (#8694)
+- Add Support for Python 3.8 (#8836)(#8823)
+- Allow K8S worker pod to be configured from JSON/YAML file (#6230)
+- Add quarterly to crontab presets (#6873)
+- Add support for ephemeral storage on KubernetesPodOperator (#6337)
+- Add AirflowFailException to fail without any retry (#7133)
+- Add SQL Branch Operator (#8942)
+
+Bug Fixes
+"""""""""
+
+- Use NULL as dag.description default value (#7593)
+- BugFix: DAG trigger via UI error in RBAC UI (#8411)
+- Fix logging issue when running tasks (#9363)
+- Fix JSON encoding error in DockerOperator (#8287)
+- Fix alembic crash due to typing import (#6547)
+- Correctly restore upstream_task_ids when deserializing Operators (#8775)
+- Correctly store non-default Nones in serialized tasks/dags (#8772)
+- Correctly deserialize dagrun_timeout field on DAGs (#8735)
+- Fix tree view if config contains " (#9250)
+- Fix Dag Run UI execution date with timezone cannot be saved issue (#8902)
+- Fix Migration for MSSQL (#8385)
+- RBAC ui: Fix missing Y-axis labels with units in plots (#8252)
+- RBAC ui: Fix missing task runs being rendered as circles instead (#8253)
+- Fix: DagRuns page renders the state column with artifacts in old UI (#9612)
+- Fix task and dag stats on home page (#8865)
+- Fix the trigger_dag api in the case of nested subdags (#8081)
+- UX Fix: Prevent undesired text selection with DAG title selection in Chrome (#8912)
+- Fix connection add/edit for spark (#8685)
+- Fix retries causing constraint violation on MySQL with DAG Serialization (#9336)
+- [AIRFLOW-4472] Use json.dumps/loads for templating lineage data (#5253)
+- Restrict google-cloud-texttospeach to committer (#7392)
- [AIRFLOW-XXXX] Remove duplicated paragraph in docs (#7662)
- Fix reference to KubernetesPodOperator (#8100)
+- Update the tree view of dag on Concepts Last Run Only (#8268)
Airflow 1.10.9, 2020-02-07
@@ -1515,7 +1981,7 @@ Improvements
- [AIRFLOW-3034]: Readme updates : Add Slack & Twitter, remove Gitter
- [AIRFLOW-3028] Update Text & Images in Readme.md
- [AIRFLOW-208] Add badge to show supported Python versions (#3839)
-- [AIRFLOW-2238] Update PR tool to push directly to Github
+- [AIRFLOW-2238] Update PR tool to push directly to GitHub
- [AIRFLOW-2238] Flake8 fixes on dev/airflow-pr
- [AIRFLOW-2238] Update PR tool to remove outdated info (#3978)
- [AIRFLOW-3005] Replace 'Airbnb Airflow' with 'Apache Airflow' (#3845)
@@ -3591,7 +4057,7 @@ Airflow 1.7.1, 2016-05-19
- Update plugins.rst for clarity on the example (#1309)
- Fix s3 logging issue
- Add twitter feed example dag
-- Github ISSUE_TEMPLATE & PR_TEMPLATE cleanup
+- GitHub ISSUE_TEMPLATE & PR_TEMPLATE cleanup
- Reduce logger verbosity
- Adding a PR Template
- Add Lucid to list of users
diff --git a/CI.rst b/CI.rst
new file mode 100644
index 0000000000000..0ac1c9d9f4ad4
--- /dev/null
+++ b/CI.rst
@@ -0,0 +1,786 @@
+ .. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ .. http://www.apache.org/licenses/LICENSE-2.0
+
+ .. Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+.. contents:: :local:
+
+CI Environment
+==============
+
+Continuous Integration is important component of making Apache Airflow robust and stable. We are running
+a lot of tests for every pull request, for master and v1-10-test branches and regularly as CRON jobs.
+
+Our execution environment for CI is `GitHub Actions `_. GitHub Actions
+(GA) are very well integrated with GitHub code and Workflow and it has evolved fast in 2019/202 to become
+a fully-fledged CI environment, easy to use and develop for, so we decided to switch to it. Our previous
+CI system was Travis CI.
+
+However part of the philosophy we have is that we are not tightly coupled with any of the CI
+environments we use. Most of our CI jobs are written as bash scripts which are executed as steps in
+the CI jobs. And we have a number of variables determine build behaviour.
+
+
+
+
+GitHub Actions runs
+-------------------
+
+Our builds on CI are highly optimized. They utilise some of the latest features provided by GitHub Actions
+environment that make it possible to reuse parts of the build process across different Jobs.
+
+Big part of our CI runs use Container Images. Airflow has a lot of dependencies and in order to make
+sure that we are running tests in a well configured and repeatable environment, most of the tests,
+documentation building, and some more sophisticated static checks are run inside a docker container
+environment. This environment consist of two types of images: CI images and PROD images. CI Images
+are used for most of the tests and checks where PROD images are used in the Kubernetes tests.
+
+In order to run the tests, we need to make sure tha the images are built using latest sources and that it
+is done quickly (full rebuild of such image from scratch might take ~15 minutes). Therefore optimisation
+techniques have been implemented that use efficiently cache from the GitHub Docker registry - in most cases
+this brings down the time needed to rebuild the image to ~4 minutes. In some cases (when dependencies change)
+it can be ~6-7 minutes and in case base image of Python releases new patch-level, it can be ~12 minutes.
+
+Currently in master version of Airflow we run tests in 3 different versions of Python (3.6, 3.7, 3.8)
+which means that we have to build 6 images (3 CI ones and 3 PROD ones). Yet we run around 12 jobs
+with each of the CI images. That is a lot of time to just build the environment to run. Therefore
+we are utilising ``workflow_run`` feature of GitHub Actions. This feature allows to run a separate,
+independent workflow, when the main workflow is run - this separate workflow is different than the main
+one, because by default it runs using ``master`` version of the sources but also - and most of all - that
+it has WRITE access to the repository. This is especially important in our case where Pull Requests to
+Airflow might come from any repository, and it would be a huge security issue if anyone from outside could
+utilise the WRITE access to Apache Airflow repository via an external Pull Request.
+
+Thanks to the WRITE access and fact that the 'workflow_run' by default uses the 'master' version of the
+sources, we can safely run some logic there will checkout the incoming Pull Request, build the container
+image from the sources from the incoming PR and push such image to an GitHub Docker Registry - so that
+this image can be built only once and used by all the jobs running tests. The image is tagged with unique
+``RUN_ID`` of the incoming Pull Request and the tests run in the Pull Request can simply pull such image
+rather than build it from the scratch. Pulling such image takes ~ 1 minute, thanks to that we are saving
+a lot of precious time for jobs.
+
+
+Local runs
+----------
+
+The main goal of the CI philosophy we have that no matter how complex the test and integration
+infrastructure, as a developer you should be able to reproduce and re-run any of the failed checks
+locally. One part of it are pre-commit checks, that allow you to run the same static checks in CI
+and locally, but another part is the CI environment which is replicated locally with Breeze.
+
+You can read more about Breeze in `BREEZE.rst `_ but in essence it is a script that allows
+you to re-create CI environment in your local development instance and interact with it. In its basic
+form, when you do development you can run all the same tests that will be run in CI - but locally,
+before you submit them as PR. Another use case where Breeze is useful is when tests fail on CI. You can
+take the ``RUN_ID`` of failed build pass it as ``--github-image-id`` parameter of Breeze and it will
+download the very same version of image that was used in CI and run it locally. This way, you can very
+easily reproduce any failed test that happens in CI - even if you do not check out the sources
+connected with the run.
+
+You can read more about it in `BREEZE.rst `_ and `TESTING.rst `_
+
+
+Difference between local runs and GitHub Action workflows
+---------------------------------------------------------
+
+Depending whether the scripts are run locally (most often via `Breeze `_) or whether they
+are run in "CI Build" or "Build Image" workflows they can take different values.
+
+You can use those variables when you try to reproduce the build locally.
+
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Variable | Local | Build Image | Main CI | Comment |
+| | development | CI workflow | Workflow | |
++=========================================+=============+=============+============+=================================================+
+| Basic variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``PYTHON_MAJOR_MINOR_VERSION`` | | | | Major/Minor version of python used. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``DB_RESET`` | false | true | true | Determines whether database should be reset |
+| | | | | at the container entry. By default locally |
+| | | | | the database is not reset, which allows to |
+| | | | | keep the database content between runs in |
+| | | | | case of Postgres or MySQL. However, |
+| | | | | it requires to perform manual init/reset |
+| | | | | if you stop the environment. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Dockerhub variables |
++-----------------------------------------+----------------------------------------+-------------------------------------------------+
+| ``DOCKERHUB_USER`` | apache | Name of the DockerHub user to use |
++-----------------------------------------+----------------------------------------+-------------------------------------------------+
+| ``DOCKERHUB_REPO`` | airflow | Name of the DockerHub repository to use |
++-----------------------------------------+----------------------------------------+-------------------------------------------------+
+| Mount variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``MOUNT_LOCAL_SOURCES`` | true | false | false | Determines whether local sources are |
+| | | | | mounted to inside the container. Useful for |
+| | | | | local development, as changes you make |
+| | | | | locally can be immediately tested in |
+| | | | | the container. We mount only selected, |
+| | | | | important folders. We do not mount the whole |
+| | | | | project folder in order to avoid accidental |
+| | | | | use of artifacts (such as ``egg-info`` |
+| | | | | directories) generated locally on the |
+| | | | | host during development. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Force variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``FORCE_PULL_IMAGES`` | true | true | true | Determines if images are force-pulled, |
+| | | | | no matter if they are already present |
+| | | | | locally. This includes not only the |
+| | | | | CI/PROD images but also the python base |
+| | | | | images. Note that if python base images |
+| | | | | change, also the CI and PROD images |
+| | | | | need to be fully rebuild unless they were |
+| | | | | already built with that base python |
+| | | | | image. This is false for local development |
+| | | | | to avoid often pulling and rebuilding |
+| | | | | the image. It is true for CI workflow in |
+| | | | | case waiting from images is enabled |
+| | | | | as the images needs to be force-pulled from |
+| | | | | GitHub Registry, but it is set to |
+| | | | | false when waiting for images is disabled. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``FORCE_BUILD_IMAGES`` | false | false | false | Forces building images. This is generally not |
+| | | | | very useful in CI as in CI environment image |
+| | | | | is built or pulled only once, so there is no |
+| | | | | need to set the variable to true. For local |
+| | | | | builds it forces rebuild, regardless if it |
+| | | | | is determined to be needed. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``FORCE_ANSWER_TO_QUESTIONS`` | | yes | yes | This variable determines if answer to questions |
+| | | | | during the build process should be |
+| | | | | automatically given. For local development, |
+| | | | | the user is occasionally asked to provide |
+| | | | | answers to questions such as - whether |
+| | | | | the image should be rebuilt. By default |
+| | | | | the user has to answer but in the CI |
+| | | | | environment, we force "yes" answer. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``SKIP_CHECK_REMOTE_IMAGE`` | false | true | true | Determines whether we check if remote image |
+| | | | | is "fresher" than the current image. |
+| | | | | When doing local breeze runs we try to |
+| | | | | determine if it will be faster to rebuild |
+| | | | | the image or whether the image should be |
+| | | | | pulled first from the cache because it has |
+| | | | | been rebuilt. This is slightly experimental |
+| | | | | feature and will be improved in the future |
+| | | | | as the current mechanism does not always |
+| | | | | work properly. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Host variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``HOST_USER_ID`` | | | | User id of the host user. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``HOST_GROUP_ID`` | | | | Group id of the host user. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``HOST_OS`` | | Linux | Linux | OS of the Host (Darwin/Linux). |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``HOST_HOME`` | | | | Home directory on the host. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``HOST_AIRFLOW_SOURCES`` | | | | Directory where airflow sources are located |
+| | | | | on the host. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Image variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``INSTALL_AIRFLOW_VERSION`` | | | | Installs Airflow version from PyPI when |
+| | | | | building image. Can be "none" to skip airflow |
+| | | | | installation so that it can be installed from |
+| | | | | locally prepared packages. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``INSTALL_AIRFLOW_REFERENCE`` | | | | Installs Airflow version from GitHub |
+| | | | | branch or tag. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Version suffix variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``VERSION_SUFFIX_FOR_PYPI`` | | | | Version suffix used during backport |
+| | | | | package preparation for PyPI builds. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``VERSION_SUFFIX_FOR_SVN`` | | | | Version suffix used during backport |
+| | | | | package preparation for SVN builds. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Git variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| COMMIT_SHA | | GITHUB_SHA | GITHUB_SHA | SHA of the commit of the build is run |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Verbosity variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``PRINT_INFO_FROM_SCRIPTS`` | true | true | true | Allows to print output to terminal from running |
+| | (x) | (x) | (x) | scripts. It prints some extra outputs if true |
+| | | | | including what the commands do, results of some |
+| | | | | operations, summary of variable values, exit |
+| | | | | status from the scripts, outputs of failing |
+| | | | | commands. If verbose is on it also prints the |
+| | | | | commands executed by docker, kind, helm, |
+| | | | | kubectl. Disabled in pre-commit checks. |
+| | | | | |
+| | | | | (x) set to false in pre-commits |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``VERBOSE`` | false | true | true | Determines whether docker, helm, kind, |
+| | | | | kubectl commands should be printed before |
+| | | | | execution. This is useful to determine |
+| | | | | what exact commands were executed for |
+| | | | | debugging purpose as well as allows |
+| | | | | to replicate those commands easily by |
+| | | | | copy&pasting them from the output. |
+| | | | | requires ``PRINT_INFO_FROM_SCRIPTS`` set to |
+| | | | | true. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``VERBOSE_COMMANDS`` | false | false | false | Determines whether every command |
+| | | | | executed in bash should also be printed |
+| | | | | before execution. This is a low-level |
+| | | | | debugging feature of bash (set -x) and |
+| | | | | it should only be used if you are lost |
+| | | | | at where the script failed. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| Image build variables |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``UPGRADE_TO_LATEST_CONSTRAINTS`` | false | false | false | Determines whether the build should |
+| | | | (x) | attempt to upgrade all |
+| | | | | PIP dependencies to latest ones matching |
+| | | | | ``setup.py`` limits. This tries to replicate |
+| | | | | the situation of "fresh" user who just installs |
+| | | | | airflow and uses latest version of matching |
+| | | | | dependencies. By default we are using a |
+| | | | | tested set of dependency constraints |
+| | | | | stored in separated "orphan" branches |
+| | | | | of the airflow repository |
+| | | | | ("constraints-master, "constraints-1-10") |
+| | | | | but when this flag is set to anything but false |
+| | | | | (for example commit SHA), they are not used |
+| | | | | used and "eager" upgrade strategy is used |
+| | | | | when installing dependencies. We set it |
+| | | | | to true in case of direct pushes (merges) |
+| | | | | to master and scheduled builds so that |
+| | | | | the constraints are tested. In those builds, |
+| | | | | in case we determine that the tests pass |
+| | | | | we automatically push latest set of |
+| | | | | "tested" constraints to the repository. |
+| | | | | |
+| | | | | Setting the value to commit SHA is best way |
+| | | | | to assure that constraints are upgraded even if |
+| | | | | there is no change to setup.py |
+| | | | | |
+| | | | | This way our constraints are automatically |
+| | | | | tested and updated whenever new versions |
+| | | | | of libraries are released. |
+| | | | | |
+| | | | | (x) true in case of direct pushes and |
+| | | | | scheduled builds |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``CHECK_IMAGE_FOR_REBUILD`` | true | true | true | Determines whether attempt should be |
+| | | | (x) | made to rebuild the CI image with latest |
+| | | | | sources. It is true by default for |
+| | | | | local builds, however it is set to |
+| | | | | true in case we know that the image |
+| | | | | we pulled or built already contains |
+| | | | | the right sources. In such case we |
+| | | | | should set it to false, especially |
+| | | | | in case our local sources are not the |
+| | | | | ones we intend to use (for example |
+| | | | | when ``--github-image-id`` is used |
+| | | | | in Breeze. |
+| | | | | |
+| | | | | In CI builds it is set to true |
+| | | | | in case of the "Build Image" |
+| | | | | workflow or when |
+| | | | | waiting for images is disabled |
+| | | | | in the CI workflow. |
+| | | | | |
+| | | | | (x) if waiting for images the variable is set |
+| | | | | to false automatically. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+| ``SKIP_BUILDING_PROD_IMAGE`` | false | false | false | Determines whether we should skip building |
+| | | | (x) | the PROD image with latest sources. |
+| | | | | It is set to false, but in deploy app for |
+| | | | | kubernetes step it is set to "true", because at |
+| | | | | this stage we know we have good image build or |
+| | | | | pulled. |
+| | | | | |
+| | | | | (x) set to true in "Deploy App to Kubernetes" |
+| | | | | to false automatically. |
++-----------------------------------------+-------------+-------------+------------+-------------------------------------------------+
+
+Running CI Builds locally
+=========================
+
+The following variables are automatically determined based on CI environment variables.
+You can locally by setting ``CI="true"`` and run the ci scripts from the ``scripts/ci`` folder:
+
+* ``provider_packages`` - scripts to build and test provider packages
+* ``constraints`` - scripts to build and publish latest set of valid constraints
+* ``docs`` - scripts to build documentation
+* ``images`` - scripts to build and push CI and PROD images
+* ``kubernetes`` - scripts to setup kubernetes cluster, deploy airflow and run kubernetes tests with it
+* ``testing`` - scripts that run unit and integration tests
+* ``tools`` - scripts that perform various clean-up and preparation tasks
+
+Common libraries of functions for all the scripts can be found in ``libraries`` folder.
+
+For detailed use of those scripts you can refer to ``.github/workflows/`` - those scripts are used
+by the CI workflows of ours.
+
+The default values are "sane" you can change them to interact with your own repositories or registries.
+Note that you need to set "CI" variable to true in order to get the same results as in CI.
+
++------------------------------+----------------------+-----------------------------------------------------+
+| Variable | Default | Comment |
++==============================+======================+=====================================================+
+| CI | ``false`` | If set to "true", we simulate behaviour of |
+| | | all scripts as if they are in CI environment |
++------------------------------+----------------------+-----------------------------------------------------+
+| CI_TARGET_REPO | ``apache/airflow`` | Target repository for the CI build. Used to |
+| | | compare incoming changes from PR with the target. |
++------------------------------+----------------------+-----------------------------------------------------+
+| CI_TARGET_BRANCH | ``master`` | Target branch where the PR should land. Used to |
+| | | compare incoming changes from PR with the target. |
++------------------------------+----------------------+-----------------------------------------------------+
+| CI_BUILD_ID | ``0`` | Unique id of the build that is kept across re runs |
+| | | (for GitHub actions it is ``GITHUB_RUN_ID``) |
++------------------------------+----------------------+-----------------------------------------------------+
+| CI_JOB_ID | ``0`` | Unique id of the job - used to produce unique |
+| | | artifact names. |
++------------------------------+----------------------+-----------------------------------------------------+
+| CI_EVENT_TYPE | ``pull_request`` | Type of the event. It can be one of |
+| | | [``pull_request``, ``pull_request_target``, |
+| | | ``schedule``, ``push``] |
++------------------------------+----------------------+-----------------------------------------------------+
+| CI_REF | ``refs/head/master`` | Branch in the source repository that is used to |
+| | | make the pull request. |
++------------------------------+----------------------+-----------------------------------------------------+
+
+
+GitHub Registry Variables
+=========================
+
+Our CI uses GitHub Registry to pull and push images to/from by default. You can however make it interact with
+DockerHub registry or change the GitHub registry to interact with and use your own repo by changing
+``GITHUB_REPOSITORY`` and providing your own GitHub Username and Token.
+
++--------------------------------+---------------------------+----------------------------------------------+
+| Variable | Default | Comment |
++================================+===========================+==============================================+
+| USE_GITHUB_REGISTRY | true | If set to "true", we interact with GitHub |
+| | | Registry registry not the DockerHub one. |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_REGISTRY | ``docker.pkg.github.com`` | DNS name of the GitHub registry to |
+| | | use. |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_REPOSITORY | ``apache/airflow`` | Prefix of the image. It indicates which. |
+| | | registry from GitHub to use |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_USERNAME | | Username to use to login to GitHub |
+| | | |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_TOKEN | | Personal token to use to login to GitHub |
+| | | |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_REGISTRY_WAIT_FOR_IMAGE | ``false`` | Wait for the image to be available. This is |
+| | | useful if commit SHA is used as pull tag |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_REGISTRY_PULL_IMAGE_TAG | ``latest`` | Pull this image tag. This is "latest" by |
+| | | default, can be commit SHA or RUN_ID. |
++--------------------------------+---------------------------+----------------------------------------------+
+| GITHUB_REGISTRY_PUSH_IMAGE_TAG | ``latest`` | Pull this image tag. This is "latest" by |
+| | | default, can be commit SHA or RUN_ID. |
++--------------------------------+---------------------------+----------------------------------------------+
+
+Dockerhub Variables
+===================
+
+If ``USE_GITHUB_REGISTRY`` is set to "false" you can interact directly with DockerHub. By default
+you pull from/push to "apache/airflow" DockerHub repository, but you can change
+that to your own repository by setting those environment variables:
+
++----------------+-------------+-----------------------------------+
+| Variable | Default | Comment |
++================+=============+===================================+
+| DOCKERHUB_USER | ``apache`` | Name of the DockerHub user to use |
++----------------+-------------+-----------------------------------+
+| DOCKERHUB_REPO | ``airflow`` | Name of the DockerHub repo to use |
++----------------+-------------+-----------------------------------+
+
+CI Architecture
+===============
+
+ .. This image is an export from the 'draw.io' graph available in
+ https://cwiki.apache.org/confluence/display/AIRFLOW/AIP-23+Migrate+out+of+Travis+CI
+ You can edit it there and re-export.
+
+.. image:: images/ci/CI.png
+ :align: center
+ :alt: CI architecture of Apache Airflow
+
+The following components are part of the CI infrastructure
+
+* **Apache Airflow Code Repository** - our code repository at https://github.com/apache/airflow
+* **Apache Airflow Forks** - forks of the Apache Airflow Code Repository from which contributors make
+ Pull Requests
+* **GitHub Actions** - (GA) UI + execution engine for our jobs
+* **GA CRON trigger** - GitHub Actions CRON triggering our jobs
+* **GA Workers** - virtual machines running our jobs at GitHub Actions (max 20 in parallel)
+* **GitHub Private Image Registry**- image registry used as build cache for CI jobs.
+ It is at https://docker.pkg.github.com/apache/airflow/airflow
+* **DockerHub Public Image Registry** - publicly available image registry at DockerHub.
+ It is at https://hub.docker.com/repository/docker/apache/airflow
+* **DockerHub Build Workers** - virtual machines running build jibs at DockerHub
+* **Official Images** (future) - these are official images that are prominently visible in DockerHub.
+ We aim our images to become official images so that you will be able to pull them
+ with ``docker pull apache-airflow``
+
+CI run types
+============
+
+The following CI Job run types are currently run for Apache Airflow (run by ci.yaml workflow)
+and each of the run types has different purpose and context.
+
+Pull request run
+----------------
+
+Those runs are results of PR from the forks made by contributors. Most builds for Apache Airflow fall
+into this category. They are executed in the context of the "Fork", not main
+Airflow Code Repository which means that they have only "read" permission to all the GitHub resources
+(container registry, code repository). This is necessary as the code in those PRs (including CI job
+definition) might be modified by people who are not committers for the Apache Airflow Code Repository.
+
+The main purpose of those jobs is to check if PR builds cleanly, if the test run properly and if
+the PR is ready to review and merge. The runs are using cached images from the Private GitHub registry -
+CI, Production Images as well as base Python images that are also cached in the Private GitHub registry.
+Also for those builds we only execute Python tests if important files changed (so for example if it is
+"no-code" change, no tests will be executed.
+
+The workflow involved in Pull Requests review and approval is a bit more complex than simple workflows
+in most of other projects because we've implemented some optimizations related to efficient use
+of queue slots we share with other Apache Software Foundation projects. More details about it
+can be found in `PULL_REQUEST_WORKFLOW.rst `_.
+
+
+Direct Push/Merge Run
+---------------------
+
+Those runs are results of direct pushes done by the committers or as result of merge of a Pull Request
+by the committers. Those runs execute in the context of the Apache Airflow Code Repository and have also
+write permission for GitHub resources (container registry, code repository).
+The main purpose for the run is to check if the code after merge still holds all the assertions - like
+whether it still builds, all tests are green.
+
+This is needed because some of the conflicting changes from multiple PRs might cause build and test failures
+after merge even if they do not fail in isolation. Also those runs are already reviewed and confirmed by the
+committers so they can be used to do some housekeeping:
+- pushing most recent image build in the PR to the GitHub Private Registry (for caching)
+- upgrading to latest constraints and pushing those constraints if all tests succeed
+- refresh latest Python base images in case new patch-level is released
+
+The housekeeping is important - Python base images are refreshed with varying frequency (once every few months
+usually but sometimes several times per week) with the latest security and bug fixes.
+Those patch level images releases can occasionally break Airflow builds (specifically Docker image builds
+based on those images) therefore in PRs we only use latest "good" python image that we store in the
+private GitHub cache. The direct push/master builds are not using registry cache to pull the python images
+- they are directly pulling the images from DockerHub, therefore they will try the latest images
+after they are released and in case they are fine, CI Docker image is build and tests are passing -
+those jobs will push the base images to the private GitHub Registry so that they be used by subsequent
+PR runs.
+
+Scheduled runs
+--------------
+
+Those runs are results of (nightly) triggered job - only for ``master`` branch. The
+main purpose of the job is to check if there was no impact of external dependency changes on the Apache
+Airflow code (for example transitive dependencies released that fail the build). It also checks if the
+Docker images can be build from the scratch (again - to see if some dependencies have not changed - for
+example downloaded package releases etc. Another reason for the nightly build is that the builds tags most
+recent master with ``nightly-master`` tag so that DockerHub build can pick up the moved tag and prepare a
+nightly public master build in the DockerHub registry. The ``v1-10-test`` branch images are build in
+DockerHub when pushing ``v1-10-stable`` manually.
+
+All runs consist of the same jobs, but the jobs behave slightly differently or they are skipped in different
+run categories. Here is a summary of the run categories with regards of the jobs they are running.
+Those jobs often have matrix run strategy which runs several different variations of the jobs
+(with different Backend type / Python version, type of the tests to run for example). The following chapter
+describes the workflows that execute for each run.
+
+Those runs and their corresponding ``Build Images`` runs are only executed in main ``apache/airflow``
+repository, they are not executed in forks - we want to be nice to the contributors and not use their
+free build minutes on GitHub Actions.
+
+Workflows
+=========
+
+Build Images Workflow
+---------------------
+
+This workflow has two purposes - it builds images for the CI Workflow but also it cancels duplicate or
+failed builds in order to save job time in GitHub Actions and allow for faster feedback for developers.
+
+It's a special type of workflow: ``workflow_run`` which means that it is triggered by other workflows (in our
+case it is triggered by the ``CI Build`` workflow). This also means that the workflow has Write permission to
+the Airflow repository and it can - for example - push to the GitHub registry the images used by CI Builds
+which means that the images can be built only once and reused by all the CI jobs (including the matrix jobs).
+We've implemented it in the way that the CI Build running will wait until the images are built by the
+"Build Images" workflow.
+
+It's possible to disable this feature and go back to the previous behaviour via
+``GITHUB_REGISTRY_WAIT_FOR_IMAGE`` flag in the "Build Workflow image". Setting it to "false" switches back to
+the behaviour that each job builds own image.
+
+You can also switch back to jobs building the images on its own on the fork level by setting
+``AIRFLOW_GITHUB_REGISTRY_WAIT_FOR_IMAGE`` secret to ``false``. This will disable pushing the "RUN_ID"
+images to GitHub Registry and all the images will be built locally by each job. It is about 20%
+slower for the whole build on average, but it does not require to have access to push images to
+GitHub, which sometimes might be not available (depending on the account status).
+
+The write permission also allows to cancel duplicate workflows. It is not possible for the Pull Request
+CI Builds run from the forks as they have no Write permission allowing them to cancels running workflows.
+In our case we perform several different cancellations:
+
+* we cancel duplicate "CI Build" workflow runs s (i.e. workflows from the same repository and branch that
+ were started in quick succession - this allows to save workers that would have been busy running older
+ version of the same Pull Request (usually with fix-ups) and free them for other runs.
+
+* we cancel duplicate "Build Images" workflow runs for the same reasons. The "Build Images" builds run image
+ builds which takes quite some time, so pushing a fixup quickly on the same branch will also cancel the
+ past "Build Images" workflows.
+
+* last, but not least - we cancel any of the "CI Build" workflow runs that failed in some important jobs.
+ This is another optimisations - GitHub does not have "fail-fast" on the whole run and this cancelling
+ effectively implements "fail-fast" of runs for some important jobs. Note that it only works when you
+ submit new PRs or push new changes. In case the jobs failed and no new PR is pushed after that, the whole
+ run will run to completion.
+
+The workflow has the following jobs:
+
++---------------------------+---------------------------------------------+
+| Job | Description |
+| | |
++===========================+=============================================+
+| Cancel workflow runs | Cancels duplicated and failed workflows |
++---------------------------+---------------------------------------------+
+| Build Info | Prints detailed information about the build |
++---------------------------+---------------------------------------------+
+| Build CI/PROD images | Builds all configured CI and PROD images |
++---------------------------+---------------------------------------------+
+
+The images are stored in the `GitHub Registry `_ and the
+names of those images follow the patterns described in
+`Naming conventions for stored images <#naming-conventions-for-stored-images>`_
+
+Image building is configured in "fail-fast" mode. When any of the images
+fails to build, it cancels other builds and the source "CI Build" workflow run
+that triggered it.
+
+
+CI Build Workflow
+-----------------
+
+This workflow is a regular workflow that performs all checks of Airflow code.
+
++---------------------------+----------------------------------------------+-------+-------+------+
+| Job | Description | PR | Push | CRON |
+| | | | Merge | (1) |
++===========================+==============================================+=======+=======+======+
+| Build info | Prints detailed information about the build | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Helm tests | Runs tests for the Helm chart | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Test OpenAPI client gen | Tests if OpenAPIClient continues to generate | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| CI Images | Waits for CI Images (3) | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Static checks | Performs static checks without pylint | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Static checks: pylint | Performs pylint static checks | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Build docs | Builds documentation | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Spell check docs | Spell check for documentation | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Backport packages | Prepares Backport Packages for 1.10 Airflow | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Trigger tests | Checks if tests should be triggered | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Tests [Pg/Msql/Sqlite] | Run all the Pytest tests for Python code | Yes(2)| Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Quarantined tests | Flaky tests that we need to fix (5) | Yes(2)| Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Upload coverage | Uploads test coverage from all the tests | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| PROD Images | Waits for CI Images (3) | Yes | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Tests Kubernetes | Run Kubernetes test | Yes(2)| Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Push PROD images | Pushes PROD images to GitHub Registry (4) | - | Yes | - |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Push CI images | Pushes CI images to GitHub Registry (4) | - | Yes | - |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Constraints | Upgrade constraints to latest ones (4) | - | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Constraints push | Pushes all upgraded constraints (4) | - | Yes | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+| Tag Repo nightly | Tags the repository with nightly tag (6) | - | - | Yes |
++---------------------------+----------------------------------------------+-------+-------+------+
+
+
+Comments:
+
+ (1) CRON jobs builds images from scratch - to test if everything works properly for clean builds
+ (2) The tests are run when the Trigger Tests job determine that important files change (this allows
+ for example "no-code" changes to build much faster)
+ (3) The jobs wait for CI images if ``GITHUB_REGISTRY_WAIT_FOR_IMAGE`` variable is set to "true".
+ You can set it to "false" to disable using shared images - this is slower though as the images
+ are rebuilt in every job that needs them. You can also set your own fork's secret
+ ``AIRFLOW_GITHUB_REGISTRY_WAIT_FOR_IMAGE`` to ``false`` to trigger the same behaviour.
+ (4) PROD and CI images are pushed as "latest" to DockerHub registry and constraints are upgraded only if all
+ tests are successful. Note that images are not pushed in CRON jobs because they are rebuilt from
+ scratch and we want to push incremental changes to the DockerHub registry.
+ (5) Flaky tests never fail in regular builds. See the next chapter where our approach to flaky tests
+ is explained.
+ (6) Nightly tag is pushed to the repository only in CRON job and only if all tests pass. This
+ causes the DockerHub images are built automatically and made available to developers.
+
+Scheduled quarantined builds
+----------------------------
+
+This workflow runs only quarantined tests. Those tests do not fail the build even if some tests fail (only if
+the whole pytest execution fails). Instead this workflow updates one of the issues where we keep status
+of quarantined tests. Once the test succeeds in NUM_RUNS subsequent runs, it is marked as stable and
+can be removed from quarantine. You can read more about quarantine in ``_
+
+The issues are only updated if the test is run as direct push or scheduled run and only in the
+``apache/airflow`` repository - so that the issues are not updated in forks.
+
+The issues that gets updated are different for different branches:
+
+* master: `Quarantine tests master `_
+* v1-10-stable: `Quarantine tests v1-10-stable `_
+* v1-10-test: `Quarantine tests v1-10-test `_
+
+Those runs and their corresponding ``Build Images`` runs are only executed in main ``apache/airflow``
+repository, they are not executed in forks - we want to be nice to the contributors and not use their
+free build minutes on GitHub Actions.
+
+Force sync master from apache/airflow
+-------------------------------------
+
+This is manually triggered workflow (via GitHub UI manual run) that should only be run in GitHub forks.
+When triggered, it will force-push the "apache/airflow" master to the fork's master. It's the easiest
+way to sync your fork master to the Apache Airflow's one.
+
+Delete old artifacts
+--------------------
+
+This workflow is introduced, to delete old artifacts from the GitHub Actions build. We set it to
+delete old artifacts that are > 7 days old. It only runs for the 'apache/airflow' repository.
+
+We also have a script that can help to clean-up the old artifacts:
+`remove_artifacts.sh `_
+
+CodeQL scan
+-----------
+
+The `CodeQL `_ security scan uses GitHub security scan framework to scan our code for security violations.
+It is run for JavaScript and python code.
+
+Naming conventions for stored images
+====================================
+
+The images produced during the CI builds are stored in the
+`GitHub Registry `_
+
+The images are stored with both "latest" tag (for last master push image that passes all the tests as well
+with the tags indicating the origin of the image.
+
+The image names follow the patterns:
+
++--------------+----------------------------+--------------------------------+--------------------------------------------------------------------------------------------+
+| Image | Name pattern | Tag for format | Comment |
++==============+============================+================================+============================================================================================+
+| Python image | python | -slim-buster- | Base python image used by both production and CI image. |
+| | | -slim-buster- | Python maintainer release new versions of those image with security fixes every few weeks. |
++--------------+----------------------------+--------------------------------+--------------------------------------------------------------------------------------------+
+| CI image | -python-ci | | CI image - this is the image used for most of the tests. |
+| | | | |
++--------------+----------------------------+--------------------------------+--------------------------------------------------------------------------------------------+
+| PROD Build | -python-build | | Production Build image - this is the "build" segment of production image. |
+| image | | | It contains build-essentials and all necessary packages to install PIP packages. |
++--------------+----------------------------+--------------------------------+--------------------------------------------------------------------------------------------+
+| PROD image | -python | | Production image. This is the actual production image - optimized for size. |
+| | | | It contains only compiled libraries and minimal set of dependencies to run Airflow. |
++--------------+----------------------------+--------------------------------+--------------------------------------------------------------------------------------------+
+
+* might be either "master" or "v1-10-test" or "v2-0-test"
+* - Python version (Major + Minor). For "master" and "v2-0-test" should be in ["3.6", "3.7", "3.8"]. For
+ v1-10-test it should be in ["2.7", "3.5", "3.6". "3.7", "3.8"].
+* - GitHub Actions RUN_ID. You can get it from CI action job outputs (run id is printed in
+ logs and displayed as part of the step name. All PRs belong to some RUN_ID and this way you can
+ pull the very exact version of image used in that RUN_ID
+* - for images that get merged to "master", "v2-0-test" of "v1-10-test" the images are also tagged
+ with the commit SHA of that particular commit. This way you can easily find the image that was used
+ for testing for that "master", "v2-0-test" or "v1-10-test" test run.
+
+Reproducing CI Runs locally
+===========================
+
+Since we store images from every CI run, you should be able easily reproduce any of the CI build problems
+locally. You can do it by pulling and using the right image and running it with the right docker command,
+For example knowing that the CI build had 210056909 RUN_ID (you can find it from GitHub CI logs):
+
+.. code-block:: bash
+
+ docker pull docker.pkg.github.com/apache/airflow/master-python3.6-ci:210056909
+
+ docker run -it docker.pkg.github.com/apache/airflow/master-python3.6-ci:210056909
+
+
+But you usually need to pass more variables amd complex setup if you want to connect to a database or
+enable some integrations. Therefore it is easiest to use `Breeze `_ for that. For example if
+you need to reproduce a MySQL environment with kerberos integration enabled for run 210056909, in python
+3.8 environment you can run:
+
+.. code-block:: bash
+
+ ./breeze --github-image-id 210056909 --python 3.8 --integration kerberos
+
+You will be dropped into a shell with the exact version that was used during the CI run and you will
+be able to run pytest tests manually, easily reproducing the environment that was used in CI. Note that in
+this case, you do not need to checkout the sources that were used for that run - they are already part of
+the image - but remember that any changes you make in those sources are lost when you leave the image as
+the sources are not mapped from your host machine.
+
+CI Sequence diagrams
+====================
+
+Sequence diagrams are shown of the flow happening during the CI builds.
+
+Pull request flow from fork
+---------------------------
+
+.. image:: images/ci/pull_request_ci_flow.png
+ :align: center
+ :alt: Pull request flow from fork
+
+
+Direct Push/Merge flow
+----------------------
+
+.. image:: images/ci/push_ci_flow.png
+ :align: center
+ :alt: Direct Push/Merge flow
+
+Scheduled build flow
+---------------------
+
+.. image:: images/ci/scheduled_ci_flow.png
+ :align: center
+ :alt: Scheduled build flow
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 1c5c4829ca9ad..bc641178a7d07 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -42,8 +42,7 @@ to follow it and apply to the programme and follow up with the community.
Report Bugs
-----------
-Report bugs through `Apache
-JIRA `__.
+Report bugs through `GitHub `__.
Please report relevant information and preferably code that exhibits the
problem.
@@ -51,16 +50,16 @@ problem.
Fix Bugs
--------
-Look through the JIRA issues for bugs. Anything is open to whoever wants to
+Look through the GitHub issues for bugs. Anything is open to whoever wants to
implement it.
Implement Features
------------------
-Look through the `Apache
-JIRA `__ for features.
+Look through the `GitHub issues labeled "kind:feature"
+`__ for features.
-Any unassigned "Improvement" issue is open to whoever wants to implement it.
+Any unassigned feature request issue is open to whoever wants to implement it.
We've created the operators, hooks, macros and executors we needed, but we've
made sure that this part of Airflow is extensible. New operators, hooks, macros
@@ -76,8 +75,7 @@ articles.
Submit Feedback
---------------
-The best way to send feedback is to open an issue on `Apache
-JIRA `__.
+The best way to send feedback is to `open an issue on GitHub `__.
If you are proposing a new feature:
@@ -86,37 +84,265 @@ If you are proposing a new feature:
- Remember that this is a volunteer-driven project, and that contributions are
welcome :)
-Documentation
+
+Roles
=============
-The latest API documentation is usually available
-`here `__.
+There are several roles within the Airflow Open-Source community.
-To generate a local version:
-1. Set up an Airflow development environment.
+PMC Member
+-----------
+The PMC (Project Management Committee) is a group of maintainers that drives changes in the way that
+Airflow is managed as a project.
-2. Install the ``doc`` extra.
+Considering Apache, the role of the PMC is primarily to ensure that Airflow conforms to Apache's processes
+and guidelines.
-.. code-block:: bash
+Committers/Maintainers
+----------------------
+
+Committers are community members that have write access to the project’s repositories, i.e., they can modify the code,
+documentation, and website by themselves and also accept other contributions.
+
+The official list of committers can be found `here `__.
+
+Additionally, committers are listed in a few other places (some of these may only be visible to existing committers):
+
+* https://whimsy.apache.org/roster/ppmc/airflow
+* https://github.com/orgs/apache/teams/airflow-committers/members
+
+Committers are responsible for:
+
+* Championing one or more items on the `Roadmap `__
+* Reviewing & Merging Pull-Requests
+* Scanning and responding to GitHub issues
+* Responding to questions on the dev mailing list (dev@airflow.apache.org)
+
+Becoming a Committer
+--------------------
+
+There is no strict protocol for becoming a committer.
+Candidates for new committers are typically people that are active contributors and community members.
- pip install -e '.[doc]'
+The key aspects of a committer are:
+* Consistent contributions over the past 6 months
+* Understanding of Airflow Core or has displayed a holistic understanding of a particular part and made
+ contributions towards a more strategic goal
+* Understanding of contributor/committer guidelines: `Contributors' Guide `__
+* Quality of the commits
+* Visibility in community discussions (dev mailing list, Slack and GitHub)
+* Testing Release Candidates
-3. Generate and serve the documentation as follows:
+
+Contributors
+------------
+
+A contributor is anyone who wants to contribute code, documentation, tests, ideas, or anything to the
+Apache Airflow project.
+
+Contributors are responsible for:
+
+* Fixing bugs
+* Adding features
+* Championing one or more items on the `Roadmap `__.
+
+Contribution Workflow
+=====================
+
+Typically, you start your first contribution by reviewing open tickets
+at `GitHub issues `__.
+
+If you create pull-request, you don't have to create an issue first, but if you want, you can do it.
+Creating an issue will allow you to collect feedback or share plans with other people.
+
+For example, you want to have the following sample ticket assigned to you:
+`#7782: Add extra CC: to the emails sent by Airflow `_.
+
+In general, your contribution includes the following stages:
+
+.. image:: images/workflow.png
+ :align: center
+ :alt: Contribution Workflow
+
+1. Make your own `fork `__ of
+ the Apache Airflow `main repository `__.
+
+2. Create a `local virtualenv `_,
+ initialize the `Breeze environment `__, and
+ install `pre-commit framework `__.
+ If you want to add more changes in the future, set up your fork and enable GitHub Actions.
+
+3. Join `devlist `__
+ and set up a `Slack account `__.
+
+4. Make the change and create a `Pull Request from your fork `__.
+
+5. Ping @ #development slack, comment @people. Be annoying. Be considerate.
+
+Step 1: Fork the Apache Airflow Repo
+------------------------------------
+From the `apache/airflow `_ repo,
+`create a fork `_:
+
+.. image:: images/fork.png
+ :align: center
+ :alt: Creating a fork
+
+
+Step 2: Configure Your Environment
+----------------------------------
+Configure the Docker-based Breeze development environment and run tests.
+
+You can use the default Breeze configuration as follows:
+
+1. Install the latest versions of the Docker Community Edition
+ and Docker Compose and add them to the PATH.
+
+2. Enter Breeze: ``./breeze``
+
+ Breeze starts with downloading the Airflow CI image from
+ the Docker Hub and installing all required dependencies.
+
+3. Enter the Docker environment and mount your local sources
+ to make them immediately visible in the environment.
+
+4. Create a local virtualenv, for example:
.. code-block:: bash
- cd docs
- ./build.sh
- ./start_doc_server.sh
+ mkvirtualenv myenv --python=python3.6
-.. note::
- The docs build script ``build.sh`` requires bash 4.0 or greater.
- If you are building on mac, you can install latest version of bash with homebrew.
+5. Initialize the created environment:
+
+.. code-block:: bash
+
+ ./breeze initialize-local-virtualenv --python 3.6
+
+6. Open your IDE (for example, PyCharm) and select the virtualenv you created
+ as the project's default virtualenv in your IDE.
+
+Step 3: Connect with People
+---------------------------
+
+For effective collaboration, make sure to join the following Airflow groups:
+
+- Mailing lists:
+
+ - Developer’s mailing list ``_
+ (quite substantial traffic on this list)
+
+ - All commits mailing list: ``_
+ (very high traffic on this list)
+
+ - Airflow users mailing list: ``_
+ (reasonably small traffic on this list)
+
+- `Issues on GitHub `__
+
+- `Slack (chat) `__
+
+Step 4: Prepare PR
+------------------
+
+1. Update the local sources to address the issue.
+
+ For example, to address this example issue, do the following:
+
+ * Read about `email configuration in Airflow `__.
+
+ * Find the class you should modify. For the example GitHub issue,
+ this is `email.py `__.
+
+ * Find the test class where you should add tests. For the example ticket,
+ this is `test_email.py `__.
+ * Make sure your fork's master is synced with Apache Airflow's master before you create a branch. See
+ `How to sync your fork <#how-to-sync-your-fork>`_ for details.
+
+ * Create a local branch for your development. Make sure to use latest
+ ``apache/master`` as base for the branch. See `How to Rebase PR <#how-to-rebase-pr>`_ for some details
+ on setting up the ``apache`` remote. Note, some people develop their changes directly in their own
+ ``master`` branches - this is OK and you can make PR from your master to ``apache/master`` but we
+ recommend to always create a local branch for your development. This allows you to easily compare
+ changes, have several changes that you work on at the same time and many more.
+ If you have ``apache`` set as remote then you can make sure that you have latest changes in your master
+ by ``git pull apache master`` when you are in the local ``master`` branch. If you have conflicts and
+ want to override your locally changed master you can override your local changes with
+ ``git fetch apache; git reset --hard apache/master``.
+
+ * Modify the class and add necessary code and unit tests.
+
+ * Run the unit tests from the `IDE `__
+ or `local virtualenv `__ as you see fit.
+
+ * Run the tests in `Breeze `__.
+
+ * Run and fix all the `static checks `__. If you have
+ `pre-commits installed `__,
+ this step is automatically run while you are committing your code. If not, you can do it manually
+ via ``git add`` and then ``pre-commit run``.
+
+2. Rebase your fork, squash commits, and resolve all conflicts. See `How to rebase PR <#how-to-rebase-pr>`_
+ if you need help with rebasing your change. Remember to rebase often if your PR takes a lot of time to
+ review/fix. This will make rebase process much easier and less painful and the more often you do it,
+ the more comfortable you will feel doing it.
+
+3. Re-run static code checks again.
+
+4. Make sure your commit has a good title and description of the context of your change, enough
+ for the committer reviewing it to understand why you are proposing a change. Make sure to follow other
+ PR guidelines described in `pull request guidelines <#pull-request-guidelines>`_.
+ Create Pull Request! Make yourself ready for the discussion!
+
+5. Depending on "scope" of your changes, your Pull Request might go through one of few paths after approval.
+ We run some non-standard workflow with high degree of automation that allows us to optimize the usage
+ of queue slots in Github Actions. Our automated workflows determine the "scope" of changes in your PR
+ and send it through the right path:
+
+ * In case of a "no-code" change, approval will generate a comment that the PR can be merged and no
+ tests are needed. This is usually when the change modifies some non-documentation related rst
+ files (such as this file). No python tests are run and no CI images are built for such PR. Usually
+ it can be approved and merged few minutes after it is submitted (unless there is a big queue of jobs).
+
+ * In case of change involving python code changes or documentation changes, a subset of full test matrix
+ will be executed. This subset of tests perform relevant tests for single combination of python, backend
+ version and only builds one CI image and one PROD image. Here the scope of tests depends on the
+ scope of your changes:
+
+ * when your change does not change "core" of Airflow (Providers, CLI, WWW, Helm Chart) you will get the
+ comment that PR is likely ok to be merged without running "full matrix" of tests. However decision
+ for that is left to committer who approves your change. The committer might set a "full tests needed"
+ label for your PR and ask you to rebase your request or re-run all jobs. PRs with "full tests needed"
+ run full matrix of tests.
+
+ * when your change changes the "core" of Airflow you will get the comment that PR needs full tests and
+ the "full tests needed" label is set for your PR. Additional check is set that prevents from
+ accidental merging of the request until full matrix of tests succeeds for the PR.
+
+ * when your change has "upgrade to newer dependencies" label set, constraints will be automatically
+ upgraded to latest constraints matching your setup.py. This is useful in case you want to force
+ upgrade to a latest version of dependencies. You can ask committers to set the label for you
+ when you need it in your PR.
+
+ More details about the PR workflow be found in `PULL_REQUEST_WORKFLOW.rst `_.
+
+
+Step 5: Pass PR Review
+----------------------
+
+.. image:: images/review.png
+ :align: center
+ :alt: PR Review
+
+Note that committers will use **Squash and Merge** instead of **Rebase and Merge**
+when merging PRs and your commit will be squashed to single commit.
+
+You need to have review of at least one committer (if you are committer yourself, it has to be
+another committer). Ideally you should have 2 or more committers reviewing the code that touches
+the core of Airflow.
-If you are creating ``example_dags`` directory, you need to create ``example_dags/__init__.py`` with Apache license or copy another ``__init__.py`` file that contains the necessary license.
Pull Request Guidelines
=======================
@@ -127,30 +353,21 @@ these guidelines:
- Include tests, either as doctests, unit tests, or both, to your pull
request.
- The airflow repo uses `Travis CI `__ to
+ The airflow repo uses `GitHub Actions `__ to
run the tests and `codecov `__ to track
- coverage. You can set up both for free on your fork (see
- `Travis CI Testing Framework `__ usage guidelines).
- It will help you make sure you do not break the build with your PR and
- that you help increase coverage.
+ coverage. You can set up both for free on your fork. It will help you make sure you do not
+ break the build with your PR and that you help increase coverage.
+
+- Follow our project's `Coding style and best practices`_.
+
+ These are things that aren't currently enforced programmatically (either because they are too hard or just
+ not yet done.)
- `Rebase your fork `__, squash
commits, and resolve all conflicts.
- When merging PRs, wherever possible try to use **Squash and Merge** instead of **Rebase and Merge**.
-- Make sure every pull request introducing code changes has an associated
- `JIRA `__
- ticket. The JIRA link should also be added to the PR description. In case of documentation only changes
- the JIRA ticket is not necessary.
-
-- Preface your commit's subject & PR title with **[AIRFLOW-NNNN] COMMIT_MSG** where *NNNN*
- is the JIRA number. For example: [AIRFLOW-5574] Fix Google Analytics script loading. In case of
- documentation only changes you should put "[AIRFLOW-XXXX]" instead.
- We compose Airflow release notes from all commit titles in a release. By placing the JIRA number in the
- commit title and hence in the release notes, we let Airflow users look into
- JIRA and GitHub PRs for more details about a particular change.
-
- Add an `Apache License `__ header
to all new files.
@@ -167,7 +384,7 @@ these guidelines:
- Run tests locally before opening PR.
-- Make sure the pull request works for Python 2.7, 3.5 and 3.6.
+- Make sure the pull request works for Python 2.7, 3.5, 3.6, 3.7 and 3.8.
- Adhere to guidelines for commit messages described in this `article `__.
This makes the lives of those who come after you a lot easier.
@@ -187,6 +404,13 @@ usually these are developers with the release manager permissions.
Once the branch is stable, the ``v1-10-stable`` branch is synchronized with ``v1-10-test``.
The ``v1-10-stable`` branch is used to release ``1.10.x`` releases.
+The general approach is that cherry-picking a commit that has already had a PR and unit tests run
+against main is done to ``v1-10-test`` branch, but PRs from contributors towards 1.10 should target
+``v1-10-stable`` branch.
+
+The ``v1-10-test`` branch and ``v1-10-stable`` ones are merged just before the release and that's the
+time when they converge.
+
Development Environments
========================
@@ -281,8 +505,9 @@ Benefits:
where all these services are available and can be used by tests
automatically.
-- Breeze environment is almost the same as used in `Travis CI `__ automated builds.
- So, if the tests run in your Breeze environment, they will work in Travis CI as well.
+- Breeze environment is almost the same as used in the CI automated builds.
+ So, if the tests run in your Breeze environment, they will work in the CI as well.
+ See ``_ for details about Airflow CI.
Limitations:
@@ -303,37 +528,144 @@ Limitations:
They are optimized for repeatability of tests, maintainability and speed of building rather
than production performance. The production images are not yet officially published.
+
+Airflow dependencies
+====================
+
Extras
------
There are a number of extras that can be specified when installing Airflow. Those
extras can be specified after the usual pip install - for example
-``pip install -e .[gcp]``. For development purpose there is a ``devel`` extra that
+``pip install -e .[ssh]``. For development purpose there is a ``devel`` extra that
installs all development dependencies. There is also ``devel_ci`` that installs
-all dependencies needed in CI envioronment.
+all dependencies needed in the CI environment.
+
+.. note::
+ On 30th of November 2020, new version of PIP (20.3) has been released with a new, 2020 resolver.
+ This resolver does not yet work with Apache Airflow and might leads to errors in installation -
+ depends on your choice of extras. In order to install Airflow you need to either downgrade
+ pip to version 20.2.4 ``pip upgrade --pip==20.2.4`` or, in case you use Pip 20.3, you need to add option
+ ``--use-deprecated legacy-resolver`` to your pip install command.
+
This is the full list of those extras:
.. START EXTRAS HERE
-all, all_dbs, async, atlas, aws, azure, azure_blob_storage, azure_container_instances, azure_cosmos,
-azure_data_lake, cassandra, celery, cgroups, cloudant, crypto, dask, databricks, datadog, devel,
-devel_azure, devel_ci, devel_hadoop, doc, docker, druid, elasticsearch, emr, gcp, gcp_api,
-github_enterprise, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
-mongo, mssql, mysql, oracle, papermill, password, pinot, postgres, presto, qds, rabbitmq, redis, s3,
-salesforce, samba, segment, sendgrid, sentry, slack, snowflake, ssh, statsd, vertica, virtualenv,
-webhdfs, winrm
+all, all_dbs, amazon, apache.atlas, apache.cassandra, apache.druid, apache.hdfs, apache.hive,
+apache.pinot, apache.presto, apache.webhdfs, async, atlas, aws, azure, azure_blob_storage,
+azure_container_instances, azure_cosmos, azure_data_lake, azure_secrets, cassandra, celery, cgroups,
+cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, devel, devel_all, devel_azure,
+devel_ci, devel_hadoop, doc, docker, druid, elasticsearch, emr, gcp, gcp_api, github_enterprise,
+google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
+microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, oracle, papermill, password,
+pinot, postgres, presto, qds, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry,
+slack, snowflake, ssh, statsd, vertica, virtualenv, webhdfs, winrm
.. END EXTRAS HERE
+Provider packages
+-----------------
-Airflow dependencies
---------------------
+Airflow 2.0 is split into core and providers. They are delivered as separate packages:
+
+* ``apache-airflow`` - core of Apache Airflow
+* ``apache-airflow-providers-*`` - More than 50 provider packages to communicate with external services
+
+In Airflow 1.10 all those providers were installed together within one single package and when you installed
+airflow locally, from sources, they were also installed. In Airflow 2.0, providers are separated out,
+and not installed together with the core, unless you set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment
+variable to ``true``.
+
+In Breeze - which is a development environment, ``INSTALL_PROVIDERS_FROM_SOURCES`` variable is set to true,
+but you can add ``--skip-installing-airflow-providers-from-sources`` flag to Breeze to skip installing providers when
+building the images.
+
+One watch-out - providers are still always installed (or rather available) if you install airflow from
+sources using ``-e`` (or ``--editable``) flag. In such case airflow is read directly from the sources
+without copying airflow packages to the usual installation location, and since 'providers' folder is
+in this airflow folder - the providers package is importable.
+
+Some of the packages have cross-dependencies with other providers packages. This typically happens for
+transfer operators where operators use hooks from the other providers in case they are transferring
+data between the providers. The list of dependencies is maintained (automatically with pre-commits)
+in the ``airflow/providers/dependencies.json``. Pre-commits are also used to generate dependencies.
+The dependency list is automatically used during pypi packages generation.
+
+Cross-dependencies between provider packages are converted into extras - if you need functionality from
+the other provider package you can install it adding [extra] after the
+``apache-airflow-backport-providers-PROVIDER`` for example:
+``pip install apache-airflow-backport-providers-google[amazon]`` in case you want to use GCP
+transfer operators from Amazon ECS.
+
+.. note::
+ On 30th of November 2020, new version of PIP (20.3) has been released with a new, 2020 resolver.
+ This resolver does not yet work with Apache Airflow and might leads to errors in installation -
+ depends on your choice of extras. In order to install Airflow you need to either downgrade
+ pip to version 20.2.4 ``pip upgrade --pip==20.2.4`` or, in case you use Pip 20.3, you need to add option
+ ``--use-deprecated legacy-resolver`` to your pip install command.
+
+
+If you add a new dependency between different providers packages, it will be detected automatically during
+pre-commit phase and pre-commit will fail - and add entry in dependencies.json so that the package extra
+dependencies are properly added when package is installed.
+
+You can regenerate the whole list of provider dependencies by running this command (you need to have
+``pre-commits`` installed).
+
+.. code-block:: bash
+
+ pre-commit run build-providers-dependencies
+
+
+Here is the list of packages and their extras:
+
+
+ .. START PACKAGE DEPENDENCIES HERE
+
+========================== ===========================
+Package Extras
+========================== ===========================
+amazon apache.hive,google,imap,mongo,mysql,postgres,ssh
+apache.druid apache.hive
+apache.hive amazon,microsoft.mssql,mysql,presto,samba,vertica
+apache.livy http
+dingding http
+discord http
+google amazon,apache.cassandra,cncf.kubernetes,facebook,microsoft.azure,microsoft.mssql,mysql,postgres,presto,salesforce,sftp
+hashicorp google
+microsoft.azure google,oracle
+microsoft.mssql odbc
+mysql amazon,presto,vertica
+opsgenie http
+postgres amazon
+sftp ssh
+slack http
+snowflake slack
+========================== ===========================
+
+ .. END PACKAGE DEPENDENCIES HERE
+
+Backport providers
+------------------
+
+You can also build backport provider packages for Airflow 1.10. They aim to provide a bridge when users
+of Airflow 1.10 want to migrate to Airflow 2.0. The backport packages are named similarly to the
+provider packages, but with "backport" added:
+
+* ``apache-airflow-backport-provider-*``
+
+Those backport providers are automatically refactored to work with Airflow 1.10.* and have a few
+limitations described in those packages.
+
+Dependency management
+=====================
Airflow is not a standard python project. Most of the python projects fall into one of two types -
application or library. As described in
[StackOverflow Question](https://stackoverflow.com/questions/28509481/should-i-pin-my-python-dependencies-versions)
-decision whether to pin (freeze) requirements for a python project depdends on the type. For
+decision whether to pin (freeze) dependency versions for a python project depends on the type. For
applications, dependencies should be pinned, but for libraries, they should be open.
For application, pinning the dependencies makes it more stable to install in the future - because new
@@ -343,80 +675,75 @@ be open to allow several different libraries with the same requirements to be in
The problem is that Apache Airflow is a bit of both - application to install and library to be used when
you are developing your own operators and DAGs.
-This - seemingly unsolvable - puzzle is solved by having pinned requirement files. Those are available
-as of airflow 1.10.10.
+This - seemingly unsolvable - puzzle is solved by having pinned constraints files. Those are available
+as of airflow 1.10.10 and further improved with 1.10.12 (moved to separate orphan branches)
-Pinned requirement files
-------------------------
+Pinned constraint files
+=======================
By default when you install ``apache-airflow`` package - the dependencies are as open as possible while
-still allowing the apache-airflow package to install. This means that 'apache-airflow' package might fail to
+still allowing the apache-airflow package to install. This means that ``apache-airflow`` package might fail to
install in case a direct or transitive dependency is released that breaks the installation. In such case
when installing ``apache-airflow``, you might need to provide additional constraints (for
example ``pip install apache-airflow==1.10.2 Werkzeug<1.0.0``)
-However we now have ``requirements-python.txt`` file generated
-automatically and committed in the requirements folder based on the set of all latest working and tested
-requirement versions. Those ``requirement-python.txt`` files can be used as
-constraints file when installing Apache Airflow - either from the sources
-
-.. code-block:: bash
-
- pip install -e . --constraint requirements/requirements-python3.6.txt
+.. note::
+ On November 2020, new version of PIP (20.3) has been released with a new, 2020 resolver. This resolver
+ does not yet work with Apache Airflow and might leads to errors in installation - depends on your choice
+ of extras. In order to install Airflow you need to either downgrade pip to version 20.2.4
+ ``pip upgrade --pip==20.2.4`` or, in case you use Pip 20.3, you need to add option
+ ``--use-deprecated legacy-resolver`` to your pip install command.
-or from the pypi package
+However we now have ``constraints-.txt`` files generated
+automatically and committed to orphan ``constraints-master`` and ``constraint-1-10`` branches based on
+the set of all latest working and tested dependency versions. Those
+``constraints-.txt`` files can be used as
+constraints file when installing Apache Airflow - either from the sources:
.. code-block:: bash
- pip install apache-airflow --constraint requirements/requirements-python3.6.txt
+ pip install -e . \
+ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1-10/constraints-3.6.txt"
-
-This works also with extras - for example:
+or from the pypi package:
.. code-block:: bash
- pip install .[gcp] --constraint requirements/requirements-python3.6.txt
-
+ pip install apache-airflow \
+ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1-10/constraints-3.6.txt"
-It is also possible to use constraints directly from github using tag/version name:
+This works also with extras - for example:
.. code-block:: bash
- pip install apache-airflow[gcp]==1.10.10 \
- --constraint https://raw.githubusercontent.com/apache/airflow/1.10.10/requirements/requirements-python3.6.txt
-
-There are different set of fixed requirements for different python major/minor versions and you should
-use the right requirements file for the right python version.
-
-The ``requirements-python.txt`` file MUST be regenerated every time after
-the ``setup.py`` is updated. This is checked automatically in Travis CI build. There are separate
-jobs for each python version that checks if the requirements should be updated.
-
-If they are not updated, you should regenerate the requirements locally using Breeze as described below.
+ pip install .[ssh] \
+ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-master/constraints-3.6.txt"
-Generating requirement files
-----------------------------
-This should be done every time after you modify setup.py file. You can generate requirement files
-using `Breeze `_ . Simply use those commands:
+As of apache-airflow 1.10.12 it is also possible to use constraints directly from GitHub using specific
+tag/hash name. We tag commits working for particular release with constraints- tag. So for example
+fixed valid constraints 1.10.12 can be used by using ``constraints-1.10.12`` tag:
.. code-block:: bash
- breeze generate-requirements --python 3.7
+ pip install apache-airflow[ssh]==1.10.12 \
+ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.6.txt"
-.. code-block:: bash
-
- breeze generate-requirements --python 3.6
+There are different set of fixed constraint files for different python major/minor versions and you should
+use the right file for the right python version.
-Note that when you generate requirements this way, you might update to latest version of requirements
-that were released since the last time so during tests you might get errors unrelated to your change.
-In this case the easiest way to fix it is to limit the culprit dependency to the previous version
-with ```` constraint added in setup.py.
+The ``constraints-.txt`` will be automatically regenerated by CI cron job
+every time after the ``setup.py`` is updated and pushed if the tests are successful. There are separate
+jobs for each python version.
Backport providers packages
---------------------------
+**NOTE:** In case of problems with installation / development of backport packages
+check `troubleshooting installing backport packages `_.
+
Since we are developing new operators in the master branch, we prepared backport packages ready to be
installed for Airflow 1.10.* series. Those backport operators (the tested ones) are going to be released
in PyPi and we are going to maintain the list at
@@ -472,13 +799,46 @@ slack http
.. END PACKAGE DEPENDENCIES HERE
+Documentation
+=============
+
+The latest API documentation (for the master branch) is usually available
+`here `__.
+
+To generate a local version you can use ``_.
+
+The documentation build consists of verifying consistency of documentation and two steps:
+
+* spell checking
+* building documentation
+
+You can only run one of the steps via ``--spellcheck-only`` or ``--docs-only``.
+
+.. code-block:: bash
+
+ ./breeze build-docs
+
+Also documentation is available as downloadable artifact in GitHub Actions after the CI builds your PR.
+
+**Known issues:**
+
+If you are creating a new directory for new integration in the ``airflow.providers`` package,
+you should also update the ``docs/autoapi_templates/index.rst`` file.
+
+If you are creating new ``hooks``, ``sensors``, ``operators`` directory in
+the ``airflow.providers`` package, you should also update
+the ``docs/operators-and-hooks-ref.rst`` file.
+
+If you are creating ``example_dags`` directory, you need to create ``example_dags/__init__.py`` with Apache
+license or copy another ``__init__.py`` file that contains the necessary license.
+
Static code checks
==================
We check our code quality via static code checks. See
`STATIC_CODE_CHECKS.rst `_ for details.
-Your code must pass all the static code checks in Travis CI in order to be eligible for Code Review.
+Your code must pass all the static code checks in the CI in order to be eligible for Code Review.
The easiest way to make sure your code is good before pushing is to use pre-commit checks locally
as described in the static code checks documentation.
@@ -523,6 +883,67 @@ If this function is designed to be called by "end-users" (i.e. DAG authors) then
...
# You SHOULD not commit the session here. The wrapper will take care of commit()/rollback() if exception
+Naming Conventions for provider packages
+----------------------------------------
+
+In Airflow 2.0 we standardized and enforced naming for provider packages, modules and classes.
+those rules (introduced as AIP-21) were not only introduced but enforced using automated checks
+that verify if the naming conventions are followed. Here is a brief summary of the rules, for
+detailed discussion you can go to [AIP-21 Changes in import paths](https://cwiki.apache.org/confluence/display/AIRFLOW/AIP-21%3A+Changes+in+import+paths)
+
+The rules are as follows:
+
+* Provider packages are all placed in 'airflow.providers'
+
+* Providers are usually direct sub-packages of the 'airflow.providers' package but in some cases they can be
+ further split into sub-packages (for example 'apache' package has 'cassandra', 'druid' ... providers ) out
+ of which several different provider packages are produced (apache.cassandra, apache.druid). This is
+ case when the providers are connected under common umbrella but very loosely coupled on the code level.
+
+* In some cases the package can have sub-packages but they are all delivered as single provider
+ package (for example 'google' package contains 'ads', 'cloud' etc. sub-packages). This is in case
+ the providers are connected under common umbrella and they are also tightly coupled on the code level.
+
+* Typical structure of provider package:
+ * example_dags -> example DAGs are stored here (used for documentation and System Tests)
+ * hooks -> hooks are stored here
+ * operators -> operators are stored here
+ * sensors -> sensors are stored here
+ * secrets -> secret backends are stored here
+ * transfers -> transfer operators are stored here
+
+* Module names do not contain word "hooks", "operators" etc. The right type comes from
+ the package. For example 'hooks.datastore' module contains DataStore hook and 'operators.datastore'
+ contains DataStore operators.
+
+* Class names contain 'Operator', 'Hook', 'Sensor' - for example DataStoreHook, DataStoreExportOperator
+
+* Operator name usually follows the convention: ``Operator``
+ (BigQueryExecuteQueryOperator) is a good example
+
+* Transfer Operators are those that actively push data from one service/provider and send it to another
+ service (might be for the same or another provider). This usually involves two hooks. The convention
+ for those ``