From 70b677719282a0d1ea269c225cb6c466f646781b Mon Sep 17 00:00:00 2001
From: GitHub Action <tomas@tomasfarias.dev>
Date: Wed, 27 Apr 2022 21:10:44 +0200
Subject: [PATCH] docs: Update documentation to showcase new connection feature

---
 README.md                                 |  8 ++-
 docs/development.rst                      | 16 ++++--
 docs/getting_started.rst                  |  8 +--
 docs/introduction.rst                     | 61 ++++++++++++++++++++++-
 examples/airflow_connection_target_dag.py | 52 +++++++++++++++++++
 5 files changed, 134 insertions(+), 11 deletions(-)
 create mode 100644 examples/airflow_connection_target_dag.py

diff --git a/README.md b/README.md
index 1eb9aaf..e6c69d0 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ With poetry:
 poetry install
 ```
 
-Install any extras you need, and only those you need:
+Install with any necessary extras:
 ``` shell
 poetry install -E postgres -E redshift
 ```
@@ -85,6 +85,12 @@ As of the time of writing S3 is the only supported backend for dbt projects, but
 
 Each dbt execution produces one or more [JSON artifacts](https://docs.getdbt.com/reference/artifacts/dbt-artifacts/) that are valuable to produce meta-metrics, build conditional workflows, for reporting purposes, and other uses. airflow-dbt-python can push these artifacts to [XCom](https://airflow.apache.org/docs/apache-airflow/stable/concepts/xcoms.html) as requested via the `do_xcom_push_artifacts` parameter, which takes a list of artifacts to push.
 
+## Use Airflow connections as dbt targets (without a profiles.yml)
+
+[Airflow connections](https://airflow.apache.org/docs/apache-airflow/stable/howto/connection.html) allow users to manage and store connection information, such as hostname, port, user name, and password, for operators to use when accessing certain applications, like databases. Similarly, a dbt `profiles.yml` file stores connection information under each target key. `airflow-dbt-python` bridges the gap between the two and allows you to use connection information stored as an Airflow connection by specifying the connection id as the `target` parameter of any of the dbt operators it provides. What's more, if using an Airflow connection, the `profiles.yml` file may be entirely omitted (although keep in mind a `profiles.yml` file contains a configuration block besides target connection information).
+
+See an example DAG [here](examples/airflow_connection_target_dag.py).
+
 # Motivation
 
 ## Airflow running in a managed environment
diff --git a/docs/development.rst b/docs/development.rst
index 66316dd..d5020c6 100644
--- a/docs/development.rst
+++ b/docs/development.rst
@@ -10,6 +10,8 @@ Poetry
 
 airflow-dbt-python uses `Poetry <https://python-poetry.org/>`_ for project management. Ensure it's installed before running: see `Poetry's installation documentation <https://python-poetry.org/docs/#installation>`_.
 
+As of `airflow-dbt-python` version 0.14, we have moved the project to Poetry version >= 1.2.0 to allow us to use dependency groups.
+
 Installing Airflow
 ------------------
 
@@ -26,22 +28,26 @@ Installing the ``airflow`` extra will fetch the latest version of Airflow with m
 
 .. code-block:: shell
 
-   cd airflow-dbt-python
    poetry install -E airflow
 
+Some features require Airflow providers. For example, any S3 backend operations require ``apache-airflow-providers-amazon``. These providers may be installed individually or with the ``airflow-providers`` extra:
+
+.. code-block:: shell
+
+   poetry install -E airflow-providers
 
 Building from source
 --------------------
 
 Clone the main repo and install it:
 
-
 .. code-block:: shell
 
    git clone https://github.com/tomasfarias/airflow-dbt-python.git
    cd airflow-dbt-python
-   poetry install
+   poetry install --with dev
 
+The dev dependency group includes development tools for code formatting, type checking, and testing.
 
 Pre-commit hooks
 ----------------
@@ -78,11 +84,11 @@ Requirements
 
 Unit tests interact with a `PostgreSQL <https://www.postgresql.org/>`_ database as a target to run dbt commands. This requires PostgreSQL to be installed in your local environment. Installation instructions for all major platforms can be found here: https://www.postgresql.org/download/.
 
-Some unit tests require the `Amazon provider package for Airflow <https://pypi.org/project/apache-airflow-providers-amazon/>`_. Ensure it's installed via the ``amazon`` extra:
+Some unit tests require the `Amazon provider package for Airflow <https://pypi.org/project/apache-airflow-providers-amazon/>`_. Ensure it's installed via the ``airflow-providers`` extra:
 
 .. code-block:: shell
 
-   poetry install -E amazon
+   poetry install -E airflow-providers
 
 Running unit tests with pytest
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index c2a1c5a..edabcc1 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -54,18 +54,18 @@ airflow-dbt-python can also be built from source by cloning the main repo:
    git clone https://github.com/tomasfarias/airflow-dbt-python.git
    cd airflow-dbt-python
 
-And installing with ``poetry`` (without development dependencies):
+And installing with ``poetry``:
 
 .. code-block:: shell
 
-   poetry install --no-dev
+   poetry install
 
 As with ``pip``, any extra adapters can be installed:
 
 .. code-block:: shell
 
-   poetry install -E postgres -E redshift -E bigquery -E snowflake --no-dev
-   poetry install -E all --no-dev
+   poetry install -E postgres -E redshift -E bigquery -E snowflake
+   poetry install -E all
 
 Installing in MWAA
 ^^^^^^^^^^^^^^^^^^
diff --git a/docs/introduction.rst b/docs/introduction.rst
index 179ab75..67f8212 100644
--- a/docs/introduction.rst
+++ b/docs/introduction.rst
@@ -49,7 +49,7 @@ This way, artifacts may be pulled and operated on by downstream tasks. For examp
 
 .. code-block:: python
    :linenos:
-   :caption: example_dbt_artifacts.py
+   :caption: example_dbt_artifacts_dag.py
 
    import datetime as dt
 
@@ -87,3 +87,62 @@ This way, artifacts may be pulled and operated on by downstream tasks. For examp
       )
 
       dbt_run >> process_artifacts
+
+Use Airflow connections as dbt targets (without a profiles.yml)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+`Airflow connections <https://airflow.apache.org/docs/apache-airflow/stable/howto/connection.html>`_ allow users to manage and store connection information, such as hostname, port, user name, and password, for operators to use when accessing certain applications, like databases. Similarly, a dbt ``profiles.yml`` file stores connection information under each target key.
+
+``airflow-dbt-python`` bridges the gap between the two and allows you to use connection information stored as an Airflow connection by specifying the connection id as the ``target`` parameter of any of the dbt operators it provides. What's more, if using an Airflow connection, the ``profiles.yml`` file may be entirely omitted (although keep in mind a ``profiles.yml`` file contains a configuration block besides target connection information).
+
+
+.. code-block:: python
+   :linenos:
+   :caption: airflow_connection_target_dag.py
+
+   import datetime as dt
+   import json
+   import os
+
+   from airflow import DAG, settings
+   from airflow.models.connection import Connection
+   from airflow.utils.dates import days_ago
+   from airflow_dbt_python.dbt.operators import DbtRunOperator
+
+   # For illustration purposes, and to keep the example self-contained, we create
+   # a Connection using Airflow's ORM. However, any method of loading connections would
+   # work, like Airflow's UI, Airflow's CLI, or in deployment scripts.
+   my_conn = Connection(
+       conn_id="my_db_connection",
+       conn_type="postgres",
+       description="A test postgres connection",
+       host="localhost",
+       login="username",
+       port=5432,
+       schema="my_dbt_schema",
+       password="password", # pragma: allowlist secret
+       # Other dbt parameters can be added as extras
+       extra=json.dumps(dict(threads=4, sslmode="require")),
+   )
+   session = settings.Session()
+   session.add(my_conn)
+   session.commit()
+
+   with DAG(
+       dag_id="example_airflow_connection",
+       schedule_interval="0 * * * *",
+       start_date=days_ago(1),
+       catchup=False,
+       dagrun_timeout=dt.timedelta(minutes=60),
+   ) as dag:
+   dbt_run = DbtRunOperator(
+       task_id="dbt_run_hourly",
+       target="my_db_connection",
+       # Profiles file is not needed as we are using an Airflow connection.
+       # If a profiles file is used, the Airflow connection will be merged to the
+       # existing targets
+       profiles_dir=None,  # Defaults to None so this may be omitted.
+       project_dir="/path/to/my/dbt/project/",
+       select=["+tag:hourly"],
+       exclude=["tag:deprecated"],
+   )
diff --git a/examples/airflow_connection_target_dag.py b/examples/airflow_connection_target_dag.py
new file mode 100644
index 0000000..e24db9d
--- /dev/null
+++ b/examples/airflow_connection_target_dag.py
@@ -0,0 +1,52 @@
+"""Sample basic DAG which showcases using an Airflow Connection as target."""
+import datetime as dt
+import json
+
+from airflow import DAG, settings
+from airflow.models.connection import Connection
+from airflow.utils.dates import days_ago
+
+from airflow_dbt_python.dbt.operators import DbtRunOperator
+
+# For illustration purposes, and to keep the example self-contained, we create
+# a Connection using Airflow's ORM. However, any method of loading connections would
+# work, like Airflow's UI, Airflow's CLI, or in deployment scripts.
+my_conn = Connection(
+    conn_id="my_db_connection",
+    conn_type="postgres",
+    description="A test postgres connection",
+    host="localhost",
+    login="username",
+    port=5432,
+    schema="my_dbt_schema",
+    password="password",  # pragma: allowlist secret
+    # Other dbt parameters can be added as extras
+    extra=json.dumps(dict(threads=4, sslmode="require")),
+)
+
+
+if settings.Session is None:
+    settings.configure_orm()
+
+session = settings.Session()
+session.add(my_conn)
+session.commit()
+
+with DAG(
+    dag_id="example_airflow_connection",
+    schedule_interval="0 * * * *",
+    start_date=days_ago(1),
+    catchup=False,
+    dagrun_timeout=dt.timedelta(minutes=60),
+) as dag:
+    dbt_run = DbtRunOperator(
+        task_id="dbt_run_hourly",
+        target="my_db_connection",
+        # Profiles file is not needed as we are using an Airflow connection.
+        # If a profiles file is used, the Airflow connection will be merged to the
+        # existing targets
+        profiles_dir=None,  # Defaults to None so this may be omitted.
+        project_dir="/path/to/my/dbt/project/",
+        select=["+tag:hourly"],
+        exclude=["tag:deprecated"],
+    )