In [None]:
from pathlib import Path
Path("src").mkdir(exist_ok=True)

In [None]:
import digitalhub as dh

In [None]:
project = dh.get_or_create_project("project-dbt-ci")

In [None]:
url = "https://gist.githubusercontent.com/kevin336/acbb2271e66c10a5b73aacf82ca82784/raw/e38afe62e088394d61ed30884dd50a6826eee0a8/employees.csv"
di = project.new_dataitem(name="employees-dbt",
                          kind="table",
                          path=url)

In [None]:
sql = """
WITH tab AS (
    SELECT  *
    FROM    {{ ref('employees') }}
)
SELECT  *
FROM    tab
WHERE   tab."DEPARTMENT_ID" = '50'
"""

In [None]:
function = project.new_function(name="function-dbt",
                                kind="dbt",
                                code=sql)

In [None]:
run = function.run("transform",
                   inputs={"employees": di.key},
                   outputs={"output_table": "department-50"},
                   wait=True)

Wait until data transform 'run' is completed.

In [None]:
run.output('department-50').as_df().head()

In [None]:
%%writefile "src/dbt_pipeline.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler(url):
    with pipeline_context() as pc:
        s1_dataset = pc.step(name="dbt",
                             function="function-dbt",
                             action="transform",
                             inputs={"employees":url},
                             outputs={"output_table": "department-50"})

In [None]:
workflow = project.new_workflow(name="pipeline_dbt",
                                kind="kfp",
                                code_src="src/dbt_pipeline.py",
                                handler="myhandler")

In [None]:
workflow.run("build", wait=True)

In [None]:
workflow_run = workflow.run("pipeline", parameters={"url": di.key}, wait=True)

Wait until workflow pipeline is executed completely. One can view the state of workflow from the console application or using the digitalhub sdk API call. 