In [1]:
import digitalhub as dh

In [2]:
project = dh.get_or_create_project("project-dbt-ci")

In [6]:
project.refresh().to_dict()

{'kind': 'project',
 'metadata': {'project': 'project-dbt-ci',
  'name': 'project-dbt-ci',
  'created': '2024-10-29T13:14:13.993Z',
  'updated': '2024-10-29T13:14:13.993Z',
  'created_by': 'khurshid@fbk.eu',
  'updated_by': 'khurshid@fbk.eu'},
 'spec': {'context': './',
  'functions': [],
  'artifacts': [],
  'workflows': [],
  'dataitems': [],
  'models': []},
 'status': {'state': 'CREATED'},
 'user': 'khurshid@fbk.eu',
 'id': 'project-dbt-ci',
 'name': 'project-dbt-ci',
 'key': 'store://project-dbt-ci'}

In [7]:
url = "https://gist.githubusercontent.com/kevin336/acbb2271e66c10a5b73aacf82ca82784/raw/e38afe62e088394d61ed30884dd50a6826eee0a8/employees.csv"
di = project.new_dataitem(name="employees-dbt",
                          kind="table",
                          path=url)

In [8]:
sql = """
WITH tab AS (
    SELECT  *
    FROM    {{ ref('employees') }}
)
SELECT  *
FROM    tab
WHERE   tab."DEPARTMENT_ID" = '50'
"""

In [9]:
function = project.new_function(name="function-dbt",
                                kind="dbt",
                                code=sql)

In [10]:
run = function.run("transform",
                   inputs={"employees": di.key},
                   outputs={"output_table": "department-50"})

Wait until data transform 'run' is completed. One can view the state of 'run' from digitalhub Console application or can query it using the digitalhub sdk API call

In [12]:
run.refresh().get_state()

{'state': 'RUNNING', 'message': 'job j-dbttransform-17ceb79c-9449-444b-8397-59273c498e74 created', 'transitions': [{'status': 'RUNNING', 'message': 'job j-dbttransform-17ceb79c-9449-444b-8397-59273c498e74 created', 'time': '2024-10-23T13:14:24.08321968Z'}, {'status': 'READY', 'time': '2024-10-23T13:14:24.042246908Z'}, {'status': 'BUILT', 'time': '2024-10-23T13:14:24.019606525Z'}], 'k8s': {'secret': {'secret': {'apiVersion': 'v1', 'kind': 'Secret', 'metadata': {'labels': {'dhcore/framework': 'k8sjob', 'app.kubernetes.io/managed-by': 'dhcore', 'app.kubernetes.io/part-of': 'dhcore-project-dbt-ci', 'dhcore/project': 'project-dbt-ci', 'dhcore/runtime': 'dbt', 'app.kubernetes.io/instance': 'dhcore-17ceb79c-9449-444b-8397-59273c498e74', 'app.kubernetes.io/version': '17ceb79c-9449-444b-8397-59273c498e74'}, 'name': 'sec-dbttransform-17ceb79c-9449-444b-8397-59273c498e74', 'namespace': 'digitalhub-tenant1'}}}, 'job': {'job': {'apiVersion': 'batch/v1', 'kind': 'Job', 'metadata': {'creationTimestam

In [11]:
proj = dh.get_or_create_project("project-dbt-ci")
di = proj.get_dataitem('department-50')
df = di.as_df()
df.head()

Unnamed: 0,EMPLOYEE_ID,FIRST_NAME,LAST_NAME,EMAIL,PHONE_NUMBER,HIRE_DATE,JOB_ID,SALARY,COMMISSION_PCT,MANAGER_ID,DEPARTMENT_ID
0,198,Donald,OConnell,DOCONNEL,650.507.9833,21-JUN-07,SH_CLERK,2600,-,124,50
1,199,Douglas,Grant,DGRANT,650.507.9844,13-JAN-08,SH_CLERK,2600,-,124,50
2,120,Matthew,Weiss,MWEISS,650.123.1234,18-JUL-04,ST_MAN,8000,-,100,50
3,121,Adam,Fripp,AFRIPP,650.123.2234,10-APR-05,ST_MAN,8200,-,100,50
4,122,Payam,Kaufling,PKAUFLIN,650.123.3234,01-MAY-03,ST_MAN,7900,-,100,50


In [12]:
import os
new_folder = 'src'
if not os.path.exists(new_folder):
    os.makedirs(new_folder)

In [13]:
%%writefile "src/dbt_ci_pipeline.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler(url):
    with pipeline_context() as pc:
        s1_dataset = pc.step(name="dbt", function="function-dbt", action="transform", inputs={"employees":url}, outputs={"output_table": "department-50"})

Writing src/dbt_ci_pipeline.py


In [14]:
workflow = proj.new_workflow(name="pipeline_dbt", kind="kfp", code_src= "src/dbt_ci_pipeline.py", handler = "myhandler")

In [15]:
di_url = proj.new_dataitem(name="url_data_item",kind="table",path=url)

In [16]:
workflow_run = workflow.run(parameters={"url": di_url.key})

In [18]:
workflow_run

{'kind': 'kfp+run', 'metadata': {'project': 'project-dbt-ci', 'name': '1e7e164d-4d37-4bee-8fcd-794b1b4b824f', 'created': '2024-10-29T13:16:36.661Z', 'updated': '2024-10-29T13:16:36.687Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'kfp+pipeline://project-dbt-ci/pipeline_dbt:96d94d81-ee1e-46d8-a373-3c4df12776f8', 'local_execution': False, 'function': 'kfp://project-dbt-ci/pipeline_dbt:96d94d81-ee1e-46d8-a373-3c4df12776f8', 'source': {'source': 'src/dbt_ci_pipeline.py', 'handler': 'myhandler', 'base64': 'CmZyb20gZGlnaXRhbGh1Yl9ydW50aW1lX2tmcC5kc2wgaW1wb3J0IHBpcGVsaW5lX2NvbnRleHQKCmRlZiBteWhhbmRsZXIodXJsKToKICAgIHdpdGggcGlwZWxpbmVfY29udGV4dCgpIGFzIHBjOgogICAgICAgIHMxX2RhdGFzZXQgPSBwYy5zdGVwKG5hbWU9ImRidCIsIGZ1bmN0aW9uPSJmdW5jdGlvbi1kYnQiLCBhY3Rpb249InRyYW5zZm9ybSIsIGlucHV0cz17ImVtcGxveWVlcyI6dXJsfSwgb3V0cHV0cz17Im91dHB1dF90YWJsZSI6ICJkZXBhcnRtZW50LTUwIn0pCg==', 'lang': 'python'}, 'inputs': {}, 'outputs': {}, 'parameters': {'url': 'store://project-d

Wait until workflow pipeline is executed completely. One can view the state of workflow from the console application or using the digitalhub sdk API call. 

In [None]:
workflow_run.refresh().get_state()

Once completed, one can view the scenario results by fetching the list of newly created dataitems using digithub sdk.

In [19]:
data_items = dh.list_dataitems(project='project-dbt-ci')

In [20]:
len(data_items)

5

As the result of scenario 5 data items are created in the core project. One can use the 'Core' application instance to view them in the project console or can use the digitalhub sdk API to fetch them.

In [23]:
data_items

[{'kind': 'table', 'metadata': {'project': 'project-dbt-ci', 'name': 'department-50', 'version': 'f0ea40e9-f6a2-4c54-b6cc-70f6918a9269', 'created': '2024-10-29T13:16:49.682Z', 'updated': '2024-10-29T13:16:49.682Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu', 'embedded': False}, 'spec': {'path': 'sql://digitalhub/public/department-50_vf0ea40e9-f6a2-4c54-b6cc-70f6918a9269', 'schema': {'fields': [{'name': 'EMPLOYEE_ID', 'type': 'integer'}, {'name': 'FIRST_NAME', 'type': 'string'}, {'name': 'LAST_NAME', 'type': 'string'}, {'name': 'EMAIL', 'type': 'string'}, {'name': 'PHONE_NUMBER', 'type': 'string'}, {'name': 'HIRE_DATE', 'type': 'string'}, {'name': 'JOB_ID', 'type': 'string'}, {'name': 'SALARY', 'type': 'integer'}, {'name': 'COMMISSION_PCT', 'type': 'string'}, {'name': 'MANAGER_ID', 'type': 'string'}, {'name': 'DEPARTMENT_ID', 'type': 'integer'}]}}, 'status': {'state': 'CREATED', 'files': [], 'preview': {'cols': [{'name': 'EMPLOYEE_ID', 'value': [198, 199, 120, 121,