In [None]:
%%writefile "functions.py"

from digitalhub_runtime_python import handler
import pandas as pd


@handler(outputs=["dataset"])
def downloader(url):
    df = url.as_df()
    return df


@handler(outputs=["dataset-spire"])
def process_spire(di):
    COLS=['codice spira','longitudine','latitudine','Livello','tipologia','codice','codice arco','codice via','Nome via', 'stato','direzione','angolo','geopoint']
    df = di.as_df()
    sdf= df.groupby(['codice spira']).first().reset_index()[COLS]
    return sdf


@handler(outputs=["dataset-measures"])
def process_measure(di):
    KEYS = ['00:00-01:00', '01:00-02:00', '02:00-03:00', '03:00-04:00', '04:00-05:00', '05:00-06:00', '06:00-07:00', '07:00-08:00', '08:00-09:00', '09:00-10:00', '10:00-11:00', '11:00-12:00', '12:00-13:00', '13:00-14:00', '14:00-15:00', '15:00-16:00', '16:00-17:00', '17:00-18:00', '18:00-19:00', '19:00-20:00', '20:00-21:00', '21:00-22:00', '22:00-23:00', '23:00-24:00']
    COLUMNS=['data','codice spira']

    df = di.as_df()
    rdf = df[COLUMNS+KEYS]
    ls = []
    for key in KEYS:
        k = key.split("-")[0]
        xdf = rdf[COLUMNS + [key]]
        xdf['time'] = xdf.data.apply(lambda x: x+' ' +k)
        xdf['value'] = xdf[key]
        ls.append(xdf[['time','codice spira','value']])
    edf = pd.concat(ls)
    return edf

In [None]:
import digitalhub as dh

# Get or create project
project = dh.get_or_create_project("project-workflow")

# Create new dataitem
url = "https://opendata.comune.bologna.it/api/explore/v2.1/catalog/datasets/rilevazione-flusso-veicoli-tramite-spire-anno-2023/exports/csv?lang=it&timezone=Europe%2FRome&use_labels=true&delimiter=%3B"
url_dataitem = project.new_dataitem(name="url-dataitem",
                                    kind="table",
                                    path=url)

# Download dataitem as pandas.Dataframe
df = url_dataitem.as_df(file_format="csv", sep=";")

# Create new dataitem with downloaded dataframe
s3_dataitem = project.log_dataitem(name="table-spire",
                                   kind="table",
                                   data=df)

In [None]:
# Create functions to execute
downloader_function = project.new_function(name="downloader",
                                           kind="python",
                                           code_src="functions.py",
                                           python_version="PYTHON3_10",
                                           handler="downloader")


process_spire_function = project.new_function(name="process-spire",
                                              kind="python",
                                              code_src="functions.py",
                                              python_version="PYTHON3_10",
                                              handler="process_spire")

process_measure_function = project.new_function(name="process-measure",
                                                kind="python",
                                                code_src="functions.py",
                                                python_version="PYTHON3_10",
                                                handler="process_measure")

In [None]:
# Create workflow
workflow = project.new_workflow(name="workflow",
                                kind="kfp",
                                handler="my_pipeline",
                                code_src="pipeline.py")

In [None]:
# Run workflow
run = workflow.run(parameters={"di_key": s3_dataitem.key})