In [None]:
import pandas as pd
import requests
import os
import json

In [None]:
URL = "https://opendata.comune.bologna.it/api/explore/v2.1/catalog/datasets/disponibilita-parcheggi-storico/exports/csv?lang=it&timezone=UTC&use_labels=true&delimiter=%3B"

df = pd.read_csv(URL, sep=";")
df[['lat', 'lon']] = df['coordinate'].str.split(', ',expand=True)
df = df.drop(columns=['% occupazione', 'GUID', 'coordinate']).rename(columns={'Parcheggio': 'parcheggio', 'Data': 'data', 'Posti liberi': 'posti_liberi', 'Posti occupati': 'posti_occupati', 'Posti totali': 'posti_totali'})
df

In [None]:
KEYS = ['parcheggio', 'lat', 'lon']
df_parcheggi = df.groupby(['parcheggio']).first().reset_index()[KEYS]
df_parcheggi

In [None]:
rdf = df.copy()
rdf['data'] = pd.to_datetime(rdf['data'])
rdf['day'] = rdf['data'].apply(lambda t: t.replace(second=0, minute=0))
rdf['lat'] = rdf['lat'].apply(lambda t: float(t))
rdf['lon'] = rdf['lon'].apply(lambda t: float(t))
rdf = rdf.drop(columns=['data'])
grouped =rdf.groupby(['parcheggio','day']).mean()
df_aggregated = grouped.reset_index()
df_aggregated

In [None]:
import digitalhub as dh

PROJECT_NAME = "parcheggi"
proj = dh.get_or_create_project(PROJECT_NAME) # source="git://github.com/scc-digitalhub/gdb-project-parkings.git"

In [None]:
%%writefile "src/download_all_dh_core.py"
from digitalhub_runtime_python import handler
import pandas as pd

@handler(outputs=["dataset"])
def downloader(project, url):
    df = url.as_df(file_format='csv',sep=";")
    df[['lat', 'lon']] = df['coordinate'].str.split(', ',expand=True)
    df = df.drop(columns=['% occupazione', 'GUID', 'coordinate']).rename(columns={'Parcheggio': 'parcheggio', 'Data': 'data', 'Posti liberi': 'posti_liberi', 'Posti occupati': 'posti_occupati', 'Posti totali': 'posti_totali'})
    df["lat"] = pd.to_numeric(df["lat"])
    df["lon"] = pd.to_numeric(df["lon"])
    return df

In [None]:
FUNCTION_NAME="downloader-funct"
func = proj.new_function(name=FUNCTION_NAME,
                         kind="python",
                         python_version="PYTHON3_9",
                         source={"source": "src/download_all_dh_core.py", "handler": "downloader"})

In [None]:
di= proj.new_dataitem(name="url_data_item",kind="table",path=URL)

In [None]:
run = func.run(action="job",inputs={"url":di.key},outputs={"dataset":"dataset"})# local_execution=True

In [None]:
run.status

In [None]:
run.refresh()

In [None]:
run.outputs()['dataset'].key

In [None]:
%%writefile "src/extract_parkings_dh_core.py"
from digitalhub_runtime_python import handler
import pandas as pd

@handler(outputs=["parkings"])
def extract_parkings(project, di):
    KEYS = ['parcheggio', 'lat', 'lon', 'posti_totali']
    df_parcheggi = di.as_df().groupby(['parcheggio']).first().reset_index()[KEYS]
    return df_parcheggi

In [None]:
FUNCTION_NAME="extract-parkings"
func = proj.new_function(name=FUNCTION_NAME,
                         kind="python",
                         python_version="PYTHON3_9",
                         source={"source": "src/extract_parkings_dh_core.py", "handler": "extract_parkings"})

In [None]:
new_data_item = run.outputs()['dataset'].key

In [None]:
run = func.run(action="job",local_execution=True,inputs={"di":new_data_item},outputs={"parkings":"parkings"})# local_execution=True

In [None]:
run.outputs()['parkings'].key

In [None]:
%%writefile "src/aggregations_parkings_dh_core.py"
from digitalhub_runtime_python import handler
import pandas as pd

@handler(outputs=["parking_data_aggregated"])
def aggregate_parkings(project, di):
    rdf = di.as_df()
    rdf['data'] = pd.to_datetime(rdf['data'])
    rdf['day'] = rdf['data'].apply(lambda t: t.replace(second=0, minute=0))
    rdf['hour'] = rdf['day'].dt.hour
    rdf['dow'] = rdf['day'].dt.dayofweek
    rdf = rdf.drop(columns=['data'])
    rdf['lat'] = rdf['lat'].apply(lambda t: float(t))
    rdf['lon'] = rdf['lon'].apply(lambda t: float(t))
    grouped = rdf.groupby(['parcheggio','day']).mean()
    df_aggregated = grouped.reset_index()
    return df_aggregated

In [None]:
FUNCTION_NAME="aggregate-parkings"
func = proj.new_function(name=FUNCTION_NAME,
                         kind="python",
                         python_version="PYTHON3_9",
                         source={"source": "src/aggregations_parkings_dh_core.py", "handler": "aggregate_parkings"})

In [None]:
new_data_item = run.outputs()['parkings'].key
print(new_data_item)

In [None]:
run = func.run(action="job",local_execution=True,inputs={"di":new_data_item},outputs={"parking_data_aggregated":"parking_data_aggregated"})# local_execution=True