# Monitor

In this step we will create Monitor workflow pipeline based on schedule, whose purpose is to call

1) call the service created after training a data prediction model using darts framework and NBEATS Deep Learning model. (see notebook parcheggi_ml.ipynb)
2) save the prediction in database.


## Platform Support - Data Ops
We use the platform support to read the data created into the platform after the execution of notebook(parcheggi_data_pipeline.ipynb) for training

In [31]:
import pandas as pd
import requests
import os
import json
import digitalhub as dh

In [32]:
URL = "https://opendata.comune.bologna.it/api/explore/v2.1/catalog/datasets/disponibilita-parcheggi-storico/exports/csv?lang=it&timezone=UTC&use_labels=true&delimiter=%3B"

In [33]:
PROJECT_NAME = "parcheggi-scheduler"
proj = dh.get_or_create_project(PROJECT_NAME)
print("created project {}".format(PROJECT_NAME))
PROJECT_NAME

created project parcheggi-scheduler


'parcheggi-scheduler'

In [34]:
data_item_download = proj.get_dataitem("dataset")

In [35]:
parkings_df = data_item_download.as_df()
parkings_df

Unnamed: 0,parcheggio,data,posti_liberi,posti_occupati,posti_totali,lat,lon
0,VIII Agosto,2024-06-07T01:59:00+00:00,484.0,141.0,625,44.500297,11.345368
1,Riva Reno,2024-06-07T02:09:00+00:00,369.0,101.0,470,44.501153,11.336062
2,Riva Reno,2024-06-07T02:19:00+00:00,369.0,101.0,470,44.501153,11.336062
3,VIII Agosto,2024-06-07T02:29:00+00:00,487.0,138.0,625,44.500297,11.345368
4,Riva Reno,2024-06-07T02:29:00+00:00,369.0,101.0,470,44.501153,11.336062
...,...,...,...,...,...,...,...
53399,Autostazione,2024-10-17T10:49:00+00:00,152.0,113.0,265,44.504422,11.346514
53400,VIII Agosto,2024-10-17T11:09:00+00:00,84.0,541.0,625,44.500297,11.345368
53401,Riva Reno,2024-10-17T11:09:00+00:00,249.0,221.0,470,44.501153,11.336062
53402,VIII Agosto,2024-10-17T11:19:00+00:00,69.0,556.0,625,44.500297,11.345368


In this script, one needs to update the 'serve' RUN id of the model service. From the project console, go to RUNS(model_serve) in RUNNING state, and copy the identifier value (last part of key value) 

**project.get_run(identifier='f4823893-1785-4a14-aeb3-99335b64f0fb')**


In [36]:
%%writefile "src/predict_nbeats_timeseries.py"
from digitalhub_runtime_python import handler
import datetime 
import requests
import json
import pandas as pd
import digitalhub as dh

@handler()
def predict_day(project,  parkings_di):
    """
    Monitor and predict parking occupancy.
    """

    # get serving predictor function run
    run_serve_model =  project.get_run(identifier='3ac40967-cda1-43b4-bae4-c7401f7e845d')
    
    # get current date and time as string
    date_str = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

    # get parkings dataset and convert it to a dataframe
    parkings_df = parkings_di.as_df()

    # initialize an empty dataframe for predictions
    pred_df = pd.DataFrame(columns=['parcheggio', 'datetime', 'predicted_mean'])

    # iterate over each parking in the dataset
    parcheggi =  parkings_df['parcheggio'].unique()
    #parcheggi = ['Riva Reno' ,'VIII Agosto']
    for parking_str in parcheggi:
        # construct API URL based on parking and current date
        API_URL = f'https://opendata.comune.bologna.it/api/explore/v2.1/catalog/datasets/disponibilita-parcheggi-storico/records?where=data%3C%3D%27{date_str}%27%20and%20parcheggio%3D%27{parking_str}%27&order_by=data%20DESC&limit=100'

        # define the file to store the latest data
        latest_data_file = 'last_records.json'

        # fetch data from the API and save it to a file
        with requests.get(API_URL) as r:
            with open(latest_data_file, "wb") as f:
                f.write(r.content)

        # read the latest data from the file and process it
        with open(latest_data_file) as f:
            json_data = json.load(f)
            df_latest = pd.json_normalize(json_data['results']).drop(columns=['guid', 'occupazione']).rename(columns={"coordinate.lon": "lon", "coordinate.lat": "lat"})
            df_latest.data = df_latest.data.astype('datetime64[ns, UTC]')
            df_latest['value'] = df_latest.posti_occupati / df_latest.posti_totali
            df_latest['date'] = df_latest.data.dt.round('30min')
            df_latest = df_latest.drop(columns=['parcheggio'])
            df_latest = df_latest.groupby('date').agg({'value': 'mean'})

        # convert the processed data to JSON and make a request to the serving predictor function
        jsonstr = df_latest.reset_index().to_json(orient='records')
        arr = json.loads(jsonstr)
        SERVICE_URL = run_serve_model.status.to_dict()["service"]["url"]
        with requests.post(f'http://{SERVICE_URL}', json={"inference_input":arr}) as r:
            res = json.loads(r.content)
        res_df = pd.DataFrame(res)
        res_df['datetime'] = res_df['date']
        res_df['parcheggio'] = parking_str
        res_df['predicted_mean'] = res_df['value']
        res_df = res_df.drop(columns=['date', 'value'])
        pred_df = pd.concat([pred_df, res_df], ignore_index=True)

    # concatenate the predicted results with the existing data (if any) and remove duplicates
    old_pd = pred_df
    try: 
        dat_old = project.get_dataitem('parking_prediction_nbeats_model')
        old_pd = pd.concat([dat_old.as_df(), pred_df], ignore_index=True)
        old_pd = old_pd.drop_duplicates(subset=['parcheggio', 'datetime'])
    except: pass

    # log the predictions as a dataset in the project
    project.log_dataitem('parking_prediction_nbeats_model', data=old_pd, kind="table")

    old_pd = pred_df.copy()
    old_pd['slice_datetime'] = date_str
    try:
        dat_old = project.get_dataitem('parking_prediction_nbeats_model_sliced')
        old_pd = pd.concat([dat_old.as_df(), old_pd], ignore_index=True)
    except: pass

    # log the predictions as a dataset in the project
    project.log_dataitem('parking_prediction_nbeats_model_sliced', data=old_pd, kind="table")

Overwriting src/predict_nbeats_timeseries.py


In [37]:
func = proj.new_function(name="predict-day-nbeats-model",
                         kind="python",
                         python_version="PYTHON3_10",
                         source={"source": "src/predict_nbeats_timeseries.py", "handler": "predict_day"})

In [8]:
data_item_download = proj.get_dataitem("dataset").key
run_monitor_parkings = func.run(action="job",inputs={"parkings_di": data_item_download},outputs={})

## Pipeline

In this step we will create a workflow pipeline whose purpose is to call the download function to fetch data and pass it to predict_day function which produce prediction based on NBEATS model. The entire workflow is scheduled for frequent runs based on frequrency provided using CRON expression.

In [38]:
%%writefile "src/parkings_pipeline_nbeats_model.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler(di):
    with pipeline_context() as pc:
        s2_predict = pc.step(name="predict-day-nbeats-model", function="predict-day-nbeats-model", action="job", inputs={"parkings_di":di}, outputs={})

Overwriting src/parkings_pipeline_nbeats_model.py


In [39]:
workflow = proj.new_workflow(name="pipeline_parcheggi_nbeats_model", kind="kfp", source={"source": "src/parkings_pipeline_nbeats_model.py", "handler": "myhandler"})

## Schedule

Nbeats model Pipeline workflow is scheduled for frequent runs using Crons expression.

In [40]:
di = proj.get_dataitem("dataset").key

In [41]:
workflow.run(parameters={"di": di}, schedule="*/55 * * * *")

{'kind': 'kfp+run', 'metadata': {'project': 'parcheggi-scheduler', 'name': '91beb6c6-bf31-40db-a1af-e734ee5e51bb', 'created': '2024-10-17T09:44:41.044Z', 'updated': '2024-10-17T09:44:41.061Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'kfp+pipeline://parcheggi-scheduler/pipeline_parcheggi_nbeats_model:62cedcfe-3956-4a79-b314-95d113501f29', 'local_execution': False, 'function': 'kfp://parcheggi-scheduler/pipeline_parcheggi_nbeats_model:62cedcfe-3956-4a79-b314-95d113501f29', 'source': {'source': 'src/parkings_pipeline_nbeats_model.py', 'handler': 'myhandler', 'base64': 'CmZyb20gZGlnaXRhbGh1Yl9ydW50aW1lX2tmcC5kc2wgaW1wb3J0IHBpcGVsaW5lX2NvbnRleHQKCmRlZiBteWhhbmRsZXIoZGkpOgogICAgd2l0aCBwaXBlbGluZV9jb250ZXh0KCkgYXMgcGM6CiAgICAgICAgczJfcHJlZGljdCA9IHBjLnN0ZXAobmFtZT0icHJlZGljdC1kYXktbmJlYXRzLW1vZGVsIiwgZnVuY3Rpb249InByZWRpY3QtZGF5LW5iZWF0cy1tb2RlbCIsIGFjdGlvbj0iam9iIiwgaW5wdXRzPXsicGFya2luZ3NfZGkiOmRpfSwgb3V0cHV0cz17fSkK', 'lang': 'python'}, 'schedule