In [25]:
import digitalhub as dh
import pandas as pd
import os

In [26]:
PROJECT = "project-cml-darts-ci"
project = dh.get_or_create_project(PROJECT)

In [27]:
new_folder = 'src'
if not os.path.exists(new_folder):
    os.makedirs(new_folder)

In [28]:
%%writefile "src/train-model.py"


from digitalhub_runtime_python import handler

import pandas as pd
import numpy as np

from darts import TimeSeries
from darts.datasets import AirPassengersDataset
from darts.models import NBEATSModel
from darts.metrics import mape, smape, mae

from zipfile import ZipFile

@handler()
def train_model(project):
    series = AirPassengersDataset().load()
    train, val = series[:-36], series[-36:]

    model = NBEATSModel(
        input_chunk_length=24,
        output_chunk_length=12,
        n_epochs=200,
        random_state=0
    )
    model.fit(train)
    pred = model.predict(n=36)

    model.save("predictor_model.pt")
    with ZipFile("predictor_model.pt.zip", "w") as z:
        z.write("predictor_model.pt")
        z.write("predictor_model.pt.ckpt")
    metrics = {
        "mape": mape(series, pred),
        "smape": smape(series, pred),
        "mae": mae(series, pred)
    }

    project.log_model(
        name="darts_model",
        kind="model",
        source="predictor_model.pt.zip",
        algorithm="darts.models.NBEATSModel",
        framework="darts",
        metrics=metrics
    )

Overwriting src/train-model.py


In [29]:
train_fn = project.new_function(
     name="train-darts",
     kind="python",
     python_version="PYTHON3_10",
     source={"source": "src/train-model.py", "handler": "train_model"})

In [30]:
train_fn.run(action="build", instructions=["pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu","pip3 install darts patsy scikit-learn"])

{'kind': 'python+run', 'metadata': {'project': 'project-cml-darts-ci', 'name': '51b32d8b-cecb-4eed-8663-c39f1532613c', 'created': '2024-10-29T10:54:01.762Z', 'updated': '2024-10-29T10:54:01.782Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'python+build://project-cml-darts-ci/train-darts:0da059ca-736a-4f15-81b4-c425c97440ae', 'local_execution': False, 'function': 'python://project-cml-darts-ci/train-darts:0da059ca-736a-4f15-81b4-c425c97440ae', 'source': {'source': 'src/train-model.py', 'handler': 'train_model', 'base64': 'Cgpmcm9tIGRpZ2l0YWxodWJfcnVudGltZV9weXRob24gaW1wb3J0IGhhbmRsZXIKCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IG51bXB5IGFzIG5wCgpmcm9tIGRhcnRzIGltcG9ydCBUaW1lU2VyaWVzCmZyb20gZGFydHMuZGF0YXNldHMgaW1wb3J0IEFpclBhc3NlbmdlcnNEYXRhc2V0CmZyb20gZGFydHMubW9kZWxzIGltcG9ydCBOQkVBVFNNb2RlbApmcm9tIGRhcnRzLm1ldHJpY3MgaW1wb3J0IG1hcGUsIHNtYXBlLCBtYWUKCmZyb20gemlwZmlsZSBpbXBvcnQgWmlwRmlsZQoKQGhhbmRsZXIoKQpkZWYgdHJhaW5fbW9kZWwocHJvamVjdCk6CiAgICBzZXJpZXM

Wait until build job is completed. One can check the status of 'Run' from console or by typing the following command

In [None]:
train_fn.get_state()

Once completed, proceed to run the training job.

In [31]:
train_fn.run(action="job", local_execution=False)

{'kind': 'python+run', 'metadata': {'project': 'project-cml-darts-ci', 'name': 'a7c34702-2783-47d4-a9ce-89547bea2ab9', 'created': '2024-10-29T10:57:35.674Z', 'updated': '2024-10-29T10:57:35.694Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'python+job://project-cml-darts-ci/train-darts:0da059ca-736a-4f15-81b4-c425c97440ae', 'local_execution': False, 'function': 'python://project-cml-darts-ci/train-darts:0da059ca-736a-4f15-81b4-c425c97440ae', 'source': {'source': 'src/train-model.py', 'handler': 'train_model', 'base64': 'Cgpmcm9tIGRpZ2l0YWxodWJfcnVudGltZV9weXRob24gaW1wb3J0IGhhbmRsZXIKCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IG51bXB5IGFzIG5wCgpmcm9tIGRhcnRzIGltcG9ydCBUaW1lU2VyaWVzCmZyb20gZGFydHMuZGF0YXNldHMgaW1wb3J0IEFpclBhc3NlbmdlcnNEYXRhc2V0CmZyb20gZGFydHMubW9kZWxzIGltcG9ydCBOQkVBVFNNb2RlbApmcm9tIGRhcnRzLm1ldHJpY3MgaW1wb3J0IG1hcGUsIHNtYXBlLCBtYWUKCmZyb20gemlwZmlsZSBpbXBvcnQgWmlwRmlsZQoKQGhhbmRsZXIoKQpkZWYgdHJhaW5fbW9kZWwocHJvamVjdCk6CiAgICBzZXJpZXMgP

Wait for job to be completed. One can check the status of 'Run' from console or by typing the following command

In [None]:
train_fn.get_state()

Once completed, proceed with serve run.

In [32]:
%%writefile "src/serve_darts_model-nk.py"

from darts.models import NBEATSModel
from zipfile import ZipFile
from darts import TimeSeries
import json
import pandas as pd

def init(context):
    model_name = "darts_model"

    model = context.project.get_model(model_name)
    path = model.download()
    local_path_model = "extracted_model/"

    with ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(local_path_model)

    input_chunk_length = 24
    output_chunk_length = 12
    name_model_local = local_path_model +"predictor_model.pt"
    mm = NBEATSModel(
            input_chunk_length,
            output_chunk_length
    ).load(name_model_local)

    setattr(context, "model", mm)

def serve(context, event):

    if isinstance(event.body, bytes):
        body = json.loads(event.body)
    else:
        body = event.body
    context.logger.info(f"Received event: {body}")
    inference_input = body["inference_input"]

    pdf = pd.DataFrame(inference_input)
    pdf['date'] = pd.to_datetime(pdf['date'], unit='ms')

    ts = TimeSeries.from_dataframe(
        pdf,
        time_col="date",
        value_cols="value"
    )

    output_chunk_length = 12
    result = context.model.predict(n=output_chunk_length*2, series=ts)
    # Convert the result to a pandas DataFrame, reset the index, and convert to a list
    jsonstr = result.pd_dataframe().reset_index().to_json(orient='records')
    return json.loads(jsonstr)

Overwriting src/serve_darts_model-nk.py


In [33]:
func = project.new_function(name="serve_darts_model",
                            kind="python",
                            python_version="PYTHON3_10",
                            source={
                                 "source": "src/serve_darts_model-nk.py",
                                 "handler": "serve",
                                 "init_function": "init"})

In [34]:
run_build_model_serve = func.run(action="build",instructions=["pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu","pip3 install darts patsy scikit-learn"])

Wait for job to be completed. One can check the status of 'Run' from console or by typing the following command

In [None]:
run_serve.get_state()

Once completed, run the serve job.

In [35]:
run_serve = func.run(action="serve", labels=["service"])

The run is associated with specific label 'service' that will be used to fetch the service id in the later stage. Wait for job to be in 'running' state. One can check the status of 'Run' from console or by typing the following command

In [36]:
#run_serve.metadata
run_serve.get_state()

{'project': 'project-cml-darts-ci', 'name': 'd3992c23-2c99-4c6c-a91d-a26276757dad', 'labels': ['service'], 'created': '2024-10-29T11:43:22.37Z', 'updated': '2024-10-29T11:43:22.393Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}

Once completed, make API call to the serve job.

In [37]:
%pip install darts==0.30.0

Note: you may need to restart the kernel to use updated packages.


In [1]:
import json
from datetime import datetime
from darts.datasets import AirPassengersDataset
import requests

series = AirPassengersDataset().load()
val = series[-24:]
json_value = json.loads(val.to_json())

data = map(lambda x, y: {"value": x[0], "date": datetime.timestamp(datetime.strptime(y, "%Y-%m-%dT%H:%M:%S.%f"))*1000}, json_value["data"], json_value["index"])
inference_input = list(data)
json = {"inference_input": inference_input}
json

{'inference_input': [{'value': 360.0, 'date': -347155200000.0},
  {'value': 342.0, 'date': -344476800000.0},
  {'value': 406.0, 'date': -342057600000.0},
  {'value': 396.0, 'date': -339379200000.0},
  {'value': 420.0, 'date': -336787200000.0},
  {'value': 472.0, 'date': -334108800000.0},
  {'value': 548.0, 'date': -331516800000.0},
  {'value': 559.0, 'date': -328838400000.0},
  {'value': 463.0, 'date': -326160000000.0},
  {'value': 407.0, 'date': -323568000000.0},
  {'value': 362.0, 'date': -320889600000.0},
  {'value': 405.0, 'date': -318297600000.0},
  {'value': 417.0, 'date': -315619200000.0},
  {'value': 391.0, 'date': -312940800000.0},
  {'value': 419.0, 'date': -310435200000.0},
  {'value': 461.0, 'date': -307756800000.0},
  {'value': 472.0, 'date': -305164800000.0},
  {'value': 535.0, 'date': -302486400000.0},
  {'value': 622.0, 'date': -299894400000.0},
  {'value': 606.0, 'date': -297216000000.0},
  {'value': 508.0, 'date': -294537600000.0},
  {'value': 461.0, 'date': -29194560

In [2]:
import digitalhub as dh
PROJECT = "project-cml-darts-ci"
project = dh.get_or_create_project(PROJECT)

Get the id of service run from project function details.

In [3]:
serve_func = project.get_function('serve_darts_model')
list = serve_func.list_runs()
len(list)

9

In [4]:
# filter = [x for x in list if(x.metadata.labels]

In [5]:
# filtered_list = [e for e in list if e.metadata.name == '734ea1da-650a-4dc6-8cc9-f00491f2127c']
# len(filtered_list)

In [6]:
filtered_list = [e for e in list if getattr(e.metadata, 'labels') is not None and 'service' in e.metadata.labels]
len(filtered_list)

1

In [7]:
id = filtered_list[0].id

In [8]:
run_serve = project.get_run(identifier=id)
SERVICE_URL = run_serve.refresh().status.to_dict()["service"]["url"]

with requests.post(f'http://{SERVICE_URL}', json={"inference_input":inference_input}) as r:
    res = r.json()
print(res)

[{'date': -283996800000, 'value': 448.4713515232}, {'date': -281318400000, 'value': 416.9437679985}, {'date': -278899200000, 'value': 488.1964291872}, {'date': -276220800000, 'value': 498.348927999}, {'date': -273628800000, 'value': 509.5799418856}, {'date': -270950400000, 'value': 614.4077203825}, {'date': -268358400000, 'value': 703.8526003896}, {'date': -265680000000, 'value': 691.8522541339}, {'date': -263001600000, 'value': 606.4765462227}, {'date': -260409600000, 'value': 532.2036568194}, {'date': -257731200000, 'value': 454.02867762}, {'date': -255139200000, 'value': 497.8013444336}, {'date': -252460800000, 'value': 524.1175493405}, {'date': -249782400000, 'value': 490.7081232278}, {'date': -247363200000, 'value': 572.8023540875}, {'date': -244684800000, 'value': 584.0529239754}, {'date': -242092800000, 'value': 595.3217350383}, {'date': -239414400000, 'value': 707.6626039279}, {'date': -236822400000, 'value': 812.7016954389}, {'date': -234144000000, 'value': 805.2495930826}, {'

## Workflow Pipeline

In [9]:
%%writefile "src/dbt_cml_darts_pipeline.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler():
    with pipeline_context() as pc:
        trainer = pc.step(name="train-model", function="train-darts", action="job")        

Overwriting src/dbt_cml_darts_pipeline.py


In [10]:
workflow = project.new_workflow(name="pipeline_cml_darts", kind="kfp", code_src= "src/dbt_cml_darts_pipeline.py", handler = "myhandler")

In [11]:
workflow.run(localExecution=True)

{'kind': 'kfp+run', 'metadata': {'project': 'project-cml-darts-ci', 'name': '045d4220-20da-41ca-843e-a2fd15b2a6f7', 'created': '2024-10-29T11:45:02.458Z', 'updated': '2024-10-29T11:45:02.475Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'kfp+pipeline://project-cml-darts-ci/pipeline_cml_darts:dde3cbc1-d475-43fd-855f-641a71c1cf33', 'local_execution': False, 'function': 'kfp://project-cml-darts-ci/pipeline_cml_darts:dde3cbc1-d475-43fd-855f-641a71c1cf33', 'source': {'source': 'src/dbt_cml_darts_pipeline.py', 'handler': 'myhandler', 'base64': 'CmZyb20gZGlnaXRhbGh1Yl9ydW50aW1lX2tmcC5kc2wgaW1wb3J0IHBpcGVsaW5lX2NvbnRleHQKCmRlZiBteWhhbmRsZXIoKToKICAgIHdpdGggcGlwZWxpbmVfY29udGV4dCgpIGFzIHBjOgogICAgICAgIHRyYWluZXIgPSBwYy5zdGVwKG5hbWU9InRyYWluLW1vZGVsIiwgZnVuY3Rpb249InRyYWluLWRhcnRzIiwgYWN0aW9uPSJqb2IiKSAgICAgICAgCg==', 'lang': 'python'}, 'inputs': {}, 'outputs': {}, 'parameters': {}}, 'status': {'state': 'READY', 'transitions': [{'status': 'READY', 'time':