In [1]:
import digitalhub as dh
import pandas as pd
import os

In [2]:
PROJECT = "project-cml-darts-ci"
project = dh.get_or_create_project(PROJECT)

In [3]:
new_folder = 'src'
if not os.path.exists(new_folder):
    os.makedirs(new_folder)

In [4]:
%%writefile "src/train-model.py"


from digitalhub_runtime_python import handler

import pandas as pd
import numpy as np

from darts import TimeSeries
from darts.datasets import AirPassengersDataset
from darts.models import NBEATSModel
from darts.metrics import mape, smape, mae

from zipfile import ZipFile

@handler()
def train_model(project):
    series = AirPassengersDataset().load()
    train, val = series[:-36], series[-36:]

    model = NBEATSModel(
        input_chunk_length=24,
        output_chunk_length=12,
        n_epochs=200,
        random_state=0
    )
    model.fit(train)
    pred = model.predict(n=36)

    model.save("predictor_model.pt")
    with ZipFile("predictor_model.pt.zip", "w") as z:
        z.write("predictor_model.pt")
        z.write("predictor_model.pt.ckpt")
    metrics = {
        "mape": mape(series, pred),
        "smape": smape(series, pred),
        "mae": mae(series, pred)
    }

    project.log_model(
        name="darts_model",
        kind="model",
        source="predictor_model.pt.zip",
        algorithm="darts.models.NBEATSModel",
        framework="darts",
        metrics=metrics
    )

Writing src/train-model.py


In [5]:
train_fn = project.new_function(
     name="train-darts",
     kind="python",
     python_version="PYTHON3_10",
     source={"source": "src/train-model.py", "handler": "train_model"})

In [6]:
#train_fn.run(action="build", instructions=["pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu","pip3 install darts patsy scikit-learn"])

In [6]:
%%writefile "src/serve_darts_model-nk.py"

from darts.models import NBEATSModel
from zipfile import ZipFile
from darts import TimeSeries
import json
import pandas as pd

def init(context):
    model_name = "darts_model"

    model = context.project.get_model(model_name)
    path = model.download()
    local_path_model = "extracted_model/"

    with ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(local_path_model)

    input_chunk_length = 24
    output_chunk_length = 12
    name_model_local = local_path_model +"predictor_model.pt"
    mm = NBEATSModel(
            input_chunk_length,
            output_chunk_length
    ).load(name_model_local)

    setattr(context, "model", mm)

def serve(context, event):

    if isinstance(event.body, bytes):
        body = json.loads(event.body)
    else:
        body = event.body
    context.logger.info(f"Received event: {body}")
    inference_input = body["inference_input"]

    pdf = pd.DataFrame(inference_input)
    pdf['date'] = pd.to_datetime(pdf['date'], unit='ms')

    ts = TimeSeries.from_dataframe(
        pdf,
        time_col="date",
        value_cols="value"
    )

    output_chunk_length = 12
    result = context.model.predict(n=output_chunk_length*2, series=ts)
    # Convert the result to a pandas DataFrame, reset the index, and convert to a list
    jsonstr = result.pd_dataframe().reset_index().to_json(orient='records')
    return json.loads(jsonstr)

Writing serve_darts_model-nk.py


In [7]:
train_fn.run(action="job", local_execution=False)

{'kind': 'python+run', 'metadata': {'project': 'project-cml-darts-ci', 'name': '52b9bcde-b19b-435e-956b-98fef2da7edf', 'created': '2024-10-28T10:02:35.57Z', 'updated': '2024-10-28T10:02:35.596Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'python+job://project-cml-darts-ci/train-darts:b6fe7680-f353-4dce-a986-2aaab69227ea', 'local_execution': False, 'function': 'python://project-cml-darts-ci/train-darts:b6fe7680-f353-4dce-a986-2aaab69227ea', 'source': {'source': 'src/train-model.py', 'handler': 'train_model', 'base64': 'Cgpmcm9tIGRpZ2l0YWxodWJfcnVudGltZV9weXRob24gaW1wb3J0IGhhbmRsZXIKCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IG51bXB5IGFzIG5wCgpmcm9tIGRhcnRzIGltcG9ydCBUaW1lU2VyaWVzCmZyb20gZGFydHMuZGF0YXNldHMgaW1wb3J0IEFpclBhc3NlbmdlcnNEYXRhc2V0CmZyb20gZGFydHMubW9kZWxzIGltcG9ydCBOQkVBVFNNb2RlbApmcm9tIGRhcnRzLm1ldHJpY3MgaW1wb3J0IG1hcGUsIHNtYXBlLCBtYWUKCmZyb20gemlwZmlsZSBpbXBvcnQgWmlwRmlsZQoKQGhhbmRsZXIoKQpkZWYgdHJhaW5fbW9kZWwocHJvamVjdCk6CiAgICBzZXJpZXMgPS

In [7]:
func = project.new_function(name="serve_darts_model",
                            kind="python",
                            python_version="PYTHON3_10",
                            source={
                                 "source": "src/serve_darts_model-nk.py",
                                 "handler": "serve",
                                 "init_function": "init"})

In [8]:
run_build_model_serve = func.run(action="build",instructions=["pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu","pip3 install darts patsy scikit-learn"])

In [9]:
run_serve = func.run(action="serve")

In [12]:
import json
from datetime import datetime
from darts.datasets import AirPassengersDataset

series = AirPassengersDataset().load()
val = series[-24:]
json_value = json.loads(val.to_json())

data = map(lambda x, y: {"value": x[0], "date": datetime.timestamp(datetime.strptime(y, "%Y-%m-%dT%H:%M:%S.%f"))*1000}, json_value["data"], json_value["index"])
inference_input = list(data)
json = {"inference_input": inference_input}
json

{'inference_input': [{'value': 360.0, 'date': -347155200000.0},
  {'value': 342.0, 'date': -344476800000.0},
  {'value': 406.0, 'date': -342057600000.0},
  {'value': 396.0, 'date': -339379200000.0},
  {'value': 420.0, 'date': -336787200000.0},
  {'value': 472.0, 'date': -334108800000.0},
  {'value': 548.0, 'date': -331516800000.0},
  {'value': 559.0, 'date': -328838400000.0},
  {'value': 463.0, 'date': -326160000000.0},
  {'value': 407.0, 'date': -323568000000.0},
  {'value': 362.0, 'date': -320889600000.0},
  {'value': 405.0, 'date': -318297600000.0},
  {'value': 417.0, 'date': -315619200000.0},
  {'value': 391.0, 'date': -312940800000.0},
  {'value': 419.0, 'date': -310435200000.0},
  {'value': 461.0, 'date': -307756800000.0},
  {'value': 472.0, 'date': -305164800000.0},
  {'value': 535.0, 'date': -302486400000.0},
  {'value': 622.0, 'date': -299894400000.0},
  {'value': 606.0, 'date': -297216000000.0},
  {'value': 508.0, 'date': -294537600000.0},
  {'value': 461.0, 'date': -29194560

In [13]:
run_serve = project.get_run(identifier='b83b148a-5e0d-433b-8a9a-3a89022f6904')
SERVICE_URL = run_serve.refresh().status.to_dict()["service"]["url"]

with requests.post(f'http://{SERVICE_URL}', json={"inference_input":inference_input}) as r:
    res = r.json()
print(res)

[{'date': -283996800000, 'value': 448.4713515232}, {'date': -281318400000, 'value': 416.9437679985}, {'date': -278899200000, 'value': 488.1964291872}, {'date': -276220800000, 'value': 498.348927999}, {'date': -273628800000, 'value': 509.5799418856}, {'date': -270950400000, 'value': 614.4077203825}, {'date': -268358400000, 'value': 703.8526003896}, {'date': -265680000000, 'value': 691.8522541339}, {'date': -263001600000, 'value': 606.4765462227}, {'date': -260409600000, 'value': 532.2036568194}, {'date': -257731200000, 'value': 454.02867762}, {'date': -255139200000, 'value': 497.8013444336}, {'date': -252460800000, 'value': 524.1175493405}, {'date': -249782400000, 'value': 490.7081232278}, {'date': -247363200000, 'value': 572.8023540875}, {'date': -244684800000, 'value': 584.0529239754}, {'date': -242092800000, 'value': 595.3217350383}, {'date': -239414400000, 'value': 707.6626039279}, {'date': -236822400000, 'value': 812.7016954389}, {'date': -234144000000, 'value': 805.2495930826}, {'

## Workflow Pipeline

In [None]:
%pip install darts==0.30.0

Collecting darts==0.30.0
  Downloading darts-0.30.0-py3-none-any.whl (917 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m917.3/917.3 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting holidays>=0.11.1 (from darts==0.30.0)
  Downloading holidays-0.59-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting nfoursid>=1.0.0 (from darts==0.30.0)
  Downloading nfoursid-1.0.1-py3-none-any.whl (16 kB)
Collecting pmdarima>=1.8.0 (from darts==0.30.0)
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hCollecting pyod>=0.9.5 (from darts==0.30.0)
  Downloading pyod-2.0.2.tar.gz (165 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.

In [8]:
%%writefile "src/dbt_cml_darts_pipeline.py"

from digitalhub_runtime_kfp.dsl import pipeline_context

def myhandler():
    with pipeline_context() as pc:
        trainer = pc.step(name="train-model", function="train-darts", action="job")        

Writing src/dbt_cml_darts_pipeline.py


In [9]:
workflow = project.new_workflow(name="pipeline_cml_darts", kind="kfp", code_src= "src/dbt_cml_darts_pipeline.py", handler = "myhandler")

In [10]:
workflow.run(localExecution=True)

{'kind': 'kfp+run', 'metadata': {'project': 'project-cml-darts-ci', 'name': 'b02b6d3f-60a6-442e-b0dd-4d552890bf8e', 'created': '2024-10-28T10:05:03.716Z', 'updated': '2024-10-28T10:05:03.739Z', 'created_by': 'khurshid@fbk.eu', 'updated_by': 'khurshid@fbk.eu'}, 'spec': {'task': 'kfp+pipeline://project-cml-darts-ci/pipeline_cml_darts:e5d5e75b-2e86-4a49-8446-800512949ba3', 'local_execution': False, 'function': 'kfp://project-cml-darts-ci/pipeline_cml_darts:e5d5e75b-2e86-4a49-8446-800512949ba3', 'source': {'source': 'src/dbt_cml_darts_pipeline.py', 'handler': 'myhandler', 'base64': 'CmZyb20gZGlnaXRhbGh1Yl9ydW50aW1lX2tmcC5kc2wgaW1wb3J0IHBpcGVsaW5lX2NvbnRleHQKCmRlZiBteWhhbmRsZXIoKToKICAgIHdpdGggcGlwZWxpbmVfY29udGV4dCgpIGFzIHBjOgogICAgICAgIHRyYWluZXIgPSBwYy5zdGVwKG5hbWU9InRyYWluLW1vZGVsIiwgZnVuY3Rpb249InRyYWluLWRhcnRzIiwgYWN0aW9uPSJqb2IiKSAgICAgICAgCg==', 'lang': 'python'}, 'inputs': {}, 'outputs': {}, 'parameters': {}}, 'status': {'state': 'READY', 'transitions': [{'status': 'READY', 'time':