In [None]:
%store -r
import os
from dkube.sdk import *
from dkube.sdk.api import DkubeApi
from dkube.sdk.rsrcs import DkubeModelmonitor
from dkube.sdk.rsrcs.modelmonitor import DatasetClass,ModelType,DriftAlgo
from dkube.sdk.rsrcs.modelmonitor import DatasetFormat,DkubeModelmonitoralert

In [None]:
api = DkubeApi(URL=os.getenv('DKUBE_URL'),token=os.getenv("DKUBE_USER_ACCESS_TOKEN"))

### Importing deployment

In [None]:
api.import_deployment(name=MONITOR_NAME)

### Model Monitor

In [None]:
mm=DkubeModelmonitor(name=MONITOR_NAME)

In [None]:
mt=ModelType.Regression
dc_t=DatasetClass.Train

In [None]:
text_file = open("transform-data.py", "r")
#read whole file to a string
script = text_file.read()
#close file
text_file.close()

In [None]:
mm.update_modelmonitor(model_type=str(mt),data_timezone="utc",input_data_type="tabular")

### Training Details

In [None]:
if DATA_SOURCE == "local" or DATA_SOURCE == "aws_s3":
    training_data = f'{DKUBE_BASE_DATASET}:'+ DKUBEUSERNAME
    train_data_version = 'v1:'+api.get_dataset_versions(
        DKUBEUSERNAME,
        DKUBE_BASE_DATASET)[0]['version']['uuid']
    prediction_data = MONITOR_NAME +'-predict:'+ DKUBEUSERNAME
    labelled_data = MONITOR_NAME +'-groundtruth:'+ DKUBEUSERNAME

if DATA_SOURCE == 'local':
    predict_data_version = 'v1:'+api.get_dataset_versions(
        DKUBEUSERNAME,
        MONITOR_NAME+'-predict')[0]['version']['uuid']
    labelled_data_version = 'v1:'+api.get_dataset_versions(
        DKUBEUSERNAME,
        MONITOR_NAME+'-groundtruth')[0]['version']['uuid']

if DATA_SOURCE == "aws_s3":
    if PREDICT_DATASET_FORMAT == "tabular":
        predict_data_format = str(DatasetFormat.Tabular)
    if PREDICT_DATASET_FORMAT == "cloudevents":
        predict_data_format = str(DatasetFormat.Cloudevents)
    if PREDICT_DATASET_FORMAT == "sagemaker":
        predict_data_format = str(DatasetFormat.Sagemaker)  

if DATA_SOURCE == "sql":
    training_data = f'{DKUBE_BASE_DATASET}:'+ DKUBEUSERNAME

### Add Training Dataset

In [None]:
if DATA_SOURCE == 'sql':
    mm.add_datasources(data_class=str(DatasetClass.Train),name=training_data,data_format=str(DatasetFormat.Tabular),sql_query="select * from insurance",transformer_script = script)
    mm.add_datasources(data_class=str(DatasetClass.Predict),name=training_data,data_format=str(DatasetFormat.Tabular),sql_query="select * from insurance_predict",date_suffix="yyyy/mm/dd/hh")
    mm.add_datasources(data_class=str(DatasetClass.Labelled),name=training_data,data_format=str(DatasetFormat.Tabular),sql_query="select * from insurance_gt",predict_col="charges",groundtruth_col="GT_target",timestamp_col="timestamp")

if DATA_SOURCE == 'local':
    mm.add_datasources(data_class=str(DatasetClass.Train),name=training_data,data_format=str(DatasetFormat.Tabular),version=train_data_version,transformer_script = script)
    mm.add_datasources(data_class=str(DatasetClass.Predict),name=prediction_data,data_format=str(DatasetFormat.Tabular),version=predict_data_version,date_suffix="none")
    mm.add_datasources(data_class=str(DatasetClass.Labelled),name=labelled_data,data_format=str(DatasetFormat.Tabular),version=labelled_data_version,predict_col="charges",groundtruth_col="GT_target",timestamp_col="timestamp")

if DATA_SOURCE == 'aws_s3':
    mm.add_datasources(data_class=str(DatasetClass.Train),name=training_data,data_format=str(DatasetFormat.Tabular),version=train_data_version,transformer_script = script)
    mm.add_datasources(data_class=str(DatasetClass.Predict),name=prediction_data,data_format=predict_data_format,date_suffix="yyyy/mm/dd/hh")
    mm.add_datasources(data_class=str(DatasetClass.Labelled),name=labelled_data,data_format=str(DatasetFormat.Tabular),predict_col="charges",groundtruth_col="GT_target",timestamp_col="timestamp")

### Add Drift monitoring details

In [None]:
mm.update_drift_monitoring_details(enabled=True,frequency=5,algorithm='auto')

### Create Model monitor

In [None]:
api.modelmonitor_create(mm,wait_for_completion=True)

### Extracting id of the Model Monitor

In [None]:
id = api.modelmonitor_get_id(MONITOR_NAME)
id

### Schema update

In [None]:
api.modelmonitor_update_schema(id,label='charges',schema_class='continuous',schema_type="prediction_output",selected=False)
api.modelmonitor_update_schema(id,label='unique_id',schema_class='continuous',schema_type="row_id",selected=False)
api.modelmonitor_update_schema(id,label='timestamp',schema_class='continuous',schema_type="timestamp",selected=False)

## age and bmi to continuous
api.modelmonitor_update_schema(id,label='age',schema_class='continuous',schema_type='input_feature')
api.modelmonitor_update_schema(id,label='bmi',schema_class='continuous',schema_type='input_feature')

## select these features
api.modelmonitor_update_schema(id,label='sex',schema_type='input_feature',schema_class='categorical')
api.modelmonitor_update_schema(id,label='children',schema_type='input_feature',schema_class='categorical')
api.modelmonitor_update_schema(id,label='smoker',schema_type='input_feature',schema_class='categorical')
api.modelmonitor_update_schema(id,label='region',schema_type='input_feature',schema_class='categorical')




### Performance Monitoring

In [None]:
mm=DkubeModelmonitor(name=MONITOR_NAME)
mm.update_performance_monitoring_details(enabled=True,source_type="labelled_data",frequency=5,soft_thresholds={"mse":{"hard":43601921.76,"soft":39968428.279999994},"rmse":{"hard":7233.360000000001,"soft":6630.58}, "mae": {"hard":4882.68,"soft":4475.79},"mape":
{"hard":0.5250794400000001,"soft":0.48132282000000004},"r2_score": {"hard":0.8795999999999999,"soft":0.8063}})

In [None]:
api.modelmonitor_update(id,mm)

### Add alerts

In [None]:
alert = DkubeModelmonitoralert(name='age_alert')
alert.update_alert(alert_class = 'feature_drift',feature='age',threshold=0.02)
api.modelmonitor_add_alert(id,alert)

### Start the model monitor

In [None]:
api.modelmonitor_start(id)

### Retraining / Rebaselining Model Monitor

In [None]:
%store -r
id = api.modelmonitor_get_id(MONITOR_NAME)

if INPUT_TRAIN_TYPE == 'retraining':
    api.modelmonitor_stop(id)
    
    training_data = f'{RETRAINING_DATASET}:'+DKUBEUSERNAME
    data_dict = api.get_dataset_versions(DKUBEUSERNAME,RETRAINING_DATASET)[0]['version']
    train_data_version = data_dict['name']+":"+data_dict['uuid']
    
    mm=DkubeModelmonitor(name=MONITOR_NAME)
    mm.update_datasources(name=training_data,data_class=str(DatasetClass.Train),version=train_data_version)
    api.modelmonitor_update(id,mm)
    
    
    ### Start the model monitor
    api.modelmonitor_start(id)



### Cleanup

In [None]:
CLEANUP = False
if CLEANUP:
    from time import sleep
    RETRIES = 4
    while RETRIES:
        mm = api.modelmonitor_get(id)
        if mm["status"] and mm["status"]["state"].lower() != "active":
            break
        elif mm["status"] and mm["status"]["state"].lower() == "active":
            api.modelmonitor_stop(id)
        RETRIES -= 1
        sleep(5)
    else:
        raise TimeoutError("modelmonitor failed to stopped")
    api.modelmonitor_delete(id)
    