#### Defined by User

In [None]:
## specify your Dkube username
DKUBEUSERNAME = "ocdkube"

## Define the model monitor name here that you will be creating 
MONITOR_NAME = "insurance-mm-d3"
## Specify the data source [aws_s3,sql,local]
DATA_SOURCE = "local"
## Specify Training type, [training,retraining]
INPUT_TRAIN_TYPE = "training"

# Required in train.ipynb to retrain the model
DKUBE_TRAINING_CODE_NAME = "insurance"

# Use Remote Deployment
USE_REMOTE_DEPLOYMENT = True

## Dkube information
import os
TOKEN = os.getenv("DKUBE_USER_ACCESS_TOKEN","")
DKUBE_URL = os.getenv("DKUBE_URL","")

## AWS_S3 
ACCESS_KEY = os.getenv("AWS_ACCESS_KEY_ID","")
SECRET_KEY = os.getenv("AWS_SECRET_ACCESS_KEY","")

## SQL
# DBHOSTNAME should be provided in the one of the following format.
# IP:PORT for eg 127.0.0.1:3306
# DOMAIN:PORT for eg my-db.com:3306
DBHOSTNAME   = os.getenv("DBHOSTNAME","")
DATABASENAME = os.getenv("DATABASENAME","")
DBUSERNAME   = os.getenv("DBUSERNAME","")
DBPASSWORD   = os.getenv("DBPASSWORD","")


# supported value are mysql or mssql
DB_PROVIDER = "mysql"

# dataset to be used as training data
DKUBE_BASE_DATASET = "insurance-data"
if DATA_SOURCE == "sql":
    DKUBE_BASE_DATASET = f"{DKUBE_BASE_DATASET}-sql"

# Model name to be created or used for example, it will create the model
# if not existing otherwise it will ignore creation.
MODEL_NAME = MONITOR_NAME


# If you  are doing retraining then you need to provide it
RETRAINING_DATASET = 'insurance-training-data'

# the frequency with which monitoring will run
RUN_FREQUENCY = 5

if DATA_SOURCE == "aws_s3":
    ## supported are 'cloudevents','sagemaker-logs','tabular'
    PREDICT_DATASET_FORMAT = "tabular" 

if TOKEN == '' or DKUBEUSERNAME == '' or DKUBE_URL == '':
    print("Please fill the Dkube details first (TOKEN, DKUBE_URL, DKUBEUSERNAME)")
    raise TypeError
if DATA_SOURCE == 'aws_s3' and (ACCESS_KEY == '' or SECRET_KEY == ''):
    print("Please fill the AWS_S3 details first (ACCESS_KEY, SECRET_KEY)")
    raise TypeError
if DATA_SOURCE == 'sql' and (DBHOSTNAME == '' or DATABASENAME == '' or DBUSERNAME == '' or DBPASSWORD == ''):
    print("Please fill the SQL details first (DBHOSTNAME, DATABASENAME, DBUSERNAME, DBPASSWORD)")
    raise TypeError
if DATA_SOURCE == "sql" and DB_PROVIDER not in ["mysql", "mssql"]:
    raise ValueError(f"{DB_PROVIDER} is not supported")

# it will be autofilled by code if USE_REMOTE_DEPLOYMENT is false
INFERENCE_URL = None

#### Dkube Resources

In [None]:
import time,json,shutil
from dkube.sdk import *

In [None]:
api = DkubeApi(URL=DKUBE_URL,token=TOKEN)
if DKUBEUSERNAME == api.validate_token()['username']:
    pass
else:
    print("Invalid User, please check your username, first")

In [None]:
data = api.get_modelmonitor_id(MONITOR_NAME)
if data.data:
    MONITOR_ID = data.data.get(MONITOR_NAME)
    if MONITOR_ID:
        raise ValueError(f"{MONITOR_NAME} already existing please use a different name")

In [None]:
d3_config = {"MONITOR_NAME":MONITOR_NAME, "DATA_SOURCE": DATA_SOURCE, 
            "INPUT_TRAIN_TYPE":INPUT_TRAIN_TYPE, "DKUBEUSERNAME":DKUBEUSERNAME,
             "TOKEN":TOKEN, "DKUBE_URL":DKUBE_URL, "ACCESS_KEY":ACCESS_KEY,
            "SECRET_KEY":SECRET_KEY, "DBHOSTNAME":DBHOSTNAME, "DATABASENAME":DATABASENAME,
            "DBUSERNAME":DBUSERNAME, "DBPASSWORD":DBPASSWORD, "DB_PROVIDER":DB_PROVIDER,
            "DKUBE_BASE_DATASET":DKUBE_BASE_DATASET, "MODEL_NAME":MODEL_NAME,
            "RETRAINING_DATASET":RETRAINING_DATASET, "RUN_FREQUENCY":RUN_FREQUENCY,
            "DKUBE_TRAINING_CODE_NAME":DKUBE_TRAINING_CODE_NAME,
            "USE_REMOTE_DEPLOYMENT":USE_REMOTE_DEPLOYMENT}
if DATA_SOURCE == "aws_s3":
    d3_config["PREDICT_DATASET_FORMAT"] = PREDICT_DATASET_FORMAT
%store d3_config

#### Dataset

In [None]:
if DATA_SOURCE == 'local' or DATA_SOURCE == 'aws_s3':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=DKUBE_BASE_DATASET)
        dataset.update_git_details(url="https://dkube-examples-data.s3.us-west-2.amazonaws.com/monitoring-insurance/training-data/insurance.csv")
        dataset.update_dataset_source(source="pub_url")
        api.create_dataset(dataset)
    except Exception as e:
        if e.reason.lower()!="conflict":
            response = e.body
            print(f"Failed[{response.code}]: {response.message}")
        
if DATA_SOURCE == 'sql':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=DKUBE_BASE_DATASET,remote=True)
        dataset.update_dataset_source('sql')
        dataset.update_sql_details(
            host=DBHOSTNAME.split(":")[0],
            port=int(DBHOSTNAME.split(":")[1]),
            username=DBUSERNAME,
            password=DBPASSWORD,
            database=DATABASENAME,
            provider=DB_PROVIDER)
        api.create_dataset(dataset)
     
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e

### Model Monitor Datasets

##### (Training / Retraining) Dataset

In [None]:
try:
    dataset = DkubeDataset(DKUBEUSERNAME, name=RETRAINING_DATASET)
    api.create_dataset(dataset)

except Exception as e:
    if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
    else:
        raise e

##### Predict Dataset

In [None]:
pname =  MONITOR_NAME+'-predict'
prefix = MONITOR_NAME+'/predict'

if DATA_SOURCE == 'local':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=pname)
        api.create_dataset(dataset)
    
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e
            

if DATA_SOURCE == 'aws_s3':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=pname,remote=True)
        dataset.update_dataset_source('aws_s3')
        dataset.update_awss3_details(
            bucket="mm-workflow",
            prefix=prefix,key=os.getenv("AWS_ACCESS_KEY_ID",ACCESS_KEY),
            secret=os.getenv("AWS_SECRET_ACCESS_KEY",SECRET_KEY))
        api.create_dataset(dataset)
        
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e
    

##### Labelled Dataset

In [None]:
gname = MONITOR_NAME+'-groundtruth'
prefix = MONITOR_NAME+'/groundtruth'

if DATA_SOURCE == 'local':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=gname)
        api.create_dataset(dataset)
    
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e
            

if DATA_SOURCE == 'aws_s3':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=gname,remote=True)
        dataset.update_dataset_source('aws_s3')
        dataset.update_awss3_details(bucket="mm-workflow",prefix=prefix,
                                     key=os.getenv("AWS_ACCESS_KEY_ID",ACCESS_KEY),
                                     secret=os.getenv("AWS_SECRET_ACCESS_KEY",SECRET_KEY))
        api.create_dataset(dataset)
        
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e


#### Model

In [None]:
if api.dkubeinfo['features'] == ['monitoring']:
    try:
        model = DkubeModel(DKUBEUSERNAME, name=MODEL_NAME)
        model.update_model_source(source='git')
        model.update_git_details('https://github.com/oneconvergence/dkube-examples/blob/monitoring/insurance/model.joblib',branch="monitoring")
        api.create_model(model)

    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e
else:
    try:
        model = DkubeModel(DKUBEUSERNAME, name=MODEL_NAME)
        model.update_model_source(source='dvs')
        api.create_model(model)
    
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e

#### Cleanup

In [None]:
## Set CLEANUP = True, after your experiment is complete.
CLEANUP = False
if CLEANUP:
    api.delete_dataset(DKUBEUSERNAME,DKUBE_BASE_DATASET,force=True)
    api.delete_dataset(DKUBEUSERNAME,RETRAINING_DATASET,force=True)
    if DATA_SOURCE != "sql":
        api.delete_dataset(DKUBEUSERNAME,pname,force=True)
        api.delete_dataset(DKUBEUSERNAME,gname,force=True)
    api.delete_model(DKUBEUSERNAME,MODEL_NAME,force=True)
    %store -d d3_config