#### Defined by User

In [None]:
## specify your Dkube username
DKUBEUSERNAME = "ocdkube"

## Define the model monitor name here that you will be creating 
MONITOR_NAME = "sagemakerinsurance"
## Specify the data source 
DATA_SOURCE = "aws_s3"

### Specify Sagemaker details
import os
BUCKET = ""
PREFIX = ""
ROLE = ""

PREFIX = "sagemaker/Demo-ModelMonitor"

## Dkube information
TOKEN = os.getenv("DKUBE_USER_ACCESS_TOKEN","")
DKUBE_URL = os.getenv("DKUBE_URL","")
DKUBE_TRAINING_CODE_NAME = "sagemaker"

### Aws details
ACCESS_KEY = os.getenv("AWS_ACCESS_KEY_ID","")
SECRET_KEY = os.getenv("AWS_SECRET_ACCESS_KEY","")
REGION_NAME = os.getenv("REGION_NAME","")

### Dkube cluster 
SAGEMAKER_DKUBE_CLUSTER_NAME = "aws-sagemaker-cluster"

# dataset to be used as training data
DKUBE_BASE_DATASET = "insurance-data"

# the frequency with which monitoring will run
RUN_FREQUENCY = 5

if TOKEN == '' or DKUBEUSERNAME == '' or DKUBE_URL == '':
    print("Please fill the Dkube details first (TOKEN, DKUBE_URL, DKUBEUSERNAME)")
    raise TypeError
if DATA_SOURCE == 'aws_s3' and (ACCESS_KEY == '' or SECRET_KEY == ''):
    print("Please fill the AWS_S3 details first (ACCESS_KEY, SECRET_KEY)")
    raise TypeError

In [None]:
%store MONITOR_NAME 
%store DATA_SOURCE 
%store DKUBEUSERNAME 
%store TOKEN 
%store DKUBE_URL 
%store ACCESS_KEY 
%store SECRET_KEY 
%store DKUBE_BASE_DATASET
%store RUN_FREQUENCY
%store DKUBE_TRAINING_CODE_NAME
%store BUCKET
%store PREFIX 
%store ROLE
%store REGION_NAME
%store SAGEMAKER_DKUBE_CLUSTER_NAME

#### Dkube Resources

In [None]:
import time,json,shutil
from dkube.sdk import *

In [None]:
api = DkubeApi(URL=DKUBE_URL,token=TOKEN)
if DKUBEUSERNAME == api.validate_token()['username']:
    pass
else:
    print("Invalid User, please check your username, first")

#### Dataset

In [None]:
if DATA_SOURCE == 'aws_s3':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=DKUBE_BASE_DATASET)
        dataset.update_git_details(url="https://dkube-examples-data.s3.us-west-2.amazonaws.com/monitoring-insurance/training-data/insurance.csv")
        dataset.update_dataset_source(source="pub_url")
        api.create_dataset(dataset)
    except Exception as e:
        if e.reason.lower()!="conflict":
            response = e.body
            print(f"Failed[{response.code}]: {response.message}")

### Model Monitor Datasets

##### Labelled Dataset

In [None]:
gname = MONITOR_NAME+'-groundtruth'
prefix = MONITOR_NAME+'/groundtruth'            

if DATA_SOURCE == 'aws_s3':
    try:
        dataset = DkubeDataset(DKUBEUSERNAME, name=gname,remote=True)
        dataset.update_dataset_source('aws_s3')
        dataset.update_awss3_details(bucket=BUCKET,prefix=prefix,key=os.getenv("AWS_ACCESS_KEY_ID",ACCESS_KEY),secret=os.getenv("AWS_SECRET_ACCESS_KEY",SECRET_KEY))
        api.create_dataset(dataset)
        
    except Exception as e:
        if e.reason:
            if e.reason.lower() != "conflict":
                response = e.body
                print(f"Failed[{response.code}]: {response.message}")
        else:
            raise e

#### Cleanup

In [None]:
## Set CLEANUP = True, after your experiment is complete.
CLEANUP = False
if CLEANUP:
    api.delete_dataset(DKUBEUSERNAME,DKUBE_BASE_DATASET,force=True)
    if DATA_SOURCE != "sql":
        api.delete_dataset(DKUBEUSERNAME,pname,force=True)
        api.delete_dataset(DKUBEUSERNAME,gname,force=True)
    %store -d MONITOR_NAME DATA_SOURCE INPUT_TRAIN_TYPE DKUBEUSERNAME TOKEN DKUBE_URL ACCESS_KEY SECRET_KEY DKUBE_BASE_DATASET DKUBE_TRAINING_CODE_NAME
    