In [None]:
%store -r
import sys,json, os
job_class = os.getenv("DKUBE_JOB_CLASS")
if not job_class:
    !{sys.executable} -m pip install kfp==1.4.0 kfp-server-api==1.2.0 --user >/dev/null

In [None]:
MONITOR_NAME = d3_config['MONITOR_NAME']
DATA_SOURCE = d3_config['DATA_SOURCE']
INPUT_TRAIN_TYPE = d3_config['INPUT_TRAIN_TYPE']
DKUBEUSERNAME = d3_config['DKUBEUSERNAME']
TOKEN = d3_config['TOKEN']
DKUBE_BASE_DATASET = d3_config['DKUBE_BASE_DATASET']
MODEL_NAME = d3_config['MODEL_NAME']
RETRAINING_DATASET = d3_config['RETRAINING_DATASET']
DKUBE_TRAINING_CODE_NAME = d3_config['DKUBE_TRAINING_CODE_NAME']
USE_REMOTE_DEPLOYMENT = d3_config['USE_REMOTE_DEPLOYMENT'] = False
%store d3_config

## Importing libraries

In [None]:
import requests

In [None]:
import json
import kfp
import kfp.dsl as dsl
import kfp.compiler as compiler
from kfp import components
from kubernetes import client as k8s_client

import os
import kfp
import string
import random
import kfp.dsl as dsl
import kfp.compiler as compiler
from kfp import components
from kubernetes import client as k8s_client

from dkube.sdk.api import DkubeApi
from dkube.sdk.rsrcs import DkubeCode
from dkube.sdk.rsrcs import DkubeDataset
from dkube.sdk.rsrcs import DkubeModel

## Loading Components

In [None]:
dkube_preprocessing_op = components.load_component_from_file("/mnt/dkube/pipeline/components/preprocess/component.yaml")
dkube_training_op = components.load_component_from_file("/mnt/dkube/pipeline/components/training/component.yaml")
dkube_serving_op  = components.load_component_from_file("/mnt/dkube/pipeline/components/serving/component.yaml") 

In [None]:
api = DkubeApi(token=os.getenv("DKUBE_USER_ACCESS_TOKEN",TOKEN))
client = kfp.Client(
    host=os.getenv("KF_PIPELINES_ENDPOINT"),
    existing_token=os.getenv("DKUBE_USER_ACCESS_TOKEN",TOKEN),
    namespace=DKUBEUSERNAME)

## Pipeline Inputs

In [None]:
training_program = DKUBE_TRAINING_CODE_NAME
if (DATA_SOURCE == 'local' or DATA_SOURCE == 'aws-s3') and INPUT_TRAIN_TYPE == 'retraining':
    input_training_dataset = MONITOR_NAME +'-groundtruth'
else:
    input_training_dataset = DKUBE_BASE_DATASET


## Preprocessing stage inputs
preprocessing_script =f"pip3 install pymysql --user;python insurance_datasources/preprocessing.py --data_source {DATA_SOURCE} --train_type {INPUT_TRAIN_TYPE} --monitor_name {MONITOR_NAME} --user {DKUBEUSERNAME}"
input_dataset_mount = ['/data']
output_dataset = RETRAINING_DATASET
output_mount_path = ['/train-data']

## Training stage inputs
training_script = "python insurance_datasources/training.py"
model_name = MODEL_NAME
output_model_mount = "/model"

## Pipeline Definition

In [None]:
@kfp.dsl.pipeline(
    name='training-pipeline',
    description='insurance-training-pl'
)
def insurance_pipeline(token):
    
    preprocessing = dkube_preprocessing_op(
                                    auth_token=str(token),
                                    container=json.dumps({"image": "ocdr/dkube-datascience-tf-cpu:v2.0.0-16"}),
                                    program=str(training_program),
                                    datasets = json.dumps([str(input_training_dataset)]),
                                    input_dataset_mounts = json.dumps(input_dataset_mount),
                                    run_script=str(preprocessing_script),
                                    outputs=json.dumps([str(output_dataset)]),
                                    output_mounts=json.dumps(output_mount_path)).set_display_name("data-generation")
    
    train       = dkube_training_op(container=json.dumps({"image": "ocdr/dkube-datascience-tf-cpu:v2.0.0-16"}),
                                    framework="tensorflow", version="2.0.0",
                                    program=str(training_program), 
                                    run_script=str(training_script),
                                    datasets=json.dumps([str(output_dataset)]), outputs=json.dumps([str(model_name)]),
                                    input_dataset_mounts=json.dumps(output_mount_path),
                                    output_mounts=json.dumps([str(output_model_mount)]),
                                    auth_token=token).after(preprocessing)
    
    serving     = dkube_serving_op(model=train.outputs['artifact'], device='cpu',
                                    name=MONITOR_NAME,
                                    serving_image=json.dumps({"image": "ocdr/tensorflowserver:2.0.0"}),
                                    transformer_image =json.dumps({"image": "ocdr/dkube-datascience-tf-cpu:v2.0.0-16"}),
                                    transformer_project=str(training_program),
                                    transformer_code='insurance_datasources/transformer.py', auth_token=token).after(train)

In [None]:
if not USE_REMOTE_DEPLOYMENT:
    deployment_id = api.get_deployment_id(name=MONITOR_NAME)
    if not deployment_id:
        client.create_run_from_pipeline_func(insurance_pipeline, arguments={'token':TOKEN})
    else:
        print("Deployment Already Existing, skipping create")