In [None]:
import os
job_class = os.getenv("DKUBE_JOB_CLASS")
if not job_class:
    !{sys.executable} -m pip install kfp >/dev/null

In [None]:
import json

import kfp
from kfp import components
from dkube.sdk.api import DkubeApi
import random, string
from termcolor import colored

## Load the DKube Pipeline Components

In [None]:
dkube_training_op = components.load_component_from_file("/mnt/dkube/pipeline/components/training/component.yaml")
dkube_serving_op  = components.load_component_from_file("/mnt/dkube/pipeline/components/serving/component.yaml")
dkube_preprocess_op  = components.load_component_from_file("/mnt/dkube/pipeline/components/preprocess/component.yaml")

## User Inputs

In [None]:

## These fields must be modified to allow the file to run based on your repo names
## After the files are modified, run all of the cells to execute

## Repo names for code, dataset & model
code_repo = "ins-lc-pre"  # Change this to identify your Code Repo name
dataset = "ins-lc"        # Change this to identify your Dataset Repo name
model = "ins-lc"          # Change this to identify your Model Repo name

## The script will create a name for you, but if you want to use your own name, enter it here
pipeline_run_name = ""

## Other Variable Setup

In [None]:
## These fields are specific to this example, and should not be modified

## Get the environmental variables
DKUBE_USERNAME = os.environ.get('DKUBE_USER_LOGIN_NAME')
DKUBE_TOKEN = os.environ.get('DKUBE_USER_ACCESS_TOKEN')
DKUBE_PROJECT_NAME = os.environ.get('DKUBE_PROJECT_NAME', 'insurance')

# Check for preprocessing env var, which will be used later to run or skip preprocessing stage
# Make the flag blank if not set during IDE creation
PREPROCESS_FLAG = os.environ.get("PREPROCESS", "")

api = DkubeApi(token=DKUBE_TOKEN)

## Project ID, make sure IDE is created under the same project where the expriment is suppose to run
## Or else provide it manually in the project_id variable.
project_id = os.environ.get("DKUBE_PROJECT_ID")

## Set the variables needed to execute the pipeline stages
image = "ocdr/dkube-datascience-tf-cpu:v2.0.0-17"
serving_image = "ocdr/tensorflowserver:2.0.0"

preprocess_script = "python insurance/preprocess.py"
training_script = "python insurance/training-pre.py"

transformer_code='insurance/transformer.py'
user = os.getenv('USERNAME')
framework = "tensorflow"
f_version = "2.0.0"

## Get the list of versions for the input dataset & specify v1 to get original dataset for preprocessing
dataset_version = api.get_dataset_versions(DKUBE_USERNAME, dataset)
dataset_v1 = dataset_version[-1]['version']['uuid']
preprocess_input_dataset_version = [dataset_v1]  

# Set the mount paths
input_dataset_mounts = ["/input/dataset"]
output_dataset_mounts = ["/output/dataset"]

output_mount_point = "/opt/dkube/out"

run_id = 0

In [None]:
@kfp.dsl.pipeline(
    name='dkube-insurance-pl',
    description='sample insurance pipeline'
)
def insurance_pipeline(DKUBE_TOKEN, tags, deployment_name):
    
    # Look for PREPROCESS flag, and run the preprocess state it is anything but blank
    if PREPROCESS_FLAG != '':
        preprocess   = dkube_preprocess_op(DKUBE_TOKEN, json.dumps({"image": image}),
                                    tags=tags,
                                    program=code_repo, run_script=preprocess_script,
                                    datasets=json.dumps([dataset]), input_dataset_mounts=input_dataset_mounts,
                                    input_dataset_versions=preprocess_input_dataset_version,
                                    outputs=json.dumps([dataset]),
                                    output_mounts=output_dataset_mounts)

        train       = dkube_training_op(DKUBE_TOKEN, json.dumps({"image": image}),
                                    tags=tags,
                                    framework=framework, version=f_version,
                                    program=code_repo, run_script=training_script,
                                    datasets=json.dumps([dataset]), input_dataset_mounts=input_dataset_mounts,
                                    outputs=json.dumps([model]),
                                    output_mounts=json.dumps([output_mount_point])).after(preprocess)
    else:
        # Otherwise, just run the training stage
        train       = dkube_training_op(DKUBE_TOKEN, json.dumps({"image": image}),
                                    tags=tags,
                                    framework=framework, version=f_version,
                                    program=code_repo, run_script=training_script,
                                    datasets=json.dumps([dataset]), input_dataset_mounts=input_dataset_mounts,
                                    outputs=json.dumps([model]),
                                    output_mounts=json.dumps([output_mount_point]))      

    # Run the serving stage either way
    serving     = dkube_serving_op(DKUBE_TOKEN, train.outputs['artifact'], device='cpu', 
                                    name=deployment_name,
                                    serving_image=json.dumps({"image": serving_image}),
                                    transformer_image=json.dumps({"image": image}),
                                    transformer_project=code_repo,
                                    transformer_code=transformer_code,
                                    production="true").after(train)

## Create a run

In [None]:
res = ''.join(random.choices(string.ascii_lowercase + string.digits, k=4))

if project_id:
    tags = json.dumps([f"project:{project_id}"])
    project_name = os.environ.get("DKUBE_PROJECT_NAME")

    # Check for user-input pipeline run name, and if provided use it here, otherwise default it
    if pipeline_run_name == "":
        run_name = f"[{project_name}] {DKUBE_USERNAME}:{code_repo} %s"%res
    else:
        run_name = f"{pipeline_run_name}"

    experiment = f"[{project_name}] experiment"
    deployment_name = f"{DKUBE_PROJECT_NAME}-{DKUBE_USERNAME}-%s"%res
else:
    tags = []

    # Check for user-input pipeline run name, and if provided use it here, otherwise default it
    if pipeline_run_name == "":
        run_name = f"{DKUBE_USERNAME}:{code_repo} %s"%res
    else:
        run_name = f"{pipeline_run_name}"

    experiment = "default"
    deployment_name = f"{DKUBE_USERNAME}-%s"%res

In [None]:
deployment_id = api.get_deployment_id(name=deployment_name)
if not deployment_id:
    print(f"On pipeline completion the deployment {colored(deployment_name, 'green', attrs=['bold'])} will be created")
    
    client = kfp.Client(existing_token=DKUBE_TOKEN)
    client.create_run_from_pipeline_func(insurance_pipeline, run_name=run_name, experiment_name=experiment,
                                         arguments={"DKUBE_TOKEN":DKUBE_TOKEN, "tags":tags,
                                                    'deployment_name':deployment_name}
                                        )
    run_id += 1
    pl_config = {"DEPLOYMENT_NAME":deployment_name}
    %store pl_config

else:
    print("Deployment Already Existing, skipping create, try running the cells again")