# CI/CD Pipeline

### Simplified CI/CD Pipeline

    - Retrain the model with new interactions via automated experiments
    - Evaluate model and promote it if performance is above threshold?
    - Save model to model repo (model dir)


In [1]:
from cmlbootstrap import CMLBootstrap
import datetime
import os, time

In [2]:
HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split(
    "/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT") 

In [35]:
# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

In [4]:
run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

In [5]:
help(cml)

Help on CMLBootstrap in module cmlbootstrap.CMLBootstrap object:

class CMLBootstrap(builtins.object)
 |  Wrapper class for calls to the internal CML api.
 |  
 |  Attributes:
 |      host (str): URL for the CML instance host.
 |      username (str): Current username.
 |      api_key (str): API key.
 |      project_name (str): Project name.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, host, username, api_key, project_name, log_level=20)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  add_project_editor(self, params)
 |  
 |  create_application(self, params)
 |      Create an Application
 |      
 |      Arguments:
 |          params {dict} -- [dictionary containing application parameters]
 |      
 |      Returns:
 |          [dict] -- [dictionary containing job details]
 |  
 |  create_environment_variable(self, params)
 |  
 |  create_job(self, params)
 |      Create a job
 |      
 |      Arguments:
 |          params {dict} -- [description]

In [6]:
# Create the YAML file for the model lineage
yaml_text = \
""""First Model {}":
  hive_table_qualified_names:                # this is a predefined key to link to training data
    - "default.historical_customer_interactions@cm"               # the qualifiedName of the hive_table object representing                
  metadata:                                  # this is a predefined key for additional metadata
    deployment: "this model was deployed manually"   # suggested use case: query used to extract training data

"New Model {}":
  hive_table_qualified_names:
    - "default.historical_customer_interactions@cm"
  metadata:
    deployment: "this model was deployed programmatically"
""".format(run_time_suffix, run_time_suffix)

with open('lineage.yml', 'w') as lineage: lineage.write(yaml_text)

In [7]:
## yaml file should only be appended not overwritten every time

In [8]:
# Get Project Details
project_details = cml.get_project({})
project_id = project_details["id"]

# Get Default Engine Details
default_engine_details = cml.get_default_engine({})
default_engine_image_id = default_engine_details["id"]

In [9]:
# Create Model
example_model_input = {
  "recency": "2",
  "history": "3",
  "used_discount": "0",
  "used_bogo": "1",
  "is_referral": "1",
  "channel_Phone": "1",
  "channel_Web": "1",
  "offer_Discount": "0",
  "offer_No Offer": "1"
}

create_model_params = {
    "projectId": project_id,
    "name": "New Model " + run_time_suffix,
    "description": "First Model Iteration",
    "visibility": "private",
    "enableAuth": False,
    "targetFilePath": "original_model.py",
    "targetFunctionName": "predict",
    "engineImageId": default_engine_image_id,
    "kernel": "python3",
    "examples": [
        {
            "request": example_model_input,
            "response": {}
        }],
    "cpuMillicores": 1000,
    "memoryMb": 2048,
    "nvidiaGPUs": 0,
    "replicationPolicy": {"type": "fixed", "numReplicas": 1},
    "environment": {}}


In [10]:
new_model_details = cml.create_model(create_model_params)
access_key = new_model_details["accessKey"]  # todo check for bad response
model_id = new_model_details["id"]

print("New model created with access key", access_key)

New model created with access key m9fa12k1pnd9jxorl5pnr53vp20i8uum


In [11]:
#Wait for the model to deploy.
is_deployed = False
while is_deployed == False:
    model = cml.get_model({"id": str(new_model_details["id"]), "latestModelDeployment": True, "latestModelBuild": True})
    if model["latestModelDeployment"]["status"] == 'deployed':
        print("Model is deployed")
        break
    else:
        print ("Deploying Model.....")
        time.sleep(10)

Deploying Model.....
Deploying Model.....
Deploying Model.....
Deploying Model.....
Model is deployed


In [12]:
##Will let you create new model or delete model but not redeploy programmatically. 
##Will need to remove model and recreate dynamically

Experiments

In [41]:
run_experiment_params = {
    "size": {
        "id": 1,
        "description": "1 vCPU / 2 GiB Memory",
        "cpu": 1,
        "memory": 2,
        "route": "engine-profiles",
        "reqParams": None,
        "parentResource": {
            "route": "site",
            "parentResource": None
        },
        "restangularCollection": False
    },
    "script": "new_model_experiment.py",
    "arguments": "",
    "kernel": "python3",
    "cpu": 1,
    "memory": 2,
    "project": str(project_id)
}

In [42]:
cml.run_experiment(run_experiment_params)

ERROR:root:{"username":"Username is required"}
ERROR:root:{'type': 'invalid', 'message': '{"username":"Username is required"}', 'statusCode': 422, 'fields': {'username': 'Username is required'}, 'code': 'invalid'}


{'code': 'invalid',
 'fields': {'username': 'Username is required'},
 'message': '{"username":"Username is required"}',
 'statusCode': 422,
 'type': 'invalid'}

In [16]:
USERNAME

'pauldefusco'