# CI/CD Pipeline

### Simplified CI/CD Pipeline

    - Retrain the model with new interactions via automated experiments
    - Evaluate model and promote it if performance is above threshold?
    - Save model to model repo (model dir)


In [22]:
from cmlbootstrap import CMLBootstrap
import datetime
import os, time

In [23]:
HOST = os.getenv("CDSW_API_URL").split(
    ":")[0] + "://" + os.getenv("CDSW_DOMAIN")
USERNAME = os.getenv("CDSW_PROJECT_URL").split(
    "/")[6]  # args.username  # "vdibia"
API_KEY = os.getenv("CDSW_API_KEY") 
PROJECT_NAME = os.getenv("CDSW_PROJECT") 

In [24]:
# Instantiate API Wrapper
cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

In [25]:
run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

In [26]:
help(cml)

Help on CMLBootstrap in module cmlbootstrap.CMLBootstrap object:

class CMLBootstrap(builtins.object)
 |  Wrapper class for calls to the internal CML api.
 |  
 |  Attributes:
 |      host (str): URL for the CML instance host.
 |      username (str): Current username.
 |      api_key (str): API key.
 |      project_name (str): Project name.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, host, username, api_key, project_name, log_level=20)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  add_project_editor(self, params)
 |  
 |  create_application(self, params)
 |      Create an Application
 |      
 |      Arguments:
 |          params {dict} -- [dictionary containing application parameters]
 |      
 |      Returns:
 |          [dict] -- [dictionary containing job details]
 |  
 |  create_environment_variable(self, params)
 |  
 |  create_job(self, params)
 |      Create a job
 |      
 |      Arguments:
 |          params {dict} -- [description]

In [27]:
# Create the YAML file for the model lineage
yaml_text = \
""""First Model {}":
  hive_table_qualified_names:                # this is a predefined key to link to training data
    - "default.historical_customer_interactions@cm"               # the qualifiedName of the hive_table object representing                
  metadata:                                  # this is a predefined key for additional metadata
    deployment: "this model was deployed manually"   # suggested use case: query used to extract training data

"New Model {}":
  hive_table_qualified_names:
    - "default.historical_customer_interactions@cm"
  metadata:
    deployment: "this model was deployed programmatically"
""".format(run_time_suffix, run_time_suffix)

with open('lineage.yml', 'w') as lineage: lineage.write(yaml_text)

In [28]:
## yaml file should only be appended not overwritten every time

In [29]:
# Get Project Details
project_details = cml.get_project({})
project_id = project_details["id"]

# Get Default Engine Details
default_engine_details = cml.get_default_engine({})
default_engine_image_id = default_engine_details["id"]

In [30]:
# Create Model
example_model_input = {
  "recency": "2",
  "history": "3",
  "used_discount": "0",
  "used_bogo": "1",
  "is_referral": "1",
  "channel_Phone": "1",
  "channel_Web": "1",
  "offer_Discount": "0",
  "offer_No Offer": "1"
}

create_model_params = {
    "projectId": project_id,
    "name": "New Model " + run_time_suffix,
    "description": "A/B Test" + run_time_suffix,
    "visibility": "private",
    "enableAuth": False,
    "targetFilePath": "original_model.py",
    "targetFunctionName": "predict",
    "engineImageId": default_engine_image_id,
    "kernel": "python3",
    "examples": [
        {
            "request": example_model_input,
            "response": {}
        }],
    "cpuMillicores": 1000,
    "memoryMb": 2048,
    "nvidiaGPUs": 0,
    "replicationPolicy": {"type": "fixed", "numReplicas": 1},
    "environment": {}}

In [31]:
new_model_details = cml.create_model(create_model_params)
access_key = new_model_details["accessKey"]  # todo check for bad response
model_id = new_model_details["id"]

print("New model created with access key", access_key)

New model created with access key mvbmjc0l59exfz66ebwpaxc0fjkexiln


In [None]:
#Wait for the model to deploy.
is_deployed = False
while is_deployed == False:
    model = cml.get_model({"id": str(new_model_details["id"]), "latestModelDeployment": True, "latestModelBuild": True})
    if model["latestModelDeployment"]["status"] == 'deployed':
        print("Model is deployed")
        break
    else:
        print ("Deploying Model.....")
        time.sleep(10)

In [None]:
##Will let you create new model or delete model but not redeploy programmatically. 
##Will need to remove model and recreate dynamically

Experiments

In [None]:
# Instantiate API Wrapper
cml2 = CMLBootstrap(HOST, USERNAME, "uuc48l0gm0r3n2mib27voxazoos65em0", PROJECT_NAME)

In [None]:
run_experiment_params = {
    "size": {
        "id": 1,
        "description": "1 vCPU / 2 GiB Memory",
        "cpu": 1,
        "memory": 2,
        "route": "engine-profiles",
        "reqParams": None,
        "parentResource": {
            "route": "site",
            "parentResource": None
        },
        "restangularCollection": True
    },
    "script": "new_model_experiment.py",
    "arguments": " ",
    "kernel": "python3",
    "cpu": 1,
    "memory": 2,
    "project": str(project_id),
    "name":"new experiment name"
}

In [None]:
new_experiment_details = cml2.run_experiment(run_experiment_params)

Set Experiment Values

In [13]:
from Experiment import Experiment
from sklearn.ensemble import GradientBoostingClassifier

In [14]:
ex1 = Experiment('data', GradientBoostingClassifier(), {'param':[1,2,3]})

In [15]:
ex1.classifier

GradientBoostingClassifier()

In [16]:
import new_model_experiment

In [17]:
var = 100

In [36]:
run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")

In [37]:
table_name = "default.historical_customer_interactions"

In [51]:
# Create the YAML file for the model lineage
yaml_text = open("lineage.yml","r")
yaml_read = yaml_text.read()

In [53]:
addition = '''"Challenger {}":
  hive_table_qualified_names:
    - "{}@cm"
  metadata:
    deployment: "this model was deployed programmatically"'''.format(run_time_suffix, table_name)

In [57]:
yaml_out = yaml_read + addition

In [58]:
yaml_out

'"First Model 13092020010319":\n  hive_table_qualified_names:                # this is a predefined key to link to training data\n    - "default.historical_customer_interactions@cm"               # the qualifiedName of the hive_table object representing                \n  metadata:                                  # this is a predefined key for additional metadata\n    deployment: "this model was deployed manually"   # suggested use case: query used to extract training data\n\n"New Model 13092020010319":\n  hive_table_qualified_names:\n    - "default.historical_customer_interactions@cm"\n  metadata:\n    deployment: "this model was deployed programmatically"\n"Challenger 13092020011256":\n  hive_table_qualified_names:\n    - "default.historical_customer_interactions@cm"\n  metadata:\n    deployment: "this model was deployed programmatically"'

In [59]:
with open('lineage.yml', 'w') as lineage: lineage.write(yaml_out)

In [68]:
import pandas as pd
deployed_models_df = pd.DataFrame(cml.get_models({}))

In [71]:
cml.get_project({})['id']

357

In [72]:
deployed_models_df.columns

Index(['id', 'projectId', 'project', 'projectOwner', 'crn', 'creatorId',
       'creator', 'name', 'description', 'visibility', 'accessKey',
       'authEnabled', 'defaultResources', 'defaultReplicationPolicy',
       'htmlUrl', 'createdAt', 'updatedAt', 'namespace'],
      dtype='object')

In [93]:
deployed_models_df[deployed_models_df['projectId'] == 357]["createdAt"].sort_values(ascending=False).iloc[0]

'2020-09-13T02:23:50.338Z'

In [80]:
deployed_models_df[deployed_models_df['projectId'] == 357]

Unnamed: 0,id,projectId,project,projectOwner,crn,creatorId,creator,name,description,visibility,accessKey,authEnabled,defaultResources,defaultReplicationPolicy,htmlUrl,createdAt,updatedAt,namespace
12,175,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",New Model 13092020004626,First Model Iteration,private,mj65xqfpvafl3scx0yc65a66rvnazzq9,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T00:46:30.148Z,2020-09-13T00:46:30.148Z,mlx-user-9
13,179,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020013008,First Model Iteration,private,mlanl99v6q4228hobdhx5acpg822ms2y,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T01:30:09.027Z,2020-09-13T01:30:09.027Z,mlx-user-9
14,182,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020021638,First Model Iteration,private,mvzp5ugi0u8s5ud57f9r23xthjxh1nyu,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T02:16:38.449Z,2020-09-13T02:16:38.449Z,mlx-user-9
15,183,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020022147,First Model Iteration,private,md38w84bhali9h31sn37qfmhdxajr527,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T02:21:47.784Z,2020-09-13T02:21:47.784Z,mlx-user-9
16,184,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020022232,First Model Iteration,private,mvav17o0lwb9oogg3jlh8g7wqaw99e6w,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T02:22:32.131Z,2020-09-13T02:22:32.131Z,mlx-user-9
17,176,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",New Model 13092020010319,First Model Iteration,private,mvbmjc0l59exfz66ebwpaxc0fjkexiln,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T01:03:23.551Z,2020-09-13T01:03:23.551Z,mlx-user-9
18,177,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenfer 13092020012430,First Model Iteration,private,msa2vmu3aqpu654scz9p4o97u28s5bw6,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T01:24:30.460Z,2020-09-13T01:24:30.460Z,mlx-user-9
19,181,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020014837,First Model Iteration,private,m77k9l35p5g63koivndtd8281a612p8u,False,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T01:48:37.707Z,2020-09-13T01:50:54.975Z,mlx-user-9
24,178,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020012543,First Model Iteration,private,mbglk18yf07kl7r3l2q5vtt16hjhg2yk,True,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T01:25:43.844Z,2020-09-13T01:25:43.844Z,mlx-user-9
28,180,357,"{'id': 357, 'name': 'Model Factory', 'slug': '...","{'id': 9, 'username': 'pauldefusco', 'type': '...",crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b...,9,"{'id': 9, 'username': 'pauldefusco', 'type': '...",Challenger 13092020014139,First Model Iteration,private,mj0wp2e7ufy2230at4jhck2034936g6v,False,"{'cpuMillicores': 1000, 'memoryMb': 2048, 'nvi...","{'type': 'fixed', 'numReplicas': 1}",http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.clou...,2020-09-13T01:41:40.005Z,2020-09-13T01:51:14.035Z,mlx-user-9


In [82]:
cml.get_models({})

[{'accessKey': 'mebs27cp035ez8mutco986k80872y517',
  'authEnabled': True,
  'createdAt': '2020-07-03T11:29:32.105Z',
  'creator': {'id': 24, 'type': 'user', 'username': 'mropars'},
  'creatorId': 24,
  'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:e4bc3658-32bb-4dc3-a22d-828150329c76/f5262375-401c-4190-ad69-d3f9d526e732',
  'defaultReplicationPolicy': {'numReplicas': 1, 'type': 'fixed'},
  'defaultResources': {'cpuMillicores': 1000,
   'memoryMb': 2048,
   'nvidiaGPUs': 0},
  'description': 'A',
  'htmlUrl': 'http://ml-2f4cffbb-91e.demo-aws.ylcu-atmi.cloudera.site/mropars/mlopsproject/models/1',
  'id': '1',
  'name': 'mymodelwithops',
  'namespace': 'mlx-user-24',
  'project': {'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:e4bc3658-32bb-4dc3-a22d-828150329c76/b9901c2b-65d1-4b5b-9105-98a015b5ae7f',
   'id': 19,
   'name': 'mlopsproject',
   'slug': 'mlopsproject'},
  'projectId': 19,
  'projectOwner': {'id': 24, 'type': 'user'