In [4]:
#****************************************************************************
# (C) Cloudera, Inc. 2020-2023
#  All rights reserved.
#
#  Applicable Open Source License: GNU Affero General Public License v3.0
#
#  NOTE: Cloudera open source products are modular software products
#  made up of hundreds of individual components, each of which was
#  individually copyrighted.  Each Cloudera open source product is a
#  collective work under U.S. Copyright Law. Your license to use the
#  collective work is as provided in your written agreement with
#  Cloudera.  Used apart from the collective work, this file is
#  licensed for your use pursuant to the open source license
#  identified above.
#
#  This code is provided to you pursuant a written agreement with
#  (i) Cloudera, Inc. or (ii) a third-party authorized to distribute
#  this code. If you do not have a written agreement with Cloudera nor
#  with an authorized and properly licensed third party, you do not
#  have any rights to access nor to use this code.
#
#  Absent a written agreement with Cloudera, Inc. (“Cloudera”) to the
#  contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY
#  KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED
#  WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO
#  IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND
#  FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU,
#  AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS
#  ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE
#  OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY
#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR
#  CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES
#  RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF
#  BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
#  DATA.
#
# #  Author(s): Paul de Fusco
#***************************************************************************/

In [5]:
from __future__ import print_function
import cmlapi
from cmlapi.rest import ApiException
from pprint import pprint
import json, secrets, os, time
import mlflow

In [6]:
client = cmlapi.default_client()

client.list_projects()

projectId = os.environ['CDSW_PROJECT_ID']
username = os.environ["PROJECT_OWNER"]

In [7]:
experimentName = "MySparkMlClf"
experimentId = mlflow.get_experiment_by_name(experimentName).experiment_id
runsDf = mlflow.search_runs(experimentId, run_view_type=1)

In [8]:
runsDf

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,tags.engineID,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.source.git.commit,tags.mlflow.source.type
0,g7rb-zngd-ehr3-y5vv,3jc1-1jz2-hw0k-7a47,EXPERIMENT_RUN_RUNNING,/home/cdsw/.experiments/3jc1-1jz2-hw0k-7a47/g7...,2023-12-08 03:37:27.700753920+00:00,,agpmcetj3l9lz1qq,pauldefusco,/usr/local/lib/python3.9/site-packages/ipykern...,4f90059097eb0230c8c6826b695873e841ddb0fb,LOCAL


In [9]:
experimentId = runsDf.iloc[-1]['experiment_id']
experimentRunId = runsDf.iloc[-1]['run_id']

In [15]:
def registerModelFromExperimentRun(modelName, experimentId, experimentRunId, modelPath, sessionId):
    """
    Method to register a model from an Experiment Run
    This is an alternative to the mlflow method to register a model via the register_model parameter in the log_model method
    Input: requires an experiment run
    Output: 
    """

    model_name = 'wine_model_' + username + "-" + sessionId

    CreateRegisteredModelRequest = {
                                    "project_id": os.environ['CDSW_PROJECT_ID'], 
                                    "experiment_id" : experimentId,
                                    "run_id": experimentRunId, 
                                    "model_name": modelName, 
                                    "model_path": modelPath
                                   }

    try:
        # Register a model.
        api_response = client.create_registered_model(CreateRegisteredModelRequest)
        pprint(api_response)
    except ApiException as e:
        print("Exception when calling CMLServiceApi->create_registered_model: %s\n" % e)
        
    return api_response

In [16]:
sessionId = secrets.token_hex(nbytes=4)
modelPath = "best-model"
modelName = "SparkClf-" + username + "-" + sessionId

registeredModelResponse = registerModelFromExperimentRun(modelName, experimentId, experimentRunId, modelPath, sessionId)

{'count': 0,
 'created_at': datetime.datetime(2023, 12, 8, 3, 38, 23, 943000, tzinfo=tzlocal()),
 'description': '',
 'model_id': '6njo-h6l6-cb3n-knum',
 'model_versions': [{'created_at': datetime.datetime(2023, 12, 8, 3, 38, 23, 944000, tzinfo=tzlocal()),
                     'model_id': '',
                     'model_version_id': 'm86h-rjd8-wnz0-w5m2',
                     'model_version_metadata': {'mlflow_metadata': {'experiment_id': '3jc1-1jz2-hw0k-7a47',
                                                                    'metrics': [],
                                                                    'params': [],
                                                                    'run_id': 'g7rb-zngd-ehr3-y5vv',
                                                                    'tags': [{'key': 'mlflow.user',
                                                                              'value': 'pauldefusco'},
                                                                 

### Create a PRD Proj for Production

In [17]:
def createPRDProject():
    """
    Method to create a PRD Project
    """

    createProjRequest = {"name": "mlops_prd_prj", "template":"git", "git_url":"https://github.com/pdefusco/MLOps_CML_PRD_Proj.git"}

    try:
        # Create a new project
        api_response = client.create_project(createProjRequest)
        pprint(api_response)
    except ApiException as e:
        print("Exception when calling CMLServiceApi->create_project: %s\n" % e)
    
    return api_response

In [18]:
projectCreationResponse = createPRDProject()

{'created_at': datetime.datetime(2023, 12, 8, 3, 38, 29, 397361, tzinfo=tzlocal()),
 'creation_status': 'unknown',
 'creator': {'email': 'pauldefusco@cloudera.com',
             'name': 'Paul de Fusco',
             'username': 'pauldefusco'},
 'default_engine_type': 'ml_runtime',
 'description': '',
 'environment': '{"PROJECT_OWNER":"pauldefusco"}',
 'ephemeral_storage_limit': 10,
 'ephemeral_storage_request': 0,
 'id': 'b99s-6j26-aclu-nqjb',
 'name': 'mlops_prd_prj',
 'owner': {'email': 'pauldefusco@cloudera.com',
           'name': 'Paul de Fusco',
           'username': 'pauldefusco'},
 'permissions': {'admin': True,
                 'business_user': True,
                 'inherit': False,
                 'operator': True,
                 'read': True,
                 'write': True},
 'shared_memory_limit': 0,
 'updated_at': datetime.datetime(2023, 12, 8, 3, 38, 29, 397361, tzinfo=tzlocal()),
 'visibility': 'private'}


In [19]:
def validatePRDProject(username):
    """
    Method to test successful project creation
    """
    
    try:
        # Return all projects, optionally filtered, sorted, and paginated.
        search_filter = {"owner.username" : username}
        search = json.dumps(search_filter)
        api_response = client.list_projects(search_filter=search)
        #pprint(api_response)
    except ApiException as e:
        print("Exception when calling CMLServiceApi->list_projects: %s\n" % e)
        
    return api_response

In [20]:
apiResp = validatePRDProject(os.environ["PROJECT_OWNER"])
#apiResp.projects

### Deploy Model in PRD Project

In [21]:
prdProjId = projectCreationResponse.id
modelId = registeredModelResponse.model_id
modelVersionId = registeredModelResponse.model_versions[0].model_version_id
modelName = "SparkClf-" + username + "-" + sessionId

In [22]:
registeredModelResponse.model_versions[0].model_version_id

'm86h-rjd8-wnz0-w5m2'

In [23]:
def createModel(projectId, modelName, modelId, description = "My Spark Clf"):
    """
    Method to create a model 
    """

    CreateModelRequest = {
                            "project_id": projectId, 
                            "name" : modelName,
                            "description": description, 
                            "registered_model_id": modelId
                         }

    try:
        # Create a model.
        api_response = client.create_model(CreateModelRequest, projectId)
        pprint(api_response)
    except ApiException as e:
        print("Exception when calling CMLServiceApi->create_model: %s\n" % e)
        
    return api_response

In [24]:
createModelResponse = createModel(prdProjId, modelName, modelId)

{'access_key': 'm7fwq1xhfv244jzst5j1dzyt3xl0mdpu',
 'auth_enabled': True,
 'created_at': datetime.datetime(2023, 12, 8, 3, 38, 34, 251351, tzinfo=tzlocal()),
 'creator': {'email': 'pauldefusco@cloudera.com',
             'name': 'Paul de Fusco',
             'username': 'pauldefusco'},
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:af51f669-db4f-490b-837e-080f355e436e/85a528e0-8d10-4aec-a86d-5934a76a3d54',
 'default_replication_policy': {'num_replicas': '0', 'type': ''},
 'default_resources': {'cpu_millicores': '0',
                       'memory_mb': '0',
                       'nvidia_gpus': '0'},
 'deletion_status': '',
 'description': 'My Spark Clf',
 'id': '85a528e0-8d10-4aec-a86d-5934a76a3d54',
 'name': 'SparkClf-pauldefusco-2db29457',
 'project': {'default_project_engine': '',
             'name': '',
             'public_identifier': '',
             'slug': ''},
 'registered_model_id': '6njo-h6l6-cb3n-knum',
 'run_as': 0,
 'updated_at': datetime.d

In [25]:
def createModelBuild(projectId, modelVersionId, modelCreationId):
    """
    Method to create a Model build
    """
    
    # Create Model Build
    CreateModelBuildRequest = {
                                "registered_model_version_id": modelVersionId, 
                                "runtime_identifier": "docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-workbench-python3.9-standard:2023.08.2-b8",
                                "comment": "invoking model build",
                                "model_id": modelCreationId
                              }

    try:
        # Create a model build.
        api_response = client.create_model_build(CreateModelBuildRequest, projectId, modelCreationId)
        pprint(api_response)
    except ApiException as e:
        print("Exception when calling CMLServiceApi->create_model_build: %s\n" % e)

    return api_response

In [26]:
modelCreationId = createModelResponse.id

In [27]:
createModelBuildResponse = createModelBuild(prdProjId, modelVersionId, modelCreationId)

{'built_at': datetime.datetime(1, 1, 1, 0, 0, tzinfo=tzlocal()),
 'comment': 'invoking model build',
 'created_at': datetime.datetime(2023, 12, 8, 3, 38, 36, 963473, tzinfo=tzlocal()),
 'creator': {'email': 'pauldefusco@cloudera.com',
             'name': 'Paul de Fusco',
             'username': 'pauldefusco'},
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:af51f669-db4f-490b-837e-080f355e436e/f8f5a151-1187-4209-a059-c223ea5ee446',
 'deletion_status': '',
 'engine_image': '',
 'file_path': 'predict.py',
 'function_name': 'predict',
 'id': 'f8f5a151-1187-4209-a059-c223ea5ee446',
 'kernel': 'Python 3.9',
 'model_id': '85a528e0-8d10-4aec-a86d-5934a76a3d54',
 'registered_model_version_id': 'm86h-rjd8-wnz0-w5m2',
 'runtime_addon_identifiers': ['hadoop-cli-7.2.16-hf3'],
 'runtime_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-workbench-python3.9-standard:2023.08.2-b8',
 'status': 'pending',
 'updated_at': datetime.datetime(2023, 12, 8, 3,

In [28]:
def createModelDeployment(modelBuildId, projectId, modelCreationId):
    """
    Method to deploy a model build
    """
    
    CreateModelDeploymentRequest = {
      "cpu" : "2",
      "memory" : "4"
    }

    try:
        # Create a model deployment.
        api_response = client.create_model_deployment(CreateModelDeploymentRequest, projectId, modelCreationId, modelBuildId)
        pprint(api_response)
    except ApiException as e:
        print("Exception when calling CMLServiceApi->create_model_deployment: %s\n" % e)

    return api_response
        
    #REMEMBER TO MAKE SURE ALL NUMBERS ARE FLOATS
    #model_request = {"dataframe_split": {"columns":["fixed acidity", "volatile acidity", "citric acid", 
    #"residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", 
    #"sulphates", "alcohol"],"data":[[6.2, 0.66, 0.48, 1.2, 0.029, 29.1, 75.1, 0.98, 3.33, 0.39, 12.8]]}}

In [29]:
modelBuildId = createModelBuildResponse.id

In [30]:
createModelDeployment(modelBuildId, prdProjId, modelCreationId)

{'build_id': 'f8f5a151-1187-4209-a059-c223ea5ee446',
 'cpu': 2.0,
 'created_at': datetime.datetime(2023, 12, 8, 3, 38, 38, 202734, tzinfo=tzlocal()),
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:af51f669-db4f-490b-837e-080f355e436e/843dee45-9b3b-4951-a37b-1b2aa56171af',
 'deployer': {'email': 'pauldefusco@cloudera.com',
              'name': 'Paul de Fusco',
              'username': 'pauldefusco'},
 'environment': '{}',
 'id': '843dee45-9b3b-4951-a37b-1b2aa56171af',
 'memory': 4.0,
 'model_id': '85a528e0-8d10-4aec-a86d-5934a76a3d54',
 'nvidia_gpu': 0,
 'project_id': 'b99s-6j26-aclu-nqjb',
 'replicas': 1,
 'status': 'pending',
 'stopped_at': datetime.datetime(1, 1, 1, 0, 0, tzinfo=tzlocal()),
 'updated_at': datetime.datetime(2023, 12, 8, 3, 38, 38, 207142, tzinfo=tzlocal())}


{'build_id': 'f8f5a151-1187-4209-a059-c223ea5ee446',
 'cpu': 2.0,
 'created_at': datetime.datetime(2023, 12, 8, 3, 38, 38, 202734, tzinfo=tzlocal()),
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:af51f669-db4f-490b-837e-080f355e436e/843dee45-9b3b-4951-a37b-1b2aa56171af',
 'deployer': {'email': 'pauldefusco@cloudera.com',
              'name': 'Paul de Fusco',
              'username': 'pauldefusco'},
 'environment': '{}',
 'id': '843dee45-9b3b-4951-a37b-1b2aa56171af',
 'memory': 4.0,
 'model_id': '85a528e0-8d10-4aec-a86d-5934a76a3d54',
 'nvidia_gpu': 0,
 'project_id': 'b99s-6j26-aclu-nqjb',
 'replicas': 1,
 'status': 'pending',
 'stopped_at': datetime.datetime(1, 1, 1, 0, 0, tzinfo=tzlocal()),
 'updated_at': datetime.datetime(2023, 12, 8, 3, 38, 38, 207142, tzinfo=tzlocal())}

### Now try a request with the following payload!

In [160]:
model_request = {"dataframe_split": {"columns": ["age", "credit_card_balance", "bank_account_balance", "mortgage_balance", "primary_loan_balance", "sec_bank_account_balance", "savings_account_balance", "sec_savings_account_balance", "secondary_loan_balance", "total_est_nworth", "college_loan_balance", "transaction_amount", "latitude", "longitude"],
                                     "data":[[11.5, 20000.5, 3900.5, 14000.5, 2944.5, 3400.5, 12000.5, 29000.5, 1300.5, 15000.5, 10000.5, 2000.5, 90.5, 120.5]]}}

In [None]:
# register model here