In [1]:
#****************************************************************************
# (C) Cloudera, Inc. 2020-2024
#  All rights reserved.
#
#  Applicable Open Source License: GNU Affero General Public License v3.0
#
#  NOTE: Cloudera open source products are modular software products
#  made up of hundreds of individual components, each of which was
#  individually copyrighted.  Each Cloudera open source product is a
#  collective work under U.S. Copyright Law. Your license to use the
#  collective work is as provided in your written agreement with
#  Cloudera.  Used apart from the collective work, this file is
#  licensed for your use pursuant to the open source license
#  identified above.
#
#  This code is provided to you pursuant a written agreement with
#  (i) Cloudera, Inc. or (ii) a third-party authorized to distribute
#  this code. If you do not have a written agreement with Cloudera nor
#  with an authorized and properly licensed third party, you do not
#  have any rights to access nor to use this code.
#
#  Absent a written agreement with Cloudera, Inc. (“Cloudera”) to the
#  contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY
#  KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED
#  WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO
#  IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND
#  FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU,
#  AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS
#  ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE
#  OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY
#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR
#  CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES
#  RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF
#  BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
#  DATA.
#
# #  Author(s): Paul de Fusco
#***************************************************************************/

In [2]:
from __future__ import print_function
import cmlapi
from cmlapi.rest import ApiException
from pprint import pprint
import json, secrets, os, time
import mlflow
import datetime

In [3]:
class ModelDeployment():
    """
    Class to manage the model deployment of the xgboost model
    """

    def __init__(self, client, projectId, username):
        self.client = cmlapi.default_client()
        self.projectId = projectId
        self.username = username


    def createPRDProject(self, name, git_url):
        """
        Method to create a PRD Project
        """

        createProjRequest = {"name": name, "template": "git", "git_url": git_url}

        try:
            # Create a new project
            api_response = self.client.create_project(createProjRequest)
            pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->create_project: %s\n" % e)

        return api_response


    def validatePRDProject(self, username):
        """
        Method to test successful project creation
        """

        try:
            # Return all projects, optionally filtered, sorted, and paginated.
            search_filter = {"owner.username" : username}
            search = json.dumps(search_filter)
            api_response = self.client.list_projects(search_filter=search)
            #pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->list_projects: %s\n" % e)

        return api_response


    def createModel(self, projectId, modelName, description = "My Model"):
        """
        Method to create a model
        """

        CreateModelRequest = {
                                "project_id": projectId,
                                "name" : modelName,
                                "description": description,
                                "disable_authentication": True
                             }

        try:
            # Create a model.
            api_response = self.client.create_model(CreateModelRequest, projectId)
            pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->create_model: %s\n" % e)

        return api_response


    def listProjects(self, projName):
        """
        List all workspace projects for provided user
        """
        
        search_filter = {"owner.username": self.username, "name":projName}
        search = json.dumps(search_filter)
        
        # str | Search filter is an optional HTTP parameter to filter results by. 
        #Supported search filter keys are: [creator.email creator.name creator.username description name owner.email owner.name owner.username]. 
        #For example:   search_filter={\"name\":\"foo\",\"creator.name\":\"bar\"},. (optional)

        try:
            # Return all projects, optionally filtered, sorted, and paginated.
            api_response = self.client.list_projects(search_filter=search)
            #pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->list_projects: %s\n" % e)

        return api_response


    def listRuntimes(self):
        """
        Method to list available runtimes
        """
        search_filter = {"kernel": "Python 3.10", "edition": "Standard", "full_version": "2023.12.1-b8"} 
        # str | Search filter is an optional HTTP parameter to filter results by. 
        # Supported search filter keys are: [\"image_identifier\", \"editor\", \"kernel\", \"edition\", \"description\", \"full_version\"]. 
        # For example:   search_filter = {\"kernel\":\"Python 3.7\",\"editor\":\"JupyterLab\"},. (optional)
        search = json.dumps(search_filter)
        try:
            # List the available runtimes, optionally filtered, sorted, and paginated.
            api_response = self.client.list_runtimes(search_filter=search)
            #pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->list_runtimes: %s\n" % e)

        return api_response


    def createModelBuild(self, projectId, filePath, runtimeId, functionName, modelCreationId):
        """
        Method to create a Model build
        """

        # Create Model Build
        CreateModelBuildRequest = {
                                    "runtime_identifier": runtimeId,
                                    "model_id": modelCreationId,
                                    "file_path": filePath,
                                    "function_name": functionName
                                  }

        try:
            # Create a model build.
            api_response = self.client.create_model_build(CreateModelBuildRequest, projectId, modelCreationId)
            pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->create_model_build: %s\n" % e)

        return api_response


    def createModelDeployment(self, modelBuildId, projectId, modelCreationId):
        """
        Method to deploy a model build
        """

        CreateModelDeploymentRequest = {
          "build_id" : modelBuildId,
          "model_id" : modelCreationId,
          "project_id" : projectId,
          "cpu" : 2.00,
          "memory" : 4.00,
          "replicas" : 1,
          "nvidia_gpus" : 0
        }

        try:
            # Create a model deployment.
            api_response = self.client.create_model_deployment(CreateModelDeploymentRequest, projectId, modelCreationId, modelBuildId)
            #pprint(api_response)
        except ApiException as e:
            print("Exception when calling CMLServiceApi->create_model_deployment: %s\n" % e)

        return api_response

In [4]:
devProjectId = os.environ['CDSW_PROJECT_ID']
username = os.environ["PROJECT_OWNER"]
today = datetime.date.today()
modelName = "TimeSeriesQuery-" + username + "-" + str(today)

In [5]:
client = cmlapi.default_client()
deployment = ModelDeployment(client, devProjectId, username)

In [6]:
#No spaces allowed in prdProjName
prdProjName = "CML MLOps Logistics PRD - {}".format(username)
prdGitUrl = "https://github.com/pdefusco/CML_MLOps_Logistics_PRD.git"
projectCreationResponse = deployment.createPRDProject(name=prdProjName, git_url=prdGitUrl)

{'created_at': datetime.datetime(2024, 3, 17, 22, 53, 34, 759343, tzinfo=tzlocal()),
 'creation_status': 'unknown',
 'creator': {'email': 'pauldefusco@cloudera.com',
             'name': 'Paul de Fusco',
             'username': 'pauldefusco'},
 'default_engine_type': 'ml_runtime',
 'description': '',
 'environment': '{"PROJECT_OWNER":"pauldefusco"}',
 'ephemeral_storage_limit': 10,
 'ephemeral_storage_request': 0,
 'id': 'bmf5-91u6-19fg-bz11',
 'name': 'CML MLOps Logistics PRD - pauldefusco',
 'owner': {'email': 'pauldefusco@cloudera.com',
           'name': 'Paul de Fusco',
           'username': 'pauldefusco'},
 'permissions': {'admin': True,
                 'business_user': True,
                 'inherit': False,
                 'operator': True,
                 'read': True,
                 'write': True},
 'shared_memory_limit': 0,
 'updated_at': datetime.datetime(2024, 3, 17, 22, 53, 34, 759343, tzinfo=tzlocal()),
 'visibility': 'private'}


In [7]:
prdProjId = projectCreationResponse.id

In [8]:
deployment.listProjects(prdProjName)

{'next_page_token': '',
 'projects': [{'created_at': datetime.datetime(2024, 3, 17, 22, 53, 34, 759343, tzinfo=tzlocal()),
               'creation_status': 'creating',
               'creator': {'email': 'pauldefusco@cloudera.com',
                           'name': 'Paul de Fusco',
                           'username': 'pauldefusco'},
               'default_engine_type': 'ml_runtime',
               'description': '',
               'environment': '{"PROJECT_OWNER":"pauldefusco"}',
               'ephemeral_storage_limit': 10,
               'ephemeral_storage_request': 0,
               'id': 'bmf5-91u6-19fg-bz11',
               'name': 'CML MLOps Logistics PRD - pauldefusco',
               'owner': {'email': 'pauldefusco@cloudera.com',
                         'name': 'Paul de Fusco',
                         'username': 'pauldefusco'},
               'permissions': {'admin': True,
                               'business_user': True,
                               'inherit': F

In [9]:
createModelResponse = deployment.createModel(prdProjId, modelName)

{'access_key': 'm952e9gut350148mopkqzano6qg5abl4',
 'auth_enabled': True,
 'created_at': datetime.datetime(2024, 3, 17, 22, 53, 35, 719306, tzinfo=tzlocal()),
 'creator': {'email': 'pauldefusco@cloudera.com',
             'name': 'Paul de Fusco',
             'username': 'pauldefusco'},
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:39832676-2a5e-48ab-a20a-9b2bb82b96ff/4a8686c9-f965-4af5-aa09-9a833977bedf',
 'default_replication_policy': {'num_replicas': '0', 'type': ''},
 'default_resources': {'cpu_millicores': '0',
                       'memory_mb': '0',
                       'nvidia_gpus': '0'},
 'deletion_status': '',
 'description': 'My Model',
 'id': '4a8686c9-f965-4af5-aa09-9a833977bedf',
 'name': 'TimeSeriesQuery-pauldefusco-2024-03-17',
 'project': {'default_project_engine': '',
             'name': '',
             'public_identifier': '',
             'slug': ''},
 'registered_model_id': '',
 'run_as': 0,
 'updated_at': datetime.datetime(2024,

In [10]:
listRuntimesResponse = deployment.listRuntimes()
print(listRuntimesResponse)

{'next_page_token': '',
 'runtimes': [{'description': 'Standard edition JupyterLab Python runtime '
                              'provided by Cloudera',
               'edition': 'Standard',
               'editor': 'JupyterLab',
               'full_version': '2023.12.1-b8',
               'image_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-standard:2023.12.1-b8',
               'kernel': 'Python 3.10',
               'register_user_id': 0,
               'status': 'ENABLED'},
              {'description': 'PBJ Workbench Python runtime provided by '
                              'Cloudera',
               'edition': 'Standard',
               'editor': 'PBJ Workbench',
               'full_version': '2023.12.1-b8',
               'image_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-pbj-workbench-python3.10-standard:2023.12.1-b8',
               'kernel': 'Python 3.10',
               'register_user_id': 0,
       

In [11]:
modelCreationId = createModelResponse.id
filePath = "tsQueryServe.py"
runtimeId = "docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-standard:2023.12.1-b8" # Copy paste from cell output above
functionName = "predict"
createModelBuildResponse = deployment.createModelBuild(projectId=prdProjId, \
                                                        filePath=filePath, \
                                                        runtimeId=runtimeId, \
                                                        functionName=functionName, \
                                                        modelCreationId=modelCreationId)

{'built_at': datetime.datetime(1, 1, 1, 0, 0, tzinfo=tzlocal()),
 'comment': '',
 'created_at': datetime.datetime(2024, 3, 17, 22, 53, 38, 184613, tzinfo=tzlocal()),
 'creator': {'email': 'pauldefusco@cloudera.com',
             'name': 'Paul de Fusco',
             'username': 'pauldefusco'},
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:39832676-2a5e-48ab-a20a-9b2bb82b96ff/146179ab-59e7-4a1c-876e-c18cef95bc15',
 'deletion_status': '',
 'engine_image': '',
 'file_path': 'tsQueryServe.py',
 'function_name': 'predict',
 'id': '146179ab-59e7-4a1c-876e-c18cef95bc15',
 'kernel': 'Python 3.10',
 'model_id': '4a8686c9-f965-4af5-aa09-9a833977bedf',
 'registered_model_version_id': '',
 'runtime_addon_identifiers': ['hadoop-cli-7.2.16-hf3'],
 'runtime_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-standard:2023.12.1-b8',
 'status': 'pending',
 'updated_at': datetime.datetime(2024, 3, 17, 22, 53, 39, 203895, tzinfo=tzloc

In [12]:
modelBuildId = createModelBuildResponse.id
deployment.createModelDeployment(modelBuildId, prdProjId, modelCreationId)

{'build_id': '146179ab-59e7-4a1c-876e-c18cef95bc15',
 'cpu': 2.0,
 'created_at': datetime.datetime(2024, 3, 17, 22, 53, 41, 488043, tzinfo=tzlocal()),
 'crn': 'crn:cdp:ml:us-west-1:8a1e15cd-04c2-48aa-8f35-b4a8c11997d3:workspace:39832676-2a5e-48ab-a20a-9b2bb82b96ff/095b118d-43c6-43e8-98ae-a777575159a9',
 'deployer': {'email': 'pauldefusco@cloudera.com',
              'name': 'Paul de Fusco',
              'username': 'pauldefusco'},
 'environment': '{}',
 'id': '095b118d-43c6-43e8-98ae-a777575159a9',
 'memory': 4.0,
 'model_id': '4a8686c9-f965-4af5-aa09-9a833977bedf',
 'nvidia_gpu': 0,
 'project_id': 'bmf5-91u6-19fg-bz11',
 'replicas': 1,
 'status': 'pending',
 'stopped_at': datetime.datetime(1, 1, 1, 0, 0, tzinfo=tzlocal()),
 'updated_at': datetime.datetime(2024, 3, 17, 22, 53, 41, 491346, tzinfo=tzlocal())}

In [12]:
## NOW NAVIGATE TO THE PRD PROJECT AND TRY A REQUEST WITH THIS PAYLOAD!

#{"pattern": [54,53,52,51]}