###### MM/HIBI - Deploy All Models (1.0.0-b5 Release)

Steps:
- Create Project (if necessary)
- Create Models (if necessary)
- Create Datasets (if necessary)
- Create Versions
- Deploy Versions

Obs: this notebook uses the services' internal endpoints. In future versions, it should use the external ones.

**Before running the notebook, edit any values in the Setup section, if necessary. Don't edit anything in others sections.**

## Setup

Set working directory.

In [9]:
%cd /home/jovyan/shared/devs/model-management/1.0.0-b5

/home/jovyan/shared/devs/model-management/1.0.0-b5


Define constants.

In [10]:
PROJECT_NAME = "Hackathon"



# Defect Detection Model
DEFECT_DETECTION_MODEL_NAME = "Defect Detection"
DEFECT_DETECTION_MODEL_DATASET_NAME = "Defects 1"
DEFECT_DETECTION_MODEL_ASC_NAME = "Defect Detection"
DEFECT_DETECTION_MODEL_VERSION_LOCATION = "s3://mlruns-hibi-04/9/de5816614f624eecb65a0dcd618760be/artifacts/model"
DEFECT_DETECTION_MODEL_DEPLOY = True
DEFECT_DETECTION_MODEL_DEPLOY_NODE_NAME = "GPU - Inference"
DEFECT_DETECTION_MODEL_FORCE_DEPLOY = True
DEFECT_DETECTION_MODEL_CPU_REQUESTS = 1
DEFECT_DETECTION_MODEL_CPU_LIMITS = 2
DEFECT_DETECTION_MODEL_MEM_REQUESTS = 1024*6 # in MiB
DEFECT_DETECTION_MODEL_MEM_LIMITS = 1024*7 # in MiB
DEFECT_DETECTION_MODEL_GPU = 1



NAMESPACE = "hitachi-solutions"

## Dependencies

Import dependencies.

In [11]:
from pprint import pprint

import os
import random
import sys
import time

import mlflow

sys.path.insert(0, "/home/jovyan/shared/devs/model-management/1.0.0-b5")

from mm_client.api import dataset_api, deploy_api, model_api, model_version_api, project_api
from mm_client.api_client import ApiClient
from mm_client.configuration import Configuration
from mm_client.exceptions import ApiException
from mm_client.model.confusion_matrix import ConfusionMatrix
from mm_client.model.deploy_request import DeployRequest
from mm_client.model.feedback_metrics import FeedbackMetrics
from mm_client.model.feedback_request import FeedbackRequest
from mm_client.model.inline_object4 import InlineObject4
from mm_client.model.inline_object5 import InlineObject5
from mm_client.model.inline_object6 import InlineObject6
from mm_client.model.inline_object7 import InlineObject7
from mm_client.model.metrics import Metrics
from mm_client.model.metrics_metrics import MetricsMetrics
from mm_client.model.model_version import ModelVersion
from mm_client.model.parameter import Parameter
from mm_client.model.predict_request import PredictRequest
from mm_client.model.predict_request_data import PredictRequestData
from mm_client.model.predict_response import PredictResponse
from mm_client.model.resources import Resources
from mm_client.model.resources_data import ResourcesData

## Get API Clients

Get the API clients for the different Model Management services.

In [12]:
# Manager service
manager_svc_config = Configuration(host = f"http://lumada-ml-model-management-manager-svc.{NAMESPACE}.svc.cluster.local:8080/api/v0")
manager_svc_api_client =  ApiClient(manager_svc_config)
proj_api_instance = project_api.ProjectApi(manager_svc_api_client)
model_api_instance = model_api.ModelApi(manager_svc_api_client)
dataset_api_instance = dataset_api.DatasetApi(manager_svc_api_client)
version_api_instance = model_version_api.ModelVersionApi(manager_svc_api_client)

# Deployment service
deployment_svc_config = Configuration(host = f"http://lumada-ml-model-management-deployment-svc.{NAMESPACE}.svc.cluster.local:8080/api/v0")
deployment_svc_api_client = ApiClient(deployment_svc_config)
deploy_api_instance = deploy_api.DeployApi(deployment_svc_api_client)

## Manage Projects, Models and Datasets

Create Project (or get existing one).

In [13]:
def get_projects():
    try:
        projects = proj_api_instance.getprojects(deleted=False)
        # pprint(projects)
        return projects
    except ApiException as ex:
        print("Exception when calling getprojects: %s\n" % ex)
        return []
    
def create_project(proj):
    try:
        project = proj_api_instance.create_project(proj)
        # pprint(project)
        return project
    except ApiException as ex:
        print("Exception when calling create_project: %s\n" % ex)

projects = get_projects()
project = None

for proj in projects:
    if proj.name == PROJECT_NAME:
        project = proj
        break

if project is None:
    project_to_create = InlineObject4(
        name=PROJECT_NAME,
        description="Project created from Jupyter Notebook.",
        tags=[],
    )

    print("Creating project...")
    
    project = create_project(project_to_create)
        
print("Project:")
pprint(project)

Project:
{'ascs': ['Defect Detection'],
 'created_at': datetime.datetime(2023, 1, 16, 17, 54, 19, 752000, tzinfo=tzlocal()),
 'description': 'Project created from Jupyter Notebook.',
 'id': 'bfbec903-370f-43da-bb57-cca619237667',
 'modified_at': datetime.datetime(2023, 1, 16, 17, 57, 28, 24000, tzinfo=tzlocal()),
 'name': 'Hackathon',
 'status': {'value': 'Draft'},
 'tags': []}


Create Models (or get existing ones).

In [15]:
def get_models(project_id):
    try:
        models = model_api_instance.get_models(project_id, deleted=False)
        # pprint(models)
        return models
    except ApiException as ex:
        print("Exception when calling get_models: %s\n" % ex)
        return []
        
def create_model(proj_id, mdl):
    try:
        model = model_api_instance.create_model(proj_id, mdl)
        # pprint(model)
        return model
    except ApiException as ex:
        print("Exception when calling create_model: %s\n" % ex)
        return None

models = get_models(project.id)
asset_detection_model = None
defect_detection_model = None
rust_detection_model = None

for mdl in models:

    if mdl.name == DEFECT_DETECTION_MODEL_NAME:
        defect_detection_model = mdl




# Defect Detection Model
if defect_detection_model is None:
    model_to_create_2 = InlineObject6(
        name=DEFECT_DETECTION_MODEL_NAME,
        description="",
        tags=[],
        asc=DEFECT_DETECTION_MODEL_ASC_NAME,
    )

    print("Creating Defect Detection Model...")
    
    defect_detection_model = create_model(project.id, model_to_create_2)
    
print("Defect Detection Model:")
pprint(defect_detection_model)


Defect Detection Model:
{'asc': 'Defect Detection',
 'created_at': datetime.datetime(2023, 1, 16, 17, 57, 28, 20000, tzinfo=tzlocal()),
 'description': '',
 'id': '1b050a98-71e3-4174-8185-112794918fd6',
 'modified_at': datetime.datetime(2023, 1, 16, 17, 57, 28, 20000, tzinfo=tzlocal()),
 'name': 'Defect Detection',
 'status': {'value': 'Draft'},
 'tags': []}


Create Datasets (or get existing ones).

In [17]:
def get_datasets(project_id):
    try:
        datasets = dataset_api_instance.get_datasets(project_id)
        # pprint(datasets)
        return datasets
    except ApiException as ex:
        print("Exception when calling get_datasets: %s\n" % ex)
        return []
    
def create_dataset(proj_id, ds):
    try:
        dataset = dataset_api_instance.create_dataset(proj_id, ds)
        # pprint(dataset)
        return dataset
    except ApiException as ex:
        print("Exception when calling create_dataset: %s\n" % ex)
        return None

datasets = get_datasets(project.id)
asset_detection_model_dataset = None
defect_detection_model_dataset = None
rust_detection_model_dataset = None

for ds in datasets:

    if ds.name == DEFECT_DETECTION_MODEL_DATASET_NAME:
        defect_detection_model_dataset = ds



# Defect Detection Model
if defect_detection_model_dataset is None:
    ds_filename_2 = DEFECT_DETECTION_MODEL_DATASET_NAME.lower().replace(" ", "-")
    
    dataset_to_create_2 = InlineObject5(
        name=DEFECT_DETECTION_MODEL_DATASET_NAME,
        description="",
        tags=[],
        location=f"s3://datasets-mm/{ds_filename_2}.csv", # this is a dummy location!
    )

    print("Creating Defect Detection Model dataset...")
    
    defect_detection_model_dataset = create_dataset(project.id, dataset_to_create_2)
    
print("Defect Detection Model dataset:")
pprint(defect_detection_model_dataset)



Defect Detection Model dataset:
{'added_at': datetime.datetime(2023, 1, 16, 17, 59, 30, 405000, tzinfo=tzlocal()),
 'description': '',
 'id': 'ca9382ff-2851-4a5e-ba9c-438c6207e1f5',
 'location': 's3://datasets-mm/defects-1.csv',
 'modified_at': datetime.datetime(2023, 1, 16, 17, 59, 30, 405000, tzinfo=tzlocal()),
 'name': 'Defects 1'}


## Create Versions

Create the Versions.

In [18]:
os.environ["MLFLOW_TRACKING_URI"] = f"http://lumada-ml-model-lifecycle-mlflow.{NAMESPACE}.svc.cluster.local:5000"

def get_version(project_id, model_id, version_no):
    try:
        version = version_api_instance.get_version(project_id, model_id, version_no)
        # pprint(version)
        return version
    except ApiException as ex:
        print("Exception when calling get_version: %s\n" % ex)
        return None
    
def create_version(proj_id, model_id, version):
    try:
        version = version_api_instance.create_version(proj_id, model_id, version)
        # pprint(version)
        return version
    except ApiException as ex:
        print("Exception when calling create_version: %s\n" % ex)
        return None


# Defect Detection Model
version_to_create_2 = InlineObject7(
    description="",
    tags=[],
    dataset_id=defect_detection_model_dataset.id,        
    location=DEFECT_DETECTION_MODEL_VERSION_LOCATION,
    params=[],
    metrics=[],
    training_duration=1000,
)

print("Creating Defect Detection ModelVersion...")

defect_detection_model_version = create_version(project.id, defect_detection_model.id, version_to_create_2)

print("Defect Detection ModelVersion:")
pprint(defect_detection_model_version)

# Rust Detection M

Creating Defect Detection ModelVersion...
Exception when calling create_version: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'content-type': 'application/json; charset=utf-8', 'vary': 'Origin', 'date': 'Mon, 16 Jan 2023 18:17:37 GMT', 'content-length': '66', 'x-envoy-upstream-service-time': '1', 'server': 'istio-envoy', 'x-envoy-decorator-operation': 'lumada-ml-model-management-manager-svc.hitachi-solutions.svc.cluster.local:8080/*'})
HTTP response body: {"message":"invalid json body","status":400,"error":"bad_request"}


Defect Detection ModelVersion:
None


### Deploy Versions

Deploy the Versions.

In [None]:
def deploy_version(request, force=False):
    try:
        response = deploy_api_instance.deploy_version(request, force=force)
        # pprint(response)
        return response
    except ApiException as ex:
        print("Exception when calling deploy_version: %s\n" % ex)
        return None


# Asset Detection Model
if ASSET_DETECTION_MODEL_DEPLOY:
    asset_detection_model_deploy_request = DeployRequest(
        name=f"Asset Detection",
        description="",
        project_id=project.id,
        model_id=asset_detection_model.id,
        version_id=asset_detection_model_version.version,
        node_name=ASSET_DETECTION_MODEL_DEPLOY_NODE_NAME,
        resources=Resources(
            requests=ResourcesData(
                cpu=float(ASSET_DETECTION_MODEL_CPU_REQUESTS),
                memory=float(ASSET_DETECTION_MODEL_MEM_REQUESTS),
                gpu=ASSET_DETECTION_MODEL_GPU,
            ),
            limits=ResourcesData(
                cpu=float(ASSET_DETECTION_MODEL_CPU_LIMITS),
                memory=float(ASSET_DETECTION_MODEL_MEM_LIMITS),
                gpu=ASSET_DETECTION_MODEL_GPU,
            )
        )
    )

    # print("Asset Detection Model deploy request:")
    # pprint(asset_detection_model_deploy_request)

    print("Deploying Asset Detection Model...")

    asset_detection_model_deploy_response = deploy_version(asset_detection_model_deploy_request, force=ASSET_DETECTION_MODEL_FORCE_DEPLOY)

    print("Asset Detection Model deploy response:")
    pprint(asset_detection_model_deploy_response)
    
# Defect Detection Model
if DEFECT_DETECTION_MODEL_DEPLOY:
    defect_detection_model_deploy_request = DeployRequest(
        name=f"Defect Detection",
        description="",
        project_id=project.id,
        model_id=defect_detection_model.id,
        version_id=defect_detection_model_version.version,
        node_name=DEFECT_DETECTION_MODEL_DEPLOY_NODE_NAME,
        resources=Resources(
            requests=ResourcesData(
                cpu=float(DEFECT_DETECTION_MODEL_CPU_REQUESTS),
                memory=float(DEFECT_DETECTION_MODEL_MEM_REQUESTS),
                gpu=DEFECT_DETECTION_MODEL_GPU,
            ),
            limits=ResourcesData(
                cpu=float(DEFECT_DETECTION_MODEL_CPU_LIMITS),
                memory=float(DEFECT_DETECTION_MODEL_MEM_LIMITS),
                gpu=DEFECT_DETECTION_MODEL_GPU,
            )
        )
    )

    # print("Defect Detection Model deploy request:")
    # pprint(defect_detection_model_deploy_request)

    print("Deploying Defect Detection Model...")

    defect_detection_model_deploy_response = deploy_version(defect_detection_model_deploy_request, force=DEFECT_DETECTION_MODEL_FORCE_DEPLOY)

    print("Defect Detection Model deploy response:")
    pprint(defect_detection_model_deploy_response)
    
# Rust Detection Model
if RUST_DETECTION_MODEL_DEPLOY:
    rust_detection_model_deploy_request = DeployRequest(
        name=f"Rust Detection",
        description="",
        project_id=project.id,
        model_id=rust_detection_model.id,
        version_id=rust_detection_model_version.version,
        node_name=RUST_DETECTION_MODEL_DEPLOY_NODE_NAME,
        resources=Resources(
            requests=ResourcesData(
                cpu=float(RUST_DETECTION_MODEL_CPU_REQUESTS),
                memory=float(RUST_DETECTION_MODEL_MEM_REQUESTS),
                gpu=RUST_DETECTION_MODEL_GPU,
            ),
            limits=ResourcesData(
                cpu=float(RUST_DETECTION_MODEL_CPU_LIMITS),
                memory=float(RUST_DETECTION_MODEL_MEM_LIMITS),
                gpu=RUST_DETECTION_MODEL_GPU,
            )
        )
    )

    # print("Rust Detection Model deploy request:")
    # pprint(rust_detection_model_deploy_request)

    print("Deploying Rust Detection Model...")

    rust_detection_model_deploy_response = deploy_version(rust_detection_model_deploy_request, force=RUST_DETECTION_MODEL_FORCE_DEPLOY)

    print("Rust Detection Model deploy response:")
    pprint(rust_detection_model_deploy_response)

## Cleanup

Delete deployment.

In [None]:
def stop_deployment(endpoint_id):
    try:
        deploy_api_instance.stop_deployment(endpoint_id)
        print("Stopping...")
    except ApiException as ex:
        print("Exception when calling stop_deployment: %s\n" % ex)

# Asset Detection Model
asset_detection_model_version_1 = get_version(project.id, asset_detection_model.id, asset_detection_model_version.version)

if asset_detection_model_version_1 and asset_detection_model_version_1.status.value.upper() == "DEPLOYED":
    stop_deployment(asset_detection_model_version_1.endpoint.id)
    
# Defect Detection Model
defect_detection_model_version_1 = get_version(project.id, defect_detection_model.id, defect_detection_model_version.version)

if defect_detection_model_version_1 and defect_detection_model_version_1.status.value.upper() == "DEPLOYED":
    stop_deployment(defect_detection_model_version_1.endpoint.id)
    
# Rust Detection Model
rust_detection_model_version_1 = get_version(project.id, rust_detection_model.id, rust_detection_model_version.version)

if rust_detection_model_version_1 and rust_detection_model_version_1.status.value.upper() == "DEPLOYED":
    stop_deployment(rust_detection_model_version_1.endpoint.id)