## BYOM Recipe

#### Authentication

In [None]:
from tmo import TmoClient

client = TmoClient()

# list all projects
list(client.projects())

#### Set values for model file and project id

In [27]:
file = "./artifacts/pima.pmml"
language = "PMML"
client.project_id = "23e1df4b-b630-47a1-ab80-7ad5385fcd8d"

#### Get Dataset templates along with train and evaluate datasets

In [None]:
dataset_template = client.dataset_templates().find_by_name_like("PIMA")["_embedded"][
    "datasetTemplates"
][0]
dataset_template

In [None]:
datasets = client.datasets().find_by_dataset_template_id(dataset_template["id"])[
    "_embedded"
]["datasets"]
datasets

In [None]:
# Get train dataset

train_dataset = [d for d in datasets if d["scope"] == "train"][0]
train_dataset

In [None]:
# Get evaluate dataset

evaluate_dataset = [d for d in datasets if d["scope"] == "evaluate"][0]
eval_dataset_id = evaluate_dataset["id"]
eval_dataset_id

#### Get Default Dataset Connection

In [None]:
# New API, allows users do fetch a default dataset connection
# (notice it only works for real users, service account probably don't have personal connections)

default_connection = client.user_attributes().get_default_connection()
default_connection
default_connection_id = default_connection["value"]["defaultDatasetConnectionId"]
default_connection_id

#### Create a BYOM model

In [33]:
import uuid

model_dict = {
    "name": f"{language}_Python_{uuid.uuid4().clock_seq}",
    "description": f"{language} model defined from Python SDK",
    "language": language,
}

model_response = client.models().save(model_dict)
model = model_response["id"]

model

In [None]:
import_id = client.trained_model_artefacts().upload_byom_model("PMML", file)
import_id

#### Import BYOM model and monitor the compute statistics job

**import_request parameters**:

To skip Model Monitoring, you can remove the `modelMonitoring` JSON object and instead use the `metadata` object: 
```json
metadata: {
    "language": language,
    "evaluationEnabled": False,
    "datasetId": train_dataset["id"],
    "datasetConnectionId": default_connection_id,
}
```

This will disable model monitoring and should be used for models that just need to be approved and deployed after importing.
To enable model monitoring and evaluation, use the below parameters:

modelMonitoring:
- *useDefaultEvaluation* - Set to True to enable default evaluation. Set to False while using custom metrics for evaluation, performance monitoring,
  feature and prediction drift monitoring (True is required when enabling model monitoring with default metrics)
- *evaluationEnabled* - Set to True to enable model evaluation and performance monitoring
- *modelType* - The type of the model, either CLASSIFICATION or REGRESSION
- *byomColumnExpression*: The predicition expression for the model
- *driftMonitoringEnabled*: Set to True to enable feature and prediction drift monitoring. This will run the computing statistics after importing the model.


In [None]:
import_request = {
    "artefactImportId": import_id,
    "externalId": str(uuid.uuid4()),
    "modelMonitoring": {
        "language": language,
        "useDefaultEvaluation": True,
        "evaluationEnabled": True,
        "modelType": "CLASSIFICATION",
        "byomColumnExpression": (
            "CAST(CAST(json_report AS JSON).JSONExtractValue('$.predicted_HasDiabetes')"
            " AS INT)"
        ),
        "driftMonitoringEnabled": True,
        "datasetId": train_dataset["id"],
        "datasetConnectionId": default_connection_id,
    },
}

response = client.models().import_byom(model, import_request)
import_job_id = response["id"]

client.jobs().wait(import_job_id)

print("Model imported")

In [None]:
# monitoring Compute Statistics Job

totalPages = client.jobs().find_all()["page"]["totalPages"]
compute_stats_job = None

for page in range(totalPages):
    jobs = client.jobs().find_all(page=page)

    for job in jobs["_embedded"]["jobs"]:
        if job["type"] == "COMPUTE_STATISTICS" and job["modelId"] == model:
            job_events = client.job_events().find_by_job_id(job["id"])
            status = job_events["_embedded"]["jobEvents"][-1]["status"]
            if status in ["CREATED", "SCHEDULED", "ASSIGNED", "RUNNING"]:
                compute_stats_job = job
                break

    if compute_stats_job:
        break

if compute_stats_job:
    client.jobs().wait(compute_stats_job["id"])
    print("Compute statistics completed")
else:
    print("No running compute statistics job found.")

In [38]:
imported_model_id = response["metadata"]["trainedModel"]["id"]

#### Evaluate the model

In [None]:
eval_request = {
    "datasetId": eval_dataset_id,
    "datasetConnectionId": default_connection_id,
    "automationOverrides": {
        "resources": {"memory": "500m", "cpu": "0.5"},
        "dockerImage": (
            "artifacts.td.teradata.com/tdproduct-docker-snapshot/avmo/vmo-python-base:3.11.4"
        ),
    },
}

evaluate_response = client.trained_models().evaluate(imported_model_id, eval_request)

eval_job_id = evaluate_response["id"]
client.jobs().wait(eval_job_id)

print("Model evaluated")

#### Approve the model

In [None]:
client.trained_models().approve(imported_model_id, comments="LGTM")

#### Deploy model and monitor batch prediction job

In [None]:
deploy_request = {
    "engineType": "IN_VANTAGE",
    "engineTypeConfig": {
        "dockerImage": (
            "artifacts.td.teradata.com/tdproduct-docker-snapshot/avmo/vmo-python-base:3.11.4"
        ),
        "engine": "byom",
        "resources": {
            "memory": "500m",
            "cpu": "0.5",
        },
    },
    "language": "PMML",
    "datasetConnectionId": default_connection_id,
    "byomModelLocation": {"database": "trng_modelops", "table": "vmo_byom_models"},
    "datasetTemplateId": dataset_template["id"],
    "cron": "@once",
    "publishOnly": "false",
    "customProperties": {},
}

deploy_response = client.trained_models().deploy(imported_model_id, deploy_request)

client.jobs().wait(deploy_response["id"])
print("Model deployed")

In [None]:
# monitoring Batch Prediction Job

deployment = client.deployments().find_by_deployment_job_id(deploy_response["id"])
jobs = client.jobs().find_by_deployment_id(deployment["id"], "expandJob")["_embedded"][
    "jobs"
]

if len(jobs) == 1:
    client.jobs().wait(jobs[0]["id"])
    print("Job completed")
elif len(jobs) == 0:
    print("No jobs found")
else:
    print("Multiple jobs found")