# Model Deployment

To run this notebook set the GOOGLE CREDENTIAL PATH

In [1]:
# %pip install google-auth google-cloud-storage google-cloud-mlflow databricks-registry-webhooks databricks-cli -q --user

In [2]:
# !pip install databricks-cli --user -q

## Import libraries

In [3]:
# General
import random
import string
import os
import urllib
import time

# Deployment
import mlflow
from  mlflow.tracking import MlflowClient
from mlflow.entities import ViewType
import google.oauth2.id_token
import google.auth.transport.requests
from databricks_registry_webhooks import RegistryWebhooksClient, HttpUrlSpec

## Define constants

In [4]:
# General
UUID = "2obn" #Indicate the UUID created 
PROJECT_ID = "leedeb-experimentation"
REGION = "us-central1"
BUCKET_NAME = "vertex-ai-databricks-retail-demo"
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [5]:
# Model Deployment 
CLOUD_FUNCTION_NAME = "deployv2"
CLOUD_FUNCTION_ENTRY_POINT = "deploy"
# CLOUD_FUNCTION_URL = f"https://{REGION}-{PROJECT_ID}.cloudfunctions.net/{CLOUD_FUNCTION_NAME}"
CLOUD_FUNCTION_URL = f"https://{CLOUD_FUNCTION_NAME}-witrkzd5va-uc.a.run.app/{CLOUD_FUNCTION_ENTRY_POINT}"


IP = "35.222.197.99"
TRACKING_URI = f"http://{IP}:5000"
EXPERIMENT_NAME = "training"
MODEL_NAME = "Weekly_Sales_GBTR_model_" + UUID
REGISTERED_MODEL = "databricks_model2"
MODEL_STAGE="production"

## Helpers

In [6]:
# auth function
def make_authorized_get_request(endpoint, audience):
    """
    Make an authorized request to the given endpoint.
    Args:
        endpoint: The endpoint to send the request to.
        audience: The audience to use when validating the JWT.
    Returns:
        The JSON response from the request.
    """
    # Define the request.
    req = urllib.request.Request(endpoint)

    # Get the ID token from the environment.
    auth_req = google.auth.transport.requests.Request()
    id_token = google.oauth2.id_token.fetch_id_token(auth_req, audience)
    
    return id_token

## Register the model

In [7]:
client = MlflowClient(TRACKING_URI)

In [8]:
last_experiment = client.search_experiments()[0]
last_run = client.search_runs(last_experiment.experiment_id)[0]
last_run_id = last_run.info.run_id

In [9]:
last_run_id

'526e6590724d4433bb3cbf76bca010be'

In [10]:
mlflow.set_tracking_uri(TRACKING_URI)

model_uri = f"runs:/{last_run_id}/{MODEL_NAME}"
registered_model = mlflow.register_model(model_uri=model_uri, name=REGISTERED_MODEL)
time.sleep(20)

Registered model 'databricks_model2' already exists. Creating a new version of this model...
2023/02/23 14:59:49 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: databricks_model2, version 29
Created version '29' of model 'databricks_model2'.


In [11]:
# add detailed comments for this specific model version that being registered
client.update_model_version(
  name=registered_model.name,
  version=registered_model.version,
  description="GBTRegressor MLLib PySpark trained for streaming pipeline, fixed issues with staging to production."
)

<ModelVersion: creation_timestamp=1677164389340, current_stage='None', description=('GBTRegressor MLLib PySpark trained for streaming pipeline, fixed issues with '
 'staging to production.'), last_updated_timestamp=1677164409498, name='databricks_model2', run_id='526e6590724d4433bb3cbf76bca010be', run_link='', source='gs://mlflow-central-oss/mlflow_artifacts/0/526e6590724d4433bb3cbf76bca010be/artifacts/Weekly_Sales_GBTR_model_2obn', status='READY', status_message='', tags={}, user_id='', version='29'>

## Create the HTTP registry webhook

Webhooks enable you to listen for Model Registry events so your integrations can automatically trigger actions. You can use webhooks to automate and integrate your machine learning pipeline with existing CI/CD tools and workflows. 

For example, in our case we are going to pass the event in a cloud function and we will trigger a CI/CD builds to deploy the model on Vertex AI.

### Get the auth to call the function from the webhook

In [15]:
token_id  = make_authorized_get_request(CLOUD_FUNCTION_URL, CLOUD_FUNCTION_URL)
!echo $token_id > token_file

In [18]:
# IP_WITH_PORT = f'{TRACKING_IP}:5000'
!databricks configure -f token_file --host $TRACKING_URI

In [19]:
http_url_spec = HttpUrlSpec(
  url=CLOUD_FUNCTION_URL,
  authorization=f"Bearer {token_id}"
)
http_webhook = RegistryWebhooksClient().create_webhook(
  model_name=registered_model.name, 
  events=["MODEL_VERSION_TRANSITIONED_STAGE"],
  http_url_spec=http_url_spec,
  description="Testing deploy model",
  status="TEST_MODE"
)


RegistryWebhooksException: API request to endpoint /api/2.0/preview/mlflow/registry-webhooks/create failed with error code 404 != 200. Response body: '<!doctype html>
<html lang=en>
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>
'

In [None]:
http_webhook

Out[48]: <RegistryWebhook: creation_timestamp=1675322948311, description='Testing deploy model', events=['MODEL_VERSION_TRANSITIONED_STAGE'], http_url_spec=<HttpUrlSpec: authorization=None, enable_ssl_verification=True, secret=None, url='https://deployv2-witrkzd5va-uc.a.run.app/deploy'>, id='1405f1b7a86940e0b185b270a288c4f2', job_spec=None, last_updated_timestamp=1675322948311, model_name='databricks_model2', status='TEST_MODE'>

### Update the webhook to active status
To enable the webhook for real events, set its status to ACTIVE through an update call, which can also be used to change any of its other properties.

In [None]:
http_webhook = RegistryWebhooksClient().update_webhook(
  id=http_webhook.id,
  status="ACTIVE"
)

In [None]:
http_webhook

Out[50]: <RegistryWebhook: creation_timestamp=1675322948311, description='Testing deploy model', events=['MODEL_VERSION_TRANSITIONED_STAGE'], http_url_spec=<HttpUrlSpec: authorization=None, enable_ssl_verification=True, secret=None, url='https://deployv2-witrkzd5va-uc.a.run.app/deploy'>, id='1405f1b7a86940e0b185b270a288c4f2', job_spec=None, last_updated_timestamp=1675322949097, model_name='databricks_model2', status='ACTIVE'>

## Transition model to production stage

In [None]:
client.transition_model_version_stage(
  name=registered_model.name,
  version=registered_model.version,
  stage=MODEL_STAGE,
)

Out[51]: <ModelVersion: creation_timestamp=1675322913329, current_stage='Production', description=('GBTRegressor MLLib PySpark trained for streaming pipeline, fixed issues with '
 'staging to production.'), last_updated_timestamp=1675322949408, name='databricks_model2', run_id='4ec16154d809480ea7db87dacfbfee33', run_link='', source='dbfs:/databricks/mlflow-tracking/bcd044c1237440d7866d3363fd45fffb/4ec16154d809480ea7db87dacfbfee33/artifacts/Weekly_Sales_GBTR_model_2obn', status='READY', status_message='', tags={}, user_id='3114545449803070', version='1'>

## Do not run

In [None]:
webhooks_list = RegistryWebhooksClient().list_webhooks(
  events=["MODEL_VERSION_TRANSITIONED_STAGE"]
)

In [None]:
for webhook in webhooks_list:
  RegistryWebhooksClient().delete_webhook(webhook.id)