# Using IBM watsonx.governance - monitoring the models deployed in Azure Databricks workspace

In [2]:
import warnings
warnings.filterwarnings("ignore")

### Cloud Pak for Data Credentials

In [1]:
#masked
WOS_CREDENTIALS = {
    "url": "[CPD URL]",
    "username": "[CPD User]",
    "password": "[CPD User Password]",
    "apikey": "[CPD User APIKey]",
    "version": "5.0"
}

In [None]:
WML_CREDENTIALS = WOS_CREDENTIALS.copy()
WML_CREDENTIALS['instance_id']='openshift'
WML_CREDENTIALS

# Wraps the deployed model in Azure Databricks as WML Python Function

* Use the Azure Portal https://portal.azure.com/#home to create Azure Databricks workspace (subscription).
* Lauch the Azure Databricks workspace (https://adb-xxxx.xx.azuredatabricks.net) to first create the Cluster
* Load the data https://github.com/ravichamarthy/azure-databricks/blob/main/insurance.csv to a new table in Azure Databricks Catalog
* Create an AutoML experiment against using the loaded data as the training data.
* Register one of the experiment as the model
* Use the registered model to create an inference endpoint.
* This inference endpoint is what we will use for monitoring the model deployment in IBM watsonx.governance.
* We will wrap the inference endpoint as a python function deployed in WML for monitoring purposes.
* The main job of this python function is to invoke the invocations endpoint and send the response back as the Python Function.
* The advantake of wrapping the Azure Databricks model deployment over WML Python Function is, one can get the automatic payload logging, instead of manually log the scored output to IBM watsonx.governance data mart.

In [4]:
def azure_databricks_scoring_wrapper():

    AZURE_DATABRICKS_TOKEN = '[Azure Databricks API Token]' #generate a token in Azure Databricks console, with desired validity.
    invocations_url = '[Azure Databricks Invocations URL]'
    
    import os
    import requests
    import numpy as np
    import pandas as pd
    import json
    
    #entry point scoring method for this python function    
    def score(scoring_payload):
        
        headers = {}
        headers["Content-Type"] = "application/json"
        headers["Accept"] = "application/json"
        headers["Authorization"] = "Bearer {}".format(AZURE_DATABRICKS_TOKEN)        
        
        input_fields = scoring_payload['input_data'][0]['fields']
        input_values = scoring_payload['input_data'][0]['values']

        # construct the scoring payload for ADB Model Deployment
        adb_scoring_payload = {
            "dataframe_split": {
                "columns": input_fields,
                "data": input_values
            }
        }
        
        # get the response
        response = requests.post(invocations_url, headers=headers, json = adb_scoring_payload, verify=False)
        response_json = response.json()

        # construct the response to what WML Python Function should send back.
        transformed_response = {
            'predictions': [{
                'values': [[round(value, 4)] for value in response_json['predictions']],
                'fields': ['prediction']
            }]
        }        
        return transformed_response
    return score

### Construct the scoring payload 

In [5]:
from IPython.utils import io

with io.capture_output() as captured:
    !wget https://raw.githubusercontent.com/ravichamarthy/azure-databricks/main/insurance.csv

In [6]:
!ls -lh insurance.csv

-rw-r--r-- 1 hadoop hadoop 72K Jun 30 08:54 insurance.csv


In [7]:
import pandas as pd
pd_data = pd.read_csv("insurance.csv", sep=",", header=0)
pd_data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,SMOKER,southwest,16884.924
1,18,male,33.77,1,NON_SMOKER,southeast,1725.5523
2,28,male,33.0,3,NON_SMOKER,southeast,4449.462
3,33,male,22.705,0,NON_SMOKER,northwest,21984.47061
4,32,male,28.88,0,NON_SMOKER,northwest,3866.8552


In [8]:
min_value = pd_data['charges'].min()
max_value = pd_data['charges'].max()
mean_value = pd_data['charges'].mean()

print('min_value:' + str(min_value))
print('max_value:' + str(max_value))
print('mean_value:' + str(mean_value))

min_value:1121.8739
max_value:63770.42801
mean_value:13270.422265141257


In [9]:
# Define number of bins
num_bins = 5

# Create bins
bins = pd.cut(pd_data['charges'], bins=num_bins)
bins

0       (13651.585, 26181.296]
1        (1059.225, 13651.585]
2        (1059.225, 13651.585]
3       (13651.585, 26181.296]
4        (1059.225, 13651.585]
                 ...          
1333     (1059.225, 13651.585]
1334     (1059.225, 13651.585]
1335     (1059.225, 13651.585]
1336     (1059.225, 13651.585]
1337    (26181.296, 38711.006]
Name: charges, Length: 1338, dtype: category
Categories (5, interval[float64, right]): [(1059.225, 13651.585] < (13651.585, 26181.296] < (26181.296, 38711.006] < (38711.006, 51240.717] < (51240.717, 63770.428]]

In [10]:
cols_to_remove = ['charges']
def get_scoring_payload(no_of_records_to_score = 1):
    for col in cols_to_remove:
        if col in pd_data.columns:
            del pd_data[col] 

    fields = pd_data.columns.tolist()
    values = pd_data[fields].values.tolist()

    payload_scoring = {"input_data": [{"fields": fields, "values": values[:no_of_records_to_score]}]}
    return payload_scoring

In [11]:
scoring_payload = get_scoring_payload(no_of_records_to_score=3)

In [12]:
scoring_payload

{'input_data': [{'fields': ['age',
    'sex',
    'bmi',
    'children',
    'smoker',
    'region'],
   'values': [[19, 'female', 27.9, 0, 'SMOKER', 'southwest'],
    [18, 'male', 33.77, 1, 'NON_SMOKER', 'southeast'],
    [28, 'male', 33.0, 3, 'NON_SMOKER', 'southeast']]}]}

### Perform a local scoring to the model deployment

In [13]:
func_result = azure_databricks_scoring_wrapper()(scoring_payload)
print(func_result)

{'predictions': [{'values': [[24833.0156], [3222.135], [6075.4478]], 'fields': ['prediction']}]}


### Platform Authentication

In [14]:
from ibm_watson_machine_learning import APIClient
wml_client = APIClient(WML_CREDENTIALS)

In [15]:
wml_client.spaces.list(limit=10)

------------------------------------  -------------------------------------------------------------------  ------------------------
ID                                    NAME                                                                 CREATED
70bcb00e-c87b-4ee8-8c65-f9f4ee5c59e8  xgovdemos-prod                                                       2024-06-30T16:33:45.792Z
9fcb9746-56fc-454e-ba5f-aaad783bd236  xgov_prod                                                            2024-06-10T18:35:45.917Z
543f115a-207f-4e58-affc-8ed5eccab71c  PG - PROD                                                            2024-06-10T10:53:49.743Z
51c8acf0-a68c-4665-9279-25a3b3f139a1  PG - Test                                                            2024-06-10T10:47:16.158Z
9fa4adb7-1254-4f67-adfc-2da8b0fb9c11  openscale-express-path-preprod-00000000-0000-0000-0000-000000000000  2024-06-07T17:10:30.128Z
32e399a3-8d5e-486b-b1bc-a5af67797739  openscale-express-path-00000000-0000-0000-0000-00000000

Unnamed: 0,ID,NAME,CREATED
0,70bcb00e-c87b-4ee8-8c65-f9f4ee5c59e8,xgovdemos-prod,2024-06-30T16:33:45.792Z
1,9fcb9746-56fc-454e-ba5f-aaad783bd236,xgov_prod,2024-06-10T18:35:45.917Z
2,543f115a-207f-4e58-affc-8ed5eccab71c,PG - PROD,2024-06-10T10:53:49.743Z
3,51c8acf0-a68c-4665-9279-25a3b3f139a1,PG - Test,2024-06-10T10:47:16.158Z
4,9fa4adb7-1254-4f67-adfc-2da8b0fb9c11,openscale-express-path-preprod-00000000-0000-0...,2024-06-07T17:10:30.128Z
5,32e399a3-8d5e-486b-b1bc-a5af67797739,openscale-express-path-00000000-0000-0000-0000...,2024-06-07T17:10:18.359Z


### Use the space to deploy the WML Python Function

In [16]:
space_id = '70bcb00e-c87b-4ee8-8c65-f9f4ee5c59e8' #replace your space id from the above list, here

In [17]:
wml_client.set.default_space(space_id)

'SUCCESS'

In [18]:
DEPLOYMENT_NAME = 'ADB Insurance Charges Predictions Deployment'
PYTHON_FUNCTION_NAME = 'ADB Insurance Charges Predictions Function'

In [19]:
def delete_deployment(DEPLOYMENT_NAME):
    deployments_list = wml_client.deployments.get_details()
    for deployment in deployments_list["resources"]:
        if 'asset' in deployment["entity"]:
            model_id = deployment["entity"]["asset"]["id"]
            deployment_id = deployment["metadata"]["id"]
            if deployment["metadata"]["name"] == DEPLOYMENT_NAME:
                print("Deleting deployment id", deployment_id)
                wml_client.deployments.delete(deployment_id)
                print("Deleting model id", model_id)
                wml_client.repository.delete(model_id)    

In [23]:
delete_deployment(DEPLOYMENT_NAME)

In [24]:
wml_client.repository.list_functions()

----  ----  -------  ----  ----------  ----------------
GUID  NAME  CREATED  TYPE  SPEC_STATE  SPEC_REPLACEMENT
----  ----  -------  ----  ----------  ----------------


Unnamed: 0,GUID,NAME,CREATED,TYPE,SPEC_STATE,SPEC_REPLACEMENT


# Deploy the WML Python Function

In [25]:
software_spec_id =  wml_client.software_specifications.get_id_by_name('runtime-23.1-py3.10')
print(software_spec_id)
function_meta_props = {
     wml_client.repository.FunctionMetaNames.NAME: PYTHON_FUNCTION_NAME,
     wml_client.repository.FunctionMetaNames.SOFTWARE_SPEC_ID: software_spec_id
     }

336b29df-e0e1-5e7d-b6a5-f6ab722625b2


In [26]:
function_artifact = wml_client.repository.store_function(meta_props=function_meta_props, function=azure_databricks_scoring_wrapper)
function_uid = wml_client.repository.get_function_id(function_artifact)
print("Function UID = " + function_uid)

Function UID = b3a67a0a-01e0-458a-866a-2f4807f5b325


In [27]:
function_details = wml_client.repository.get_details(function_uid)
from pprint import pprint
pprint(function_details)

{'entity': {'software_spec': {'id': '336b29df-e0e1-5e7d-b6a5-f6ab722625b2',
                              'name': 'runtime-23.1-py3.10'},
            'type': 'python'},
 'metadata': {'created_at': '2024-07-02T13:22:25.869Z',
              'id': 'b3a67a0a-01e0-458a-866a-2f4807f5b325',
              'modified_at': '2024-07-02T13:22:26.175Z',
              'name': 'ADB Insurance Charges Predictions Function',
              'owner': '1000331001',
              'space_id': '70bcb00e-c87b-4ee8-8c65-f9f4ee5c59e8'},


In [28]:
hardware_spec_id = wml_client.hardware_specifications.get_id_by_name('M')
hardware_spec_id

'c076e82c-b2a7-4d20-9c0f-1f0c2fdf5a24'

In [29]:
deploy_meta = {
 wml_client.deployments.ConfigurationMetaNames.NAME: DEPLOYMENT_NAME,
 wml_client.deployments.ConfigurationMetaNames.ONLINE: {},
 wml_client.deployments.ConfigurationMetaNames.HARDWARE_SPEC: { "id": hardware_spec_id}
}

In [30]:
deployment_details = wml_client.deployments.create(function_uid, meta_props=deploy_meta)



#######################################################################################

Synchronous deployment creation for uid: 'b3a67a0a-01e0-458a-866a-2f4807f5b325' started

#######################################################################################


initializing
Note: online_url is deprecated and will be removed in a future release. Use serving_urls instead.
......
ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='ed6c6836-d60c-4030-aec2-b9a5ca658e3c'
------------------------------------------------------------------------------------------------




In [None]:
deployment_details

In [32]:
pyfunc_deployment_id = wml_client.deployments.get_id(deployment_details)
pyfunc_deployment_id

'ed6c6836-d60c-4030-aec2-b9a5ca658e3c'

### Get the scoring URL for the python function deployment

In [33]:
created_at = deployment_details['metadata']['created_at']
find_string_pos = created_at.find("T")
if find_string_pos != -1:
    current_date = created_at[0:find_string_pos]
scoring_url = wml_client.deployments.get_scoring_href(deployment_details)
scoring_url = scoring_url + "?version="+current_date

In [None]:
scoring_url

# Perform scoring for the deployed python function

In [35]:
job_details = wml_client.deployments.score(pyfunc_deployment_id, scoring_payload)
pprint(job_details)

{'predictions': [{'fields': ['prediction'],
                  'values': [[24833.0156], [3222.135], [6075.4478]]}]}


# IBM watsonx.governance - monitoring Configuration

## Get the training data

In [36]:
import pandas as pd
training_data = pd.read_csv("insurance.csv", sep=",", header=0)
training_data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,SMOKER,southwest,16884.924
1,18,male,33.77,1,NON_SMOKER,southeast,1725.5523
2,28,male,33.0,3,NON_SMOKER,southeast,4449.462
3,33,male,22.705,0,NON_SMOKER,northwest,21984.47061
4,32,male,28.88,0,NON_SMOKER,northwest,3866.8552


In [37]:
len(training_data)

1338

## Store the training data to IBM COS

In [38]:
IAM_URL="https://iam.ng.bluemix.net/oidc/token"

In [39]:
COS_API_KEY_ID = "[IBM Cloud API Key to store the training data]"
COS_RESOURCE_CRN = "[IBM Cloud COS CRN]"
COS_ENDPOINT = "https://s3.us.cloud-object-storage.appdomain.cloud"

In [40]:
BUCKET_NAME = "testcasebucket"

In [41]:
training_data_file_name="insurance.csv"

In [42]:
import ibm_boto3
from ibm_botocore.client import Config, ClientError

cos_client = ibm_boto3.resource("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_RESOURCE_CRN,
    ibm_auth_endpoint="https://iam.bluemix.net/oidc/token",
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT
)

In [43]:
with open(training_data_file_name, "rb") as file_data:
    cos_client.Object(BUCKET_NAME, training_data_file_name).upload_fileobj(
        Fileobj=file_data
    )

## Describe the training data - features, categorical columns, label column

In [44]:
input_fields = scoring_payload['input_data'][0]['fields']

In [45]:
input_fields

['age', 'sex', 'bmi', 'children', 'smoker', 'region']

In [46]:
feature_columns = input_fields
cat_features = [
                "sex", "smoker", "region"
                ]
class_label = 'charges'
prediction_field='prediction'

## IBM watsonx.governance Authentication

In [47]:
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import APIClient

from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

In [48]:
authenticator = CloudPakForDataAuthenticator(
        url=WML_CREDENTIALS['url'],
        username=WML_CREDENTIALS['username'],
        apikey=WML_CREDENTIALS['apikey'],
        disable_ssl_verification=True
    )
wos_client = APIClient(service_url=WML_CREDENTIALS['url'],authenticator=authenticator)
wos_client.version

'3.0.38'

In [49]:
wos_client.data_marts.show()

0,1,2,3,4,5
AIOSFASTPATHICP-00000000-0000-0000-0000-000000000000,Data Mart created by OpenScale ExpressPath,False,active,2024-06-10 04:20:14.860000+00:00,00000000-0000-0000-0000-000000000000


## Use the existing data mart

In [50]:
data_marts = wos_client.data_marts.list().result.data_marts
data_mart_id=data_marts[0].metadata.id
print('Using existing datamart {}'.format(data_mart_id))

Using existing datamart 00000000-0000-0000-0000-000000000000


## Create a service provider to WML Python Function that wraps the ADB model deployment

In [51]:
SERVICE_PROVIDER_NAME = "ADB Provider"
SERVICE_PROVIDER_DESCRIPTION = "Monitoring the model deployed in Azure Databricks using WML Python Function."

### Cleanup of the service providers

In [52]:
service_providers = wos_client.service_providers.list().result.service_providers
for service_provider in service_providers:
    service_instance_name = service_provider.entity.name
    if service_instance_name == SERVICE_PROVIDER_NAME:
        service_provider_id = service_provider.metadata.id
        wos_client.service_providers.delete(service_provider_id)
        print("Deleted existing service_provider for WML instance: {}".format(service_provider_id))

Deleted existing service_provider for WML instance: da2ad422-e353-42cd-97d4-d800637e9ca4


## Create the service provider here

In [54]:
added_service_provider_result = wos_client.service_providers.add(
        name=SERVICE_PROVIDER_NAME,
        description=SERVICE_PROVIDER_DESCRIPTION,
        service_type=ServiceTypes.WATSON_MACHINE_LEARNING,
        deployment_space_id = space_id,
        operational_space_id = "production",
        credentials=WMLCredentialsCP4D(
            # url=WML_CREDENTIALS["url"],
            # username=WML_CREDENTIALS["username"],
            # apikey=WML_CREDENTIALS["apikey"],
            # instance_id=None
        ),
        background_mode=False
    ).result
service_provider_id = added_service_provider_result.metadata.id




 Waiting for end of adding service provider dc7d15cd-7fc9-4aae-995f-299f691dfc5c 




active

-----------------------------------------------
 Successfully finished adding service provider 
-----------------------------------------------




In [55]:
wos_client.service_providers.show()

0,1,2,3,4,5
99999999-9999-9999-9999-999999999999,active,ADB Provider,watson_machine_learning,2024-07-02 13:25:50.457000+00:00,dc7d15cd-7fc9-4aae-995f-299f691dfc5c
99999999-9999-9999-9999-999999999999,active,PROJECT_wxgovdemos,watson_machine_learning,2024-06-19 12:47:10.072000+00:00,f5f2edab-0d12-49ba-820d-d25bdccab14a
,active,PepGenX Service Provider,custom_machine_learning,2024-06-12 16:09:08.748000+00:00,aceeabfe-2037-4979-aa47-a4e570b20aed
99999999-9999-9999-9999-999999999999,active,SPACE_xgov_prod,watson_machine_learning,2024-06-10 18:40:05.513000+00:00,d05cda18-b6bd-4947-8e13-2d42f547365a
99999999-9999-9999-9999-999999999999,active,PROJECT_gov_demo,watson_machine_learning,2024-06-10 12:08:47.831000+00:00,244d7f43-e1e7-4114-ad14-38d8f08aa005
99999999-9999-9999-9999-999999999999,active,SPACE_PG - PROD,watson_machine_learning,2024-06-10 10:54:54.825000+00:00,b283029b-c623-4b49-b22d-d41931d59091
99999999-9999-9999-9999-999999999999,active,SPACE_PG - Test,watson_machine_learning,2024-06-10 10:49:51.117000+00:00,a964e5bf-d7ba-4d08-8d61-8e5cd1fa8a1f
99999999-9999-9999-9999-999999999999,active,PROJECT_PG,watson_machine_learning,2024-06-10 10:41:19.603000+00:00,0afb48e0-164e-430c-b83a-e10cd109ca39
99999999-9999-9999-9999-999999999999,active,WOS ExpressPath WML pre_production binding,watson_machine_learning,2024-06-10 04:20:25.328000+00:00,53e3cf02-f42a-4f9a-b31b-413d7b567bc9
99999999-9999-9999-9999-999999999999,active,WOS ExpressPath WML production binding,watson_machine_learning,2024-06-10 04:20:20.061000+00:00,db77b1ec-adcb-4377-b8fe-8c6017dec120


## Get the python function deployment details

In [None]:
asset_deployment_details = wos_client.service_providers.list_assets(data_mart_id=data_mart_id, 
                                                                    service_provider_id=service_provider_id,
                                                                    deployment_id=pyfunc_deployment_id, 
                                                                    deployment_space_id = space_id).result['resources'][0]
asset_deployment_details

In [None]:
model_asset_details_from_deployment=wos_client.service_providers.get_deployment_asset(data_mart_id=data_mart_id,
                                                                                      service_provider_id=service_provider_id,
                                                                                      deployment_id=pyfunc_deployment_id,
                                                                                      deployment_space_id=space_id)
model_asset_details_from_deployment

## Subsribe the Python Function Deployment with IBM watsonx.governance - monitoring

In [58]:
wos_client.subscriptions.show()

0,1,2,3,4,5,6,7,8,9
a2a3b00e-1759-4544-9de6-17fb0f153a92,prompt,Banking FAQ External prompt2 (Azure OpenAI GPT35),00000000-0000-0000-0000-000000000000,143d5231-cccd-4f6b-aded-c9c1c8fcd0ce,Banking FAQ External prompt2 (Azure OpenAI GPT35),f5f2edab-0d12-49ba-820d-d25bdccab14a,active,2024-06-20 12:56:03.916000+00:00,0f7f07a2-abdd-44ec-906f-fd5e5901dbb0
9aef2762-4289-4dcf-8580-d1d20bc60cdf,prompt,Banking FAQ External prompt (Azure OpenAI GPT35),00000000-0000-0000-0000-000000000000,cb831032-a2a2-4235-af2d-082b7e42d2f9,Banking FAQ External prompt (Azure OpenAI GPT35),f5f2edab-0d12-49ba-820d-d25bdccab14a,active,2024-06-20 01:11:41.428000+00:00,d1c633da-5970-4635-8672-e8ec379a197c
72d9e5bc-fa10-453c-bda9-9da47cdbbcdd,model,[asset] PepGenX QnA,00000000-0000-0000-0000-000000000000,22052618-3617-467d-a227-4d4fb0a9037e,[asset] PepGenX QnA,aceeabfe-2037-4979-aa47-a4e570b20aed,active,2024-06-12 16:09:21.763000+00:00,42fa0b25-7a66-44c3-ae89-2c6f9ff3699c
0fab89d6-9fcc-41ba-995f-c6894af3ca7a,prompt,External prompt sample (Open AI),00000000-0000-0000-0000-000000000000,dcbdee1f-fa53-43b5-963b-7554f3b63991,carbonated_drinks_app_deployment,d05cda18-b6bd-4947-8e13-2d42f547365a,active,2024-06-11 09:54:05.519000+00:00,f474858e-4341-47b4-8e5b-ecbe6333fa75
6d050575-dc38-4e02-a3f1-0acacfed1bcb,prompt,External prompt sample (Open AI),00000000-0000-0000-0000-000000000000,9c2ba3d8-f62c-40ee-92dd-b0538a2559e2,External prompt sample (Open AI),244d7f43-e1e7-4114-ad14-38d8f08aa005,active,2024-06-10 12:08:50.060000+00:00,24a03ac9-665e-4003-bdbd-b46a2d62f749
95d88606-6d55-475e-9603-20cdcf8ba65f,prompt,HP - Detached - Summarization,00000000-0000-0000-0000-000000000000,f4fba2d3-7c99-4e18-9d6c-24f798f3ccd2,HP - Detached - Summarization,b283029b-c623-4b49-b22d-d41931d59091,active,2024-06-10 10:54:55.113000+00:00,880a5cec-6a5c-437d-a107-d96b75eb5133
8a1980b2-2a10-4069-9ab2-317f8c61888f,prompt,HP - Detached - Summarization,00000000-0000-0000-0000-000000000000,27d2b5a8-7fed-41d0-ae21-631c9b9560a9,HP - Detached - Summarization,a964e5bf-d7ba-4d08-8d61-8e5cd1fa8a1f,active,2024-06-10 10:49:51.415000+00:00,1dd1d050-72c3-4dd7-b42e-454c2e955ea8
f0967783-8605-461b-b26e-21e3715650cb,prompt,HP - Detached - Summarization,00000000-0000-0000-0000-000000000000,c9803953-5c55-437e-bfd1-ebf09f862e75,HP - Detached - Summarization,0afb48e0-164e-430c-b83a-e10cd109ca39,active,2024-06-10 10:41:21.613000+00:00,06148744-8946-4462-9371-c5469f054137
95b6ee15-383a-4a78-8ef6-9c6f5e99dd86,model,GermanCreditRiskModelICP,00000000-0000-0000-0000-000000000000,cd3e30e7-2dac-4baa-ad0c-73e59e994bf8,GermanCreditRiskModelICP,db77b1ec-adcb-4377-b8fe-8c6017dec120,active,2024-06-10 04:24:24.410000+00:00,47b75d3a-cc19-49d8-b591-fc925b54e05c
fb252fed-fdcb-4cf0-bc26-32a2b3786874,model,GermanCreditRiskModelPreProdICP,00000000-0000-0000-0000-000000000000,b9c9513f-8b9b-48ae-80f3-2dc162fa719a,GermanCreditRiskModelPreProdICP,53e3cf02-f42a-4f9a-b31b-413d7b567bc9,active,2024-06-10 04:22:14.034000+00:00,caaded4a-1abe-4bb3-8764-a16b2f792d95


Note: First 10 records were displayed.


### Cleanup of the subscription

In [59]:
subscriptions = wos_client.subscriptions.list().result.subscriptions
for subscription in subscriptions:
    sub_model_id = subscription.entity.asset.asset_id
    if sub_model_id == function_uid:
        wos_client.subscriptions.delete(subscription.metadata.id)
        print('Deleted existing subscription for model', sub_model_id)

### Create the subscription here

In [60]:
subscription_details = wos_client.subscriptions.add(
        data_mart_id=data_mart_id,
        service_provider_id=service_provider_id,
        asset=Asset(
            asset_id=model_asset_details_from_deployment["entity"]["asset"]["asset_id"],
            name=model_asset_details_from_deployment["entity"]["asset"]["name"],
            url=model_asset_details_from_deployment["entity"]["asset"]["url"],
            asset_type=AssetTypes.MODEL,
            input_data_type=InputDataType.STRUCTURED,
            problem_type=ProblemType.REGRESSION
        ),
        deployment=AssetDeploymentRequest(
            deployment_id=asset_deployment_details['metadata']['guid'],
            name=asset_deployment_details['entity']['name'],
            deployment_type= DeploymentTypes.ONLINE,
            url=asset_deployment_details['entity']['scoring_endpoint']['url']
        ),
        asset_properties=AssetPropertiesRequest(
            label_column=class_label,
            prediction_field=prediction_field,
            feature_fields = feature_columns,
            categorical_fields = cat_features,
            training_data_reference=TrainingDataReference(type='cos',
                                                          location=COSTrainingDataReferenceLocation(bucket = BUCKET_NAME,
                                                                                                    file_name = training_data_file_name),
                                                          connection=COSTrainingDataReferenceConnection.from_dict({
                                                              "resource_instance_id": COS_RESOURCE_CRN,
                                                              "url": COS_ENDPOINT,
                                                              "api_key": COS_API_KEY_ID,
                                                              "iam_url": IAM_URL})),            
        ),
        background_mode=False
    ).result
subscription_id = subscription_details.metadata.id
subscription_id




 Waiting for end of adding subscription 653aaacf-3940-4b75-9d1b-c860cbd83cdb 




active

-------------------------------------------
 Successfully finished adding subscription 
-------------------------------------------




'653aaacf-3940-4b75-9d1b-c860cbd83cdb'

## Get the payload logging data set id, to where the payload logging is done.

In [61]:
import time

time.sleep(5)
payload_data_set_id = None
payload_data_set_id = wos_client.data_sets.list(type=DataSetTypes.PAYLOAD_LOGGING, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result.data_sets[0].metadata.id
if payload_data_set_id is None:
    print("Payload data set not found. Please check subscription status.")
else:
    print("Payload data set id: ", payload_data_set_id)

Payload data set id:  5f5b0673-2146-4eac-b45f-4dfe00430f5c


In [62]:
scoring_payload = get_scoring_payload(no_of_records_to_score=100)

## Score the WML Python Function deployment to enable automatic payload logging 

In [119]:
job_details = wml_client.deployments.score(pyfunc_deployment_id, scoring_payload)
pprint(job_details)

{'predictions': [{'fields': ['prediction'],
                  'values': [[24833.0156],
                             [3222.135],
                             [6075.4478],
                             [3256.1193],
                             [5497.3674],
                             [2544.2976],
                             [10248.147],
                             [7200.4697],
                             [9277.2973],
                             [11423.3053],
                             [3242.7471],
                             [35087.9978],
                             [4710.0012],
                             [14894.2719],
                             [32500.3335],
                             [267.8612],
                             [12585.8869],
                             [1747.9031],
                             [15862.4835],
                             [31026.696],
                             [16232.4042],
                             [6082.663],
                           

### Make sure the records reached the payload logging table

In [64]:
import uuid
from ibm_watson_openscale.supporting_classes.payload_record import PayloadRecord
time.sleep(5)
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))
if pl_records_count == 0:
    raise Exception("Payload logging did not happen!")

Number of records in the payload logging table: 100


## Utility - get monitoring instance, as needed

In [65]:
def get_monitor_instance(monitor_definition_id='explainability'):
    monitor_instance = wos_client.monitor_instances.list(
        data_mart_id=data_mart_id, target_target_id=subscription_id, monitor_definition_id=monitor_definition_id
     )
    monitor_instances_info = monitor_instance.result
    print(monitor_instances_info)
    return monitor_instances_info

In [None]:
monitor_instances_info = get_monitor_instance(monitor_definition_id='drift_v2')
monitor_instances_info

## Utility - Cleanup existing monitoring instance, as needed

In [67]:
def cleanup_monitor_instance(monitor_definition_id='explainability'):
    monitor_instances_info = get_monitor_instance(monitor_definition_id)

    if monitor_instances_info is not None and len(monitor_instances_info.monitor_instances) !=0 :
        monitor_instances_id = monitor_instances_info.monitor_instances[0].metadata.id
        wos_client.monitor_instances.delete(
                monitor_instance_id=monitor_instances_id
             )    

In [68]:
#cleanup_monitor_instance(monitor_definition_id='drift_v2')

# Drift V2 Configuration

In [69]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id
)

parameters = {
        "min_samples": 10,
        "max_samples": 1000,
        "train_archive": True,
        "features": {
            "fields": feature_columns,
            "importances": {}
        },
        "most_important_features": {
            "fields": feature_columns
        }
    }
drift_v2_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.DRIFT_V2.ID,
    target=target,
    background_mode=False,
    parameters=parameters
).result

drift_v2_monitor_instance_id = drift_v2_monitor_details.metadata.id
drift_v2_monitor_instance_id




 Waiting for end of monitor instance creation b7632393-7167-4824-9e1e-413303606381 




preparing.......................
active

---------------------------------------
 Monitor instance successfully created 
---------------------------------------




'b7632393-7167-4824-9e1e-413303606381'

# Run Drift V2 monitor

In [70]:
drift_v2_run_details = wos_client.monitor_instances.run(monitor_instance_id=drift_v2_monitor_instance_id, background_mode=False)




 Waiting for end of monitoring run 70a2f4e2-06a7-4848-9b17-c12fa6597530 




running....................................
finished

---------------------------
 Successfully finished run 
---------------------------




In [71]:
time.sleep(5)
wos_client.monitor_instances.show_metrics(monitor_instance_id=drift_v2_monitor_instance_id)

# Configure Explainability

Provide the following explainability configuration to enable SHAP global explanation. Set global_explanation enabled False if SHAP global explanation is not required or if you are trying to use headless subscription ie., subscription which is configured without a REST endpoint for scoring needs.

* global_explanation: The global explanation parameters.
     * enabled: Enable the global explanation. SHAP explanation also should be enabled when global explanation is enabled.
     * sample_size: The sample size of the records to be considered for computing global explanation in the payload window.
* shap: The shap explanation parameters
     * enabled: Enable the shap explanation
     * perturbations_count: The no of perturbations created when generating a local explanation
* local_explanation_method: The default local explanation method to be used when generating local explanation. Possible values are "shap" and "lime"

In [108]:
# cleanup_monitor_instance(monitor_definition_id='explainability')

In [109]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id
)

parameters = {
    "enabled": True,
    "global_explanation": {
        "enabled": True,
        "sample_size": 50 # the sample size of the records to be considered for computing training and payload global explanation
    },
    "local_explanation_method": "shap", # or lime. The default explanation to be generated when running the local explanation.
    "shap": {
        "enabled": True,
        "perturbations_count": 0 # 0 means auto determine, If not provide a positive integer value below 2500
    }    
}

thresholds = [MetricThresholdOverride(metric_id=wos_client.monitor_definitions.MONITORS.EXPLAINABILITY.METRIC.GLOBAL_EXPLANATION_STABILITY,
                                      type=MetricThresholdTypes.LOWER_LIMIT,
                                      value=0.8)]

explainability_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.EXPLAINABILITY.ID,
    target=target,
    parameters=parameters,
    thresholds=thresholds
).result

explainability_monitor_id = explainability_details.metadata.id




 Waiting for end of monitor instance creation c2fd6f49-48de-476f-b388-df1297c99d3e 




preparing....................
active

---------------------------------------
 Monitor instance successfully created 
---------------------------------------




## Run local explanation for sample record

In [111]:
payload_data = wos_client.data_sets.get_list_of_records(data_set_id=payload_data_set_id,output_type='pandas').result
explanation_types = ["lime"]

scoring_ids = payload_data.head(1)['scoring_id'].tolist()
result = wos_client.monitor_instances.explanation_tasks(scoring_ids=scoring_ids, explanation_types=explanation_types, subscription_id=subscription_id).result

explanation_task_ids=result.metadata.explanation_task_ids
explanation_task_ids

['a4477f9f-1fb5-4c93-b64c-c9e3745ee9f2']

### Utiility to wait for the explain task to complete

In [112]:
def finish_explanation_tasks(sample_size = 1):
    finished_explanations = []
    finished_explanation_task_ids = []
    
    # Check for the explanation task status for finished status. 
    # If it is in-progress state, then sleep for some time and check again. 
    # Perform the same for couple of times, so that all tasks get into finished state.
    for i in range(0, 5):
        # for each explanation
        print('iteration ' + str(i))
        
        #check status for all explanation tasks
        for explanation_task_id in explanation_task_ids:
            if explanation_task_id not in finished_explanation_task_ids:
                result = wos_client.monitor_instances.get_explanation_tasks(explanation_task_id=explanation_task_id, subscription_id=subscription_id ).result
                print(explanation_task_id + ' : ' + result.entity.status.state)
                if (result.entity.status.state == 'finished' or result.entity.status.state == 'error') and explanation_task_id not in finished_explanation_task_ids:
                    finished_explanation_task_ids.append(explanation_task_id)
                    finished_explanations.append(result)


        # if there is altest one explanation task that is not yet completed, then sleep for sometime, 
        # and check for all those tasks, for which explanation is not yet completeed.
        
        if len(finished_explanation_task_ids) != sample_size:
            print('sleeping for some time..')
            time.sleep(10)
        else:
            break
                    
    return finished_explanations

### Check whether the explain task finished or not

In [113]:
finished_explanations = finish_explanation_tasks(1)

iteration 0
a4477f9f-1fb5-4c93-b64c-c9e3745ee9f2 : finished


## Print the explain evaluation output

In [None]:
for result in finished_explanations:
    print(result)

## Run explainability monitor (global explanation)

Kick off an explainability monitor run on existing payload data. The monitor run is scheduled to run every week, but can also be triggered manually using the Python client or the API. The monitor run computes global explanation on the sample payload records and publishes global_explanation_stability metric.

In [116]:
explainability_run_details = wos_client.monitor_instances.run(monitor_instance_id=explainability_monitor_id, background_mode=False)




 Waiting for end of monitoring run 994199e9-aed9-478c-83b2-e6fd2ebfdc28 




running.......
finished

---------------------------
 Successfully finished run 
---------------------------




In [118]:
time.sleep(5)
wos_client.monitor_instances.show_metrics(monitor_instance_id=explainability_monitor_id)

# Configure Quality Monitoring

In [79]:
import time

time.sleep(10)
target = Target(
        target_type=TargetTypes.SUBSCRIPTION,
        target_id=subscription_id
)
parameters = {
    "min_feedback_data_size": 50
}
quality_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.QUALITY.ID,
    target=target,
    parameters=parameters
).result




 Waiting for end of monitor instance creation 5423f7bd-6dc2-4279-aea4-1b41487b96be 




preparing
active

---------------------------------------
 Monitor instance successfully created 
---------------------------------------




## Get the quality monitor instance id and the feedback data set id

In [80]:
quality_monitor_instance_id = quality_monitor_details.metadata.id
quality_monitor_instance_id

'5423f7bd-6dc2-4279-aea4-1b41487b96be'

In [None]:
feedback_dataset_id = None
feedback_dataset = wos_client.data_sets.list(type=DataSetTypes.FEEDBACK, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result
print(feedback_dataset)
feedback_dataset_id = feedback_dataset.data_sets[0].metadata.id
if feedback_dataset_id is None:
    print("Feedback data set not found. Please check quality monitor status.")

In [82]:
feedback_dataset_id

'01151cda-9ffe-4be2-90fa-dc8d04d1a94f'

## Perform feedback logging

In [83]:
import pandas as pd
df = pd.read_csv("insurance.csv", sep=",", header=0)

In [84]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,SMOKER,southwest,16884.924
1,18,male,33.77,1,NON_SMOKER,southeast,1725.5523
2,28,male,33.0,3,NON_SMOKER,southeast,4449.462
3,33,male,22.705,0,NON_SMOKER,northwest,21984.47061
4,32,male,28.88,0,NON_SMOKER,northwest,3866.8552


In [85]:
feedback_data = df.sample(n=100)

In [86]:
cols_to_remove = []
def get_feedback_payload():
    for col in cols_to_remove:
        if col in feedback_data.columns:
            del feedback_data[col]

    fields = feedback_data.columns.tolist()
    values = feedback_data[fields].values.tolist()

    feedback_payload = {"fields": fields, "values": values}
    return feedback_payload

In [87]:
feedback_payload = get_feedback_payload()

In [None]:
feedback_payload

In [89]:
# get access token
def generate_access_token():
    import urllib3, requests, json
    headers = {
        'Content-type': 'application/json'
    }
    payload = {
        'username': WML_CREDENTIALS["username"],
        'api_key': WML_CREDENTIALS["apikey"]
    }
    ICP_TOKEN_URL = '{}/icp4d-api/v1/authorize'.format(WML_CREDENTIALS["url"])

    response = requests.post(ICP_TOKEN_URL, headers=headers, json=payload, verify=False)
    json_data = response.json()
    icp_access_token = json_data['token']
    return icp_access_token  

In [90]:
token = generate_access_token()

In [91]:
headers = {}
headers["Content-Type"] = "application/json"
headers["Authorization"] = "Bearer {}".format(token)

In [92]:
url=WOS_CREDENTIALS['url']

### Load the feedback data

In [120]:
import urllib3, requests, json

DATASETS_STORE_RECORDS_URL =  url + "/openscale/{0}/v2/data_sets/{1}/records".format(data_mart_id, feedback_dataset_id)
response = requests.post(DATASETS_STORE_RECORDS_URL, json=feedback_payload, headers=headers, verify=False)
json_data = response.json()
print(json_data)

{'state': 'preparing'}


In [121]:
import time
time.sleep(5)
wos_client.data_sets.get_records_count(data_set_id=feedback_dataset_id)

200

## Trigger the quality monitor

In [95]:
run_details = wos_client.monitor_instances.run(monitor_instance_id=quality_monitor_instance_id, background_mode=False).result




 Waiting for end of monitoring run 48becad9-eacd-4467-943f-8369db2efb53 




running
finished

---------------------------
 Successfully finished run 
---------------------------




### Fetch the quality monitor evaluated metrics

In [96]:
wos_client.monitor_instances.show_metrics(monitor_instance_id=quality_monitor_instance_id)

# Fairness Monitor Configuration

In [97]:
cleanup_monitor_instance(monitor_definition_id='fairness')

{
  "monitor_instances": []
}


In [98]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id

)
parameters = {
    "features": [
        {"feature": "sex",
         "majority": ['male'],
         "minority": ['female']
         }
    ],
    "favourable_class": [[10000,64000]],
    "unfavourable_class": [[1120,9999]],
    "min_records": 100
}
thresholds = [{
    "metric_id": "fairness_value",
    "specific_values": [
        {
            "applies_to": [{
                "key": "feature",
                "type": "tag",
                "value": "sex"
            }],
            "value": 98
        }
    ],
    "type": "lower_limit",
    "value": 95.0
}]

fairness_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.FAIRNESS.ID,
    target=target,
    parameters=parameters,
    thresholds=thresholds).result




 Waiting for end of monitor instance creation 185d690b-58e3-439b-b957-1d4578fd5d64 




active

---------------------------------------
 Monitor instance successfully created 
---------------------------------------




In [99]:
fairness_monitor_instance_id = fairness_monitor_details.metadata.id

## Check, or otherwise wait, for the fairness monitor evaluation to complete

In [100]:
runs = wos_client.monitor_instances.list_runs(fairness_monitor_instance_id, limit=1).result.to_dict()
fairness_monitoring_run_id = runs["runs"][0]["metadata"]["id"]
run_status = None
while(run_status not in ["finished", "error"]):
    run_details = wos_client.monitor_instances.get_run_details(fairness_monitor_instance_id, fairness_monitoring_run_id).result.to_dict()
    run_status = run_details["entity"]["status"]["state"]
    print('run_status: ', run_status)
    if run_status in ["finished", "error"]:
        break
    time.sleep(10)

run_status:  running
run_status:  finished


In [None]:
wos_client.monitor_instances.get_run_details(fairness_monitor_instance_id, fairness_monitoring_run_id).result.to_dict()

### Fetch the fairness monitor evaluation results

In [102]:
wos_client.monitor_instances.show_metrics(monitor_instance_id=fairness_monitor_instance_id)

# Drift Monitor Configuration

Note that, as this model is regression type, only data drift is enabled.

In [103]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id

)
parameters = {
    "min_samples": 100,
    "drift_threshold": 0.1,
    "train_drift_model": True,
    "enable_model_drift": True,
    "enable_data_drift": True
}

drift_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.DRIFT.ID,
    target=target,
    parameters=parameters
).result

drift_monitor_instance_id = drift_monitor_details.metadata.id
drift_monitor_instance_id




 Waiting for end of monitor instance creation 5e595272-d973-4376-b943-c5a3dd6f8f37 




active

---------------------------------------
 Monitor instance successfully created 
---------------------------------------




'5e595272-d973-4376-b943-c5a3dd6f8f37'

## Trigger the drift monitoring run

In [104]:
drift_run_details = wos_client.monitor_instances.run(monitor_instance_id=drift_monitor_instance_id, background_mode=False)




 Waiting for end of monitoring run d09a2705-0406-4f87-93eb-f6b53ea9df46 




finished

---------------------------
 Successfully finished run 
---------------------------




### Fetch the drift monitor evaluation results

In [105]:
time.sleep(5)
wos_client.monitor_instances.show_metrics(monitor_instance_id=drift_monitor_instance_id)

## Recap of the steps performed in this notebook


* Through a python function, triggers an Azure Databricks invocations endpoint.
* Deploys this python function to WML
* Configures this python function deployment to IBM watsonx.governance for monitoring.
* As this is a WML Python Function, automatic payload logging can happen, and it does the same by scoring the python function deployment end point.
* Configures all the IBM watsonx.governance monitors and evaluates them

Author: Ravi Chamarthy (ravi.chamarthy@in.ibm.com)