<img src="https://github.com/pmservice/ai-openscale-tutorials/raw/master/notebooks/images/banner.png" align="left" alt="banner">

# Working with Watson OpenScale - Custom Machine Learning Provider

This notebook should be run using with **Python 3.7** runtime environment. **If you are viewing this in Watson Studio and do not see Python 3.7 in the upper right corner of your screen, please update the runtime now.** It requires service credentials for the following services:
  * Watson OpenScale

  
The notebook will configure OpenScale to monitor Fairness on a Custom ML deployment

# Setup <a name="setup"></a>

## Package installation

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install --upgrade pyspark==2.4 --no-cache | tail -n 1

!pip install --upgrade pandas==0.25.3 --no-cache | tail -n 1
!pip install --upgrade requests==2.23 --no-cache | tail -n 1
!pip install numpy==1.16.4 --no-cache | tail -n 1
!pip install SciPy --no-cache | tail -n 1
!pip install lime --no-cache | tail -n 1
!pip install ibm-cloud-sdk-core --no-cache | tail -n 1

!pip install --upgrade ibm-watson-machine-learning --user | tail -n 1
!pip install --upgrade ibm-watson-openscale --no-cache | tail -n 1

### Action: restart the kernel!

## Configure credentials

- WOS_CREDENTIALS (CP4D)

In [2]:
#masked
WOS_CREDENTIALS = {
    "url": "https://cpd-namespace1.apps.xxxxx.cp.fyre.ibm.com",
    "username": "xxxxx",
    "password": "xxxxxx"
}

In [3]:
SCORING_URL = 'https://us-south.ml.cloud.ibm.com/ml/v4/deployments/d39bf576-8ed9-4f23-8b7d-750635395bfc/predictions?version=2021-10-06'

In [4]:
import os
import base64
import json
import requests
from requests.auth import HTTPBasicAuth

# Scoring part here

In [5]:
!rm customer_segmentation_train.csv
!wget https://raw.githubusercontent.com/ravichamarthy/custom-engine/master/customer_segmentation_train.csv

--2021-10-21 18:48:10--  https://raw.githubusercontent.com/ravichamarthy/custom-engine/master/customer_segmentation_train.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 425369 (415K) [text/plain]
Saving to: ‘customer_segmentation_train.csv’


2021-10-21 18:48:11 (2.55 MB/s) - ‘customer_segmentation_train.csv’ saved [425369/425369]



In [6]:
import urllib3, requests, json
def generate_cloud_access_token():
    API_KEY = "xxxxxx-K8qPNuo1RiS"
    token_response = requests.post('https://iam.cloud.ibm.com/identity/token', data={"apikey": API_KEY, "grant_type": 'urn:ibm:params:oauth:grant-type:apikey'})
    mltoken = token_response.json()["access_token"]
    return mltoken

In [7]:
cloud_access_token = generate_cloud_access_token()

In [8]:
cloud_header = {
    'Content-Type': 'application/json', 
    'Authorization': 'Bearer ' + cloud_access_token
}

In [9]:
def get_scoring_payload(no_of_records_to_score = 1):

    for col in cols_to_remove:
        if col in payload_data.columns:
            del payload_data[col] 

    fields = payload_data.columns.tolist()
    values = payload_data[fields].values.tolist()

    payload_scoring = {"input_data": [{"fields": fields, "values": values[:no_of_records_to_score],}]}
 
    return payload_scoring

In [10]:
def sample_scoring(no_of_records_to_score = 1):
    import urllib3, requests, json
    records_list=[]
    payload_scoring = get_scoring_payload(no_of_records_to_score)
    
    
    response_scoring = requests.post(SCORING_URL, json=payload_scoring, headers=cloud_header, verify=False)

    scoring_response = response_scoring.json()
    print('Single record scoring result:', '\n fields:', scoring_response['predictions'][0]['fields'], '\n values: ', scoring_response['predictions'][0]['values'][0])
    response_data = {"fields": scoring_response['predictions'][0]['fields'],"values": scoring_response['predictions'][0]['values']}
    return payload_scoring, response_data

In [11]:
import pandas as pd
payload_data = pd.read_csv("customer_segmentation_train.csv", sep=",", header=0, nrows=200).dropna()
cols_to_remove = []

no_of_records_to_score = len(payload_data)
print('no_of_records_to_score:' + str(no_of_records_to_score))

payload_scoring, scoring_response = sample_scoring(no_of_records_to_score=no_of_records_to_score)

no_of_records_to_score:171
Single record scoring result: 
 fields: ['prediction', 'probability'] 
 values:  ['D', [0.1287619158607908, 0.04219464309672481, 0.028847068231968923, 0.8001963728105155]]


In [12]:
def get_feedback_payload(payload_scoring, scoring_response):
    
    scoring_prediction_prob = pd.DataFrame(scoring_response['values'], columns = scoring_response['fields'])
    print('scoring_prediction_prob.columns: ' + str(scoring_prediction_prob.columns))
    
    #Create a Pandas dataframe out of the scoring response
    scoring_response_df = pd.concat([payload_data, scoring_prediction_prob], axis=1)
    scoring_response_df = scoring_response_df.dropna()
    
    print('len(scoring_response_df):'+str(len(scoring_response_df)))
    
    # Drop all the index columns and other columns which are not needed to be stored as part of the feedback logging
    drop_attributes = ["ID","Gender","Ever_Married","Age","Graduated","Profession","Work_Experience","Spending_Score","Family_Size","Var_1"]
    for attr in drop_attributes:
        scoring_response_df = scoring_response_df.drop(attr, axis=1)

    print('scoring_response_df.columns:' + str(scoring_response_df.columns))
    
    #Rename "predictedLabel" to "_original_prediction" and "probability" to "_original_probability"
    scoring_response_df = scoring_response_df.rename(columns = {'prediction' : '_original_prediction', 'probability' : '_original_probability'})
    #scoring_response_df[['_original_prediction', '_original_probability']].head()
    
    print('scoring_response_df.columns:' + str(scoring_response_df.columns))
    
    import datetime
    prefix = "tx_"
    i = 0
    for idx, row in scoring_response_df.iterrows():
        transaction_id = prefix + str(i)
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        scoring_response_df.loc[idx,'transaction_id'] = transaction_id
        scoring_response_df.loc[idx,'record_timestamp'] = timestamp
        i = i + 1
        
    #scoring_response_df['record_timestamp']
    #scoring_response_df.head()
    
    #Create the feedback payload
    feedback_fields = scoring_response_df.columns.tolist()
    feedback_values = scoring_response_df[feedback_fields].values.tolist()
    feedback_payload = {"fields": feedback_fields, "values": feedback_values}
    return feedback_payload
    

In [13]:
feedback_payload = get_feedback_payload(payload_scoring, scoring_response)

scoring_prediction_prob.columns: Index(['prediction', 'probability'], dtype='object')
len(scoring_response_df):147
scoring_response_df.columns:Index(['Segmentation', 'prediction', 'probability'], dtype='object')
scoring_response_df.columns:Index(['Segmentation', '_original_prediction', '_original_probability'], dtype='object')


In [14]:
feedback_payload

{'fields': ['Segmentation',
  '_original_prediction',
  '_original_probability',
  'transaction_id',
  'record_timestamp'],
 'values': [['D',
   'D',
   [0.1287619158607908,
    0.04219464309672481,
    0.028847068231968923,
    0.8001963728105155],
   'tx_0',
   '2021-10-21T18:48:13.959219Z'],
  ['B',
   'C',
   [0.16859530013067667,
    0.32652366255876863,
    0.44581708021456956,
    0.05906395709598516],
   'tx_1',
   '2021-10-21T18:48:13.963253Z'],
  ['B',
   'B',
   [0.19980766257486948,
    0.371312779232792,
    0.36235942880876654,
    0.0665201293835721],
   'tx_2',
   '2021-10-21T18:48:13.964525Z'],
  ['C',
   'C',
   [0.258614729925121,
    0.25603876919575286,
    0.2632697297868105,
    0.22207677109231558],
   'tx_3',
   '2021-10-21T18:48:13.965408Z'],
  ['C',
   'B',
   [0.23643275100376293,
    0.35421091774649927,
    0.3316333668268927,
    0.07772296442284507],
   'tx_4',
   '2021-10-21T18:48:13.966305Z'],
  ['D',
   'C',
   [0.05500158537841311,
    0.250034969143

# Configure OpenScale 

The notebook will now import the necessary libraries and set up a Python OpenScale client.

In [15]:
from ibm_watson_openscale import APIClient
from ibm_watson_openscale.utils import *
from ibm_watson_openscale.supporting_classes import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.base_classes.watson_open_scale_v2 import *


import json
import requests
import base64
from requests.auth import HTTPBasicAuth
import time

## Get a instance of the OpenScale SDK client

In [16]:
service_instance_id = '00000000-0000-0000-0000-1634636795973656'

In [17]:
authenticator = CloudPakForDataAuthenticator(
        url=WOS_CREDENTIALS['url'],
        username=WOS_CREDENTIALS['username'],
        password=WOS_CREDENTIALS['password'],
        disable_ssl_verification=True, 
    )

wos_client = APIClient(service_url=WOS_CREDENTIALS['url'],service_instance_id=service_instance_id, authenticator=authenticator)
wos_client.version

'3.0.12'

## Set up datamart

Watson OpenScale uses a database to store payload logs and calculated metrics. If database credentials were not supplied above, the notebook will use the free, internal lite database. If database credentials were supplied, the datamart will be created there unless there is an existing datamart and the KEEP_MY_INTERNAL_POSTGRES variable is set to True. If an OpenScale datamart exists in Db2 or PostgreSQL, the existing datamart will be used and no data will be overwritten.

Prior instances of the model will be removed from OpenScale monitoring.

In [18]:
wos_client.data_marts.show()

0,1,2,3,4,5
AIOSFASTPATHICP-00000000-0000-0000-0000-1634636795973656,Data Mart created by OpenScale ExpressPath,False,active,2021-10-19 09:59:04.688000+00:00,00000000-0000-0000-0000-1634636795973656


In [19]:
data_marts = wos_client.data_marts.list().result.data_marts
for data_mart in data_marts:
    print(data_mart)
data_mart_id=data_marts[0].metadata.id
print('Using existing datamart {}'.format(data_mart_id))

{
  "metadata": {
    "id": "00000000-0000-0000-0000-1634636795973656",
    "crn": "crn:v1:bluemix:public:aiopenscale:us-south:a/na:00000000-0000-0000-0000-1634636795973656:data_mart:00000000-0000-0000-0000-1634636795973656",
    "url": "/v2/data_marts/00000000-0000-0000-0000-1634636795973656",
    "created_at": "2021-10-19T09:59:04.688000Z",
    "created_by": "admin"
  },
  "entity": {
    "name": "AIOSFASTPATHICP-00000000-0000-0000-0000-1634636795973656",
    "description": "Data Mart created by OpenScale ExpressPath",
    "service_instance_crn": "N/A",
    "internal_database": false,
    "database_configuration": {
      "database_type": "db2",
      "credentials": {
        "secret_id": "7cf32e41-e419-45ea-bc4c-3d06f8190361"
      },
      "location": {
        "schema_name": "AIOSFASTPATHICP-00000000-0000-0000-0000-1634636795973656"
      }
    },
    "status": {
      "state": "active"
    }
  }
}
Using existing datamart 00000000-0000-0000-0000-1634636795973656


In [20]:
data_mart_details = wos_client.data_marts.list().result.data_marts[0]
data_mart_details.to_dict()

{'metadata': {'id': '00000000-0000-0000-0000-1634636795973656',
  'crn': 'crn:v1:bluemix:public:aiopenscale:us-south:a/na:00000000-0000-0000-0000-1634636795973656:data_mart:00000000-0000-0000-0000-1634636795973656',
  'url': '/v2/data_marts/00000000-0000-0000-0000-1634636795973656',
  'created_at': '2021-10-19T09:59:04.688000Z',
  'created_by': 'admin'},
 'entity': {'name': 'AIOSFASTPATHICP-00000000-0000-0000-0000-1634636795973656',
  'description': 'Data Mart created by OpenScale ExpressPath',
  'service_instance_crn': 'N/A',
  'internal_database': False,
  'database_configuration': {'database_type': 'db2',
   'credentials': {'secret_id': '7cf32e41-e419-45ea-bc4c-3d06f8190361'},
   'location': {'schema_name': 'AIOSFASTPATHICP-00000000-0000-0000-0000-1634636795973656'}},
  'status': {'state': 'active'}}}

In [21]:
wos_client.service_providers.show()

0,1,2,3,4,5
,active,Multiclass Headless Provider,custom_machine_learning,2021-10-21 18:31:46.927000+00:00,e15dabb5-ad1e-41d7-a87e-4d84aaab82f0
99999999-9999-9999-9999-999999999999,active,WOS ExpressPath WML pre_production binding,watson_machine_learning,2021-10-19 09:59:15.492000+00:00,f5ae2de0-23c1-44ee-91e3-0198b03ec689
99999999-9999-9999-9999-999999999999,active,WOS ExpressPath WML production binding,watson_machine_learning,2021-10-19 09:59:10.161000+00:00,e5565350-3a25-414a-a4da-33ebd7f9ce45


## Remove existing service provider

Multiple service providers for the same engine instance are avaiable in Watson OpenScale. To avoid multiple service providers of used WML instance in the tutorial notebook the following code deletes existing service provder(s) and then adds new one.

In [22]:
SERVICE_PROVIDER_NAME = "Multiclass Headless Provider"
SERVICE_PROVIDER_DESCRIPTION = "Added by tutorial WOS notebook to showcase Custom ML Provider functionality."

In [23]:
service_providers = wos_client.service_providers.list().result.service_providers
for service_provider in service_providers:
    service_instance_name = service_provider.entity.name
    if service_instance_name == SERVICE_PROVIDER_NAME:
        service_provider_id = service_provider.metadata.id
        wos_client.service_providers.delete(service_provider_id)
        print("Deleted existing service_provider for WML instance: {}".format(service_provider_id))

Deleted existing service_provider for WML instance: e15dabb5-ad1e-41d7-a87e-4d84aaab82f0


## Add service provider

Watson OpenScale needs to be bound to the Watson Machine Learning instance to capture payload data into and out of the model.

Note: Here the service provider is created with empty credentials, meaning no endpoint. Just to demonstrate the use case were we don't need an actual end point serving requests.

In [24]:
MLCredentials = {}
added_service_provider_result = wos_client.service_providers.add(
        name=SERVICE_PROVIDER_NAME,
        description=SERVICE_PROVIDER_DESCRIPTION,
        service_type=ServiceTypes.CUSTOM_MACHINE_LEARNING,
        operational_space_id = "production",
        credentials=MLCredentials,
        background_mode=False
    ).result
service_provider_id = added_service_provider_result.metadata.id




 Waiting for end of adding service provider c382d137-76e5-439a-9666-75386c17320d 




active

-----------------------------------------------
 Successfully finished adding service provider 
-----------------------------------------------




In [25]:
print(wos_client.service_providers.get(service_provider_id).result)

{
  "metadata": {
    "id": "c382d137-76e5-439a-9666-75386c17320d",
    "crn": "crn:v1:bluemix:public:aiopenscale:us-south:a/na:00000000-0000-0000-0000-1634636795973656:service_provider:c382d137-76e5-439a-9666-75386c17320d",
    "url": "/v2/service_providers/c382d137-76e5-439a-9666-75386c17320d",
    "created_at": "2021-10-21T18:48:33.576000Z",
    "created_by": "admin"
  },
  "entity": {
    "name": "Multiclass Headless Provider",
    "service_type": "custom_machine_learning",
    "credentials": {
      "secret_id": "69eef050-4a29-4256-a782-555289e3255c"
    },
    "operational_space_id": "production",
    "status": {
      "state": "active"
    }
  }
}


## Subscriptions

Remove existing credit risk subscriptions

This code removes previous subscriptions to the model to refresh the monitors with the new model and new data.

In [84]:
wos_client.subscriptions.show(limit=None)

0,1,2,3,4,5,6,7,8
1e55b01b-5727-4ea4-8c2a-ea281cc764bc,[asset] RC - Customer Segmentation Headless Subscription 70,00000000-0000-0000-0000-1634636795973656,0f7628f6-72df-485a-b5b9-54a3c5d2352f,[asset] RC - Customer Segmentation Headless Subscription 70,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 21:10:27.839000+00:00,43d49a21-da50-4d89-95eb-70a582c1b7af
71e87096-558b-45b4-935c-6fee7b095959,[asset] RC - Customer Segmentation Headless Subscription 95,00000000-0000-0000-0000-1634636795973656,fffda38a-cad2-401e-9dbb-4b0b3d6d9a4f,[asset] RC - Customer Segmentation Headless Subscription 95,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 21:00:04.559000+00:00,537a8991-9e71-4910-a33d-9b2dca83edf1
9d637f57-1c25-46c5-ac1e-f4b1c23dc8f1,[asset] RC - Customer Segmentation Headless Subscription 94,00000000-0000-0000-0000-1634636795973656,0c458d98-ec0d-44d8-9102-2a770be261b5,[asset] RC - Customer Segmentation Headless Subscription 94,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:59:19.545000+00:00,bc491653-de23-4c85-af93-613dfafc6d69
850eebd4-0088-426d-bad5-b9424b7d257f,[asset] RC - Customer Segmentation Headless Subscription 93,00000000-0000-0000-0000-1634636795973656,7710b4ff-35e2-43ac-8617-b96c1a24b44e,[asset] RC - Customer Segmentation Headless Subscription 93,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:58:35.344000+00:00,f85522d3-5827-4459-8df3-2dd1eca96aa8
197417d9-d47d-43bd-aa86-8c12d6c2a6f7,[asset] RC - Customer Segmentation Headless Subscription 92,00000000-0000-0000-0000-1634636795973656,1d73d036-4773-452c-8e94-8a3a19febf5f,[asset] RC - Customer Segmentation Headless Subscription 92,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:57:50.742000+00:00,2d694646-3ac7-4517-9719-f1fd808eb43b
46722a74-9aad-4b31-b38e-69ca627b6533,[asset] RC - Customer Segmentation Headless Subscription 91,00000000-0000-0000-0000-1634636795973656,6d5a926d-e226-4ffa-b93b-90e4e0cd00c6,[asset] RC - Customer Segmentation Headless Subscription 91,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:57:06.303000+00:00,4556d8c6-dc25-48a1-a531-be39664aa39d
65b4c591-fe57-4c22-8cd9-cf2e343d0ba3,[asset] RC - Customer Segmentation Headless Subscription 90,00000000-0000-0000-0000-1634636795973656,11b592be-c82e-420c-9153-35314bfc7927,[asset] RC - Customer Segmentation Headless Subscription 90,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:56:22.461000+00:00,f206be7d-5ac3-4346-af75-cc12c287dcaa
e19cfcb6-47e9-4a59-94be-e87d9315a765,[asset] RC - Customer Segmentation Headless Subscription 89,00000000-0000-0000-0000-1634636795973656,51a7deb6-0689-4da0-be3a-69b9f6914d82,[asset] RC - Customer Segmentation Headless Subscription 89,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:55:38.226000+00:00,3357a21e-4465-442e-a390-8d7fd7d5d24c
8b6119c2-3da2-4855-8873-52f4aae41e40,[asset] RC - Customer Segmentation Headless Subscription 88,00000000-0000-0000-0000-1634636795973656,ead6a5d6-fe74-4762-af1c-b550ef4098dd,[asset] RC - Customer Segmentation Headless Subscription 88,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:54:59.536000+00:00,030c36ae-c456-4923-9aa3-858bef5f1312
efa5324b-0a14-450a-9c15-a8d363513fde,[asset] RC - Customer Segmentation Headless Subscription 87,00000000-0000-0000-0000-1634636795973656,42034cac-6ccb-4274-9691-953f4a0825c4,[asset] RC - Customer Segmentation Headless Subscription 87,c382d137-76e5-439a-9666-75386c17320d,active,2021-10-21 20:54:14.813000+00:00,50d6f0c1-ebd0-4f89-8d16-7431f1cc1557


Note: Only first 50 records were displayed. To display more use 'limit' parameter.


In [41]:
import urllib3, requests, json
def generate_cpd_access_token():
    headers={}
    headers["Accept"] = "application/json"
    auth = HTTPBasicAuth(WOS_CREDENTIALS["username"], WOS_CREDENTIALS["password"])
    
    ICP_TOKEN_URL= WOS_CREDENTIALS["url"] + "/v1/preauth/validateAuth"
    
    response = requests.get(ICP_TOKEN_URL, headers=headers, auth=auth, verify=False)
    json_data = response.json()
    icp_access_token = json_data['accessToken']
    return icp_access_token

In [42]:
cpd_access_token = generate_cpd_access_token()

In [43]:
cpd_header = {
    'Content-Type': 'application/json', 
    'Authorization': 'Bearer ' + cpd_access_token
}

In [29]:
print('data_mart_id: '+data_mart_id)
print('service_provider_id: '+service_provider_id)

data_mart_id: 00000000-0000-0000-0000-1634636795973656
service_provider_id: c382d137-76e5-439a-9666-75386c17320d


In [30]:
scoring_request = {
            "fields": ["ID","Gender","Ever_Married","Age","Graduated","Profession","Work_Experience","Spending_Score","Family_Size","Var_1"],
            "values": [
                [ 458989, "Female", "Yes", 36, "Yes", "Engineer", 0.0, "Low", 1.0, "Cat_6" ]
            ]
        }

In [31]:
scoring_response = {
    "predictions": [
        {
            "fields": [
                "prediction",
                "probability"
            ],
            "values": [
                [
                    "A",
                    [
                        0.3929703493872889,
                        0.332374467244737,
                        0.1574284593589472,
                        0.11722672400902688
                    ]
                ]
            ]
        }
    ]
}

In [32]:
feature_columns=["ID","Gender","Ever_Married","Age","Graduated","Profession","Work_Experience","Spending_Score","Family_Size","Var_1"]
cat_features=["Gender","Ever_Married","Graduated","Profession","Spending_Score","Var_1"]

In [33]:
def cleanup_subscription(SUBSCRIPTION_NAME):
    
    '''
    Remove the existing subscription
    '''

    subscriptions = wos_client.subscriptions.list().result.subscriptions
    for subscription in subscriptions:
        if subscription.entity.asset.name == '[asset] ' + SUBSCRIPTION_NAME:
            sub_model_id = subscription.metadata.id
            wos_client.subscriptions.delete(subscription.metadata.id)
            print('Deleted existing subscription for model', sub_model_id)    

In [107]:
def get_data_set_id_by_type(type):
    
    '''
    get the paylaod data set id for a given subscription
    '''
    
    data_set_id = None
    data_sets = wos_client.data_sets.list(type=type, 
                                                    target_target_id=subscription_id, 
                                                    target_target_type=TargetTypes.SUBSCRIPTION).result.data_sets
    print('Printing data sets')
    for data_set in data_sets:
        print(data_set)
    
    data_set_id = data_sets[0].metadata.id
    if data_set_id is None:
        print("Data set not found. Please check subscription status.")
    else:
        print("Data set id:", data_set_id)
        return data_set_id    

In [77]:
def get_payload_data_set_id(subscription_id):
    
    '''
    get the paylaod data set id for a given subscription
    '''
    
    payload_data_set_id = get_data_set_id_by_type(type=DataSetTypes.PAYLOAD_LOGGING)
    return payload_data_set_id

In [45]:
def get_feedback_data_set_id(subscription_id):
    '''
    get the feedback data set id for a given subscription
    '''    
    feedback_dataset_id = get_data_set_id_by_type(type=DataSetTypes.FEEDBACK)
    return feedback_dataset_id

In [36]:
def establish_payload_schema(payload_data_set_id):
    
    '''
    Push a payload record to setup the required schemas in the subscription
    '''
    
    #Note : No scoring is done against the model. The PayloadRecord is constructed with the request and response from the model/deployment.
    from ibm_watson_openscale.supporting_classes.payload_record import PayloadRecord
    import json 

    records_list=[]

    #manual PL logging for custom ml provider
    pl_record = PayloadRecord(request=scoring_request, response=scoring_response)
    records_list.append(pl_record)
    wos_client.data_sets.store_records(data_set_id=payload_data_set_id, request_body=records_list)    

In [81]:
def enable_subscription(SUBSCRIPTION_NAME):
    cleanup_subscription(SUBSCRIPTION_NAME)
    
    import uuid
    asset_id = str(uuid.uuid4())
    asset_name = '[asset] ' + SUBSCRIPTION_NAME
    url = ''

    asset_deployment_id = str(uuid.uuid4())
    asset_deployment_name = asset_name
    
    #This code creates the model subscription in OpenScale using the Python client API. Note that we need to provide the model unique identifier, and some information about the model itself.
    subscription_details = wos_client.subscriptions.add(
            data_mart_id=data_mart_id,
            service_provider_id=service_provider_id,
            asset=Asset(
                asset_id=asset_id,
                name=asset_name,
                url=url,
                asset_type=AssetTypes.MODEL,
                input_data_type=InputDataType.STRUCTURED,
                problem_type=ProblemType.BINARY_CLASSIFICATION
            ),
            deployment=AssetDeploymentRequest(
                deployment_id=asset_deployment_id,
                name=asset_deployment_name,
                deployment_type= DeploymentTypes.ONLINE
            ),
            asset_properties=AssetPropertiesRequest(
                label_column="Segmentation",
                probability_fields=["probability"],
                prediction_field="prediction",
                feature_fields = feature_columns,
                categorical_fields = cat_features
            )
        ).result
    subscription_id = subscription_details.metadata.id
    print('subscription_id: ' + subscription_id)
    
    import time
    time.sleep(15)
    payload_data_set_id = get_payload_data_set_id(subscription_id)
    
    establish_payload_schema(payload_data_set_id)
    
    time.sleep(5)
    pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
    print("Number of records in the payload logging table: {}".format(pl_records_count))    
    
    return subscription_id

In [46]:
def enable_quality_monitoring(subscription_id):
    '''
    Enable quality monitoring
    '''

    target = Target(
            target_type=TargetTypes.SUBSCRIPTION,
            target_id=subscription_id
    )
    parameters = {
        "min_feedback_data_size": 100
    }
    thresholds = [
                    {
                        "metric_id": "accuracy",
                        "type": "lower_limit",
                        "value": .80
                    }
                ]
    quality_monitor_details = wos_client.monitor_instances.create(
        data_mart_id=data_mart_id,
        background_mode=False,
        monitor_definition_id=wos_client.monitor_definitions.MONITORS.QUALITY.ID,
        target=target,
        parameters=parameters,
        thresholds=thresholds
    ).result
    
    quality_monitor_instance_id = quality_monitor_details.metadata.id
    return quality_monitor_instance_id

In [47]:
def store_feedback_record(feedback_dataset_id):
    
    '''
    Store the feedback payload using the data sets API
    '''
    
    DATASETS_STORE_RECORDS_URL =   WOS_CREDENTIALS["url"] + "/openscale/{0}/v2/data_sets/{1}/records".format(data_mart_id, feedback_dataset_id)
    print('DATASETS_STORE_RECORDS_URL: '+DATASETS_STORE_RECORDS_URL)
    response = requests.post(DATASETS_STORE_RECORDS_URL, json=feedback_payload, headers=cpd_header, verify=False)
    json_data = response.json()
    print(json_data)
    
    # Wait for sometime, and make sure the records have reached to data sets related table.
    time.sleep(10)
    DATASETS_STORE_RECORDS_URL =   WOS_CREDENTIALS["url"] + "/openscale/{0}/v2/data_sets/{1}/records?limit={2}&include_total_count={3}".format(data_mart_id, feedback_dataset_id, 1, "true")
    response = requests.get(DATASETS_STORE_RECORDS_URL, headers=cpd_header, verify=False)
    json_data = response.json()
    print(json_data['total_count'])    

In [52]:
def run_quality_monitor(quality_monitor_instance_id):
    run_details = wos_client.monitor_instances.run(monitor_instance_id=quality_monitor_instance_id, background_mode=False).result
    wos_client.monitor_instances.show_metrics(monitor_instance_id=quality_monitor_instance_id)

In [115]:
for i in range(200, 201):
    SUBSCRIPTION_NAME = "RC - Customer Segmentation Headless Subscription " + str(i)
    print('Setting up the subscription: ' + str(SUBSCRIPTION_NAME))
    
    # enable the subscription
    print('Enabling the subscription..')
    subscription_id = enable_subscription(SUBSCRIPTION_NAME)
    print('Subscription enabled: '+ subscription_id)
    
    
    # enable the quality monitor for the given subscription
    print('Enable quality monitor for the subscription..')
    quality_monitor_instance_id = enable_quality_monitoring(subscription_id)
    print('Quality Monitor instance id: '+ quality_monitor_instance_id)
    
    import time
    time.sleep(10)    
    # get the feedback data set id for a given subscription
    feedback_dataset_id = get_feedback_data_set_id(subscription_id)
    
    #store the feedback records for a given feedback data set id
    print('storing the feedback records..')
    store_feedback_record(feedback_dataset_id)
    
    # and then run the quality monitor
    print('Run the quality monitor')
    run_quality_monitor(quality_monitor_instance_id)

Setting up the subscription: RC - Customer Segmentation Headless Subscription 200
Enabling the subscription..
subscription_id: b1a8a2f7-86cc-4d40-9adc-7ad4257c3648
Printing data sets
{
  "metadata": {
    "id": "bd25f400-6ecf-495c-8ff1-36a8b6ab440f",
    "crn": "crn:v1:bluemix:public:aiopenscale:us-south:a/na:00000000-0000-0000-0000-1634636795973656:data_set:bd25f400-6ecf-495c-8ff1-36a8b6ab440f",
    "url": "/v2/data_sets/bd25f400-6ecf-495c-8ff1-36a8b6ab440f",
    "created_at": "2021-10-22T03:03:44.156000Z",
    "created_by": "internal-service",
    "modified_at": "2021-10-22T03:12:17.888000Z",
    "modified_by": "internal-service"
  },
  "entity": {
    "data_mart_id": "00000000-0000-0000-0000-1634636795973656",
    "name": "b50b521a-ec86-4598-964b-349c7298aff2_pl",
    "description": "b50b521a-ec86-4598-964b-349c7298aff2_pl",
    "type": "payload_logging",
    "target": {
      "target_type": "subscription",
      "target_id": "b50b521a-ec86-4598-964b-349c7298aff2"
    },
    "schema

IndexError: list index out of range

In [116]:
subscription_id = 'b1a8a2f7-86cc-4d40-9adc-7ad4257c3648'

In [117]:
payload_data_set_id = get_payload_data_set_id(subscription_id)
print(payload_data_set_id)

establish_payload_schema(payload_data_set_id)

Printing data sets
{
  "metadata": {
    "id": "5863362a-8ca1-41bf-aa80-dda954366172",
    "crn": "crn:v1:bluemix:public:aiopenscale:us-south:a/na:00000000-0000-0000-0000-1634636795973656:data_set:5863362a-8ca1-41bf-aa80-dda954366172",
    "url": "/v2/data_sets/5863362a-8ca1-41bf-aa80-dda954366172",
    "created_at": "2021-10-22T03:14:26.736000Z",
    "created_by": "internal-service"
  },
  "entity": {
    "data_mart_id": "00000000-0000-0000-0000-1634636795973656",
    "name": "b1a8a2f7-86cc-4d40-9adc-7ad4257c3648_pl",
    "description": "b1a8a2f7-86cc-4d40-9adc-7ad4257c3648_pl",
    "type": "payload_logging",
    "target": {
      "target_type": "subscription",
      "target_id": "b1a8a2f7-86cc-4d40-9adc-7ad4257c3648"
    },
    "schema_update_mode": "auto",
    "data_schema": {
      "type": "struct",
      "fields": [
        {
          "metadata": {
            "columnInfo": {
              "columnLength": 128
            },
            "modeling_role": "record-id",
            "p

In [118]:
time.sleep(5)
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))    

Number of records in the payload logging table: 1


In [125]:
# enable the quality monitor for the given subscription
print('Enable quality monitor for the subscription..')
quality_monitor_instance_id = 'eb5a8009-4b20-4125-9876-2b5174bad4d9' #enable_quality_monitoring(subscription_id)
print('Quality Monitor instance id: '+ quality_monitor_instance_id)

import time
time.sleep(10)    
# get the feedback data set id for a given subscription
feedback_dataset_id = get_feedback_data_set_id(subscription_id)

#store the feedback records for a given feedback data set id
print('storing the feedback records..')
store_feedback_record(feedback_dataset_id)

# and then run the quality monitor
print('Run the quality monitor')
run_quality_monitor(quality_monitor_instance_id)

Enable quality monitor for the subscription..
Quality Monitor instance id: eb5a8009-4b20-4125-9876-2b5174bad4d9
Printing data sets


IndexError: list index out of range

Author: Ravi Chamarthy (ravi.chamarthy@in.ibm.com)