# watsonx.governance: Monitor external machine learning provider

1. [Set up the environment](#setup_environment)
1. [Load and explore data](#load_explore_data)
1. [Configure OpenScale](#configure_openscale)
1. [Score the model](#score_model)
1. [Configure monitors](#configure_monitors)

In [None]:
%pip install --upgrade ibm-watson-machine-learning
%pip install --upgrade ibm-watson-openscale
%pip install --upgrade ibm_wos_utils

In [None]:
import uuid
import time
import json
import requests
import pandas as pd

<a id="setup_environment"></a>
## 1. Set up the environment

In [None]:
WOS_API_KEY = "WOS_API_KEY"

<a id="load_explore_data"></a>
## 2. Load and explore data

In [None]:
df_training = pd.read_csv('../data/credit_risk_training.csv')
df_training.head()

<a id="explore_prepare_data"></a>
### 2.1. Construct the scoring payload

In [None]:
def get_scoring_payload(df, no_of_records_to_score = 1, cols_to_remove = []):
    df = df.copy()

    for col in cols_to_remove:
        if col in df.columns:
            del df[col] 

    fields = df.columns.tolist()
    values = df[fields].values.tolist()

    payload_scoring ={"fields": fields, "values": values[:no_of_records_to_score]}  
    return payload_scoring

In [None]:
payload_scoring = get_scoring_payload(df_training, 1, ["Risk"])
print(payload_scoring)

### 2.2 Function to perform scoring

In [None]:
def custom_ml_scoring(payload_scoring,
                      scoring_url):
    header = {"Content-Type": "application/json"}

    scoring_response = requests.post(url=scoring_url, json=payload_scoring, headers=header, verify=False)

    jsonify_scoring_response = scoring_response.json()
    return jsonify_scoring_response

### 2.3 Function to perform payload logging

In [None]:
from ibm_watson_openscale.supporting_classes.payload_record import PayloadRecord

def payload_logging(openscale_client, payload_data_set_id, payload_scoring, scoring_response):
    scoring_id = str(uuid.uuid4())
    records_list=[]
    
    pl_record = PayloadRecord(scoring_id=scoring_id, request=payload_scoring, response=scoring_response, response_time=int(460))
    records_list.append(pl_record)
    openscale_client.data_sets.store_records(data_set_id=payload_data_set_id, request_body=records_list)
    
    time.sleep(10)
    pl_records_count = openscale_client.data_sets.get_records_count(payload_data_set_id)
    print("Number of records in the payload logging table: {}".format(pl_records_count))
    return scoring_id

<a id="configure_openscale"></a>
## 3. Configure OpenScale

In [None]:
from ibm_watson_openscale import APIClient
from ibm_watson_openscale.utils import *
from ibm_watson_openscale.supporting_classes import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.base_classes.watson_open_scale_v2 import *

In [None]:
authenticator = IAMAuthenticator(apikey=WOS_API_KEY)
wos_client = APIClient(authenticator=authenticator)
wos_client.version

In [None]:
wos_client.data_marts.show()

In [None]:
data_marts = wos_client.data_marts.list().result.data_marts
if len(data_marts) == 0:
    raise Exception("Missing data mart.")
data_mart_id=data_marts[0].metadata.id
print('Using existing datamart: {}'.format(data_mart_id))

### 3.1 Add custom service provider

In [None]:
PREFIX = str(uuid.uuid4())[:8]

SERVICE_PROVIDER_NAME = PREFIX + " " + "External ML Provider"
SERVICE_PROVIDER_DESCRIPTION = PREFIX + " " + "Added external WOS provider"

In [None]:
request_headers = { "Content-Type": "application/json" }
MLCredentials = {}
added_service_provider_result = wos_client.service_providers.add(
        name=SERVICE_PROVIDER_NAME,
        description=SERVICE_PROVIDER_DESCRIPTION,
        service_type=ServiceTypes.CUSTOM_MACHINE_LEARNING,
        request_headers=request_headers,
        operational_space_id = "production",
        credentials=MLCredentials,
        background_mode=False
    ).result

service_provider_id = added_service_provider_result.metadata.id

In [None]:
print('Service provider created with id: {}'.format(service_provider_id))

### 3.2 Add subscription

In [None]:
label_column="Risk"

wos_training_df = df_training.drop(label_column, axis=1)
feature_columns = wos_training_df.columns.to_list()

num_cols = wos_training_df._get_numeric_data().columns
cat_columns = list(set(feature_columns) - set(num_cols))

print(feature_columns)
print(cat_columns)

In [None]:
scoring_request_headers = { "Content-Type": "application/json", "X-Wos-Request": True }

SUBSCRIPTION_NAME = PREFIX + " " + "External ML - All Monitors"
SCORING_ENDPOINT_URL = "SCORING_ENDPOINT_URL"

ASSET_ID = str(uuid.uuid4())
ASSET_DEPLOYMENT_ID = str(uuid.uuid4())

# COS credentials (training data storage)
COS_API_KEY_ID="COS_API_KEY_ID"
COS_RESOURCE_CRN="COS_RESOURCE_CRN"
COS_ENDPOINT="https://s3.us-east.cloud-object-storage.appdomain.cloud"

BUCKET_NAME="BUCKET_NAME"
FILE_NAME="FILE_NAME"

IAM_URL="https://iam.bluemix.net/oidc/token"

In [None]:
subscription_details = wos_client.subscriptions.add(
        data_mart_id=data_mart_id,
        service_provider_id=service_provider_id,
        asset=Asset(
            asset_id=ASSET_ID,
            name=SUBSCRIPTION_NAME,
            url=SCORING_ENDPOINT_URL,
            asset_type=AssetTypes.MODEL,
            input_data_type=InputDataType.STRUCTURED,
            problem_type=ProblemType.BINARY_CLASSIFICATION
        ),
        deployment=AssetDeploymentRequest(
            deployment_id=ASSET_DEPLOYMENT_ID,
            name=SUBSCRIPTION_NAME,
            deployment_type= DeploymentTypes.ONLINE,
            scoring_endpoint=ScoringEndpointRequest(
                url=SCORING_ENDPOINT_URL,
                request_headers=scoring_request_headers
            )
        ),
        asset_properties=AssetPropertiesRequest(
            label_column=label_column,
            probability_fields=["probability"],
            prediction_field="prediction",
            feature_fields = feature_columns,
            categorical_fields=cat_columns,
            training_data_reference=TrainingDataReference(type="cos",
                                                          location=COSTrainingDataReferenceLocation(bucket = BUCKET_NAME,
                                                                                                    file_name = FILE_NAME),
                                                          connection=COSTrainingDataReferenceConnection.from_dict({
                                                                        "resource_instance_id": COS_RESOURCE_CRN,
                                                                        "url": COS_ENDPOINT,
                                                                        "api_key": COS_API_KEY_ID,
                                                                        "iam_url": IAM_URL}))
        )
    ).result
subscription_id = subscription_details.metadata.id

In [None]:
print('Subscription created with id: {}'.format(subscription_id))

In [None]:
import time

time.sleep(10)
payload_data_set_id = None
payload_data_set_id = wos_client.data_sets.list(type=DataSetTypes.PAYLOAD_LOGGING, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result.data_sets[0].metadata.id
if payload_data_set_id is None:
    print("Payload data set not found. Please check subscription status.")
else:
    print('Payload data set id: {}'.format(payload_data_set_id))

### 3.3 Update subscription scoring endpoint

NOTE: When working with external models you're responsible for performing payload logging. Now, you can use your `payload_data_set_id` to deploy your model scoring endpoint. After that, you can update your subscription scoring endpoint. PERFORM THIS PROCESS BEFORE SETTING UP THE MONITORS!

Python payload logging example: https://github.com/leonardofurnielis/sample-model-container/blob/wos-payload-logging/main.py

In [None]:
wos_client.subscriptions.update(subscription_id=subscription_id, patch_document=[{
    "op": "replace",
    "path": "/asset/url",
    "value": "SCORING_ENDPOINT_URL",
},
{
    "op": "replace",
    "path": "/deployment/scoring_endpoint/url",
    "value": "SCORING_ENDPOINT_URL"}]).result

<a id="score_model"></a>
## 4. Score the model

In [None]:
no_of_records_to_score = 50
payload_scoring = get_scoring_payload(df_training, no_of_records_to_score, [label_column])
print(payload_scoring)

In [None]:
scoring_response = custom_ml_scoring(payload_scoring, SCORING_ENDPOINT_URL)
print(scoring_response)

In [None]:
# scoring_id = payload_logging(openscale_client=wos_client,
#                              payload_data_set_id=payload_data_set_id, 
#                              payload_scoring=payload_scoring, 
#                              scoring_response=scoring_response)
# print(scoring_id)

<a id="configure_monitors"></a>
## 5. Configure monitors

### 5.1 Quality monitoring and feedback logging

In [None]:
target = Target(
        target_type=TargetTypes.SUBSCRIPTION,
        target_id=subscription_id
)
parameters = {
    "min_feedback_data_size": 50
}
thresholds = [
                {
                    "metric_id": wos_client.monitor_definitions.MONITORS.QUALITY.METRIC.AREA_UNDER_ROC,
                    "type": "lower_limit",
                    "value": .85
                },
                {
                    "metric_id": wos_client.monitor_definitions.MONITORS.QUALITY.METRIC.ACCURACY,
                    "type": "lower_limit",
                    "value": .75
                },
                  {
                    "metric_id": wos_client.monitor_definitions.MONITORS.QUALITY.METRIC.PRECISION,
                    "type": "lower_limit",
                    "value": .90
                },
                     {
                    "metric_id": wos_client.monitor_definitions.MONITORS.QUALITY.METRIC.RECALL,
                    "type": "lower_limit",
                    "value": .70
                }
            ]
quality_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.QUALITY.ID,
    target=target,
    parameters=parameters,
    thresholds=thresholds
).result

In [None]:
quality_monitor_instance_id = quality_monitor_details.metadata.id
print('Quality monitor id: {}'.format(quality_monitor_instance_id))

In [None]:
df_test = pd.read_csv('../data/credit_risk_test.csv')
df_test.head()

In [None]:
no_of_records_to_eval = 50
feedback_columns = df_test.columns.to_list()

feedback_data = []

for value in df_test.values[:no_of_records_to_eval]:

    dict_item = {}
    for i in range(len(feedback_columns)):
        dict_item[feedback_columns[i]] = value[i]
    
    feedback_data.append(dict_item)

In [None]:
feedback_dataset_id = None
feedback_dataset = wos_client.data_sets.list(type=DataSetTypes.FEEDBACK, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result
feedback_dataset_id = feedback_dataset.data_sets[0].metadata.id
if feedback_dataset_id is None:
    print("Feedback data set not found. Please check quality monitor status.")

print('Feedback data set id: {}'.format(feedback_dataset_id))

Store feedback data into feedback dataset

In [None]:
wos_client.data_sets.store_records(feedback_dataset_id, request_body=feedback_data, background_mode=False)

Verify the number of feedback data into feedback dataset

In [None]:
wos_client.data_sets.get_records_count(data_set_id=feedback_dataset_id)

Run quality monitor

In [None]:
run_details = wos_client.monitor_instances.run(monitor_instance_id=quality_monitor_instance_id, background_mode=False).result