# Trusted AI IBM-Azure usecase: monitoring Azure model

**Build & Deploy** Machine Learning model in **Azure Machine Learning Studio**

**Monitor** the model in **IBM Watson OpenScale**

### Contents

1. [Set up the environment](#setup_environment)
1. [Explore and prepare training data](#explore_prepare_data)
1. [Create train and test dataset](#train_test_set)
1. [Train the model](#train_model)
1. [Save the model in Azure](#save_model)
1. [Create a custom entry script](#custom_score_script)
1. [Deploy and score in Azure](#deploy_model)

In [None]:
%pip install ibm_cloud_sdk_core
%pip install ibm-watson-openscale

In [1]:
import os

import pandas as pd
import numpy as np

<a id="setup_environment"></a>
## 1. Set up the environment

IBM credentials

In [None]:
CLOUD_WOS = True
API_KEY = "***"

DB_CREDENTIALS=None

SCHEMA_NAME = 'azure_dm'
IAM_URL="https://iam.ng.bluemix.net/oidc/token"

Azure credentials

In [203]:
AZURE_ENGINE_CREDENTIALS =  {
    "client_id": "***",
    "client_secret": "***",
    "tenant": "***",
    "subscription_id": "***"
}

<a id="explore_prepare_data"></a>
## 2. Explore and prepare training data

NOTE: read from `/data` directory if running locally

In [None]:
df = pd.read_csv('../data/credit_risk_training.csv')
df.head()

In [None]:
print('Columns: ', list(df.columns))
print('Number of columns: ', len(df.columns))

<a id="train_test_set"></a>
## 3. Create train and test dataset

NOTE: Test dataset (20%) and Training dataset (80%)

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [None]:
train_data, test_data = train_test_split(df, test_size=0.2)

In [None]:
features_idx = np.s_[0:-1]
all_records_idx = np.s_[:]
first_record_idx = np.s_[0]

In [None]:
string_fields = [type(fld) is str for fld in train_data.iloc[first_record_idx, features_idx]]
ct = ColumnTransformer([("ohe", OneHotEncoder(), list(np.array(train_data.columns)[features_idx][string_fields]))])
clf_linear = SGDClassifier(loss='log_loss', penalty='l2', max_iter=1000, tol=1e-5)

pipeline = Pipeline([('ct', ct), ('clf_linear', clf_linear)])

<a id="train_model"></a>
## 4. Train the model

Create a Scikit-learn models.

In [None]:
MODEL_NAME = "Scikit Credit Risk Model Azure ML"
MODEL_NAME_SHORT = 'azure_credit_risk_model'
DEPLOYMENT_NAME = "Scikit Credit Risk Deployment Azure ML"

In [None]:
risk_model = pipeline.fit(train_data.drop('Risk', axis=1), train_data.Risk)

In [None]:
from sklearn.metrics import roc_auc_score

predictions = risk_model.predict(test_data.drop('Risk', axis=1))
indexed_preds = [0 if prediction=='No Risk' else 1 for prediction in predictions]

real_observations = test_data.Risk.replace('Risk', 1)
real_observations = real_observations.replace('No Risk', 0).values

auc = roc_auc_score(real_observations, indexed_preds)
print(auc)

In [None]:
import joblib

joblib.dump(risk_model, '../model/' + MODEL_NAME_SHORT+ ".pkl")

<a id="save_model"></a>
## 5. Save the model in Azure

In [None]:
%pip install azure-ai-ml azure-identity

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

In [204]:
os.environ["AZURE_CLIENT_ID"] = AZURE_ENGINE_CREDENTIALS["client_id"]
os.environ["AZURE_TENANT_ID"] = AZURE_ENGINE_CREDENTIALS["tenant"]
os.environ["AZURE_CLIENT_SECRET"] = AZURE_ENGINE_CREDENTIALS["client_secret"]

In [205]:
# authenticate
# https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?view=azureml-api-2&tabs=sdk

credential = DefaultAzureCredential()

# credential.get_token("https://management.azure.com/.default")

# Get a handle to the workspace
az_ml_client = MLClient(
    credential=credential,
    subscription_id="***",
    resource_group_name="watsonx_governance",
    workspace_name="ml_integration",
)

In [206]:
ws = az_ml_client.workspaces.get('ml_integration')
print("LOCATION:", ws.location, "NAME:", ws.name, "RESOURCE_GROUP:", ws.resource_group)

LOCATION: eastus NAME: ml_integration RESOURCE_GROUP: watsonx_governance


In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

file_model = Model(
    path='../model/'+MODEL_NAME_SHORT+'.pkl',
    type=AssetTypes.CUSTOM_MODEL,
    name=MODEL_NAME_SHORT,
    description='Random Forest Model to classify credit risk with probability',

)

az_ml_client.models.create_or_update(file_model)

In [162]:
models = az_ml_client.models.list()
for model in models:
    print(model.name)

azure_credit_risk_model


In [163]:
# Let's pick the latest version of the model
latest_model_version = max(
    [int(m.version) for m in az_ml_client.models.list(name=MODEL_NAME_SHORT)]
)
print(f'Latest model is version "{latest_model_version}" ')

Latest model is version "1" 


<a id="custom_score_script"></a>
## 6. Create a custom entry script for the scoring response

In [177]:
%%writefile ../script/azure_score.py


import os
import json
import joblib

import pandas as pd

def init():
    """
    This function is called when the container is initialized/started, typically after create/update of the deployment.
    You can write the logic here to perform init operations like caching the model in memory
    """
    global model
    model_path = os.path.join(str(os.getenv("AZUREML_MODEL_DIR")), "azure_credit_risk_model.pkl") 
    model = joblib.load(model_path)

    print("===> Init complete")

def run(input_payload):
    """
    This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
    In the example we extract the data from the json input and call the scikit-learn model's predict()
    method and return the result back
    """
    print("===> Request received")
    try:
        if type(input_payload) is str:
            dict_data = json.loads(input_payload)
        else:
            dict_data = input_payload

        data = pd.DataFrame.from_dict(dict_data["input"])
        predictions = model.predict(data)
        scores = model.predict_proba(data)
        risk_column = []
        proba_column = []
        proba_vector = []

        for pred, proba in zip(predictions, scores):
            risk_column.append(pred)
            proba_vector.append([proba[0], proba[1]])
            if pred == "No Risk":
                proba_column.append(proba[0])
            else:
                proba_column.append(proba[1])
        data["Scored Labels"] = risk_column
        data["Scored Probabilities"] = proba_column
        data["ProbabilityVector"] = proba_vector

        result = { "output": data.to_dict('records') }
        print("===> Request processed")

        return result
    except Exception as e:
        result = str(e)
        return { "error": result }


Writing ../script/score_azure.py


<a id="create_endpoint"></a>
## 7. Create an online endpoint

In [179]:
import uuid

# Creating a unique name for the endpoint
online_endpoint_name = MODEL_NAME_SHORT + '_' + str(uuid.uuid4())[:8]
online_endpoint_name = online_endpoint_name.replace('_', '-')

print(online_endpoint_name)

azure-credit-risk-model-9535e6d8


In [180]:
# Expect the endpoint creation to take a few minutes
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Environment,
    CodeConfiguration,
)

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name, 
    description="This is a sample online endpoint",
    auth_mode="key"
)

endpoint = az_ml_client.online_endpoints.begin_create_or_update(endpoint).result()

print(f"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}")

Endpoint azure-credit-risk-model-9535e6d8 provisioning state: Succeeded


In [181]:
endpoint = az_ml_client.online_endpoints.get(name=online_endpoint_name)

print(
    f'Endpoint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved'
)

Endpoint "azure-credit-risk-model-9535e6d8" with provisioning state "Succeeded" is retrieved


<a id="create_custom_environment"></a>
## 8. Create a custom environment for python

In [185]:
environment = Environment(
    name="azure-sklearn1.3-env",
    description="Environment created from a Docker image plus Conda environment",
    conda_file="../az-environment/conda.yaml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)

In [None]:
az_ml_client.environments.create_or_update(environment)

In [196]:
# Let's pick the latest version of the environment
latest_env_version = max(
    [int(m.version) for m in az_ml_client.environments.list(name="azure-sklearn1.3-env")]
)
print(f'Latest model is version "{latest_env_version}" ')

Latest model is version "5" 


In [197]:
environment = az_ml_client.environments.get(name="credit-risk-azure-env", version=latest_env_version)

In [199]:
environment

Environment({'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'credit-risk-azure-env', 'description': 'Environment created from a Docker image plus Conda environment', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': True, 'id': '/subscriptions/9a863ea3-cd25-4a99-a668-e1fae13107d6/resourceGroups/watsonx_governance/providers/Microsoft.MachineLearningServices/workspaces/ml_integration/environments/credit-risk-azure-env/versions/5', 'Resource__source_path': None, 'base_path': '/Users/leonardofurnielis/Documents/github/wml-toolkit/jupyter', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x13ec892a0>, 'serialize': <msrest.serialization.Serializer object at 0x13d734e80>, 'version': '5', 'latest_version': None, 'conda_file': {'channels': ['conda-forge'], 'dependencies': ['python=3.10', 'pip=23.3.1', {'pip': ['pandas==1.5.3', 'numpy==1.23.5', 'scikit-learn==1.3.1', 's

<a id="deploy_model"></a>
## 9. Deploy and score

NOTE: Deploy and score the model deployed at Azure ML Studio

In [209]:
%%time
model = az_ml_client.models.get(name=MODEL_NAME_SHORT, version=latest_model_version)

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    instance_type="Standard_D2as_v4", # Standard_DS3_v2
    instance_count=1,
    environment=environment,
    code_configuration=CodeConfiguration(
        code="../script", scoring_script="score_azure.py"
    ),
)

blue_deployment = az_ml_client.begin_create_or_update(blue_deployment).result()

Check: endpoint azure-credit-risk-model-9535e6d8 exists
Uploading script (0.0 MBs): 100%|██████████| 1424/1424 [00:00<00:00, 5145.68it/s]




........................................................................................CPU times: user 2.37 s, sys: 424 ms, total: 2.8 s
Wall time: 8min 13s


In [193]:
# test the deployment with some sample data
import json

with open('./input_data.json', 'w') as f:
    json.dump({'input': train_data.drop('Risk', axis=1)[-1:].to_dict('records')}, f)

In [191]:
output = az_ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="blue",
    request_file="./input_data.json",
)

print(json.dumps(output, indent=2))

'{"output": [{"CheckingStatus": "less_0", "LoanDuration": 21, "CreditHistory": "prior_payments_delayed", "LoanPurpose": "furniture", "LoanAmount": 5281, "ExistingSavings": "less_100", "EmploymentDuration": "1_to_4", "InstallmentPercent": 3, "Sex": "male", "OthersOnLoan": "none", "CurrentResidenceDuration": 3, "OwnsProperty": "savings_insurance", "Age": 26, "InstallmentPlans": "none", "Housing": "free", "ExistingCreditsCount": 1, "Job": "skilled", "Dependents": 1, "Telephone": "none", "ForeignWorker": "yes", "Scored Labels": "No Risk", "Scored Probabilities": 0.6860922326149397, "ProbabilityVector": [0.6860922326149397, 0.31390776738506027]}]}'

Delete input_data from filesystem

In [None]:
os.remove('./input_data.json')