# Example 2: Invoke deployed models/functions from Watson Machine Learning

## Use case


A complex/advance model is developed by the Data Science team and deployed on Watson Machine Learning.

The business users could then visually apply business rules and preprocess the data as required, before finally making a prediction using the deployed model.


# Build a model in Watson Studio and deploy in Watson Machine Learning (Data Scientist persona)

In [3]:
! pip install --quiet optuna

In [23]:
import os
import json
import optuna
import sklearn
import numpy as np
import pandas as pd
import lightgbm as lgb
import ibm_watson_machine_learning

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import make_column_selector
from sklearn.compose import make_column_transformer

import itc_utils.flight_service as itcfs

In [20]:
SPACE_ID = "5d163188-8cf0-405c-94e6-077fae0bd306"

wml_credentials = {
    "token": os.environ["USER_ACCESS_TOKEN"],
    "url": os.environ["RUNTIME_ENV_APSX_URL"],
    "instance_id": "openshift",
    "version": "4.5"
}

wml_client = ibm_watson_machine_learning.APIClient(wml_credentials)
wml_client.set.default_space(SPACE_ID)

'SUCCESS'

In [21]:
nb_data_request = {
    "data_name": "mortgage-default.csv",
    "interaction_properties": {"infer_schema": "true"}
}

flightInfo = itcfs.get_flight_info(itcfs.get_flight_client(), nb_data_request=nb_data_request)

df = itcfs.read_pandas_and_concat(itcfs.get_flight_client(), flightInfo)
df = df.drop("ID", axis=1)
df.head(10)

Unnamed: 0,Income,AppliedOnline,Residence,YearCurrentAddress,YearsCurrentEmployer,NumberOfCards,CCDebt,Loans,LoanAmount,SalePrice,Location,MortgageDefault
0,43593.0,YES,Owner Occupier,13.0,0.0,1.0,2315.0,0.0,12820.0,180000,130,NO
1,45706.0,YES,Owner Occupier,17.0,16.0,2.0,373.0,1.0,7275.0,145000,100,YES
2,44756.0,YES,Owner Occupier,19.0,6.0,1.0,2117.0,1.0,10760.0,145000,110,NO
3,44202.0,YES,Owner Occupier,8.0,0.0,2.0,748.0,0.0,10455.0,170000,100,NO
4,45715.0,YES,Owner Occupier,8.0,14.0,2.0,772.0,1.0,12985.0,137000,100,NO
5,43800.0,YES,Owner Occupier,0.0,4.0,2.0,725.0,0.0,7340.0,259000,100,NO
6,45049.0,YES,Public Housing,6.0,16.0,2.0,1345.0,1.0,9085.0,280000,110,NO
7,44974.0,YES,Public Housing,14.0,18.0,2.0,2772.0,0.0,9515.0,264000,130,NO
8,44956.0,NO,Owner Occupier,21.0,21.0,2.0,806.0,1.0,8150.0,170000,100,YES
9,56087.0,NO,Public Housing,27.0,18.0,1.0,4818.0,1.0,8910.0,95000,101,NO


In [24]:
target = "MortgageDefault"

y = df[target].map({"YES": 1, "NO": 0})
X = df.drop(target, axis=1)

ct = make_column_transformer(
    (OneHotEncoder(), make_column_selector(dtype_include=object)),
    remainder="passthrough"
)
X_transformed = ct.fit_transform(X)

params = {
    "objective": "binary",
    "metric": "auc",
    "verbosity": -1,
    "boosting_type": "gbdt",
}
data = optuna.integration.lightgbm.Dataset(X_transformed, label=y)
tuner = optuna.integration.lightgbm.LightGBMTunerCV(
    params, data, verbose_eval=None, callbacks=[lgb.early_stopping(stopping_rounds=10, verbose=False)], folds=sklearn.model_selection.KFold(n_splits=3), optuna_seed=12345
)
tuner.run()
best_params = tuner.best_params
param_names = {
    "feature_fraction": "colsample_bytree",
    "lambda_l1": "reg_alpha",
    "bagging_fraction": "subsample",
    "lambda_l2": "reg_lambda",
    "bagging_freq": "subsample_freq",
}
for k, v in param_names.items():
    best_params[v] = best_params.pop(k)

pipeline = Pipeline(steps=[("transform", ct), ("clf", lgb.LGBMClassifier())])

pipeline.fit(X, y)

pipeline.score(X, y)

1.0

In [11]:
meta_props = {
    wml_client.repository.ModelMetaNames.NAME: "mortgage model",
    wml_client.repository.ModelMetaNames.TYPE: "scikit-learn_1.0",
    wml_client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: wml_client.software_specifications.get_uid_by_name("runtime-22.1-py3.9")
}

model_details = wml_client.repository.store_model(model=pipeline, meta_props=meta_props)
model_uid = wml_client.repository.get_model_id(model_details)
meta_props = {wml_client.deployments.ConfigurationMetaNames.NAME: "mortgage model deployment", wml_client.deployments.ConfigurationMetaNames.ONLINE: {}}
deployment_details = wml_client.deployments.create(model_uid, meta_props=meta_props)
deployment_id = wml_client.deployments.get_id(deployment_details)
deployment_id



#######################################################################################

Synchronous deployment creation for uid: '1384f077-50aa-48d2-b789-a76b27577ce5' started

#######################################################################################


initializing
Note: online_url is deprecated and will be removed in a future release. Use serving_urls instead.
..
ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='17ad92c5-cd7a-4914-ad49-14adc5c919ca'
------------------------------------------------------------------------------------------------




'17ad92c5-cd7a-4914-ad49-14adc5c919ca'

In [13]:
payload = {"input_data": [{"fields": X.columns.tolist(), "values": X.sample(10)}]}
response = wml_client.deployments.score(deployment_id, payload)
response

# Make predictions from the deployed model in SPSS Modeler flow (Power Business User persona)

## Build Syntax in Extension Model node

Copy and the below code to the Build Syntax section of Extension Model node 

In [None]:
import sys
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "ibm-watson-machine-learning", "--no-input"])

## Score Syntax in Extension Model node

Copy and the below code to the Build Syntax section of Extension Model node 

In [None]:
import sys
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "ibm-watson-machine-learning", "--no-input"])

import spss.pyspark.runtime
from pyspark.sql.types import *
from pyspark.sql import SQLContext
import numpy as np
import ibm_watson_machine_learning

SPACE_ID = "5d163188-8cf0-405c-94e6-077fae0bd306"
DEPLOYMENT_ID = "17ad92c5-cd7a-4914-ad49-14adc5c919ca"

wml_credentials = {
    "username": "admin",
    "password": "QJJj0GSt2dxZ",
    "url": "https://cpd-cpd.itzroks-550003aw18-kv78z8-6ccd7f378ae819553d37d5f2ee142bd6-0000.au-syd.containers.appdomain.cloud",
    "instance_id": "openshift",
    "version": "4.5",
}

wml_client = ibm_watson_machine_learning.APIClient(wml_credentials)
wml_client.set.default_space(SPACE_ID)

cxt = spss.pyspark.runtime.getContext()
sqlContext = cxt.getSparkSQLContext()

target = "MortgageDefault"
prediction = f"$PRED-{target}"
probability = f"$PROB-{target}"

fieldList = [StructField(x.name, x.dataType, x.nullable) for x in cxt.getSparkInputSchema()]
fieldList.append(StructField(prediction, StringType(), nullable=False))
fieldList.append(StructField(probability, FloatType(), nullable=False))
outputSchema = StructType(fieldList)
cxt.setSparkOutputSchema(outputSchema)

if not cxt.isComputeDataModelOnly():
    df = cxt.getSparkInputData().toPandas()
    payload = {"input_data": [{"fields": df.columns.tolist(), "values": df}]}
    response = wml_client.deployments.score(DEPLOYMENT_ID, payload)
    pred, prob = zip(*response["predictions"][0]["values"])
    df[prediction] = pred
    df[prediction] = df[prediction].map({1: "YES", 0: "NO"})
    df[probability] = np.array(prob)[:,-1]
    cxt.setSparkOutputData(sqlContext.createDataFrame(df))
