# Integrating Key Performance Indicators and Hyperparameters Into Model Manager
Model Manager generates certain Key Performance Indicators (KPIs) automatically based on user created performance definitions. However, we may want to include other KPIs to measure certain things not tracked by MM.

This can be done by performing local tests on models we've passed to MM, then passing up the resulting values as custom KPI values.

For certain python models, sasctl will also generate a json file containing the hyperparameters of the model, making them easily accessible for future use.

### Python Package Imports

In [87]:
# Standard Library
from pathlib import Path
import warnings

# Third Party
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Application Specific
import sasctl.pzmm as pzmm
from sasctl import Session
from sasctl.pzmm.model_parameters import ModelParameters as mp

# Global Package Options
pd.options.mode.chained_assignment = None  # default='warn'
warnings.simplefilter(action='ignore', category=FutureWarning)

### Building the Model
For more information on building models for model manager, see the [PZMM Model Import Example](/pzmmModelImportExample.ipynb).

In [88]:
hmeqData = pd.read_csv('data/hmeq.csv', sep=',')

In [89]:
predictorColumns = ['LOAN', 'MORTDUE', 'VALUE', 'YOJ', 'DEROG', 'DELINQ', 'CLAGE', 'NINQ', 'CLNO', 'DEBTINC']

targetColumn = 'BAD'
x = hmeqData[predictorColumns]
y = hmeqData[targetColumn]

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.3, random_state=42)

xTest.fillna(xTest.mean(), inplace=True)
xTrain.fillna(xTrain.mean(), inplace=True)

In [90]:
treeModel = DecisionTreeClassifier(random_state=42)
treeModel = treeModel.fit(xTrain, yTrain)

In [91]:
yTreePredict = treeModel.predict(xTest)
yTreeProba = treeModel.predict_proba(xTest)

In [92]:
path = Path.cwd() / 'data/hmeqModels/DecisionTreeClassifier/'
prefix = 'DecisionTreeClassifier'
pzmm.PickleModel.pickle_trained_model(treeModel, prefix, path)

Model DecisionTreeClassifier was successfully pickled and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/DecisionTreeClassifier.pickle.


In [93]:
J = pzmm.JSONFiles()

J.write_var_json(hmeqData[predictorColumns], is_input=True, json_path=path)

output_var = pd.DataFrame(columns=['EM_EVENTPROBABILITY', 'EM_CLASSIFICATION'], data=[[0.5, 'A']])
J.write_var_json(output_var, is_input=False, json_path=path)

J.write_model_properties_json(model_name=prefix,
                              model_desc='',
                              target_variable=targetColumn,
                              model_type='Classification',
                              target_event=1,
                              num_target_categories=1,
                              event_prob_var='EM_EVENTPROBABILITY',
                              json_path=path,
                              modeler='sasdemo')

J.write_file_metadata_json(model_prefix=prefix, json_path=path)

inputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/inputVar.json
outputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/outputVar.json
ModelProperties.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/ModelProperties.json
fileMetadata.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/fileMetadata.json


In [94]:
import getpass

username = getpass.getpass()
password = getpass.getpass()
host = 'demo.sas.com'

sess = Session(host, username, password, protocol='http')

trainProba = treeModel.predict_proba(xTrain)

trainData = pd.concat([yTrain.reset_index(drop=True), pd.Series(data=trainProba[:, 1])], axis=1)
testData = pd.concat([yTest.reset_index(drop=True), pd.Series(data=yTreeProba[:, 1])], axis=1)

J.calculate_model_statistics(target_value=1, train_data=trainData, test_data=testData, json_path=path)

dmcas_fitstat.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/dmcas_fitstat.json
dmcas_roc.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/dmcas_roc.json
dmcas_lift.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/dmcas_lift.json


In [96]:
I = pzmm.ImportModel()
from sklearn.tree import DecisionTreeClassifier
I.import_model(
    model_files=path, 
    model_prefix=prefix, 
    project='HMEQModels', 
    input_data=x, 
    predict_method= DecisionTreeClassifier.predict, 
    output_variables=['BAD'], 
    predict_threshold=.5,
    target_values=[1],
    model_file_name=prefix + '.pickle')

Model score code was written successfully to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier/DecisionTreeClassifierScore.py and uploaded to SAS Model Manager.
All model files were zipped to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/DecisionTreeClassifier.


  warn(f"No project with the name or UUID {project} was found.")


A new project named HMEQModels was created.
Model was successfully imported into SAS Model Manager as DecisionTreeClassifier with the following UUID: a45e99cc-20a0-4d24-99c7-c64bc7447dce.


(<class 'sasctl.core.RestObj'>(headers={'Date': 'Thu, 09 Mar 2023 15:57:16 GMT', 'Content-Type': 'application/vnd.sas.collection+json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Content-Security-Policy': "default-src 'self'; object-src 'none'; frame-ancestors 'self'; form-action 'self';", 'Expires': '0', 'Pragma': 'no-cache', 'Sas-Service-Response-Flag': 'true', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Xss-Protection': '1; mode=block'}, data={'creationTimeStamp': '2023-03-09T15:57:12.718Z', 'createdBy': 'edmdev', 'modifiedTimeStamp': '2023-03-09T15:57:13.828Z', 'modifiedBy': 'edmdev', 'id': 'a45e99cc-20a0-4d24-99c7-c64bc7447dce', 'name': 'DecisionTreeClassifier', 'role': 'plain', 'scoreCodeType': 'python', 'algorithm': 'Classification', 'modeler': 'sasdemo', 'trainCodeType': 'Python', 'tool': 'Python 3', 'toolVersion': '3.8.16', 'version': 2, 'indirectFolderId': '382d37a5-da82-47da-a5f5-ab37b70b3b

### Updating Model and Project Properties
In order to allow for performance definitions to be run in Model Manager, certain properties need to be set for both the model and the project.

In [97]:
from sasctl._services.model_repository import ModelRepository as mr

model = mr.get_model(prefix)

model['targetEvent'] = '1'
model['targetVariable'] = 'BAD'
model['function'] = 'Classification'
model['targetLevel'] = 'Binary'
model['eventProbVar'] = 'EM_EVENTPROBABILITY'

mr.update_model(model)

<class 'sasctl.core.RestObj'>(headers={'Date': 'Thu, 09 Mar 2023 15:57:25 GMT', 'Content-Type': 'application/vnd.sas.models.model+json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Content-Security-Policy': "default-src 'self'; object-src 'none'; frame-ancestors 'self'; form-action 'self';", 'Etag': 'W/"1678377445192828000"', 'Expires': '0', 'Pragma': 'no-cache', 'Sas-Service-Response-Flag': 'true', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Xss-Protection': '1; mode=block'}, data={'creationTimeStamp': '2023-03-09T15:57:12.718Z', 'createdBy': 'edmdev', 'modifiedTimeStamp': '2023-03-09T15:57:25.192Z', 'modifiedBy': 'edmdev', 'id': 'a45e99cc-20a0-4d24-99c7-c64bc7447dce', 'name': 'DecisionTreeClassifier', 'role': 'plain', 'scoreCodeType': 'python', 'algorithm': 'Classification', 'function': 'Classification', 'modeler': 'sasdemo', 'trainCodeType': 'Python', 'eventProbVar': 'EM_EVENTPROBABILITY', 'targetVa

In [98]:
model = mr.get_model(prefix)
project = mr.get_project("HMEQModels")

variables = model['inputVariables'] + model['outputVariables']

project['targetVariable'] = 'BAD'
project['variables'] = variables
project['targetLevel'] = 'Binary'
project['targetEventValue'] = "1"
project['classTargetValues'] = ".5"
project['function'] = 'Classification'

mr.update_project(project)

project = mr.get_project("HMEQModels")
project['eventProbabilityVariable'] = 'EM_EVENTPROBABILITY'

mr.update_project(project)

<class 'sasctl.core.RestObj'>(headers={'Date': 'Thu, 09 Mar 2023 15:57:30 GMT', 'Content-Type': 'application/vnd.sas.models.project+json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Content-Security-Policy': "default-src 'self'; object-src 'none'; frame-ancestors 'self'; form-action 'self';", 'Etag': 'W/"1678377450238474000"', 'Expires': '0', 'Last-Modified': 'Thu, 09 Mar 2023 15:57:30 GMT', 'Pragma': 'no-cache', 'Sas-Service-Response-Flag': 'true', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Xss-Protection': '1; mode=block'}, data={'creationTimeStamp': '2023-03-09T15:57:09.443421Z', 'createdBy': 'edmdev', 'modifiedTimeStamp': '2023-03-09T15:57:30.238474Z', 'modifiedBy': 'edmdev', 'version': 0, 'id': 'acc21f66-01f1-4c15-b352-1b85ae353986', 'name': 'HMEQModels', 'function': 'Classification', 'targetVariable': 'BAD', 'targetEventValue': '1', 'classTargetValues': '.5', 'targetLevel': 'binary', 'eventProb

### Hyperparameter Generation
If the hyperparameter json file is not generated automatically, this code block will generate it and add it to Model Manager.

In [99]:
mp.generate_hyperparameters(treeModel, prefix, path)

with open(path / f'{prefix}Hyperparameters.json', 'r') as f:
    mr.add_model_content(prefix, f, f'{prefix}Hyperparameters.json')

Once the model has been uploaded to Model Manager, custom hyperparameters can be added to the hyperparameter json file using the add_hyperparamters function. The custom hyperparameters are passed in as kwargs.

In [100]:
mp.add_hyperparameters("DecisionTreeClassifier", example=1)

### Performance Definition
To create a performance definition, we first have to pass up data for the performance definition to run on. All data used for performance defintions should be named using the following format: 

{Table Prefix}\_{Time}\_{Time Label}

In [101]:
from sasctl._services.cas_management import CASManagement as cas

for x in range(1, 5):
    cas.upload_file(
        file=f'data/HMEQPERF_{x}_Q{x}.csv', 
        name=f'HMEQPERF_{x}_Q{x}')
    print(x)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/ipykernel_35626/394561866.py", line 4, in <module>
    cas.upload_file(
  File "/Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/sasctl/_services/cas_management.py", line 433, in upload_file
    tbl = cls.post(
  File "/Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/sasctl/_services/service.py", line 113, in post
    return cls.request("post", *args, **kwargs)
  File "/Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/sasctl/_services/service.py", line 93, in request
    return core.request(verb, path, session, format_, **kwargs)
  File "/Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/sasctl/core.py", line 2033, in request
    raise HTT

After pushing up the data, the performance definition can be created. When the performance definition is run, the KPIs are generated within Model Manager.

In [102]:
from sasctl._services.model_management import ModelManagement as mm

mm.create_performance_definition(table_prefix='hmeqperf', project='HMEQModels', scoring_required=True)

<class 'sasctl.core.RestObj'>(headers={'Date': 'Thu, 09 Mar 2023 15:57:57 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '1084', 'Set-Cookie': 'sas-ingress-nginx=a48a1d448b830ed19c8a79d63eee7fe6|c3d98640c92f9e7d1a09cbdf02714a7f; Path=/modelManagement/; HttpOnly; SameSite=Lax', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Content-Security-Policy': "default-src 'self'; object-src 'none'; frame-ancestors 'self'; form-action 'self';", 'Etag': 'W/"1678377475738059000"', 'Expires': '0', 'Last-Modified': 'Thu, 09 Mar 2023 15:57:55 GMT', 'Pragma': 'no-cache', 'Sas-Service-Response-Flag': 'true', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Xss-Protection': '1; mode=block'}, data={'casServerId': 'cas-shared-default', 'challengerMonitored': False, 'championMonitored': False, 'createdBy': 'edmdev', 'creationTimeStamp': '2023-03-09T15:57:55.738058Z', 'dataLibrary': 'Public', 'dataPrefix': 'hmeqperf', 'description': 'Performance defini

In [104]:
project = mr.get_project('HMEQModels')

performance_definition = mm.list_performance_definitions(filter=f"eq(projectId,'{project.id}')")

mm.execute_performance_definition(performance_definition[0].id)

<class 'sasctl.core.RestObj'>(headers={'Date': 'Thu, 09 Mar 2023 15:58:11 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Content-Security-Policy': "default-src 'self'; object-src 'none'; frame-ancestors 'self'; form-action 'self';", 'Expires': '0', 'Pragma': 'no-cache', 'Sas-Service-Response-Flag': 'true', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Xss-Protection': '1; mode=block'}, data={'code': "options cashost='sas-cas-server-default-client' casport=5570;\ncas _mmcas_;\ncaslib _all_ assign;\n%let _MM_PerfExecutor = 1;\n%let _MM_ProjectUUID = %nrstr(acc21f66-01f1-4c15-b352-1b85ae353986);\n%let _MM_TargetVar = BAD;\n%let _MM_TargetLevel = BINARY;\n%let _MM_PredictedVar = ;\n%let _MM_TargetEvent = 1;\n%let _MM_EventProbVar = EM_EVENTPROBABILITY;\n%let _MM_KeepVars = EM_EVENTPROBABILITY EM_CLASSIFICATION;\n%let _MM_CAKeepVars = LOAN MORTDUE VALUE YOJ DEROG DELINQ 

Once the performance defintion is run, it is possible to update the hyperparameter json file to include the KPIs that have been generated. This is not a necessary step, but could be helpful when analyzing which hyperparameters lead to better KPIs.

In [105]:
mp.update_kpis('HMEQModels')

  if pd.__version__ >= StrictVersion("1.0.3"):


### Custom KPIs
It is also possible to generate custom key performance indicators and pass them up to Model Manager. Below, using the same data sets as were used in the SAS performance definition, the recall score is calculated, and then passed up to the KPI table in Model Manager.

In [116]:
from sklearn.metrics import jaccard_score

recall_list = list()
time_labels = list()
time_sks = list()
name = ['jaccard' for x in range(4)]

for x in range(1, 5):
    test_data = pd.read_csv(f'data/HMEQPERF_{x}_Q{x}.csv')
    x_test = test_data[predictorColumns]
    y_test = test_data[targetColumn]
    test_data_predictions = treeModel.predict(x_test)
    recall = jaccard_score(y_test, test_data_predictions)
    recall_list.append(recall)
    time_labels.append(f'Q{x}')
    time_sks.append(x)

#TODO: allow option to add multiple of same custom KPI
model = mr.get_model('DecisionTreeClassifier')
mm.create_custom_kpi(
    model=model.id,
    project='HMEQModels',
    kpiName=name,
    kpiValue=recall_list,
    timeLabel=time_labels,
    timeSK=time_sks
)

Uploading custom kpis to SAS Viya...


[]

Once the KPIs have been generated, the hyperparameter file can updated, and the new KPIs will appear in the file.

In [118]:
import json

mp.update_kpis('HMEQModels')

hyperparameters = mp.get_hyperparameters('DecisionTreeClassifier')

print(json.dumps(hyperparameters, indent=4))

[
    {
        "hyperparameters": {
            "ccp_alpha": 0.0,
            "class_weight": null,
            "criterion": "gini",
            "max_depth": null,
            "max_features": null,
            "max_leaf_nodes": null,
            "min_impurity_decrease": 0.0,
            "min_impurity_split": null,
            "min_samples_leaf": 1,
            "min_samples_split": 2,
            "min_weight_fraction_leaf": 0.0,
            "presort": "deprecated",
            "random_state": 42,
            "splitter": "best",
            "example": 1
        },
        "kpis": {
            "Q1": {
                "TimeSK": "1",
                "ProjectUUID": "acc21f66-01f1-4c15-b352-1b85ae353986",
                "ModelName": "DecisionTreeClassifier",
                "ModelFlag": "-1",
                "_AUC_": "0.8196751932",
                "_F1_": "0.6459143969",
                "_TPR_": "0.6935933148",
                "_FPR_": "0.0542429285",
                "_MISC_": "0.08115338