# Prediction Studio Python library

To get started, first we initialise an Infinity client.

The user credentials are already set as environment variables so they will be automatically picked up.

In [None]:
from pdstools import Infinity
from dotenv import load_dotenv
import polars as pl

pl.Config.set_fmt_str_lengths(100)

load_dotenv()

client = Infinity.from_basic_auth(
    verify=False,
    pega_version="24.2",
)

#### Enable logging 

In [None]:
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

## Overview of Prediction Studio & Monitoring APIs

### Get Repository Information

In [None]:
client.prediction_studio.repository()

### Getting notifications for a Prediction  Studio

In Prediction Studio, we have notifications as a mechanism to stay up-to-date on any sudden or gradual changes in Prediction Studio Performance.

These are also accessible through the APIs:

In [None]:
notification = client.prediction_studio.get_notifications(return_df=True)
notification

### Listing Predictions from Prediction Studio

Use list_predictions to get a list of the Predictions in the system. 

Here we use `return_df` to get a nice tabular overview, but you can of course get the individual Prediction objects in the `predictions` variable.

In [None]:
predictions = client.prediction_studio.list_predictions(return_df=True)
predictions

### Describing a single Prediction

If we want to get more information about an individual Prediction, we can call `.describe()` on it:

In [None]:
prediction = client.prediction_studio.get_prediction(label="Predict Web Propensity")
prediction.describe()

### Get Prediction metrics

We can also plot the metrics corresponding to the Prediction by using `plot_metrics`:

In [None]:
from datetime import date
prediction = client.prediction_studio.get_prediction(label="Predict Cards Acceptance")
prediction.get_metric(start_date=date(2024,7,2), end_date=date(2024,7,11),
                        metric="Lift",
                        frequency="Daily")

### Getting notifications for a Prediction

In Prediction Studio, we have notifications as a mechanism to stay up-to-date on any sudden or gradual changes in Prediction performance.

These are also accessible through the APIs:

In [None]:
Predict_action_propensity = client.prediction_studio.get_prediction(label="Predict Action Propensity")
notification = Predict_action_propensity.get_notifications(return_df=True)
notification

### Get list of models from Prediction Studio

In [None]:
models = client.prediction_studio.list_models(return_df=True)
models

### Describe a specific model

In [None]:
model = client.prediction_studio.get_model(model_id="DATA-DECISION-REQUEST-CUSTOMER!OMNIADAPTIVEMODEL")
model.describe()

### Getting notifications for a Model

In Prediction Studio, we have notifications as a mechanism to stay up-to-date on any sudden or gradual changes in Model performance.

These are also accessible through the APIs:

In [None]:
notification = model.get_notifications(return_df=True)
notification

### Trigger ADM datamart export

In [None]:
datamart_export1 = client.prediction_studio.trigger_datamart_export()

In [None]:
datamart_export1.get_export_status()

## Prediction and model management APIs

### Get Champion challenger objects of a prediction
To perform any operation with the Prediction Studio library, we need champion-challenger models. 

In [None]:
prediction = client.prediction_studio.get_prediction(label="Predict Cards Acceptance")
prediction.get_champion_challengers()

In [None]:
testModel_falcons = client.prediction_studio.get_model(label="testModel_falcons")
testModel_falcons.describe()

### Add a conditional model to the prediction

### Get model category information

In [None]:
categories = client.prediction_studio.get_model_categories()
categories

In [None]:
Retention_CC = prediction.add_conditional_model(new_model=testModel_falcons,category="Retention")
Retention_CC

In [None]:
Retention_CC.describe()

#### Exploring further details about this model

In [None]:
Retention_CC.active_model.describe()

### Add a new adm challenger model to that champion challenger object by copying the active model 

In [None]:
from pdstools.infinity.resources.prediction_studio.types import AdmModelType
Retention_CC.clone_model(challenger_response_share=0.8, adm_model_type=AdmModelType.GRADIENT_BOOSTING, model_label="Test_model_1")
Retention_CC

In [None]:
Retention_CC.challenger_model.describe()

In [None]:
Retention_CC.add_predictor(is_active_model=False, name="Income4", parameterized=True, predictor_type="Numeric", data_type="Double",value="Customer.Age")

In [None]:
Retention_CC.challenger_model.describe()

In [None]:
Retention_CC.remove_predictor(is_active_model=False, name="Income4", parameterized=True)

### Introduce a new challenger model to a active model from the Prediction Studio's available models.

In [None]:
prediction.get_champion_challengers()

In [None]:
NoContext_CC = prediction.get_champion_challengers()[2]
NoContext_CC

#### Explore the available models for use in the Champion-Challenger setup.

In [None]:
NoContext_CC.list_available_models_to_add(return_df=True)

In [None]:
test_model = client.prediction_studio.get_model(label="Test_model_1")

In [None]:
test_model.describe()

In [None]:
NoContext_CC.add_model(new_model=test_model, challenger_response_share=0.8)
NoContext_CC

In [None]:
NoContext_CC.describe()

In [None]:
NoContext_CC.challenger_model.describe()

### Incorporate a PMML model stored locally as a challenger in the Prediction 

#### Prepare a local model

In [None]:
from pdstools.infinity.resources.prediction_studio.local_model_utils import PMMLModel
pmml_model = PMMLModel(file_path="riskModel.pmml")

#### Deploy model to repository

In [None]:
new_risk_model = client.prediction_studio.upload_model(pmml_model, file_name="riskModel.pmml")
new_risk_model

In [None]:
prediction.get_champion_challengers()

In [None]:
RiskModel_cc = prediction.get_champion_challengers()[2]
RiskModel_cc

In [None]:
RiskModel_cc.active_model.describe()

In [None]:
predictor_mapping = [
    {
      "predictor": "Gender",
      "property": ".Gender"
    },
    {
      "predictor": "DataUsage",
      "property": ".RiskCode"
    },
    {
      "predictor": "Age",
      "property": ".Age"
    }]
RiskModel_cc.add_model(new_model=new_risk_model, challenger_response_share=0.6,predictor_mapping=predictor_mapping)

In [None]:
RiskModel_cc.describe()

#### Update champion challenger percentage

In [None]:
RiskModel_cc.update_challenger_response_share(new_challenger_response_share=0.8)

In [None]:
RiskModel_cc.describe()

#### Promote the challenger model to active model

In [None]:
RiskModel_cc.promote_challenger_model()
RiskModel_cc.describe()

#### Remove the challenger model

In [None]:
RiskModel_cc.delete_challenger_model()
RiskModel_cc

### List all the Prediction Changes

In [None]:
prediction.get_staged_changes()

### Deploy the staged changes for CR creation

In [None]:
prediction.deploy_staged_changes(message="Deploying demo changes")

## Create, validate, run and upload ONNX model

### Build a Sklearn pipeline object

In [None]:
from sklearn.ensemble import RandomForestClassifier
import polars as pl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pathlib

basePath = pathlib.Path().resolve().parent.parent
file_path = f"{basePath}/data/Churn_CID.csv"
dataset = pl.read_csv(file_path)
X = dataset.select(dataset.columns[:-2])
Y = dataset['outcome']

X_train, X_test, y_train, y_test = train_test_split(X, Y)
categorical_cols = X.select(pl.col(pl.String),pl.col(pl.Boolean)).columns
numerical_cols = X.select(pl.col(pl.Int64), pl.col(pl.Float64)).columns

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

model = RandomForestClassifier(n_estimators=100, random_state=42)

clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', model)
])

clf.fit(X_train, y_train)

### Prepare the model input definitions (initial_types)
(Note: Required for converting the pipeline object to ONNX)

In [None]:
from skl2onnx.common.data_types import FloatTensorType, StringTensorType

initial_types = [(col, FloatTensorType([None, 1])) for col in numerical_cols]
initial_types.extend([(col, StringTensorType([None, 1])) for col in categorical_cols])

### Create a ONNX model object

In [None]:
from pdstools.infinity.resources.prediction_studio.local_model_utils import ONNXModel
onnx_model = ONNXModel.from_sklearn_pipeline(clf, initial_types=initial_types)

### Identify the output nodes of created onnx model, like label node and score node. 

In [None]:
import netron
import IPython
import tempfile
temp_file = tempfile.NamedTemporaryFile(suffix=".onnx")
onnx_model.save(temp_file.name)
netron.start(temp_file.name, browse=False)
iframe = '<iframe src="http://localhost:8080" width="100%" height="600px"></iframe>'
IPython.display.HTML(iframe)

### Add metadata to ONNX model, use previously identified output nodes to populate labelName and scoreName

In [None]:
from pdstools.infinity.resources.prediction_studio.local_model_utils import Metadata, Output, OutcomeType
onnx_model.add_metadata(Metadata(type=OutcomeType.BINARY, output=Output(label_name="output_label",score_name="output_probability", possible_values=["Churned","Loyal"])))

### Validate the ONNX model

In [None]:
onnx_model.validate()

### Prepare test data and do a test run of ONNX model

#### Prepare test data

In [None]:
# Action: cast numeric columns to float32 and string or bool columns to utf8
# Reason: model was converted with float32 and utf8 for numeric and string or bool columns respectively
test_df=pl.DataFrame(X_test, schema=X.columns).with_columns([
    pl.col(col).cast(pl.Float32) if X[col].dtype in [pl.Int64, pl.Float64, pl.Int32, pl.Float32] else pl.col(col).cast(pl.Utf8)
    for col in X.columns
])
# reshape each column to a 2D array
test_data = {col: test_df[col].to_numpy().reshape(-1, 1) for col in test_df.columns}

#### Do test run

In [None]:
onnx_model.run(test_data)

### Upload the ONNX model

In [None]:
new_pipeline_model = client.prediction_studio.upload_model(onnx_model, file_name="churn_model.onnx")
new_pipeline_model

### Get a Prediction and do Champion Challenge

In [None]:
prediction = client.prediction_studio.get_prediction(label="Churn Risk")
prediction.get_champion_challengers()

In [None]:
# DATA-DECISION-REQUEST-CUSTOMER!CHURNRISK
champion_challenger = prediction.get_champion_challengers()[0]
champion_challenger.describe()

In [None]:
predictor_mapping = [
    {
      "predictor": "InCollections",
      "property": ".Customer.InCollections"
    },
    {
      "predictor": "CreditScore",
      "property": ".Customer.CreditScore"
    },
    {
      "predictor": "Age",
      "property": ".Customer.Age"
    },
    {
      "predictor": "WinScore",
      "property": ".Customer.WinScore"
    },
    {
      "predictor": "RiskCode",
      "property": ".Customer.RiskCode"
    },
    {
      "predictor": "AnnualIncome",
      "property": ".Customer.AnnualIncome"
    },
    {
      "predictor": "NetWealth",
      "property": ".Customer.NetWealth"
    }
]
champion_challenger.add_model(new_model=new_pipeline_model,challenger_response_share=0.8,predictor_mapping=predictor_mapping)
champion_challenger.describe()

In [None]:
prediction.describe()