# Model comparison

We have now trained a series of basic Regression models (length of stay) and Classification models (risk 1-5).

We can create an equivalent risk model from the risk categories using the predicted Length of Stay:

Risk Category|Day Range for Risk Category
-----|------
1 - Very low risk|0-6
2 - Low risk|7-10
3 - Normal risk|11-13
4 - Elevated risk|14-15
5 - High risk|>15

We have a number of approaches where we will compare side by side plots for risk stratification:

Model|Regression version|Classification version
---|---|---
Dummy|Mean|Prior
ElasticNet|ElasticNet|LogisticRegression
Decision Tree|DecisionTreeRegressor|DecisionTreeClassifier
Random Forest|RandomForestRegressor|RandomForestClassifier
Catboost|CatBoostRegressor|CatBoostClassifier
XGBoost|XGBRegressor|XGBClassifier


In [None]:
import math
import pickle
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn import preprocessing
from sklearn.metrics import f1_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

sys.path.append("../src/")

from utils import risk_score

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

%matplotlib inline
plt.rcParams["figure.figsize"] = [15, 8]

## Load data

In [None]:
features_df = pd.read_parquet("../../data/processed/features.parquet")
features_catboost_df = pd.read_parquet("../../data/processed/features-catboost.parquet")

# add actual risk scores
risk_labels = [
    "1 - Very Low Risk",
    "2 - Low Risk",
    "3 - Normal Risk",
    "4 - Elevated Risk",
    "5 - High Risk",
]
features_df["risk"] = [risk_score(los) for los in features_df.LENGTH_OF_STAY]
features_catboost_df["risk"] = [
    risk_score(los) for los in features_catboost_df.LENGTH_OF_STAY
]
# separate training and target features
X = features_df.drop(columns=["LENGTH_OF_STAY", "risk"])
y_reg = features_df["LENGTH_OF_STAY"]
y_clf = features_df["risk"]

# non-one-hot encoded data for catboost
X_catboost = features_catboost_df.drop(columns=["LENGTH_OF_STAY"])
y_catboost_reg = features_catboost_df["LENGTH_OF_STAY"]
y_catboost_clf = features_catboost_df["risk"]

# separate training and test data
# split data for train/test
X_train, X_test, y_train_reg, y_test_reg, y_train_clf, y_test_clf = train_test_split(
    X, y_reg, y_clf, train_size=0.75, random_state=42
)
print(
    X_train.shape,
    X_test.shape,
    y_train_reg.shape,
    y_train_clf.shape,
    y_test_reg.shape,
    y_test_clf.shape,
)

# Scale data for LogReg only using training data
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_scaled = pd.DataFrame(
    scaler.transform(X_train), index=X_train.index, columns=X_train.columns
)
X_test_scaled = pd.DataFrame(
    scaler.transform(X_test), index=X_test.index, columns=X_test.columns
)
print(X_train_scaled.shape, X_test_scaled.shape)

# Split data for train/test
(
    X_train_catboost,
    X_test_catboost,
    y_train_catboost_reg,
    y_test_catboost_reg,
    y_train_catboost_clf,
    y_test_catboost_clf,
) = train_test_split(
    X_catboost, y_catboost_reg, y_catboost_clf, train_size=0.75, random_state=42
)
print(
    X_train_catboost.shape,
    X_test_catboost.shape,
    y_train_catboost_reg.shape,
    y_train_catboost_clf.shape,
    y_test_catboost_reg.shape,
    y_test_catboost_clf.shape,
)

## Load models

In [None]:
# load models from outside the git tree
with open("../../models/regression.pickle", "rb") as handle:
    models_regression = pickle.load(handle)
# remove the final model as we are comparing all baseline models
models_regression.pop("final_model")
models_regression

In [None]:
# load models from outside the git tree
with open("../../models/classification.pickle", "rb") as handle:
    models_classification = pickle.load(handle)

# remove the final model as we are comparing all baseline models
models_classification.pop("final_model")
models_classification

## Compare predicted risk score (classification) with equivalent-predicted risk score (regression)

Classification -> Risk score

Regression -> Length Of Stay -> Equivalent risk score

In [None]:
# setup a subplot figure
fig, axs = plt.subplots(len(models_classification), 2)
fig.set_size_inches(15, 7 * len(models_classification))

i = 0

for model_classification in models_classification:
    # specify which test sets to use for each classification model
    if model_classification == "catboost":
        # catboost has non one-hot encoded features
        model_classification_X_test = X_test_catboost
        model_classification_y_test = y_test_catboost_clf
    elif model_classification == "elastic":
        # elastic logreg has normalised features
        model_classification_X_test = X_test_scaled
        model_classification_y_test = y_test_clf
    else:
        model_classification_X_test = X_test
        model_classification_y_test = y_test_clf

    # specify which test sets to use for each corresponding regression model
    if model_classification == "catboost":
        model_regression_X_test = X_test_catboost
        model_regression_y_test = y_test_catboost_reg
    # elasticnet is scaled for classification (LogisticRegression), but not for regression (ElasticNet)
    elif model_classification == "elastic":
        model_regression_X_test = X_test
        model_regression_y_test = y_test_reg
    else:
        model_regression_X_test = X_test
        model_regression_y_test = y_test_reg

    # prior is being compared to mean
    if model_classification == "prior":
        model_regression = "mean"
    else:
        model_regression = model_classification

    # perform inference
    preds_regression = np.clip(
        models_regression[model_regression]["model"].predict(model_regression_X_test),
        0,
        None,
    )
    preds_classification = models_classification[model_classification]["model"].predict(
        model_classification_X_test
    )

    # calculate performance metrics
    rmse = mean_squared_error(model_regression_y_test, preds_regression, squared=False)
    mae = mean_absolute_error(model_regression_y_test, preds_regression)

    f1_score_weighted = f1_score(
        model_classification_y_test, preds_classification, average="weighted"
    )

    # create a prediction dataframe
    predictions_df = pd.DataFrame(
        data=model_classification_y_test.reset_index(drop=True)
    )
    predictions_df["pred_regression_los"] = preds_regression
    # calculate equivalent risk score from regression model
    predictions_df["pred_regression"] = [
        risk_score(los) for los in predictions_df.pred_regression_los
    ]
    predictions_df["pred_classification"] = preds_classification

    #### Predicted vs Actual ####

    # plot predicted vs actual CLASSES for classification
    risks = dict.fromkeys(risk_labels)
    for proportion in risks:
        risks[proportion] = np.array([0.0, 0.0, 0.0, 0.0, 0.0])

        for label in risk_labels:
            this_risk = int(label[0])

            # extract the predicted risk
            subset = predictions_df[predictions_df.pred_classification == this_risk]

            if proportion == "1 - Very Low Risk":
                count = (subset.risk == 1).sum()
            elif proportion == "2 - Low Risk":
                count = (subset.risk == 2).sum()
            elif proportion == "3 - Normal Risk":
                count = (subset.risk == 3).sum()
            elif proportion == "4 - Elevated Risk":
                count = (subset.risk == 4).sum()
            else:
                count = (subset.risk == 5).sum()

            prop = 0 if count == 0 else count / subset.shape[0]

            risks[proportion][this_risk - 1] = prop

    bottom = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
    for proportion in risks:
        if proportion == "1 - Very Low Risk":
            data = risks[proportion]
            axs[i, 0].bar(risk_labels, data, label=proportion, width=0.35)
        else:
            bottom += data
            data = risks[proportion]
            axs[i, 0].bar(
                risk_labels, data, label=proportion, bottom=bottom, width=0.35
            )

    axs[i, 0].set_xlabel("Predicted risk")
    axs[i, 0].set_ylabel("Actual risk proportion")
    axs[i, 0].set_title(
        f"classification: {model_classification} - f1 weighted: {f1_score_weighted.round(2)}"
    )

    # plot actual vs predicted CLASSES for regression
    risks = dict.fromkeys(risk_labels)
    for proportion in risks:
        risks[proportion] = np.array([0.0, 0.0, 0.0, 0.0, 0.0])

        for label in risk_labels:
            this_risk = int(label[0])

            # extract the predicted risk
            subset = predictions_df[predictions_df.pred_regression == this_risk]

            if proportion == "1 - Very Low Risk":
                count = (subset.risk == 1).sum()
            elif proportion == "2 - Low Risk":
                count = (subset.risk == 2).sum()
            elif proportion == "3 - Normal Risk":
                count = (subset.risk == 3).sum()
            elif proportion == "4 - Elevated Risk":
                count = (subset.risk == 4).sum()
            else:
                count = (subset.risk == 5).sum()

            prop = 0 if count == 0 else count / subset.shape[0]

            risks[proportion][this_risk - 1] = prop

    bottom = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
    for proportion in risks:
        if proportion == "1 - Very Low Risk":
            data = risks[proportion]
            axs[i, 1].bar(risk_labels, data, label=proportion, width=0.35)
        else:
            bottom += data
            data = risks[proportion]
            axs[i, 1].bar(
                risk_labels, data, label=proportion, bottom=bottom, width=0.35
            )
    handles, labels = axs[i, 1].get_legend_handles_labels()
    axs[i, 1].legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 1))
    axs[i, 1].set_xlabel("Predicted risk")
    axs[i, 1].set_ylabel("Actual risk proportion")
    axs[i, 1].set_title(
        f"regression: {model_regression} - RMSE {rmse.round(2)} days, MAE {mae.round(2)} days"
    )

    fig.suptitle("Predicted vs Actual risk")
    fig.tight_layout()
    fig.subplots_adjust(top=0.95)

    i += 1

## Extensions

* Add number of predictions to bins in plots using e.g. https://stackoverflow.com/questions/30228069/how-to-display-the-value-of-the-bar-on-each-bar-with-pyplot-barh
* Refactor visualisation code