## Import Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

import model_metrics

print(model_metrics.__version__)

from sklearn.linear_model import LogisticRegression
from model_tuner import Model, loadObjects

from ucimlrepo import fetch_ucirepo
from eda_toolkit import ensure_directory
from sklearn.metrics import roc_curve
from model_metrics import (
    summarize_model_performance,
    show_calibration_curve,
    show_confusion_matrix,
    show_roc_curve,
    show_pr_curve,
    show_lift_chart,
    show_gain_chart,
    plot_threshold_metrics,
)

## Set Model Path

In [None]:
print(f"Model Metrics version: {model_metrics.__version__}")
print(f"Model Metrics authors: {model_metrics.__author__} \n")

## Define base paths
## `base_path`` represents the parent directory of your current working directory
base_path = os.path.join(os.pardir)
## Go up one level from 'notebooks' to the parent directory, then into the
## 'results' folder

model_path = os.path.join(os.pardir, "model_files/results")
data_path = os.path.join(os.pardir, "model_files")
image_path_png = os.path.join(data_path, "images", "png_images")
image_path_svg = os.path.join(data_path, "images", "svg_images")

# Use the function to ensure the 'data' directory exists
ensure_directory(model_path)
ensure_directory(image_path_png)
ensure_directory(image_path_svg)

## Load The Model Object and Test Data

In [None]:
adult = fetch_ucirepo(id=2)

# data (as pandas dataframes)
X = adult.data.features

# X.to_csv("../data/X.csv")

### In Case UCI ML Repo is Down



In [None]:
# X = pd.read_csv("../data/X.csv")

### Load Model Objects

In [None]:
model_lr = loadObjects(os.path.join(model_path, "LogisticRegression.pkl"))
model_dt = loadObjects(os.path.join(model_path, "DecisionTreeClassifier.pkl"))
model_rf = loadObjects(os.path.join(model_path, "RandomForestClassifier.pkl"))


X_test = pd.read_parquet(os.path.join(data_path, "X_test.parquet"))
y_test = pd.read_parquet(os.path.join(data_path, "y_test.parquet"))

In [None]:
X.head()

In [None]:
X_test_2 = X_test.join(
    X[["sex", "race", "relationship", "occupation", "workclass", "education"]]
)

In [None]:
X_test_2.head()

## Set The Desired Naming Conventions

In [None]:
pipelines_or_models = [
    model_lr["model"],
    model_dt["model"],
    model_rf["model"],
]

# Model titles
model_titles = [
    "Logistic Regression",
    "Decision Tree Classifier",
    "Random Forest Classifier",
]

model_thresholds = {
    "Logistic Regression": next(iter(model_lr["model"].threshold.values())),
    "Decision Tree Classifier": next(iter(model_dt["model"].threshold.values())),
    "Random Forest Classifier": next(iter(model_rf["model"].threshold.values())),
}

In [None]:
model_thresholds

## ROC AUC Curves

In [None]:
model_titles = [
    "Logistic Regression",
    "Decision Tree Classifier",
    "Random Forest Classifier",
]

### ROC Curve w/ Model Objects

In [None]:
# Plot ROC curves
show_roc_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    model_title=model_titles,
    decimal_places=2,
    n_cols=3,
    n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    # },
    linestyle_kwgs={"color": "red", "linestyle": "--"},
    save_plot=True,
    subplots=True,
    text_wrap=20,
    figsize=(12, 4),
    label_fontsize=16,
    tick_fontsize=14,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

### ROC Curve w/ Probabilities

In [None]:
pipelines_or_models[0]

In [None]:
model_rf["model"].predict_proba(X_test)[:, 1]

In [None]:
y_prob = [model.predict_proba(X_test)[:, 1] for model in pipelines_or_models]

In [None]:
# Plot ROC curves
show_roc_curve(
    y_prob=y_prob,
    y=y_test.squeeze().ravel(),
    model_title=model_titles,
    decimal_places=2,
    n_cols=3,
    n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    # },
    linestyle_kwgs={"color": "red", "linestyle": "--"},
    save_plot=True,
    subplots=True,
    text_wrap=20,
    figsize=(12, 4),
    label_fontsize=16,
    tick_fontsize=14,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

### ROC Curves: Race Category

In [None]:
# Plot ROC curves
show_roc_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    # overlay=True,
    # title="Custom",
    # title = "",
    title=None,
    model_title=model_titles,
    decimal_places=2,
    n_cols=3,
    # n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    # },
    # linestyle_kwgs={"color": "grey", "linestyle": "--"},
    save_plot=True,
    # figsize=(8, 8),
    # label_fontsize=16,
    # tick_fontsize=16,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    # gridlines=False,
    group_category=X_test_2["race"],
)

## Precision-Recall Curves

### Precision-Recall Curve w/ Model Objects

In [None]:
show_pr_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    # x_label="Hello",
    model_title=model_titles,
    decimal_places=3,
    # title="Custom",
    overlay=False,
    n_cols=3,
    subplots=True,
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    figsize=(10, 4),
    # tick_fontsize=16,
    # label_fontsize=16,
    # grid=True,
    # gridlines=False,
)

### Precision-Recall Curve w/ Probabilities

In [None]:
show_pr_curve(
    # model=pipelines_or_models,
    y_prob=y_prob,
    # X=X_test,
    y=y_test,
    # x_label="Hello",
    model_title=model_titles,
    decimal_places=3,
    # title="Custom",
    overlay=False,
    n_cols=3,
    subplots=True,
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    figsize=(10, 4),
    # tick_fontsize=16,
    # label_fontsize=16,
    # grid=True,
    # gridlines=False,
)

### Precision-Recall Curves: Race Category

In [None]:
# Plot ROC curves
show_pr_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    overlay=False,
    # title="Custom",
    # title = "",
    title=None,
    model_title=model_titles,
    decimal_places=2,
    n_cols=3,
    # n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    # },
    # linestyle_kwgs={"color": "grey", "linestyle": "--"},
    save_plot=True,
    subplots=False,
    # figsize=(8, 8),
    # label_fontsize=16,
    # tick_fontsize=16,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    # gridlines=False,
    group_category=X_test_2["race"],
)

## Summarize Model Performance

### Summarize Model Performance: Model Objectsm

In [None]:
model_thresholds

In [None]:
model_summary = summarize_model_performance(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    model_title=model_titles,
    model_threshold=model_thresholds,
    return_df=True,
    # decimal_places=6,
    # custom_threshold=0.5,
)

model_summary

### Summarize Model Performance: Probabilities

In [None]:
model_summary = summarize_model_performance(
    y_prob=y_prob,
    y=y_test,
    model_title=model_titles,
    model_threshold=model_thresholds,
    return_df=True,
    # decimal_places=6,
    # custom_threshold=0.5,
)

model_summary

In [None]:
model_titles

In [None]:
model_titles[1]

## Plot The Calibration Curve

### Calibration Curve w/ Model Objects

In [None]:
# Plot calibration curves in overlay mode
show_calibration_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    # model_titles=model_titles,
    overlay=True,
    # title="Calibration Curves",
    text_wrap=40,
    figsize=(10, 6),
    label_fontsize=14,
    tick_fontsize=9,
    bins=10,
    show_brier_score=True,
    subplots=False,
    # gridlines=False,
    linestyle_kwgs={"color": "black"},
    title="",
)

### Calibration Curve w/ Probabilities

In [None]:
# Plot calibration curves in overlay mode
show_calibration_curve(
    y_prob=y_prob,
    y=y_test,
    # model_titles=model_titles,
    overlay=True,
    # title="Calibration Curves",
    text_wrap=40,
    figsize=(10, 6),
    label_fontsize=14,
    tick_fontsize=9,
    bins=10,
    show_brier_score=True,
    subplots=False,
    # gridlines=False,
    linestyle_kwgs={"color": "black"},
    title="",
)

## Plot The Confusion Matrix

### Confusion Matrix w/ Model Objects

In [None]:
model_thresholds

In [None]:
pipelines_or_models

In [None]:
show_confusion_matrix(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    model_title=model_titles,
    cmap="Blues",
    text_wrap=35,
    # title="Custom",
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    grid=True,
    n_cols=3,
    n_rows=1,
    figsize=(4, 4),
    show_colorbar=False,
    model_threshold=model_thresholds,
    label_fontsize=14,
    tick_fontsize=12,
    inner_fontsize=10,
    class_report=False,
    # custom_threshold=0.5,
    # labels=False,
)

In [None]:
model_thresholds

### Confusion Matrix w/ Probabilities

In [None]:
y_pred = [model.predict(X_test) for model in pipelines_or_models]

In [None]:
show_confusion_matrix(
    # model=pipelines_or_models,
    y_prob=y_prob,
    y=y_test,
    model_title=model_titles,
    cmap="Blues",
    text_wrap=35,
    # title="Custom",
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    model_threshold=model_thresholds,
    grid=True,
    n_cols=3,
    n_rows=1,
    figsize=(4, 4),
    show_colorbar=False,
    label_fontsize=14,
    tick_fontsize=12,
    inner_fontsize=10,
    class_report=True,
    # custom_threshold=0.5,
    # labels=False,
)

In [None]:
X_test.columns.to_list()

## Lift Chart

### Lift Chart w/ Model Objects

In [None]:
# Plot Lift chart
show_lift_chart(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    overlay=False,
    # title="Custom",
    model_title=model_titles,
    save_plot=True,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     # "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    linestyle_kwgs={"color": "grey", "linestyle": "--"},
    subplots=True,
    n_cols=3,
    figsize=(10, 4),
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

### Lift Chart w. Probabilities

In [None]:
# Plot Lift chart
show_lift_chart(
    y_prob=y_prob,
    y=y_test,
    overlay=False,
    # title="Custom",
    model_title=model_titles,
    save_plot=True,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     # "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    linestyle_kwgs={"color": "grey", "linestyle": "--"},
    subplots=True,
    n_cols=3,
    figsize=(10, 4),
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

## Gain Chart

### Gain Chart w/ Model Objects

In [None]:
# Plot Gain chart
show_gain_chart(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    overlay=False,
    # title="Custom",
    model_title=model_titles,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     # "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    # linestyle_kwgs={"color": "black", "linestyle": "-"},
    save_plot=True,
    subplots=True,
    n_cols=3,
    figsize=(10, 4),
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

### Gain Chart w/ Probabilities

In [None]:
# Plot Gain chart
show_gain_chart(
    y_prob=y_prob,
    y=y_test,
    overlay=False,
    # title="Custom",
    model_title=model_titles,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     # "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    # linestyle_kwgs={"color": "black", "linestyle": "-"},
    save_plot=True,
    subplots=True,
    n_cols=3,
    figsize=(10, 4),
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

In [None]:
model_summary = summarize_model_performance(
    model=pipelines_or_models[0],
    X=X_test,
    y=y_test,
    model_title=model_titles,
    # model_threshold=thresholds,
    return_df=True,
    custom_threshold=0.7811,
)

model_summary

## Model Threshold Metrics

### Model Threshold Metrics w/ Model Objects

In [None]:
# Example usage with a trained model
plot_threshold_metrics(
    pipelines_or_models[0],
    X_test,
    y_test=y_test,
    # figsize=(6, 6),
    # gridlines=False,
    baseline_thresh=True,
    model_threshold=model_thresholds["Logistic Regression"],
    baseline_kwgs={
        "color": "red",
        "linestyle": "--",
        # "alpha": 0.7,
        "linewidth": 1,
    },
    threshold_kwgs={
        "color": "green",
        "linestyle": "--",
        # "alpha": 0.7,
        "linewidth": 1,
    },
    curve_kwgs={
        "linestyle": "-",
        # "alpha": 0.1,
        "linewidth": 1.25,
    },
    lookup_metric="recall",
    lookup_value=0.56,
    # title="Threshold Metrics for Model X",
    # decimal_places=2,
    # label_fontsize=10,
    # tick_fontsize=8,
)

### Model Threshold Metrics w/ Probabilities

In [None]:
# Example usage with a trained model
plot_threshold_metrics(
    y_prob=y_prob[0],
    y_test=y_test,
    # figsize=(6, 6),
    # gridlines=False,
    baseline_thresh=False,
    model_threshold=model_thresholds["Logistic Regression"],
    threshold_kwgs={
        "color": "blue",
        "linestyle": "--",
        # "alpha": 0.7,
        "linewidth": 1,
    },
    curve_kwgs={
        "linestyle": "-",
        # "alpha": 0.1,
        "linewidth": 1.25,
    },
    # lookup_metric="recall",
    # lookup_value=0.56,
    # title="Threshold Metrics for Model X",
    # decimal_places=2,
    # label_fontsize=10,
    # tick_fontsize=8,
)

In [None]:
# Example usage with a trained model
plot_threshold_metrics(
    y_prob=y_prob[2],
    y_test=y_test,
    baseline_thresh=False,
    model_threshold=model_thresholds["Random Forest Classifier"],
    threshold_kwgs={
        "color": "blue",
        "linestyle": "--",
        "linewidth": 1,
    },
    curve_kwgs={
        "linestyle": "-",
        "linewidth": 1.25,
    },
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)