## Import Necessary Libraries

In [None]:
import pandas as pd
import os

import model_metrics

print(model_metrics.__version__)

from model_tuner import loadObjects

from eda_toolkit import ensure_directory
from model_metrics import ( 
    summarize_model_performance,
    show_calibration_curve,
    show_confusion_matrix,
    show_roc_curve,
    show_pr_curve,
    show_lift_chart,
    show_gain_chart,
)

## Set Model Path

In [None]:
print(f"Model Metrics version: {model_metrics.__version__}")
print(f"Model Metrics authors: {model_metrics.__author__} \n")

## Define base paths
## `base_path`` represents the parent directory of current working directory
base_path = os.path.join(os.pardir)
## Go up one level from 'notebooks' to the parent directory, then into the
## 'results' folder

model_path = os.path.join(os.pardir, "model_files/single_model_classification_results")
image_path_png = os.path.join(model_path, "images", "png_images")
image_path_svg = os.path.join(model_path, "images", "svg_images")

# Use the function to ensure the 'data' directory exists
ensure_directory(model_path)
ensure_directory(image_path_png)
ensure_directory(image_path_svg)

## Load The Model Object and Test Data

In [None]:
model = loadObjects(os.path.join(model_path, "logistic_regression_model.pkl"))

X_test = pd.read_parquet(os.path.join(model_path, "X_test.parquet"))
y_test = pd.read_parquet(os.path.join(model_path, "y_test.parquet"))

X_valid = pd.read_parquet(os.path.join(model_path, "X_valid.parquet"))
y_valid = pd.read_parquet(os.path.join(model_path, "y_valid.parquet"))

## Set The Desired Naming Conventions

In [None]:
pipelines_or_models = [
    model,
]

# Model titles
model_titles = [
    "Logistic Regression",
]

## Summarize Model Performance

In [None]:
model_summary = summarize_model_performance(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    model_type="classification",
    model_title=model_titles,
    # model_threshold=thresholdas,
    return_df=True,
    # overall_only=True,
    # custom_threshold=0.7,
    # decimal_places=2,
)

model_summary

In [None]:
summarize_model_performance(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    model_type="classification",
    model_title=model_titles,
    # model_threshold=thresholdas,
    # return_df=True,
    # overall_only=True,
    # custom_threshold=0.7,
    # decimal_places=2,
)

## Plot The Calibration Curve

In [None]:
# Plot calibration curves in overlay mode
show_calibration_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    model_title=model_titles,
    overlay=False,
    # title="Calibration Curves",
    text_wrap=40,
    figsize=(6, 4),
    label_fontsize=14,
    tick_fontsize=9,
    bins=10,
    show_brier_score=True,
    brier_decimals=5,
    # grid=True,
    # gridlines=False,
    linestyle_kwgs={"color": "black"},
)

## Plot The Confusion Matrix

In [None]:
pipelines_or_models

In [None]:
show_confusion_matrix(
    model=pipelines_or_models,
    X=X_valid,
    y=y_valid,
    model_titles=model_titles,
    cmap="Blues",
    text_wrap=40,
    # title="Custom",
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    grid=False,
    # n_cols=3,
    # n_rows=1,
    figsize=(6, 4),
    show_colorbar=True,
    label_fontsize=14,
    tick_fontsize=12,
    inner_fontsize=10,
    class_report=True,
    # custom_threshold=0.5,
    # labels=False,
)

In [None]:
y_prob = [model.predict_proba(X_valid)[:, 1] for model in pipelines_or_models]

In [None]:
y_prob

In [None]:
show_confusion_matrix(
    y_prob=y_prob,
    y=y_valid.to_numpy(),
    model_title=model_titles,
    cmap="Blues",
    text_wrap=40,
    # title="Custom",
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    grid=False,
    # n_cols=3,
    # n_rows=1,
    figsize=(6, 4),
    show_colorbar=True,
    label_fontsize=14,
    tick_fontsize=12,
    inner_fontsize=10,
    class_report=True,
    # custom_threshold=0.5,
    # labels=False,
)

## Plot ROC AUC Curve

### On Validation Set

In [None]:
y_prob_valid = [model.predict_proba(X_valid)[:, 1] for model in pipelines_or_models]
y_prob_valid

In [None]:
# Plot ROC curves
show_roc_curve(
    # model=pipelines_or_models,
    # X=X_test,
    y=y_valid,
    y_prob=y_prob_valid,
    overlay=False,
    # title="Custom",
    title=None,
    model_title=model_titles,
    decimal_places=3,
    n_cols=3,
    # n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    # linestyle_kwgs={"color": "grey", "linestyle": "--"},
    save_plot=True,
    subplots=False,
    figsize=(4, 4),
    # label_fontsize=16,
    # tick_fontsize=16,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    # gridlines=False,
)

### On Test Set

In [None]:
y_prob_test = [model.predict_proba(X_test)[:, 1] for model in pipelines_or_models]
y_prob_test

In [None]:
# Plot ROC curves
show_roc_curve(
    # model=pipelines_or_models,
    # X=X_test,
    y=y_test,
    y_prob=y_prob_test,
    overlay=False,
    # title="Custom",
    title=None,
    model_title=model_titles,
    decimal_places=3,
    n_cols=3,
    # n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    # linestyle_kwgs={"color": "grey", "linestyle": "--"},
    save_plot=True,
    subplots=False,
    figsize=(4, 4),
    # label_fontsize=16,
    # tick_fontsize=16,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    # gridlines=False,
)

## Optimal Operating Point Using Youden's J Statistic

In [None]:
# Plot ROC curves
show_roc_curve(
    # model=pipelines_or_models,
    # X=X_test,
    y=y_test,
    y_prob=y_prob_test,
    overlay=False,
    # title="Custom",
    title=None,
    model_title=model_titles,
    decimal_places=3,
    n_cols=3,
    # n_rows=1,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    # linestyle_kwgs={"color": "grey", "linestyle": "--"},
    save_plot=True,
    subplots=False,
    figsize=(4, 4),
    # label_fontsize=16,
    # tick_fontsize=16,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    operating_point_method="youden",
    show_operating_point=True,
    operating_point_kwgs={"marker": "o", "color": "red",},
    # gridlines=False,
)

## Precision-Recall Curve

In [None]:
show_pr_curve(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    # x_label="Hello",
    model_title=model_titles,
    decimal_places=3,
    # title="Custom",
    overlay=False,
    subplots=False,
    save_plot=True,
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
    figsize=(4, 4),
    # tick_fontsize=16,
    # label_fontsize=16,
    # grid=True,
    # gridlines=False,
)

## Lift Chart

In [None]:
# Plot Lift chart
show_lift_chart(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    overlay=False,
    # title="Custom",
    model_title=model_titles,
    save_plot=True,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     # "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    linestyle_kwgs={"color": "grey", "linestyle": "--"},
    subplots=False,
    figsize=(4, 4),
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

## Gain Chart

In [None]:
# Plot Gain chart
show_gain_chart(
    model=pipelines_or_models,
    X=X_test,
    y=y_test,
    overlay=False,
    # title="Custom",
    model_title=model_titles,
    # curve_kwgs={
    #     "Logistic Regression": {"color": "blue", "linewidth": 2},
    #     # "SVM": {"color": "red", "linestyle": "--", "linewidth": 1.5},
    # },
    # linestyle_kwgs={"color": "black", "linestyle": "-"},
    save_plot=True,
    subplots=False,
    figsize=(4, 4),
    image_path_png=image_path_png,
    image_path_svg=image_path_svg,
)

## Make Classification Examples

In [None]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Generate a synthetic dataset
X, y = make_classification(
    n_samples=1000,
    n_features=10,
    random_state=42,
)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

# Train models
model1 = LogisticRegression(random_state=3).fit(X_train, y_train)
model2 = RandomForestClassifier(random_state=3).fit(X_train, y_train)

model_titles = ["Logistic Regression", "Random Forest"]

In [None]:
from model_metrics import summarize_model_performance

summarize_model_performance(
    model=[model1, model2],
    X=X_test,
    y=y_test,
    model_type="classification",
    model_title=model_titles,
    return_df=True,
)

In [None]:
from model_metrics import show_roc_curve

show_roc_curve(
    model=[model1, model2],
    X=X_test,
    y=y_test,
    model_title=model_titles,
    decimal_places=2,
    figsize=(8, 4),
    n_cols=2,
    n_rows=1,
    image_path_svg=image_path_svg,
    image_path_png=image_path_png,
    save_plot=True,
    curve_kwgs={
        "Logistic Regression": {"color": "blue", "linewidth": 1.5},
        "Random Forest": {"color": "black", "linewidth": 1.5},
    },
    linestyle_kwgs={"color": "red", "linestyle": "--"},
    subplots=True,
    show_operating_point=True,
    operating_point_method="youden",
    operating_point_kwgs={"marker": "o", "color": "red",},
)