In [None]:
import os
from sklearn import set_config
set_config(display='diagram')
os.chdir("../../")

import pandas as pd

pd.set_option('display.max_columns', 50)


%load_ext kedro.ipython
%reload_kedro .



In [None]:
# namespace = "xgboost"

In [None]:
model = catalog.load(f"{namespace}.model_artifact")
class_names = ["class_die", "class_survived"]

## Model metrics 

### All metrics and predictions are out of sample using **cross_val_predict** method from sklearn


In [None]:
metrics = model.scores
metrics = {
    metric.capitalize().replace("_", " "): str(round(value * 100, 2)) + " [%]" for metric, value in metrics.items()
}
metrics

In [None]:
import plotly.subplots as sp
import plotly.graph_objects as go
import numpy as np


def plot_confusion_matrix(y_true, y_pred, class_names=["class1", "class2",]):
    """
    Create an interactive confusion matrix plot using Plotly.

    Args:
        y_true (array-like): True classes.
        y_pred (array-like): Predicted classes.

    Returns:
        None
    """
    # Get unique classes from true and predicted labels
    classes = np.unique(np.concatenate([y_true, y_pred]))

    # Create confusion matrix
    n_classes = len(classes)
    confusion_matrix = np.zeros((n_classes, n_classes), dtype=int) # Confusion matrix initialized with zeros

    for i in range(len(y_true)):
        confusion_matrix[np.where(classes == y_true[i]), np.where(classes == y_pred[i])] += 1 # Fill the confusion matrix

    trace = go.Heatmap(
        z=confusion_matrix,
        x= class_names,
        y= class_names,
        text=confusion_matrix,
        colorscale='Blues',
        showscale=True,
        hoverinfo='text',
        colorbar=dict(title='Count'),
        )

    # Add annotations to each cell
    annotations = []
    for i in range(len(confusion_matrix)):
        for j in range(len(confusion_matrix)):
            annotations.append(
                dict(
                    x=j,
                    y=i,
                    text=str(confusion_matrix[i][j]),
                    showarrow=False,
                    font=dict(color='white' if confusion_matrix[i][j] > (0.5 * confusion_matrix.max()) else 'black')
                )
            )

    layout = go.Layout(
        title=f'Confusion Matrix {namespace} model',
        xaxis=dict(title='Predicted'),
        yaxis=dict(title='Actual'),
        annotations=annotations,
        template='plotly_dark',  # Choose a plotly template (e.g., dark mode)
    )

    fig = go.Figure(data=[trace], layout=layout)
    fig.show()
    

y_pred = model.y_pred
y_true = model.y_train



## Classifier Confusion Matrix

In [None]:
plot_confusion_matrix(np.array(y_true).ravel(), np.array(y_pred).ravel(), class_names=class_names)


## Classification report on *out of sample predictions*

In [None]:
from sklearn.metrics import classification_report

# Generate the classification report
report = classification_report(y_true, y_pred, output_dict=True)

# Extract the metrics and class labels from the report
class_labels = list(report.keys())
class_labels.remove('accuracy')

metric_names = ['precision', 'recall', 'f1-score']

# Create an empty numpy array to store the metric values
metrics_array = np.zeros((len(class_labels), len(metric_names)), dtype=float)

# Populate the metrics array with the metric values
for idx, label in enumerate(class_labels):
    for metric_idx, metric_name in enumerate(metric_names):
        metrics_array[idx, metric_idx] = round(report[label][metric_name], 2)

# Create the heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
    z=metrics_array,
    x=metric_names,
    y=class_labels,
    colorscale='Bluered',
    colorbar=dict(title='Metric Values'),
    hovertemplate='Class: %{y}<br>Metric: %{x}<br>Value: %{z:.2f}<extra></extra>'
))

fig.update_layout(
    title=f'Classification Report {namespace} model',
    xaxis=dict(title='Metrics'),
    yaxis=dict(title='Class Labels'),
    template='plotly_dark',  # Choose a plotly template (e.g., dark mode)
)

# Add annotations to each cell
for i in range(len(class_labels)):
    for j in range(len(metric_names)):
        fig.add_annotation(
            x=metric_names[j],
            y=(class_labels[i]),
            text=str(metrics_array[i, j]),
            showarrow=False,
            font=dict(color='white' if metrics_array[i, j] > (0.5 * metrics_array.max()) else 'black')
        )

fig.show()

## ROC AUC Curve

In [None]:
from sklearn.metrics import roc_curve, auc

y_score = model.y_score
y_score.columns = class_names

y_probs = y_score[y_score.columns[1]].ravel()

# Compute ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_true[model.target].ravel(), y_probs)
roc_auc = auc(fpr, tpr)

# Create a Plotly figure
fig = go.Figure()

# Add ROC curve to the figure
fig.add_trace(
    go.Scatter(
        x=fpr,
        y=tpr,
        mode='lines',
        name=f'ROC Curve (AUC={roc_auc:.2f})',
        line=dict(width=2),
    )
)

# Customize the layout
fig.update_layout(
    title=f'ROC Curve {namespace} model',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    xaxis_range=[0, 1],
    yaxis_range=[0, 1],
    legend=dict(x=0.7, y=0.2),
    template='plotly_dark',  # Choose a plotly template (e.g., dark mode)
)

# Add diagonal reference line
fig.add_shape(
    type='line',
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(dash='dash', color='gray'),
)


## Calibration plot

In [None]:
from sklearn.calibration import calibration_curve
prob_true, prob_pred = calibration_curve(y_true, y_probs, n_bins=10,)
# Create a Plotly figure
fig = go.Figure()

# Add the perfectly calibrated line
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name="Perfectly Calibrated"))

# Add the calibration curve
fig.add_trace(go.Scatter(x=prob_pred, y=prob_true, mode='lines+markers', name=f'{namespace} Calibration', line=dict(width=2)))

# Customize the layout
fig.update_layout(
    title=f'Calibration Curve for {namespace}',
    xaxis_title='Mean Predicted Probability',
    yaxis_title='Fraction of Positives',
    xaxis_range=[0, 1],
    yaxis_range=[0, 1],
    legend=dict(x=0.7, y=0.2),
    template='plotly_dark',  # Choose a plotly template (e.g., dark mode)
)

# Show the figure
fig.show()

## Cumulative gain plot 

In [None]:

# Sort the data by predicted probabilities in descending order
sorted_indices = np.argsort(y_probs)[::-1]
y_true_sorted = y_true[model.target].ravel()[sorted_indices]

# Calculate cumulative gain
cumulative_gain = np.cumsum(y_true_sorted) / np.sum(y_true_sorted)

# Create a Plotly figure
fig = go.Figure()

# Calculate the fraction of the dataset examined in percentage
fraction_examined_percentage = np.arange(1, len(cumulative_gain) + 1) / len(cumulative_gain) * 100

# Calculate the Gain in percentage
cumulative_gain_percentage = cumulative_gain * 100

fig.add_trace(go.Scatter(x=fraction_examined_percentage, y=cumulative_gain_percentage, mode='lines', name='Cumulative Gain Curve'))

# Customize the layout
fig.update_layout(
    title=f'Cumulative Gain Curve for {namespace} Model',
    xaxis_title='Fraction of Dataset Examined (%)',
    yaxis_title='Gain (%)',
    xaxis_range=[0, 100],  # Set the x-axis range to 0-100%
    yaxis_range=[0, 100],  # Set the y-axis range to 0-100%
    legend=dict(x=0.7, y=0.2),
    template='plotly_dark',  # Choose a plotly template (e.g., dark mode)
)

# Show the figure
fig.show()



## Probabilities plot through the indexes

In [None]:
import plotly.express as px

class_name_str = "/ ".join(class_names)

fig = px.line(y_score.reset_index(), x=y_score.index.name, y=[col for col in y_score.columns if "class" in col]).update_traces(line_width=3.5)
fig.update_layout(
    # template="plotly_dark",
    title=dict(
        text=f'{namespace} Model' + "<br>" + f"{class_name_str}",
        y=.93,
        font=dict(
            family="Courier New",
            size=32,
        ),
    ),
    font_size=20,
    xaxis_title=y_score.index.name,
    yaxis_title=f'{class_name_str}',
    template='plotly_dark',  # Choose a plotly template (e.g., dark mode)
    boxgroupgap=0,
    legend=dict(
        title="",
        orientation="v",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
    ),
    height=1080,
    width=1920,
    margin=dict(l=150, r=75, t=270, b=15),
)

fig.show()
