In [None]:
import os
from sklearn import set_config

set_config(display="diagram")
os.chdir("../../")

import pandas as pd

pd.set_option("display.max_columns", 50)


%load_ext kedro.ipython
%reload_kedro .

## Model namespaces to be evaluated

In [None]:
from project.namespaces import NAMESPACES as namespaces

namespaces

## Metrics Compilation

In [None]:
import plotly.express as px


def get_compile_metric_dataset(namespace, cross_validation_metrics):
    metrics = []
    for metric, value in cross_validation_metrics.items():
        metrics.append([metric, value["value"]])
    metrics = (
        pd.DataFrame(metrics, columns=["metric", "value"])
        .set_index("metric")
        .T.reset_index(drop=True)
    )
    metric_columns = list(metrics.columns)
    metrics["model"] = namespace
    metrics = metrics[["model"] + metric_columns]
    return metrics


dfs_metrics = []
for namespace in namespaces:
    model = catalog.load(f"{namespace}.model_artifact")
    cross_validation_metrics = model.hypertune_results["cross_validation_metrics"]
    metrics = get_compile_metric_dataset(namespace, cross_validation_metrics)
    dfs_metrics.append(metrics)

df_metrics = pd.concat(dfs_metrics, axis=0).reset_index(drop=True)
df_metrics_transpose = df_metrics.set_index("model").T
df_metrics_transpose

## All cross validation metrics visualization

In [None]:
dfs = []
for col in df_metrics_transpose.columns:
    data = df_metrics_transpose[[col]]
    data.columns = ["value"]
    data = data.reset_index()
    data["model"] = col
    dfs.append(data)

df = pd.concat(dfs, axis=0).reset_index(drop=True)
fig = px.bar(df, x="metric", y="value", color="model", barmode="group", height=400)
fig.show()

## Specific Metrics Visualization

In [None]:
metrics = [
    "accuracy",
    "f1_weighted",
    "precision_weighted",
    "recall_weighted",
    "roc_auc",
]

fig = px.bar(
    df[df["metric"].isin(metrics)],
    x="metric",
    y="value",
    color="model",
    barmode="group",
    height=400,
)
fig.show()

## Best and worst models looking at the mean of specified metrics

In [None]:
df_mean_metrics = df[df["metric"].isin(metrics)].groupby("model").mean()
df_mean_metrics = df_mean_metrics.sort_values(
    "value",
    ascending=False,
)
df_mean_metrics

## Model optimization results

In [None]:
fig = px.bar(
    df_mean_metrics.reset_index(),
    x="model",
    y="value",
    color="model",
)
fig.show()