In [1]:
"""Work on confidence thresholds determination."""
# pylint: disable=line-too-long, redefined-outer-name, import-error, pointless-statement, use-dict-literal, expression-not-assigned, unused-import, too-many-lines

In [4]:
from __future__ import annotations

import shutil
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from epi_ml.utils.general_utility import get_valid_filename

## Confidence threshold impact on accuracy

In [None]:
def compute_metrics(
    df: pd.DataFrame,
    threshold: float,
    true_col: str,
    pred_col: str,
    pred_prob_cols: List[str],
    target_class: str | None,
) -> Tuple[float, float, float, float]:
    """
    Compute accuracy, precision, and subset size for a given probability threshold and class.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing the true labels, predicted labels, and predicted probabilities.
    threshold (float): The probability threshold for filtering the DataFrame.
    true_col (str): The column name containing the true labels.
    pred_col (str): The column name containing the predicted labels.
    pred_prob_cols (List[str]): List of column names containing the predicted probabilities.
    target_class (str|None): The class for which precision is to be calculated. Return np.nan if None.

    Considers target class for computations if given, otherwise considers all samples.

    Returns:
    Tuple[float, float, float, float]: A tuple containing the threshold, the calculated accuracy (%), the calculated precision (%),
                                       and the subset size (%) respectively.
    """
    # Targeting a class or not
    if target_class in [None, "all"]:
        total_size = len(df)
    else:
        total_size = len(df[true_col] == target_class)

    # Filter rows where the max predicted probability is above the threshold
    subset_df = df[df[pred_prob_cols].max(axis=1) >= threshold]

    if len(subset_df) == 0:
        return np.nan, np.nan, np.nan, np.nan

    # Calculate the accuracy for this subset
    if target_class in [None, "all"]:
        correct_preds = np.sum(subset_df[true_col] == subset_df[pred_col])
        subset_size = len(subset_df)
    else:
        correct_preds = np.sum(
            (subset_df[true_col] == subset_df[pred_col])
            & (subset_df[true_col] == target_class)
        )
        subset_size = np.sum(subset_df[true_col] == target_class)
    accuracy = (correct_preds / subset_size) * 100
    subset_size_percent = (subset_size / total_size) * 100

    # Calculate precision for the target class
    if target_class in [None, "all"]:
        precision = np.nan
        return threshold, accuracy, precision, subset_size_percent

    true_positives = np.sum(
        (subset_df[true_col] == target_class) & (subset_df[pred_col] == target_class)
    )
    false_positives = np.sum(
        (subset_df[true_col] != target_class) & (subset_df[pred_col] == target_class)
    )

    if true_positives + false_positives == 0:
        precision = np.nan
    else:
        precision = (true_positives / (true_positives + false_positives)) * 100

    return threshold, accuracy, precision, subset_size_percent

In [None]:
ACCURACY_NAME = "rec"
PRECISION_NAME = "prec"
SUBSET_SIZE_NAME = "sz"

def evaluate_thresholds(
    df: pd.DataFrame, thresholds: List[float]
) -> Dict[str, pd.DataFrame]:
    """
    Evaluate the accuracy and subset size for different probability thresholds with improved automatic column detection.

    Parameters:
    df (pd.DataFrame): The dataframe containing true labels and predicted probabilities.
    thresholds (list): List of probability thresholds to evaluate.

    Returns:
    pd.DataFrame: A dataframe containing the accuracy and subset size for each threshold.
    """
    # Automatic column detection
    likely_true_class_cols = [col for col in df.columns if "true" in col.lower()]
    likely_pred_class_cols = [col for col in df.columns if "pred" in col.lower()]

    if not likely_true_class_cols or not likely_pred_class_cols:
        raise ValueError(
            "Could not automatically detect 'True class' or 'Predicted class' columns."
        )

    true_col = likely_true_class_cols[0]
    pred_col = likely_pred_class_cols[0]
    if df[true_col].dtype != object or df[pred_col].dtype != object:
        print(f"{true_col} and {pred_col} are not string columns. Could cause issues.")

    classes = df[true_col].unique().tolist() + ["all"]
    pred_prob_cols = classes[0:-1]
    # Evaluate each threshold over each class
    results_dfs = {}
    for class_label in classes:
        results = []
        filtered_df = (
            df
            if class_label == "all"
            else df[(df[true_col] == class_label) | (df[pred_col] == class_label)]
        )

        for thresh in thresholds:
            result = compute_metrics(
                filtered_df,
                thresh,
                true_col,
                pred_col,
                pred_prob_cols,
                target_class=class_label,
            )
            results.append(result)

        # Convert to DataFrame for easier manipulation
        short_class_label = class_label[0:10]
        results_df = pd.DataFrame(
            results,
            columns=[
                "Threshold",
                f"{ACCURACY_NAME}_{short_class_label} (%)",
                f"{PRECISION_NAME}_{short_class_label} (%)",
                f"{SUBSET_SIZE_NAME}_{short_class_label} (%) ({filtered_df.shape[0]})",
            ],
        )

        results_dfs[class_label] = results_df

    return results_dfs

In [None]:
def create_thresholds_graph_plotly(threshold_dfs: Dict[str, pd.DataFrame], name: str):
    """
    Return graph of the accuracy and subset size at different probability thresholds for all classes.

    Parameters:
    threshold_dfs (Dict[str, pd.DataFrame]): A dictionary containing dataframes for each class label and the general case.
    name (str): Graph title.

    Returns:
    go.Figure: Plotly figure object with the plotted graph.
    """

    fig = go.Figure()
    colors = px.colors.qualitative.Dark24
    marker1 = "circle"
    marker2 = "cross-open"
    marker3 = "circle-open"

    for idx, (_, threshold_df) in enumerate(threshold_dfs.items()):
        color = colors[idx % len(colors)]

        acc_label = threshold_df.filter(like=f"{ACCURACY_NAME}").columns[0]
        acc_subset = threshold_df.filter(like=f"{SUBSET_SIZE_NAME}").columns[0]
        prec_label = threshold_df.filter(like=f"{PRECISION_NAME}").columns[0]

        # Plot accuracy
        fig.add_trace(
            go.Scatter(
                x=threshold_df["Threshold"],
                y=threshold_df[acc_label],
                name=acc_label,
                line=dict(color=color),
                marker_symbol=marker1,
                mode="lines+markers",
            )
        )

        # Plot precision
        prec_vals = threshold_df[prec_label]
        if not prec_vals.isna().all():
            fig.add_trace(
                go.Scatter(
                    x=threshold_df["Threshold"],
                    y=prec_vals,
                    name=prec_label,
                    line=dict(color=color, dash="dot"),
                    marker_symbol=marker2,
                    mode="lines+markers",
                )
            )

        # Plot subset size on secondary Y-axis
        fig.add_trace(
            go.Scatter(
                x=threshold_df["Threshold"],
                y=threshold_df[acc_subset],
                name=acc_subset,
                line=dict(color=color, dash="dash"),
                marker_symbol=marker3,
                yaxis="y2",
                mode="lines+markers",
            )
        )

    # Adjusting the layout
    fig.update_layout(
        title=f"Accuracy and Subset Size at Different Probability Thresholds<br>{name}",
        xaxis_title="Probability Threshold",
        xaxis=dict(
            tickvals=np.linspace(0, 1, 11),
            ticktext=[f"{x:.1f}" for x in np.linspace(0, 1, 11)],
        ),
        yaxis_title="Accuracy (%)",
        yaxis2=dict(title="Subset Size (%)", overlaying="y", side="right"),
        legend=dict(orientation="v", x=1.05, y=1),
        height=1000,
        width=1600,
    )
    fig.update_xaxes(range=[-0.001, 1.001])
    fig.update_traces(line={"width": 1})

    return fig

In [None]:
# Select files for analysis
list_path = (
    Path.home() / "Projects/epilap/output/logs/epiatlas-dfreeze-v2.1/10fold_results.list"
)

output2 = Path.home() / "downloads" / "temp" / "threshold_impact_graph"
with open(list_path, "r", encoding="utf8") as f:
    files = [Path(line.strip()) for line in f.readlines()]

In [None]:
thresholds = list(np.arange(0, 1, 1 / 20)) + [0.99]
for file in files:
    print(file)
    # compute
    df = pd.read_csv(file, header=0, dtype={"True class": str, "Predicted class": str})
    nb_samples = df.shape[0]
    nb_classes = len(df.select_dtypes(include=[np.number]).columns.tolist())  # type: ignore

    threshold_dfs = evaluate_thresholds(df, thresholds)

    # plot
    name = f"{file.parents[1].name} - {file.parents[0].name} - {nb_classes} classes"
    graph = create_thresholds_graph_plotly(threshold_dfs, f"{name} - n={nb_samples}")

    # save
    filename = f"threshold_impact_graph_full_{get_valid_filename(name)}".replace(
        "_-_", "-"
    )
    print(filename)
    hmtl_name = filename + ".html"
    png_name = filename + ".png"

    out1_html = file.parent / hmtl_name
    out1_png = file.parent / png_name

    graph.write_html(out1_html)
    graph.write_image(out1_png, scale=3)

    shutil.copy(out1_html, output2 / hmtl_name)
    shutil.copy(out1_png, output2 / png_name)