In [None]:
import pandas as pd
import plotly.express as px

In [None]:
y_true = [False,  True,  True, False, False,  True, False, False, False,
       False, False,  True,  True,  True,  True,  True, False, False,
        True,  True, False,  True, False,  True,  True, False, False,
        True,  True, False,  True,  True,  True,  True, False,  True,
        True, False,  True, False, False,  True,  True, False, False,
       False,  True,  True, False, False]
y_pred_prob = [0.44839687, 0.92633269, 0.65914581, 0.17905397, 0.40417923,
       0.47162528, 0.15476492, 0.59346126, 0.54024882, 0.46245561,
       0.38636323, 0.98602046, 0.15788436, 0.54024882, 0.91074003,
       0.8193212 , 0.3440063 , 0.31334757, 0.78226145, 0.91401247,
       0.52908781, 0.99243193, 0.58230025, 0.7025755 , 0.58230025,
       0.13056516, 0.50801374, 0.29276724, 0.4597397 , 0.15981569,
       0.72814605, 0.78462172, 0.75172519, 0.91401247, 0.09549534,
       0.81102209, 0.48224654, 0.3550919 , 0.68840797, 0.17606575,
       0.28834576, 0.59731907, 0.37894104, 0.15474382, 0.33875763,
       0.37722093, 0.98223886, 0.59346126, 0.17606575, 0.19882789]
df = pd.DataFrame(
    {
        "y_pred_prob": y_pred_prob,
        "y_true": y_true,
    }
)


In [None]:
def calculate_benefits_per_threshold(
    df: pd.DataFrame,
    true_postive_benefit: int,
    false_positive_cost: int,
    true_negative_benefit: int,
    false_negative_cost: int,
    number_of_thresholds: int = 1,
) -> pd.DataFrame:
    thresholds = [i / 100 for i in range(0, 100, number_of_thresholds)]
    benefits = []

    for threshold in thresholds:
        predictions = df.copy()["y_pred_prob"] > threshold
        true_positives = ((predictions == df["y_true"]) & (predictions == 1)).sum()
        false_positives = ((predictions != df["y_true"]) & (predictions == 1)).sum()
        true_negatives = ((predictions == df["y_true"]) & (predictions == 0)).sum()
        false_negatives = ((predictions != df["y_true"]) & (predictions == 0)).sum()

        benefit = (
            true_positives * true_postive_benefit
            + true_negatives * true_negative_benefit
            - false_positives * false_positive_cost
            - false_negatives * false_negative_cost
        )
        benefits.append(benefit)

    benefit_per_threshold = pd.DataFrame(
        {"Threshold": thresholds, "Total Benefit": benefits}
    )
    return benefit_per_threshold


In [None]:
benefit_per_threshold = calculate_benefits_per_threshold(
    df=df,
    true_postive_benefit=10,
    false_positive_cost=3,
    true_negative_benefit=1,
    false_negative_cost=5
)

px.line(benefit_per_threshold, x='Threshold', y="Total Benefit", )

In [None]:
benefit_per_threshold.iloc[benefit_per_threshold['Total Benefit'].argmax()]