In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("../data/compas-scores-raw.csv")

# Select relevant columns
df = df[['Ethnic_Code_Text', 'Sex_Code_Text', 'RawScore', 'DecileScore', 'Recidivism_Within_2years']]

# Clean dataset
df = df.dropna(subset=['Ethnic_Code_Text', 'Recidivism_Within_2years'])

racial_groups = df['Ethnic_Code_Text'].unique()
print("Racial Groups:", racial_groups)

# Convert data types
df['Recidivism_Within_2years'] = df['Recidivism_Within_2years'].astype(int)

# Function to compute metrics
def compute_metrics(group):
    y_true = group['Recidivism_Within_2years']
    y_pred = (group['DecileScore'] >= 5).astype(int)

    tp = sum((y_true == 1) & (y_pred == 1))
    fp = sum((y_true == 0) & (y_pred == 1))
    fn = sum((y_true == 1) & (y_pred == 0))
    tn = sum((y_true == 0) & (y_pred == 0))

    return pd.Series({
        "FPR": fp / (fp + tn) if (fp + tn) > 0 else np.nan,
        "FNR": fn / (fn + tp) if (fn + tp) > 0 else np.nan,
        "TPR": tp / (tp + fn) if (tp + fn) > 0 else np.nan,
        "TNR": tn / (tn + fp) if (tn + fp) > 0 else np.nan
    })

# Calculate metrics per race
metrics = df.groupby("Ethnic_Code_Text").apply(compute_metrics)
metrics
