In [1]:
import pandas as pd

In [2]:
THRESHOLD = 0.5

In [3]:
def clean_accent_name(accent):
    accent = accent.split("(")[0].strip()
    accent = " ".join([word.capitalize() for word in accent.split()])
    accent = accent.replace("&", "and")
    return accent

def preprocess_results(results_df):
    results_df["region"] = results_df["region"].apply(clean_accent_name)
    results_df["predicted_spoof"] = (results_df["prediction_score"] < THRESHOLD).astype(int)
    results_df["actual_label"] = 1
    results_df["FP"] = (results_df["predicted_spoof"] == 1) & (results_df["actual_label"] == 1)
    results_df["TN"] = (results_df["predicted_spoof"] == 0) & (results_df["actual_label"] == 1)
    return results_df

def compute_region_metrics(results_df):
    region_metrics = results_df.groupby("region").agg(
        total_samples=("actual_label", "count"),
        false_positives=("FP", "sum"),
        true_negatives=("TN", "sum")
    )
    region_metrics["FPR"] = region_metrics["false_positives"] / region_metrics["total_samples"]
    region_metrics["TNR"] = region_metrics["true_negatives"] / region_metrics["total_samples"]
    region_metrics = region_metrics.reset_index()
    return region_metrics

## Baseline Original Results

In [4]:
tssd_original = pd.read_csv("baseline-original-results/final-results-tssd.csv")
tssd_original = preprocess_results(tssd_original)
region_metrics_original = compute_region_metrics(tssd_original)
region_metrics_original

Unnamed: 0,region,total_samples,false_positives,true_negatives,FPR,TNR
0,British Isles,100,96,4,0.96,0.04
1,Multiple Accents,100,97,3,0.97,0.03
2,North America,100,91,9,0.91,0.09
3,Other,100,97,3,0.97,0.03
4,South Asia,100,91,9,0.91,0.09
5,Southeast Asia,100,93,7,0.93,0.07
6,Sub-saharan Africa,100,91,9,0.91,0.09


## Baseline Updated Results

In [5]:
tssd_updated = pd.read_csv("baseline-updated-results/final-results-tssd.csv")
tssd_updated = preprocess_results(tssd_updated)
region_metrics_updated = compute_region_metrics(tssd_updated)
region_metrics_updated

Unnamed: 0,region,total_samples,false_positives,true_negatives,FPR,TNR
0,British Isles,100,98,2,0.98,0.02
1,Caribbean,100,88,12,0.88,0.12
2,East Asia,100,88,12,0.88,0.12
3,Middle East and Central Asia,92,77,15,0.836957,0.163043
4,North America,100,93,7,0.93,0.07
5,South Asia,100,90,10,0.9,0.1
6,Southeast Asia,100,69,31,0.69,0.31
7,Sub-saharan Africa,100,96,4,0.96,0.04
8,Western Europe,100,84,16,0.84,0.16


## Comparison

In [6]:
comparison = pd.merge(
    region_metrics_original[["region", "false_positives"]],
    region_metrics_updated[["region", "false_positives"]],
    on="region",
    how="outer",
    suffixes=("_original", "_updated")
)
# Drop rows where region is "multiple accents" or "other":
comparison = comparison[~comparison["region"].isin(["Multiple Accents", "Other"])]
comparison

Unnamed: 0,region,false_positives_original,false_positives_updated
0,British Isles,96.0,98.0
2,North America,91.0,93.0
4,South Asia,91.0,90.0
5,Southeast Asia,93.0,69.0
6,Sub-saharan Africa,91.0,96.0
7,Caribbean,,88.0
8,East Asia,,88.0
9,Middle East and Central Asia,,77.0
10,Western Europe,,84.0
