In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r'final_results.csv')

In [5]:
# Define the classification threshold (adjust if needed)
THRESHOLD = 0.5  # Example threshold

# Create binary labels: 1 = Spoof (Fake), 0 = Real
df["predicted_spoof"] = (df["prediction_score"] >= THRESHOLD).astype(int)

# Assuming ground truth is all real (0) since FPR & TNR assume no spoofed examples
df["actual_label"] = 0  # 0 = Real, as given in the assumption

# Compute FP (False Positives) & TN (True Negatives)
df["FP"] = (df["predicted_spoof"] == 1) & (df["actual_label"] == 0)
df["TN"] = (df["predicted_spoof"] == 0) & (df["actual_label"] == 0)

# Group by 'region' and compute FPR & TNR
region_metrics = df.groupby("region").agg(
    total_samples=("actual_label", "count"),
    false_positives=("FP", "sum"),
    true_negatives=("TN", "sum")
)

# Calculate FPR & TNR
region_metrics["FPR"] = region_metrics["false_positives"] / region_metrics["total_samples"]
region_metrics["TNR"] = region_metrics["true_negatives"] / region_metrics["total_samples"]

In [6]:
region_metrics

Unnamed: 0_level_0,total_samples,false_positives,true_negatives,FPR,TNR
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
british isles (uk and ireland),100,59,41,0.59,0.41
multiple accents,100,48,52,0.48,0.52
north america (us and canada),100,69,31,0.69,0.31
other,100,47,53,0.47,0.53
"south asia (india, pakistan, bangladesh, sri lanka, nepal)",100,47,53,0.47,0.53
"southeast asia (singapore, malaysia, the philippines, indonesia, thailand, vietnam, myanmar, cambodia, and others)",100,83,17,0.83,0.17
"sub-saharan africa (all countries south of the sahara desert, excluding south africa)",100,74,26,0.74,0.26


In [7]:
import matplotlib.pyplot as plt
import seaborn as sns

# Reset index for plotting
region_metrics = region_metrics.reset_index()

ModuleNotFoundError: No module named 'matplotlib'

In [None]:


# Set plot style
sns.set(style="whitegrid")

# Create box plots for FPR and TNR
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))

# Boxplot for FPR
sns.boxplot(x=region_metrics["FPR"], y=region_metrics["region"], ax=axes[0])
axes[0].set_xlabel("False Positive Rate (FPR)")
axes[0].set_ylabel("Region")
axes[0].set_title("FPR Distribution by Region")

# Boxplot for TNR
sns.boxplot(x=region_metrics["TNR"], y=region_metrics["region"], ax=axes[1])
axes[1].set_xlabel("True Negative Rate (TNR)")
axes[1].set_ylabel("Region")
axes[1].set_title("TNR Distribution by Region")

# Adjust layout
plt.tight_layout()

# Show the plots
plt.show()