In [12]:
import pandas as pd
import matplotlib.pyplot as plt
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

# Load COMPAS dataset
df = pd.read_csv("compas-scores-two-years.csv")

label_name = "two_year_recid"
protected_attribute = "race"
features = [col for col in df.columns if col not in [label_name, protected_attribute]]

# Convert to AI Fairness 360 dataset
dataset = BinaryLabelDataset(
    df=df,
    label_names=[label_name],
    protected_attribute_names=[protected_attribute],
    favorable_label=0,
    unfavorable_label=1
)

privileged_groups = [{protected_attribute: 'Caucasian'}]
unprivileged_groups = [{protected_attribute: 'African-American'}]

# Split into train/test
X = df[features].values
y = df[label_name].values
X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
    X, y, df.index, test_size=0.3, random_state=42
)

# Train classifier
clf = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Create test dataset object
test_df = df.loc[idx_test].copy()
test_dataset = BinaryLabelDataset(
    df=test_df,
    label_names=[label_name],
    protected_attribute_names=[protected_attribute],
    favorable_label=0,
    unfavorable_label=1
)

# Attach predictions to test dataset
classified_dataset = test_dataset.copy(deepcopy=True)
classified_dataset.labels = y_pred

# Compute fairness metrics
classification_metric = ClassificationMetric(test_dataset,
                                             classified_dataset,
                                             privileged_groups=privileged_groups,
                                             unprivileged_groups=unprivileged_groups)

fpr_priv = classification_metric.false_positive_rate(privileged=True)
fpr_unpriv = classification_metric.false_positive_rate(privileged=False)

print("False Positive Rate (Privileged):", fpr_priv)
print("False Positive Rate (Unprivileged):", fpr_unpriv)

# Visualization
plt.bar(['Privileged (Caucasian)', 'Unprivileged (African-American)'],
        [fpr_priv, fpr_unpriv], color=['blue', 'red'])
plt.title("False Positive Rate Disparity in COMPAS Predictions")
plt.ylabel("False Positive Rate")

# Show plot
plt.show()

# Save plot as PNG image
plt.savefig("compas_fpr_disparity.png", dpi=300, bbox_inches='tight')
print("Visualization saved as compas_fpr_disparity.png")


FileNotFoundError: [Errno 2] No such file or directory: 'compas-scores-two-years.csv'