In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

In [2]:
# Load datasets
clean_path = '4000_0.05_clean.csv'
dirty_path = '4000_0.05_dirty.csv'

clean_df = pd.read_csv(clean_path)
dirty_df = pd.read_csv(dirty_path)

# Display basic info about the datasets
# print('Clean Data Info:')
# print(clean_df.info())
# print('\nDirty Data Info:')
# print(dirty_df.info())

# Check the first few rows of the datasets
# print('\nClean Data Sample:')
# print(clean_df.head())
# print('\nDirty Data Sample:')
# print(dirty_df.head())

In [3]:
# Prepare Data: target variable y is the binary classification of NATIVITY (whether a person is Native or Foreign-born)
# The sensitive attribute RAC1P (likely race or ethnicity) is separated out to evaluate fairness
def prepare_data(df, is_clean=True):
    if is_clean:
        df = df.iloc[:, :-8] # Dropping the last 8 columns of the clean dataset
    X = df.drop(['NATIVITY', 'RAC1P'], axis=1)
    y = (df['NATIVITY'] == 1).astype(int) # Binary classification: 1 if Native, 0 if Foreign-born
    sensitive_attr = df['RAC1P']
    X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(X, y, sensitive_attr, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test, s_train, s_test

In [4]:
# Prepare training and testing sets: data is split into training and testing sets, along with the sensitive attribute.
X_train_clean, X_test_clean, y_train_clean, y_test_clean, s_train_clean, s_test_clean = prepare_data(clean_df, is_clean=True)
X_train_dirty, X_test_dirty, y_train_dirty, y_test_dirty, s_train_dirty, s_test_dirty = prepare_data(dirty_df, is_clean=False)

In [5]:
# Fairness Metrics
def demographic_parity(y_pred, sensitive_attr): # Measures whether the prediction rates are equal across different race groups
    groups = sensitive_attr.unique()
    rates = {group: (y_pred[sensitive_attr == group].mean()) for group in groups}
    return rates

def equalized_odds(y_true, y_pred, sensitive_attr): # Measures whether true positive rates and false positive rates are equal across race groups
    groups = sensitive_attr.unique()
    true_positive_rates = {}
    false_positive_rates = {}

    for group in groups:
        y_true_group = y_true[sensitive_attr == group]
        y_pred_group = y_pred[sensitive_attr == group]
        
        tp = ((y_pred_group == 1) & (y_true_group == 1)).sum()
        tn = ((y_pred_group == 0) & (y_true_group == 0)).sum()
        fp = ((y_pred_group == 1) & (y_true_group == 0)).sum()
        fn = ((y_pred_group == 0) & (y_true_group == 1)).sum()
        
        tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        
        true_positive_rates[group] = tpr
        false_positive_rates[group] = fpr

    return true_positive_rates, false_positive_rates

def disparate_impact_ratio(y_pred, sensitive_attr): # Ratio of the minimum prediction rate to the maximum prediction rate across groups
    groups = sensitive_attr.unique()
    rates = {group: (y_pred[sensitive_attr == group].mean()) for group in groups}
    min_rate = min(rates.values())
    max_rate = max(rates.values())
    return min_rate / max_rate if max_rate > 0 else 0

def statistical_parity_difference(y_pred, sensitive_attr): # Difference between the maximum and minimum prediction rates across groups
    groups = sensitive_attr.unique()
    rates = {group: (y_pred[sensitive_attr == group].mean()) for group in groups}
    max_rate = max(rates.values())
    min_rate = min(rates.values())
    return max_rate - min_rate

In [6]:
logreg_train_clean_test_clean = LogisticRegression(max_iter=1000)
logreg_train_clean_test_clean.fit(X_train_clean, y_train_clean)
y_pred_logreg_train_clean_test_clean = logreg_train_clean_test_clean.predict(X_test_clean)

logreg_train_clean_test_dirty = LogisticRegression(max_iter=1000)
logreg_train_clean_test_dirty.fit(X_train_clean, y_train_clean)
y_pred_logreg_train_clean_test_dirty = logreg_train_clean_test_dirty.predict(X_test_dirty)

logreg_train_dirty_test_clean = LogisticRegression(max_iter=1000)
logreg_train_dirty_test_clean.fit(X_train_dirty, y_train_dirty)
y_pred_logreg_train_dirty_test_clean = logreg_train_dirty_test_clean.predict(X_test_clean)

logreg_train_dirty_test_dirty = LogisticRegression(max_iter=1000)
logreg_train_dirty_test_dirty.fit(X_train_dirty, y_train_dirty)
y_pred_logreg_train_dirty_test_dirty = logreg_train_dirty_test_dirty.predict(X_test_dirty)

In [7]:
tree_train_clean_test_clean = DecisionTreeClassifier()
tree_train_clean_test_clean.fit(X_train_clean, y_train_clean)
y_pred_tree_train_clean_test_clean = tree_train_clean_test_clean.predict(X_test_clean)

tree_train_clean_test_dirty = DecisionTreeClassifier()
tree_train_clean_test_dirty.fit(X_train_clean, y_train_clean)
y_pred_tree_train_clean_test_dirty = tree_train_clean_test_dirty.predict(X_test_dirty)

tree_train_dirty_test_clean = DecisionTreeClassifier()
tree_train_dirty_test_clean.fit(X_train_dirty, y_train_dirty)
y_pred_tree_train_dirty_test_clean = tree_train_dirty_test_clean.predict(X_test_clean)

tree_train_dirty_test_dirty = DecisionTreeClassifier()
tree_train_dirty_test_dirty.fit(X_train_dirty, y_train_dirty)
y_pred_tree_train_dirty_test_dirty = tree_train_dirty_test_dirty.predict(X_test_dirty)

In [8]:
rf_train_clean_test_clean = RandomForestClassifier()
rf_train_clean_test_clean.fit(X_train_clean, y_train_clean)
y_pred_rf_train_clean_test_clean = rf_train_clean_test_clean.predict(X_test_clean)

rf_train_clean_test_dirty = RandomForestClassifier()
rf_train_clean_test_dirty.fit(X_train_clean, y_train_clean)
y_pred_rf_train_clean_test_dirty = rf_train_clean_test_dirty.predict(X_test_dirty)

rf_train_dirty_test_clean = RandomForestClassifier()
rf_train_dirty_test_clean.fit(X_train_dirty, y_train_dirty)
y_pred_rf_train_dirty_test_clean = rf_train_dirty_test_clean.predict(X_test_clean)

rf_train_dirty_test_dirty = RandomForestClassifier()
rf_train_dirty_test_dirty.fit(X_train_dirty, y_train_dirty)
y_pred_rf_train_dirty_test_dirty = rf_train_dirty_test_dirty.predict(X_test_dirty)

In [9]:
svm_train_clean_test_clean = SVC()
svm_train_clean_test_clean.fit(X_train_clean, y_train_clean)
y_pred_svm_train_clean_test_clean = svm_train_clean_test_clean.predict(X_test_clean)

svm_train_clean_test_dirty = SVC()
svm_train_clean_test_dirty.fit(X_train_clean, y_train_clean)
y_pred_svm_train_clean_test_dirty = svm_train_clean_test_dirty.predict(X_test_dirty)

svm_train_dirty_test_clean = SVC()
svm_train_dirty_test_clean.fit(X_train_dirty, y_train_dirty)
y_pred_svm_train_dirty_test_clean = svm_train_dirty_test_clean.predict(X_test_clean)

svm_train_dirty_test_dirty = SVC()
svm_train_dirty_test_dirty.fit(X_train_dirty, y_train_dirty)
y_pred_svm_train_dirty_test_dirty = svm_train_dirty_test_dirty.predict(X_test_dirty)

In [10]:
print('Logistic Regression Accuracy (Train: Clean, Test: Clean):', accuracy_score(y_test_clean, y_pred_logreg_train_clean_test_clean))
print('Logistic Regression Accuracy (Train: Clean, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_logreg_train_clean_test_dirty))
print('Logistic Regression Accuracy (Train: Dirty, Test: Clean):', accuracy_score(y_test_clean, y_pred_logreg_train_dirty_test_clean))
print('Logistic Regression Accuracy (Train: Dirty, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_logreg_train_dirty_test_dirty))

Logistic Regression Accuracy (Train: Clean, Test: Clean): 0.99875
Logistic Regression Accuracy (Train: Clean, Test: Dirty): 0.9125
Logistic Regression Accuracy (Train: Dirty, Test: Clean): 0.99375
Logistic Regression Accuracy (Train: Dirty, Test: Dirty): 0.93125


In [11]:
print('Decision Tree Accuracy (Train: Clean, Test: Clean):', accuracy_score(y_test_clean, y_pred_tree_train_clean_test_clean))
print('Decision Tree Accuracy (Train: Clean, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_tree_train_clean_test_dirty))
print('Decision Tree Accuracy (Train: Dirty, Test: Clean):', accuracy_score(y_test_clean, y_pred_tree_train_dirty_test_clean))
print('Decision Tree Accuracy (Train: Dirty, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_tree_train_dirty_test_dirty))

Decision Tree Accuracy (Train: Clean, Test: Clean): 1.0
Decision Tree Accuracy (Train: Clean, Test: Dirty): 0.93
Decision Tree Accuracy (Train: Dirty, Test: Clean): 0.945
Decision Tree Accuracy (Train: Dirty, Test: Dirty): 0.895


In [12]:
print('Random Forest Accuracy (Train: Clean, Test: Clean):', accuracy_score(y_test_clean, y_pred_rf_train_clean_test_clean))
print('Random Forest Accuracy (Train: Clean, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_rf_train_clean_test_dirty))
print('Random Forest Accuracy (Train: Dirty, Test: Clean):', accuracy_score(y_test_clean, y_pred_rf_train_dirty_test_clean))
print('Random Forest Accuracy (Train: Dirty, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_rf_train_dirty_test_dirty))

Random Forest Accuracy (Train: Clean, Test: Clean): 1.0
Random Forest Accuracy (Train: Clean, Test: Dirty): 0.93125
Random Forest Accuracy (Train: Dirty, Test: Clean): 0.9875
Random Forest Accuracy (Train: Dirty, Test: Dirty): 0.9375


In [13]:
print('SVM Accuracy (Train: Clean, Test: Clean):', accuracy_score(y_test_clean, y_pred_svm_train_clean_test_clean))
print('SVM Accuracy (Train: Clean, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_svm_train_clean_test_dirty))
print('SVM Accuracy (Train: Dirty, Test: Clean):', accuracy_score(y_test_clean, y_pred_svm_train_dirty_test_clean))
print('SVM Accuracy (Train: Dirty, Test: Dirty):', accuracy_score(y_test_dirty, y_pred_svm_train_dirty_test_dirty))

SVM Accuracy (Train: Clean, Test: Clean): 0.85
SVM Accuracy (Train: Clean, Test: Dirty): 0.84125
SVM Accuracy (Train: Dirty, Test: Clean): 0.85
SVM Accuracy (Train: Dirty, Test: Dirty): 0.84125


### Four Scenarios

Train: Clean, Test: Clean - best-case benchmark, model sees only high-integrity data
Train: Clean, Test: Dirty - measures how robust a model trained on clean data is to real-world bias or corrupted inputs
Train: Dirty, Test: Clean - evaluates if a model trained on biased data still performs well on unbiased data (insight into bias internalization)
Train: Dirty, Test: Dirty - realistic worst-case for fairness, data is perturbed throughout, helps us understand what users would experience under biased conditions

### Model Insights

Logistic regression is highly sensitive to perturbations in the test data, dropping from 99.9% to 91.3% when tested on dirty data. But it still performs reasonably well when trained on dirty data. This indicates that the model isn’t overfitting to spurious correlations, but it does suffer when test data deviates from training.

Decision Trees are slightly more brittle. The performance drop is sharper across the board, suggesting that trees are more likely to learn (and depend on) sensitive attribute relationships that get disrupted in dirty data. They don't generalize as well under perturbation.

Ensembles like Random Forests are more robust than single trees. While there is still a drop from Clean to Dirty, it is less dramatic. They handle noise and biased training slightly better, thanks to averaging over many trees. Best balance of generalization and accuracy.

SVMs have stable, but lower performance. Interestingly, they’re less sensitive to the dirty/clean switch, but never reach high accuracy. This may indicate underfitting, or that the feature space isn’t separable with a linear kernel (which is the default unless otherwise specified).

In [14]:
# Fairness Metrics for Logistic Regression models

# print("Train: Clean, Test: Clean")
# print("Demographic Parity:", demographic_parity(y_pred_logreg_train_clean_test_clean, s_test_clean))
# print("Equalized Odds:", equalized_odds(y_test_clean, y_pred_logreg_train_clean_test_clean, s_test_clean))
# print("Disparate Impact Ratio:", disparate_impact_ratio(y_pred_logreg_train_clean_test_clean, s_test_clean))
# print("Statistical Parity Difference:", statistical_parity_difference(y_pred_logreg_train_clean_test_clean, s_test_clean))

# print("Train: Clean, Test: Dirty")
# print("Demographic Parity:", demographic_parity(y_pred_logreg_train_clean_test_dirty, s_test_dirty))
# print("Equalized Odds:", equalized_odds(y_test_dirty, y_pred_logreg_train_clean_test_dirty, s_test_dirty))
# print("Disparate Impact Ratio:", disparate_impact_ratio(y_pred_logreg_train_clean_test_dirty, s_test_dirty))
# print("Statistical Parity Difference:", statistical_parity_difference(y_pred_logreg_train_clean_test_dirty, s_test_dirty))

# print("Train: Dirty, Test: Clean")
# print("Demographic Parity:", demographic_parity(y_pred_logreg_train_dirty_test_clean, s_test_clean))
# print("Equalized Odds:", equalized_odds(y_test_clean, y_pred_logreg_train_dirty_test_clean, s_test_clean))
# print("Disparate Impact Ratio:", disparate_impact_ratio(y_pred_logreg_train_dirty_test_clean, s_test_clean))
# print("Statistical Parity Difference:", statistical_parity_difference(y_pred_logreg_train_dirty_test_clean, s_test_clean))

# print("Train: Dirty, Test: Dirty")
# print("Demographic Parity:", demographic_parity(y_pred_logreg_train_dirty_test_dirty, s_test_dirty))
# print("Equalized Odds:", equalized_odds(y_test_dirty, y_pred_logreg_train_dirty_test_dirty, s_test_dirty))
# print("Disparate Impact Ratio:", disparate_impact_ratio(y_pred_logreg_train_dirty_test_dirty, s_test_dirty))
# print("Statistical Parity Difference:", statistical_parity_difference(y_pred_logreg_train_dirty_test_dirty, s_test_dirty))

In [15]:
# Fairness Metrics for Logistic Regression

results = {
    "Clean/Clean": {
        "Demographic Parity": demographic_parity(y_pred_logreg_train_clean_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_logreg_train_clean_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_logreg_train_clean_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_logreg_train_clean_test_clean, s_test_clean)
    },
    "Clean/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_logreg_train_clean_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_logreg_train_clean_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_logreg_train_clean_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_logreg_train_clean_test_dirty, s_test_dirty)
    },
    "Dirty/Clean": {
        "Demographic Parity": demographic_parity(y_pred_logreg_train_dirty_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_logreg_train_dirty_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_logreg_train_dirty_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_logreg_train_dirty_test_clean, s_test_clean)
    },
    "Dirty/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_logreg_train_dirty_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_logreg_train_dirty_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_logreg_train_dirty_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_logreg_train_dirty_test_dirty, s_test_dirty)
    }
}

col_widths = {
    "Train/Test": 15,
    "Fairness Metric": 30
}

# Header
print(f"{'Train/Test':<{col_widths['Train/Test']}}"
      f"{'Fairness Metric':<{col_widths['Fairness Metric']}}"
      f"Value")
print("-" * (col_widths['Train/Test'] + col_widths['Fairness Metric'] + 96))

def format_value(val):
    if isinstance(val, float):
        return f"{val:.4f}"
    elif isinstance(val, dict):
        return "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in val.items()) + "}"
    elif isinstance(val, tuple):
        # Handle tuple of dicts or other types
        return "(" + ", ".join(
            "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in d.items()) + "}" 
            if isinstance(d, dict) else 
            (f"{d:.4f}" if isinstance(d, float) else str(d))
            for d in val
        ) + ")"
    else:
        return str(val)

# Rows
for config, metrics in results.items():
    for metric_name, metric_value in metrics.items():
        formatted = format_value(metric_value)
        print(f"{config:<{col_widths['Train/Test']}}"
              f"{metric_name:<{col_widths['Fairness Metric']}}"
              f"{formatted}")

Train/Test     Fairness Metric               Value
---------------------------------------------------------------------------------------------------------------------------------------------
Clean/Clean    Demographic Parity            {1: 0.9519, 9: 0.7531, 2: 0.8904, 8: 0.6200, 6: 0.2769, 7: 1.0000, 3: 1.0000, 5: 0.5000}
Clean/Clean    Equalized Odds                ({1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 0.9688, 6: 1.0000, 7: 1.0000, 3: 1.0000, 5: 1.0000}, {1: 0.0000, 9: 0.0000, 2: 0.0000, 8: 0.0000, 6: 0.0000, 7: 0, 3: 0, 5: 0.0000})
Clean/Clean    Disparate Impact Ratio        0.2769
Clean/Clean    Statistical Parity Difference 0.7231
Clean/Dirty    Demographic Parity            {3: 0.7778, 1: 0.9329, 9: 0.7531, 2: 0.8608, 8: 0.6842, 6: 0.4444, 7: 0.7500, 5: 0.7500, 4: 0.5000}
Clean/Dirty    Equalized Odds                ({3: 0.8750, 1: 0.9632, 9: 0.9077, 2: 0.9683, 8: 0.9250, 6: 0.8889, 7: 1.0000, 5: 1.0000, 4: 0.6000}, {3: 0.0000, 1: 0.4667, 9: 0.1250, 2: 0.4375, 8: 0.1176, 6: 0.

In [16]:
# Fairness Metrics for Decision Trees

results = {
    "Clean/Clean": {
        "Demographic Parity": demographic_parity(y_pred_tree_train_clean_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_tree_train_clean_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_tree_train_clean_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_tree_train_clean_test_clean, s_test_clean)
    },
    "Clean/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_tree_train_clean_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_tree_train_clean_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_tree_train_clean_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_tree_train_clean_test_dirty, s_test_dirty)
    },
    "Dirty/Clean": {
        "Demographic Parity": demographic_parity(y_pred_tree_train_dirty_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_tree_train_dirty_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_tree_train_dirty_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_tree_train_dirty_test_clean, s_test_clean)
    },
    "Dirty/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_tree_train_dirty_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_tree_train_dirty_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_tree_train_dirty_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_tree_train_dirty_test_dirty, s_test_dirty)
    }
}

col_widths = {
    "Train/Test": 15,
    "Fairness Metric": 30
}

# Header
print(f"{'Train/Test':<{col_widths['Train/Test']}}"
      f"{'Fairness Metric':<{col_widths['Fairness Metric']}}"
      f"Value")
print("-" * (col_widths['Train/Test'] + col_widths['Fairness Metric'] + 96))

def format_value(val):
    if isinstance(val, float):
        return f"{val:.4f}"
    elif isinstance(val, dict):
        return "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in val.items()) + "}"
    elif isinstance(val, tuple):
        # Handle tuple of dicts or other types
        return "(" + ", ".join(
            "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in d.items()) + "}" 
            if isinstance(d, dict) else 
            (f"{d:.4f}" if isinstance(d, float) else str(d))
            for d in val
        ) + ")"
    else:
        return str(val)

# Rows
for config, metrics in results.items():
    for metric_name, metric_value in metrics.items():
        formatted = format_value(metric_value)
        print(f"{config:<{col_widths['Train/Test']}}"
              f"{metric_name:<{col_widths['Fairness Metric']}}"
              f"{formatted}")

Train/Test     Fairness Metric               Value
---------------------------------------------------------------------------------------------------------------------------------------------
Clean/Clean    Demographic Parity            {1: 0.9519, 9: 0.7531, 2: 0.8904, 8: 0.6400, 6: 0.2769, 7: 1.0000, 3: 1.0000, 5: 0.5000}
Clean/Clean    Equalized Odds                ({1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 1.0000, 3: 1.0000, 5: 1.0000}, {1: 0.0000, 9: 0.0000, 2: 0.0000, 8: 0.0000, 6: 0.0000, 7: 0, 3: 0, 5: 0.0000})
Clean/Clean    Disparate Impact Ratio        0.2769
Clean/Clean    Statistical Parity Difference 0.7231
Clean/Dirty    Demographic Parity            {3: 0.8889, 1: 0.9390, 9: 0.7654, 2: 0.8354, 8: 0.7018, 6: 0.3889, 7: 0.7500, 5: 0.7500, 4: 0.3750}
Clean/Dirty    Equalized Odds                ({3: 1.0000, 1: 0.9719, 9: 0.9077, 2: 0.9683, 8: 0.9500, 6: 0.9444, 7: 1.0000, 5: 1.0000, 4: 0.6000}, {3: 0.0000, 1: 0.4333, 9: 0.1875, 2: 0.3125, 8: 0.1176, 6: 0.

In [17]:
# Fairness Metrics for Random Forests

results = {
    "Clean/Clean": {
        "Demographic Parity": demographic_parity(y_pred_rf_train_clean_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_rf_train_clean_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_rf_train_clean_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_rf_train_clean_test_clean, s_test_clean)
    },
    "Clean/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_rf_train_clean_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_rf_train_clean_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_rf_train_clean_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_rf_train_clean_test_dirty, s_test_dirty)
    },
    "Dirty/Clean": {
        "Demographic Parity": demographic_parity(y_pred_rf_train_dirty_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_rf_train_dirty_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_rf_train_dirty_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_rf_train_dirty_test_clean, s_test_clean)
    },
    "Dirty/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_rf_train_dirty_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_rf_train_dirty_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_rf_train_dirty_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_rf_train_dirty_test_dirty, s_test_dirty)
    }
}

col_widths = {
    "Train/Test": 15,
    "Fairness Metric": 30
}

# Header
print(f"{'Train/Test':<{col_widths['Train/Test']}}"
      f"{'Fairness Metric':<{col_widths['Fairness Metric']}}"
      f"Value")
print("-" * (col_widths['Train/Test'] + col_widths['Fairness Metric'] + 96))

def format_value(val):
    if isinstance(val, float):
        return f"{val:.4f}"
    elif isinstance(val, dict):
        return "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in val.items()) + "}"
    elif isinstance(val, tuple):
        # Handle tuple of dicts or other types
        return "(" + ", ".join(
            "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in d.items()) + "}" 
            if isinstance(d, dict) else 
            (f"{d:.4f}" if isinstance(d, float) else str(d))
            for d in val
        ) + ")"
    else:
        return str(val)

# Rows
for config, metrics in results.items():
    for metric_name, metric_value in metrics.items():
        formatted = format_value(metric_value)
        print(f"{config:<{col_widths['Train/Test']}}"
              f"{metric_name:<{col_widths['Fairness Metric']}}"
              f"{formatted}")

Train/Test     Fairness Metric               Value
---------------------------------------------------------------------------------------------------------------------------------------------
Clean/Clean    Demographic Parity            {1: 0.9519, 9: 0.7531, 2: 0.8904, 8: 0.6400, 6: 0.2769, 7: 1.0000, 3: 1.0000, 5: 0.5000}
Clean/Clean    Equalized Odds                ({1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 1.0000, 3: 1.0000, 5: 1.0000}, {1: 0.0000, 9: 0.0000, 2: 0.0000, 8: 0.0000, 6: 0.0000, 7: 0, 3: 0, 5: 0.0000})
Clean/Clean    Disparate Impact Ratio        0.2769
Clean/Clean    Statistical Parity Difference 0.7231
Clean/Dirty    Demographic Parity            {3: 0.8889, 1: 0.9390, 9: 0.7778, 2: 0.8354, 8: 0.7018, 6: 0.3889, 7: 0.7500, 5: 0.7500, 4: 0.3750}
Clean/Dirty    Equalized Odds                ({3: 1.0000, 1: 0.9719, 9: 0.9231, 2: 0.9683, 8: 0.9500, 6: 0.9444, 7: 1.0000, 5: 1.0000, 4: 0.6000}, {3: 0.0000, 1: 0.4333, 9: 0.1875, 2: 0.3125, 8: 0.1176, 6: 0.

In [18]:
# Fairness Metrics for SVC

results = {
    "Clean/Clean": {
        "Demographic Parity": demographic_parity(y_pred_svm_train_clean_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_svm_train_clean_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_svm_train_clean_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_svm_train_clean_test_clean, s_test_clean)
    },
    "Clean/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_svm_train_clean_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_svm_train_clean_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_svm_train_clean_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_svm_train_clean_test_dirty, s_test_dirty)
    },
    "Dirty/Clean": {
        "Demographic Parity": demographic_parity(y_pred_svm_train_dirty_test_clean, s_test_clean),
        "Equalized Odds": equalized_odds(y_test_clean, y_pred_svm_train_dirty_test_clean, s_test_clean),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_svm_train_dirty_test_clean, s_test_clean),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_svm_train_dirty_test_clean, s_test_clean)
    },
    "Dirty/Dirty": {
        "Demographic Parity": demographic_parity(y_pred_svm_train_dirty_test_dirty, s_test_dirty),
        "Equalized Odds": equalized_odds(y_test_dirty, y_pred_svm_train_dirty_test_dirty, s_test_dirty),
        "Disparate Impact Ratio": disparate_impact_ratio(y_pred_svm_train_dirty_test_dirty, s_test_dirty),
        "Statistical Parity Difference": statistical_parity_difference(y_pred_svm_train_dirty_test_dirty, s_test_dirty)
    }
}

col_widths = {
    "Train/Test": 15,
    "Fairness Metric": 30
}

# Header
print(f"{'Train/Test':<{col_widths['Train/Test']}}"
      f"{'Fairness Metric':<{col_widths['Fairness Metric']}}"
      f"Value")
print("-" * (col_widths['Train/Test'] + col_widths['Fairness Metric'] + 96))

def format_value(val):
    if isinstance(val, float):
        return f"{val:.4f}"
    elif isinstance(val, dict):
        return "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in val.items()) + "}"
    elif isinstance(val, tuple):
        # Handle tuple of dicts or other types
        return "(" + ", ".join(
            "{" + ", ".join(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}" for k, v in d.items()) + "}" 
            if isinstance(d, dict) else 
            (f"{d:.4f}" if isinstance(d, float) else str(d))
            for d in val
        ) + ")"
    else:
        return str(val)

# Rows
for config, metrics in results.items():
    for metric_name, metric_value in metrics.items():
        formatted = format_value(metric_value)
        print(f"{config:<{col_widths['Train/Test']}}"
              f"{metric_name:<{col_widths['Fairness Metric']}}"
              f"{formatted}")

Train/Test     Fairness Metric               Value
---------------------------------------------------------------------------------------------------------------------------------------------
Clean/Clean    Demographic Parity            {1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 1.0000, 3: 1.0000, 5: 1.0000}
Clean/Clean    Equalized Odds                ({1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 1.0000, 3: 1.0000, 5: 1.0000}, {1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 0, 3: 0, 5: 1.0000})
Clean/Clean    Disparate Impact Ratio        1.0000
Clean/Clean    Statistical Parity Difference 0.0000
Clean/Dirty    Demographic Parity            {3: 1.0000, 1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 1.0000, 5: 1.0000, 4: 1.0000}
Clean/Dirty    Equalized Odds                ({3: 1.0000, 1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.0000, 7: 1.0000, 5: 1.0000, 4: 1.0000}, {3: 1.0000, 1: 1.0000, 9: 1.0000, 2: 1.0000, 8: 1.0000, 6: 1.

## Analysis of Results

### Demographic Parity:
In both datasets, prediction rates vary significantly across racial groups. In the clean data, groups 7 and 3 have 100% prediction rate while group 6 has only 28%. The dirty data shows less extreme variations, with rates between 44% and 93%.

### Equalized Odds
In the clean dataset, the true positive rates (TPR) are quite similar for all groups (around 1.0 for most groups), and false positive rates (FPR) are mostly 0. For the dirty dataset, the TPRs and FPRs deviate more between groups, indicating increased bias introduced by the data corruption.

### Disparate Impact Ratio
The clean dataset shows a disparate impact ratio of around 0.28, indicating significant disparity in prediction rates between the most and least favored groups. The dirty dataset shows an improved disparate impact ratio of around 0.48, but still reflects inequality.

### Statistical Parity Difference
The clean dataset has a parity difference of around 0.72, showing a noticeable discrepancy in positive prediction rates between different groups. The dirty dataset shows a smaller but still substantial disparity (0.49), suggesting that while the data corruption may slightly reduce the bias, it does not eliminate it.

## Summary

The dirty data (with functional dependency violations) significantly reduces model accuracy, but interestingly shows improved fairness metrics in some ways. Both datasets show substantial disparities across racial groups: the clean data has perfect classification accuracy but shows greater disparity between groups, the dirty data has lower accuracy but somewhat reduced disparities. Decision Trees are slightly more robust to the data quality issues than Logistic Regression. A potential explantaion for these results are that the functional dependency violations in the dirty data may have disrupted some of the patterns that led to the extreme disparities in the clean data, inadvertently reducing some fairness metrics while decreasing overall accuracy. This suggests an important trade-off between model accuracy and fairness metrics, highlighting the complex relationship between data quality and algorithmic fairness.

In [19]:
# For decision trees
def plot_feature_importance(model, feature_names, title):
    importances = model.feature_importances_
    indices = np.argsort(importances)[::-1]
    
    plt.figure(figsize=(10, 6))
    plt.title(title)
    plt.bar(range(len(indices)), importances[indices], align='center')
    plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
    plt.tight_layout()
    plt.show()
    
    # Return top 5 features and their importance
    top_features = [(feature_names[i], importances[i]) for i in indices[:5]]
    return top_features

# For logistic regression
def plot_logistic_coefficients(model, feature_names, title):
    coef = model.coef_[0]
    indices = np.argsort(np.abs(coef))[::-1]
    
    plt.figure(figsize=(10, 6))
    plt.title(title)
    plt.bar(range(len(indices)), coef[indices], align='center')
    plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
    plt.tight_layout()
    plt.show()
    
    # Return top 5 features and their coefficients
    top_features = [(feature_names[i], coef[i]) for i in indices[:5]]
    return top_features

# Use these to compare feature importance between clean and dirty data
clean_feature_names = X_train_clean.columns
dirty_feature_names = X_train_dirty.columns

tree_clean_features = plot_feature_importance(tree_clean, clean_feature_names, "Feature Importance - Clean Data (Decision Tree)")
tree_dirty_features = plot_feature_importance(tree_dirty, dirty_feature_names, "Feature Importance - Dirty Data (Decision Tree)")

logreg_clean_features = plot_logistic_coefficients(logreg_clean, clean_feature_names, "Feature Coefficients - Clean Data (Logistic Regression)")
logreg_dirty_features = plot_logistic_coefficients(logreg_dirty, dirty_feature_names, "Feature Coefficients - Dirty Data (Logistic Regression)")

NameError: name 'tree_clean' is not defined

In [None]:
# Evaluate fairness metrics for different prediction thresholds
def fairness_by_threshold(model, X_test, y_test, sensitive_attr, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0.1, 0.9, 9)
    
    results = {}
    
    for threshold in thresholds:
        y_prob = model.predict_proba(X_test)[:, 1]
        y_pred = (y_prob >= threshold).astype(int)
        
        dp = demographic_parity(y_pred, sensitive_attr)
        tpr, fpr = equalized_odds(y_test, y_pred, sensitive_attr)
        di = disparate_impact_ratio(y_pred, sensitive_attr)
        spd = statistical_parity_difference(y_pred, sensitive_attr)
        
        # Calculate accuracy at this threshold
        acc = accuracy_score(y_test, y_pred)
        
        results[threshold] = {
            'accuracy': acc,
            'demographic_parity': dp,
            'equalized_odds_tpr': tpr,
            'equalized_odds_fpr': fpr,
            'disparate_impact': di,
            'statistical_parity_diff': spd
        }
    
    return results

# Apply to both models on both datasets
logreg_clean_thresholds = fairness_by_threshold(logreg_clean, X_test_clean, y_test_clean, s_test_clean)
logreg_dirty_thresholds = fairness_by_threshold(logreg_dirty, X_test_dirty, y_test_dirty, s_test_dirty)

# Plot the trade-off between accuracy and fairness metrics
thresholds = list(logreg_clean_thresholds.keys())
clean_acc = [logreg_clean_thresholds[t]['accuracy'] for t in thresholds]
clean_di = [logreg_clean_thresholds[t]['disparate_impact'] for t in thresholds]
dirty_acc = [logreg_dirty_thresholds[t]['accuracy'] for t in thresholds]
dirty_di = [logreg_dirty_thresholds[t]['disparate_impact'] for t in thresholds]

plt.figure(figsize=(10, 6))
plt.plot(thresholds, clean_acc, 'b-', label='Clean Data - Accuracy')
plt.plot(thresholds, clean_di, 'b--', label='Clean Data - Disparate Impact')
plt.plot(thresholds, dirty_acc, 'r-', label='Dirty Data - Accuracy')
plt.plot(thresholds, dirty_di, 'r--', label='Dirty Data - Disparate Impact')
plt.xlabel('Classification Threshold')
plt.ylabel('Metric Value')
plt.title('Accuracy vs. Fairness Trade-off')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Function to analyze model performance by group
# 1: White alone, 2: Black or African American alone, 3: American Indian or Alaska Native alone, 4: Chinese alone, 5: Japanese alone
# 6: Other Asian or Pacific Islander alone, 7: Other race alone, 8: Two or more races, 9: Asian Indian alone
def analyze_by_group(y_true, y_pred, sensitive_attr):
    groups = sensitive_attr.unique()
    results = {}
    
    for group in groups:
        mask = (sensitive_attr == group)
        y_true_group = y_true[mask]
        y_pred_group = y_pred[mask]
        
        # Skip groups with too few samples
        if len(y_true_group) < 5:
            continue
        
        results[group] = {
            'accuracy': accuracy_score(y_true_group, y_pred_group),
            'group_size': len(y_true_group),
            'percent_positive_predictions': y_pred_group.mean() * 100,
            'percent_positive_actual': y_true_group.mean() * 100,
            'confusion_matrix': confusion_matrix(y_true_group, y_pred_group).tolist()
        }
    
    return results

# Apply to both models on both datasets
logreg_clean_by_group = analyze_by_group(y_test_clean, y_pred_clean, s_test_clean)
logreg_dirty_by_group = analyze_by_group(y_test_dirty, y_pred_dirty, s_test_dirty)
tree_clean_by_group = analyze_by_group(y_test_clean, y_pred_tree_clean, s_test_clean)
tree_dirty_by_group = analyze_by_group(y_test_dirty, y_pred_tree_dirty, s_test_dirty)

# Plot accuracy by group and dataset
groups = sorted(set(logreg_clean_by_group.keys()) | set(logreg_dirty_by_group.keys()))
clean_acc_by_group = [logreg_clean_by_group.get(g, {}).get('accuracy', 0) for g in groups]
dirty_acc_by_group = [logreg_dirty_by_group.get(g, {}).get('accuracy', 0) for g in groups]

plt.figure(figsize=(12, 6))
x = np.arange(len(groups))
width = 0.35
plt.bar(x - width/2, clean_acc_by_group, width, label='Clean Data')
plt.bar(x + width/2, dirty_acc_by_group, width, label='Dirty Data')
plt.xlabel('Race Group')
plt.ylabel('Accuracy')
plt.title('Model Accuracy by Race Group')
plt.xticks(x, groups)
plt.ylim(0, 1.05)
plt.legend()
plt.grid(True, axis='y')
plt.show()

In [None]:
# Function to identify and analyze misclassified examples
def analyze_misclassifications(X_test, y_test, y_pred, sensitive_attr):
    # Find misclassified examples
    misclassified = (y_test != y_pred)
    X_misclassified = X_test[misclassified]
    y_true_misclassified = y_test[misclassified]
    s_misclassified = sensitive_attr[misclassified]
    
    # Count misclassifications by group
    group_counts = s_misclassified.value_counts().to_dict()
    total_by_group = sensitive_attr.value_counts().to_dict()
    
    # Calculate error rates by group
    error_rates = {g: group_counts.get(g, 0) / total_by_group.get(g, 1) for g in total_by_group}
    
    # Analyze feature distributions of misclassified examples
    misclassified_stats = X_misclassified.describe()
    overall_stats = X_test.describe()
    
    return {
        'total_misclassified': misclassified.sum(),
        'error_rate': misclassified.mean(),
        'group_error_counts': group_counts,
        'group_error_rates': error_rates,
        'misclassified_feature_stats': misclassified_stats,
        'overall_feature_stats': overall_stats
    }

# Apply to both models on both datasets
logreg_clean_errors = analyze_misclassifications(X_test_clean, y_test_clean, y_pred_clean, s_test_clean)
logreg_dirty_errors = analyze_misclassifications(X_test_dirty, y_test_dirty, y_pred_dirty, s_test_dirty)
tree_clean_errors = analyze_misclassifications(X_test_clean, y_test_clean, y_pred_tree_clean, s_test_clean)
tree_dirty_errors = analyze_misclassifications(X_test_dirty, y_test_dirty, y_pred_tree_dirty, s_test_dirty)

# Visualize error rates by group
groups = sorted(set(logreg_clean_errors['group_error_rates'].keys()) | 
                set(logreg_dirty_errors['group_error_rates'].keys()))
clean_errors = [logreg_clean_errors['group_error_rates'].get(g, 0) for g in groups]
dirty_errors = [logreg_dirty_errors['group_error_rates'].get(g, 0) for g in groups]

plt.figure(figsize=(12, 6))
x = np.arange(len(groups))
width = 0.35
plt.bar(x - width/2, clean_errors, width, label='Clean Data')
plt.bar(x + width/2, dirty_errors, width, label='Dirty Data')
plt.xlabel('Race Group')
plt.ylabel('Error Rate')
plt.title('Error Rates by Race Group')
plt.xticks(x, groups)
plt.legend()
plt.grid(True, axis='y')
plt.show()