In [1]:
import os
import sys

src_path = r"C:\Users\asrit\Documents\516-Project-main\516-Project-main\src"
if src_path not in sys.path:
    sys.path.append(src_path)

In [2]:
import pandas as pd
import numpy as np
import shap

from sklearn.metrics import mean_absolute_error, accuracy_score
from scipy.stats import pearsonr

from data_loader import load_dataset
from modeling import train_smote_forest, evaluate_model, train_random_forest
from fairness_metrics import print_group_rates, disparate_impact, equal_opportunity
from preprocess import apply_reweighing

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.metrics import MetricFrame, selection_rate
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds

from lime.lime_tabular import LimeTabularExplainer

pip install 'aif360[inFairness]'


In [3]:
# Load data
df = load_dataset("../data/cleaned_dataset.csv")

# Pre-Processing

Using a preprocessing mitigation strategy - AIF360’s Reweighing

In [4]:
# Step 1: Apply AIF360 Reweighing (only used for fairness metrics, not for training if SMOTE is applied)
df = apply_reweighing(df, protected_attr='Gender', label_col='Employment')

gender_map = {0: 'Man', 1: 'NonBinary', 2: 'Woman'}
#age_map = {0: '<35', 1: '>35'}

df['Gender_group'] = df['Gender'].map(gender_map)
df['Age_group'] = df['Age']  


print(df['Gender_group'].unique())
print(df['Gender_group'].dtype)

print(df['Age_group'].unique())
print(df['Age_group'].dtype)


# Step 2: Feature engineering for modeling
features = [
    'Gender_group', 'Age_group', 'EdLevel', 'Gender_encoded', 'Age_encoded',
    'YearsCode', 'YearsCodePro', 'ComputerSkills', 'PreviousSalary'
]
X = pd.get_dummies(df[features], drop_first=True)
y = df['Employment'].astype(int)

print(X.columns)

['Man' 'Woman' 'NonBinary']
object
['<35' '>35']
object
Index(['Gender_encoded', 'Age_encoded', 'YearsCode', 'YearsCodePro',
       'ComputerSkills', 'PreviousSalary', 'Gender_group_NonBinary',
       'Gender_group_Woman', 'Age_group_>35', 'EdLevel_NoHigherEd',
       'EdLevel_Other', 'EdLevel_PhD', 'EdLevel_Undergraduate'],
      dtype='object')


In [5]:
# Step 3: Choose model strategy
use_smote = True  # Toggle this to False to use reweighing instead of SMOTE

if use_smote:
    model, X_test, y_test, y_pred = train_smote_forest(X, y)
else:
    sample_weights = df['instance_weight']
    model, X_test, y_test, y_pred = train_random_forest(X, y, sample_weights)

# Step 4: Evaluate base model performance (before fairness post-processing)
evaluate_model(y_test, y_pred)

# --- 3. Equal Accuracy by Gender Group ---
y_test_aligned = y_test.reset_index(drop=True)
y_pred_aligned = pd.Series(y_pred).reset_index(drop=True)
gender_column = df.loc[y_test.index, 'Gender_group'].reset_index(drop=True)

# --- 1. Pearson's Correlation ---
pearson_corr, _ = pearsonr(y_test, y_pred)
print(f"Pearson Correlation: {round(pearson_corr, 4)}")

# --- 2. Mean Absolute Error ---
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {round(mae, 4)}")

equal_accuracy = {}
for group in sorted(gender_column.unique()):
    group_mask = gender_column == group
    acc = accuracy_score(y_test_aligned[group_mask], y_pred_aligned[group_mask])
    equal_accuracy[group] = acc
    print(f"Accuracy for Gender {group}: {round(acc, 4)}")

equal_accuracy_gap = max(equal_accuracy.values()) - min(equal_accuracy.values())
print(f"Equal Accuracy Gap: {round(equal_accuracy_gap, 4)}")

Accuracy: 0.8051181995553337
Classification Report:
               precision    recall  f1-score   support

           0       0.18      0.19      0.18      2571
           1       0.89      0.89      0.89     19468

    accuracy                           0.81     22039
   macro avg       0.54      0.54      0.54     22039
weighted avg       0.81      0.81      0.81     22039

Pearson Correlation: 0.0743
Mean Absolute Error: 0.1949
Accuracy for Gender Man: 0.8001
Accuracy for Gender NonBinary: 0.8318
Accuracy for Gender Woman: 0.8947
Equal Accuracy Gap: 0.0946


In [6]:
# Step 5: Custom fairness metrics based on fairness-aware predictions
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

# Disparate impact comparisons
disparate_impact(df, 2, 0, 'Gender')  # Woman vs Man
disparate_impact(df, 1, 0, 'Gender')  # NonBinary vs Man
disparate_impact(df, '>35', '<35', 'Age')

# Equal opportunity using fairness-aware predictions
equal_opportunity(
    y_test.reset_index(drop=True),
    pd.Series(y_pred), 
    [0, 1, 2],  # Man, NonBinary, Woman
    'Gender',
    df.reset_index(drop=True)
)



Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
0: TPR = 0.89
1: TPR = 0.89
2: TPR = 0.87


# In-processing

In [7]:
def run_consistent_metrics(y_test, y_pred, sensitive_col='Gender_encoded', label_name='Gender'):
    """
    Evaluate predictions using MAE, Pearson correlation, and group-wise accuracy.
    Assumes access to global df and X_test_pmute for index alignment.
    """
    # Align predictions and labels
    y_test = y_test.reset_index(drop=True)
    y_pred = pd.Series(y_pred).reset_index(drop=True)
    
    # Align sensitive attribute values from df using test indices
    sensitive_series = df.loc[y_test.index, sensitive_col].reset_index(drop=True)

    # --- 1. Pearson Correlation ---
    pearson_corr, _ = pearsonr(y_test, y_pred)
    print(f"\nPearson Correlation: {round(pearson_corr, 4)}")

    # --- 2. Mean Absolute Error ---
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Mean Absolute Error: {round(mae, 4)}")

    # --- 3. Group-wise Accuracy ---
    print(f"\nAccuracy by {label_name} group:")
    for group in sorted(sensitive_series.unique()):
        group_mask = sensitive_series == group
        acc = accuracy_score(y_test[group_mask], y_pred[group_mask])
        print(f"{label_name} {group}: {round(acc, 4)}")


In [8]:
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from fairlearn.metrics import MetricFrame, selection_rate
from imblearn.over_sampling import SMOTE

# --- 1. Split original train and test sets from preprocessed X, y ---
X_train_orig = X.loc[X.index.difference(X_test.index)].copy()
y_train_orig = y.loc[X.index.difference(X_test.index)].copy()
X_test_eg = X_test.copy()
y_test_eg = y_test.copy()

# --- 2. Add Gender_group (protected attr) as column to X ---
X_aug = X_train_orig.copy()
X_aug["Gender_group"] = df.loc[X_train_orig.index, "Gender_group"].values

# --- 3. Combine X + y + protected attr into one DataFrame ---
train_combined = X_aug.copy()
train_combined["Employment"] = y_train_orig.values

# --- 4. One-hot encode Gender_group to allow SMOTE ---
train_encoded = pd.get_dummies(train_combined, columns=["Gender_group"])

# --- 5. Apply SMOTE on features (excluding label) ---
smote = SMOTE(random_state=42)
X_smoted, y_resampled = smote.fit_resample(
    train_encoded.drop(columns=["Employment"]),
    train_encoded["Employment"]
)

# --- 6. Recover protected attribute from one-hot encoded columns ---
gender_cols = [col for col in X_smoted.columns if col.startswith("Gender_group_")]
sensitive_feature_train = X_smoted[gender_cols].idxmax(axis=1).str.replace("Gender_group_", "")

# --- 7. Drop one-hot protected attr columns from training data ---
X_resampled_final = X_smoted.drop(columns=gender_cols)

# --- 8. Train fairness-aware model ---
base_model = LogisticRegression(solver="liblinear", class_weight='balanced')
eg_model = ExponentiatedGradient(
    estimator=base_model,
    constraints=EqualizedOdds(),
    eps=0.01
)
eg_model.fit(X_resampled_final, y_resampled, sensitive_features=sensitive_feature_train)

# --- 9. Drop protected one-hot columns from test set to match training features ---
X_test_eg = X_test_eg.drop(columns=[col for col in X_test_eg.columns if col.startswith("Gender_group_")])
sensitive_feature_test = df.loc[X_test_eg.index, "Gender_group"]
y_pred_eg = eg_model.predict(X_test_eg)

In [9]:
# --- 10. Performance Metrics ---
print("Exponentiated Gradient with SMOTE and Class Weights:")
print("Accuracy:", accuracy_score(y_test_eg, y_pred_eg))
print("Classification Report:")
print(classification_report(y_test_eg, y_pred_eg))

# --- 11. Utility Evaluation ---
evaluate_model(y_test_eg, y_pred_eg)
run_consistent_metrics(y_test_eg, y_pred_eg, sensitive_col='Gender_group', label_name='Gender')
run_consistent_metrics(y_test_eg, y_pred_eg, sensitive_col='Age_group', label_name='Age')
run_consistent_metrics(y_test_eg, y_pred_eg, sensitive_col='EdLevel', label_name='EdLevel')

Exponentiated Gradient with SMOTE and Class Weights:
Accuracy: 0.7763963882208812
Classification Report:
              precision    recall  f1-score   support

           0       0.18      0.25      0.21      2571
           1       0.90      0.85      0.87     19468

    accuracy                           0.78     22039
   macro avg       0.54      0.55      0.54     22039
weighted avg       0.81      0.78      0.79     22039

Accuracy: 0.7763963882208812
Classification Report:
               precision    recall  f1-score   support

           0       0.18      0.25      0.21      2571
           1       0.90      0.85      0.87     19468

    accuracy                           0.78     22039
   macro avg       0.54      0.55      0.54     22039
weighted avg       0.81      0.78      0.79     22039


Pearson Correlation: 0.0854
Mean Absolute Error: 0.2236

Accuracy by Gender group:
Gender Man: 0.7757
Gender NonBinary: 0.7766
Gender Woman: 0.7922

Pearson Correlation: 0.0854
Mean Absol

In [10]:
# --- 12. MetricFrame Fairness ---
for attr, label in [("Gender_group", "Gender"), ("Age_group", "Age"), ("EdLevel", "EdLevel")]:
    mf = MetricFrame(
        metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
        y_true=y_test_eg,
        y_pred=y_pred_eg,
        sensitive_features=df.loc[X_test_eg.index, attr]
    )
    print(f"\nFairness metrics by {label}:")
    print(mf.by_group)


Fairness metrics by Gender:
              accuracy  selection_rate
Gender_group                          
Man           0.777039        0.833714
NonBinary     0.739336        0.815166
Woman         0.778752        0.845029

Fairness metrics by Age:
           accuracy  selection_rate
Age_group                          
<35        0.841863        0.913446
>35        0.653946        0.685074

Fairness metrics by EdLevel:
               accuracy  selection_rate
EdLevel                                
Master         0.875536        0.993205
NoHigherEd     0.291324        0.155251
Other          0.535869        0.520099
PhD            0.845269        0.916880
Undergraduate  0.838215        0.904640


In [11]:
# --- 13. Custom Fairness Metrics ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

# gender
disparate_impact(df, 'Woman', 'Man', 'Gender_group')      
disparate_impact(df, 'NonBinary', 'Man', 'Gender_group') 

# age
disparate_impact(df, '>35', '<35', 'Age_group')             

# Education level
disparate_impact(df, 'NoHigherEd', 'Master', 'EdLevel')
disparate_impact(df, 'PhD', 'Undergraduate', 'EdLevel')
disparate_impact(df, 'Other', 'Undergraduate', 'EdLevel')




Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (Woman/Man): 1.03

Disparate Impact (NonBinary/Man): 0.98

Disparate Impact (>35/<35): 0.95

Disparate Impact (NoHigherEd/Master): 0.91

Disparate Impact (PhD/Undergraduate): 0.99

Disparate Impact (Other/Undergraduate): 0.93


0.9285316081807473

In [12]:
equal_opportunity(
    y_test_eg.reset_index(drop=True),
    pd.Series(y_pred_eg),
    ['Man', 'NonBinary', 'Woman'],
    'Gender_group',
    df.reset_index(drop=True)
)

df.loc[X_test_eg.index, "predicted_label"] = y_pred_eg

equal_opportunity(
    y_test_eg.reset_index(drop=True),
    pd.Series(y_pred_eg),
    ['<35', '>35'],             # distinct values in Age_group
    'Age_group',
    df.reset_index(drop=True)
)

equal_opportunity(
    y_test_eg.reset_index(drop=True),
    pd.Series(y_pred_eg),
    ['Undergraduate', 'Master', 'PhD', 'Other', 'NoHigherEd'],   # use actual values in your data
    'EdLevel',
    df.reset_index(drop=True)
)



Equal Opportunity by group:
Man: TPR = 0.85
NonBinary: TPR = 0.83
Woman: TPR = 0.85

Equal Opportunity by group:
<35: TPR = 0.85
>35: TPR = 0.84

Equal Opportunity by group:
Undergraduate: TPR = 0.84
Master: TPR = 0.85
PhD: TPR = 0.85
Other: TPR = 0.85
NoHigherEd: TPR = 0.84


# Post Processing

In [13]:
df[['Gender_group', 'Age_group', 'EdLevel']].head()


Unnamed: 0,Gender_group,Age_group,EdLevel
0,Man,<35,Master
1,Man,<35,Undergraduate
2,Man,<35,Master
3,Man,<35,Undergraduate
4,Man,>35,PhD


In [14]:
# --- FULL POST-PROCESSING BLOCK WITH CONSISTENCY AND METRICS ---

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error
from fairlearn.metrics import MetricFrame, selection_rate
from scipy.stats import pearsonr

# --- 1. 3-Way Split: Train (60%), Validation (10%), Test (30%) ---
X_trainval, X_test_post, y_trainval, y_test_post = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)
X_train_post, X_val_post, y_train_post, y_val_post = train_test_split(
    X_trainval, y_trainval, test_size=0.125, stratify=y_trainval, random_state=42
)

# --- 2. Apply SMOTE to training data ---
smote = SMOTE(random_state=42)
X_train_post_smote, y_train_post_smote = smote.fit_resample(X_train_post, y_train_post)

# --- 3. Train classifier on SMOTE-balanced data ---
model_post = RandomForestClassifier(n_estimators=100, random_state=42)
model_post.fit(X_train_post_smote, y_train_post_smote)

# --- 4. Fit ThresholdOptimizer using validation set ---
sensitive_gender_val = df.loc[X_val_post.index, "Gender_group"]
postprocessor = ThresholdOptimizer(
    estimator=model_post,
    constraints="equalized_odds",
    predict_method="predict_proba"
)
postprocessor.fit(X_val_post, y_val_post, sensitive_features=sensitive_gender_val)

# --- 5. Predict on test set ---
sensitive_gender_test = df.loc[X_test_post.index, "Gender_group"]
y_pred_fair = postprocessor.predict(X_test_post, sensitive_features=sensitive_gender_test)

# --- 6. Utility & Classification Metrics ---
print("📊 Fairness-aware Evaluation (SMOTE + Equalized Odds):")
print("Accuracy:", accuracy_score(y_test_post, y_pred_fair))
print("\nClassification Report:")
print(classification_report(y_test_post, y_pred_fair))

pearson_corr, _ = pearsonr(y_test_post, y_pred_fair)
mae = mean_absolute_error(y_test_post, y_pred_fair)
print(f"Pearson Correlation: {round(pearson_corr, 4)}")
print(f"Mean Absolute Error: {round(mae, 4)}")

# --- 7. Group-wise Accuracy Reporting ---
def run_consistent_metrics(y_true, y_pred, sensitive_col, label_name):
    print(f"\nAccuracy by {label_name} group:")
    groups = df.loc[X_test_post.index, sensitive_col]
    y_true_aligned = y_true.reset_index(drop=True)
    y_pred_aligned = pd.Series(y_pred).reset_index(drop=True)
    for group in sorted(groups.unique()):
        mask = (groups == group).reset_index(drop=True)
        acc = accuracy_score(y_true_aligned[mask], y_pred_aligned[mask])
        print(f"{label_name} {group}: {round(acc, 4)}")

run_consistent_metrics(y_test_post, y_pred_fair, 'Gender_group', 'Gender')
run_consistent_metrics(y_test_post, y_pred_fair, 'Age_group', 'Age')
run_consistent_metrics(y_test_post, y_pred_fair, 'EdLevel', 'EdLevel')

# --- 8. MetricFrame Fairness Evaluation ---
mf_gender = MetricFrame(metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
                        y_true=y_test_post, y_pred=y_pred_fair,
                        sensitive_features=df.loc[X_test_post.index, "Gender_group"])
mf_age = MetricFrame(metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
                     y_true=y_test_post, y_pred=y_pred_fair,
                     sensitive_features=df.loc[X_test_post.index, "Age_group"])
mf_edlevel = MetricFrame(metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
                         y_true=y_test_post, y_pred=y_pred_fair,
                         sensitive_features=df.loc[X_test_post.index, "EdLevel"])

print("\nFairness metrics by Gender:")
print(mf_gender.by_group)
print("\nFairness metrics by Age:")
print(mf_age.by_group)
print("\nFairness metrics by EdLevel:")
print(mf_edlevel.by_group)



📊 Fairness-aware Evaluation (SMOTE + Equalized Odds):
Accuracy: 0.8660556286582876

Classification Report:
              precision    recall  f1-score   support

           0       0.23      0.06      0.10      2576
           1       0.89      0.97      0.93     19463

    accuracy                           0.87     22039
   macro avg       0.56      0.52      0.51     22039
weighted avg       0.81      0.87      0.83     22039

Pearson Correlation: 0.0635
Mean Absolute Error: 0.1339

Accuracy by Gender group:
Gender Man: 0.8649
Gender NonBinary: 0.8616
Gender Woman: 0.8903

Accuracy by Age group:
Age <35: 0.8903
Age >35: 0.8217

Accuracy by EdLevel group:
EdLevel Master: 0.874
EdLevel NoHigherEd: 0.7847
EdLevel Other: 0.7987
EdLevel PhD: 0.8794
EdLevel Undergraduate: 0.889

Fairness metrics by Gender:
              accuracy  selection_rate
Gender_group                          
Man           0.864945        0.968736
NonBinary     0.861575        0.966587
Woman         0.890304       

In [15]:
# --- Selection Rates by Group ---
print("\nSelection Rates by Gender:")
for g in ['Man', 'Woman', 'NonBinary']:
    mask = df.loc[X_test_post.index, 'Gender_group'] == g
    rate = pd.Series(y_pred_fair, index=X_test_post.index)[mask].mean()
    print(f"{g}: {round(rate, 2)}")

print("\nSelection Rates by Age:")
for g in ['<35', '>35']:
    mask = df.loc[X_test_post.index, 'Age_group'] == g
    rate = pd.Series(y_pred_fair, index=X_test_post.index)[mask].mean()
    print(f"{g}: {round(rate, 2)}")

print("\nSelection Rates by EdLevel:")
for g in ['Master', 'Undergraduate', 'PhD', 'Other', 'NoHigherEd']:
    mask = df.loc[X_test_post.index, 'EdLevel'] == g
    rate = pd.Series(y_pred_fair, index=X_test_post.index)[mask].mean()
    print(f"{g}: {round(rate, 2)}")

# --- Disparate Impact ---
print("\nDisparate Impact (Safe Computation):")
disparities = {
    "Woman/Man": ('Woman', 'Man', 'Gender_group'),
    "NonBinary/Man": ('NonBinary', 'Man', 'Gender_group'),
    ">35/<35": ('>35', '<35', 'Age_group'),
    "NoHigherEd/Master": ('NoHigherEd', 'Master', 'EdLevel'),
    "PhD/Undergraduate": ('PhD', 'Undergraduate', 'EdLevel'),
    "Other/Undergraduate": ('Other', 'Undergraduate', 'EdLevel'),
}

for label, (grp1, grp2, col) in disparities.items():
    mask1 = df.loc[X_test_post.index, col] == grp1
    mask2 = df.loc[X_test_post.index, col] == grp2
    
    rate1 = pd.Series(y_pred_fair, index=X_test_post.index)[mask1].mean()
    rate2 = pd.Series(y_pred_fair, index=X_test_post.index)[mask2].mean()
    
    if rate2 == 0:
        di = "Undefined (division by zero)"
    else:
        di = round(rate1 / rate2, 2)
    
    print(f"Disparate Impact ({label}): {di}")

# --- Equal Opportunity by Group ---
def compute_tpr_by_group(y_true, y_pred, group_series, label_name):
    print(f"\nEqual Opportunity by group ({label_name}):")
    for group in sorted(group_series.unique()):
        mask = (group_series == group).reset_index(drop=True)
        true_pos = ((y_true == 1) & (y_pred == 1))[mask].sum()
        actual_pos = (y_true == 1)[mask].sum()
        tpr = round(true_pos / actual_pos, 2) if actual_pos > 0 else 0.0
        print(f"{group}: TPR = {tpr}")

compute_tpr_by_group(
    y_test_post.reset_index(drop=True),
    pd.Series(y_pred_fair),
    df.loc[X_test_post.index, 'Gender_group'],
    'Gender'
)

compute_tpr_by_group(
    y_test_post.reset_index(drop=True),
    pd.Series(y_pred_fair),
    df.loc[X_test_post.index, 'Age_group'],
    'Age'
)

compute_tpr_by_group(
    y_test_post.reset_index(drop=True),
    pd.Series(y_pred_fair),
    df.loc[X_test_post.index, 'EdLevel'],
    'EdLevel'
)



Selection Rates by Gender:
Man: 0.97
Woman: 0.96
NonBinary: 0.97

Selection Rates by Age:
<35: 0.98
>35: 0.95

Selection Rates by EdLevel:
Master: 0.98
Undergraduate: 0.98
PhD: 0.98
Other: 0.93
NoHigherEd: 0.94

Disparate Impact (Safe Computation):
Disparate Impact (Woman/Man): 0.99
Disparate Impact (NonBinary/Man): 1.0
Disparate Impact (>35/<35): 0.96
Disparate Impact (NoHigherEd/Master): 0.96
Disparate Impact (PhD/Undergraduate): 1.0
Disparate Impact (Other/Undergraduate): 0.95

Equal Opportunity by group (Gender):
Man: TPR = 0.97
NonBinary: TPR = 0.97
Woman: TPR = 0.97

Equal Opportunity by group (Age):
<35: TPR = 0.98
>35: TPR = 0.95

Equal Opportunity by group (EdLevel):
Master: TPR = 0.98
NoHigherEd: TPR = 0.95
Other: TPR = 0.93
PhD: TPR = 0.99
Undergraduate: TPR = 0.98


# Explainability

## a) Proxy Mute

In [16]:
# --- 1. Train-Test Split ---
X_train_pmute, X_test_pmute, y_train_pmute, y_test_pmute = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# --- 2. Train Base Model (Logistic Regression with Class Weights) ---
lr_model_base = LogisticRegression(solver="liblinear", class_weight='balanced')
lr_model_base.fit(X_train_pmute, y_train_pmute)

# --- 3. SHAP Explainability (KernelExplainer for probability output) ---
explainer = shap.KernelExplainer(
    lr_model_base.predict_proba,
    X_train_pmute.sample(100, random_state=42)
)
shap_values = explainer.shap_values(X_test_pmute[:100])

# --- 4. Mean Absolute SHAP Importance ---
mean_abs_shap = np.abs(shap_values[1]).mean(axis=0)
shap_summary = pd.DataFrame({
    "feature": X_train_pmute.columns,
    "mean_abs_shap": mean_abs_shap
}).sort_values(by="mean_abs_shap", ascending=False)

  0%|          | 0/100 [00:00<?, ?it/s]

In [17]:
# --- 5. Define Proxy Features to Mute (based on SHAP summary) ---
proxy_features = ['PreviousSalary', 'EdLevel_Undergraduate', 'ComputerSkills']

# --- 6. Muting Proxy Features in Test Set ---
X_test_muted = X_test_pmute.copy()
for col in proxy_features:
    if col in X_test_muted.columns:
        X_test_muted[col] = X_test_muted[col].mean()

# --- 7. Predict on Muted Test Set ---
y_pred_muted = lr_model_base.predict(X_test_muted)

# --- 8. Performance Metrics ---
print("ProxyMute (Revised Proxy List):")
print("Accuracy:", accuracy_score(y_test_pmute, y_pred_muted))
print("\nClassification Report:")
print(classification_report(y_test_pmute, y_pred_muted))

# --- Utility: Pearson + MAE ---
pearson_corr, _ = pearsonr(y_test_pmute, y_pred_muted)
mae = mean_absolute_error(y_test_pmute, y_pred_muted)
print(f"Pearson Correlation: {round(pearson_corr, 4)}")
print(f"Mean Absolute Error: {round(mae, 4)}")



ProxyMute (Revised Proxy List):
Accuracy: 0.6645038341122556

Classification Report:
              precision    recall  f1-score   support

           0       0.16      0.44      0.23      2576
           1       0.90      0.69      0.79     19463

    accuracy                           0.66     22039
   macro avg       0.53      0.57      0.51     22039
weighted avg       0.82      0.66      0.72     22039

Pearson Correlation: 0.0904
Mean Absolute Error: 0.3355


In [18]:
# --- 9. Fairness Evaluation Using MetricFrame ---
# Gender
mf_shap_gender = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_pmute,
    y_pred=y_pred_muted,
    sensitive_features=df.loc[X_test_pmute.index, "Gender_group"]
)
print("\nFairness metrics by Gender (ProxyMute SHAP):")
print(mf_shap_gender.by_group)

# Age
mf_shap_age = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_pmute,
    y_pred=y_pred_muted,
    sensitive_features=df.loc[X_test_pmute.index, "Age_encoded"]
)
print("\nFairness metrics by Age (ProxyMute SHAP):")
print(mf_shap_age.by_group)

# EdLevel
mf_shap_edlevel = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_pmute,
    y_pred=y_pred_muted,
    sensitive_features=df.loc[X_test_pmute.index, "EdLevel"]
)
print("\nFairness metrics by EdLevel (ProxyMute SHAP):")
print(mf_shap_edlevel.by_group)



Fairness metrics by Gender (ProxyMute SHAP):
              accuracy  selection_rate
Gender_group                          
Man           0.656197        0.669013
NonBinary     0.739857        0.749403
Woman         0.801175        0.857982

Fairness metrics by Age (ProxyMute SHAP):
             accuracy  selection_rate
Age_encoded                          
0            0.873675        0.959553
1            0.282508        0.167479

Fairness metrics by EdLevel (ProxyMute SHAP):
               accuracy  selection_rate
EdLevel                                
Master         0.663126        0.666313
NoHigherEd     0.596661        0.630931
Other          0.543199        0.537377
PhD            0.595020        0.570118
Undergraduate  0.712071        0.739421


In [19]:
# --- 10. Consistent Utility Metrics ---
evaluate_model(y_test_pmute, y_pred_muted)

run_consistent_metrics(y_test_pmute, y_pred_muted, sensitive_col='Gender_group', label_name='Gender')
run_consistent_metrics(y_test_pmute, y_pred_muted, sensitive_col='Age_encoded', label_name='Age')
run_consistent_metrics(y_test_pmute, y_pred_muted, sensitive_col='EdLevel', label_name='EdLevel')



Accuracy: 0.6645038341122556
Classification Report:
               precision    recall  f1-score   support

           0       0.16      0.44      0.23      2576
           1       0.90      0.69      0.79     19463

    accuracy                           0.66     22039
   macro avg       0.53      0.57      0.51     22039
weighted avg       0.82      0.66      0.72     22039


Accuracy by Gender group:
Gender Man: 0.6562
Gender NonBinary: 0.7399
Gender Woman: 0.8012

Accuracy by Age group:
Age 0: 0.8737
Age 1: 0.2825

Accuracy by EdLevel group:
EdLevel Master: 0.6631
EdLevel NoHigherEd: 0.5967
EdLevel Other: 0.5432
EdLevel PhD: 0.595
EdLevel Undergraduate: 0.7121


In [20]:
# --- 11. Custom Fairness Metrics ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

disparate_impact(df, 2, 0, 'Gender')  # Woman vs Man
disparate_impact(df, 1, 0, 'Gender')  # NonBinary vs Man
disparate_impact(df, '>35', '<35', 'Age')

equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_muted),
    ["Man", "NonBinary", "Woman"],  # group labels as strings
    'Gender_group',  # string-based column in df
    df.reset_index(drop=True)
)


equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_muted),
    ['<35', '>35'],
    'Age_group',
    df.reset_index(drop=True)
)

# Equal Opportunity for Education Level
equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_muted),
    ['Undergraduate', 'Master', 'PhD', 'Other', 'NoHigherEd'],
    'EdLevel',
    df.reset_index(drop=True)
)




Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
Man: TPR = 0.70
NonBinary: TPR = 0.64
Woman: TPR = 0.69

Equal Opportunity by group:
<35: TPR = 0.69
>35: TPR = 0.70

Equal Opportunity by group:
Undergraduate: TPR = 0.70
Master: TPR = 0.69
PhD: TPR = 0.66
Other: TPR = 0.69
NoHigherEd: TPR = 0.68


## b) Refined Proxy Mute

In [21]:
# --- 1. Initialize LIME Explainer ---
explainer = LimeTabularExplainer(
    training_data=X_train_pmute.values,
    feature_names=X_train_pmute.columns.tolist(),
    class_names=["Not Employed", "Employed"],
    mode="classification",
    discretize_continuous=False
)

# --- 2. Local Muting: Top 2 Features per Instance ---
X_test_localmute_muted = X_test_pmute.copy()

for i in range(500):  # Apply LIME only to top 500 for speed
    exp = explainer.explain_instance(
        X_test_pmute.iloc[i].values,
        lambda x: lr_model_base.predict_proba(pd.DataFrame(x, columns=X_train_pmute.columns)),
        num_features=2
    )
    top_features = [f[0] for f in exp.as_list()]
    
    for f in top_features:
        f_name = f.split('<')[0].split('>')[0].split('=')[0].strip()
        if f_name in X_test_localmute_muted.columns:
            col_idx = X_test_localmute_muted.columns.get_loc(f_name)
            mean_val = X_train_pmute[f_name].mean()
            col_dtype = X_test_localmute_muted.dtypes[f_name]
            X_test_localmute_muted.iat[i, col_idx] = col_dtype.type(mean_val)

# --- 3. Predict ---
y_pred_lime_localmute = lr_model_base.predict(X_test_localmute_muted)

In [22]:
# --- 4. Performance Metrics ---
print("Refined ProxyMute (LIME, Top 2 Features):")
print("Accuracy:", accuracy_score(y_test_pmute, y_pred_lime_localmute))
print("Classification Report:")
print(classification_report(y_test_pmute, y_pred_lime_localmute))

# --- 5. Fairness: MetricFrame by Group ---
for attr, label in [("Gender_group", "Gender"), ("Age_encoded", "Age"), ("EdLevel", "EdLevel")]:
    mf = MetricFrame(
        metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
        y_true=y_test_pmute,
        y_pred=y_pred_lime_localmute,
        sensitive_features=df.loc[X_test_pmute.index, attr]
    )
    print(f"\nFairness metrics by {label}:")
    print(mf.by_group)

Refined ProxyMute (LIME, Top 2 Features):
Accuracy: 0.6049276282952947
Classification Report:
              precision    recall  f1-score   support

           0       0.16      0.58      0.25      2576
           1       0.92      0.61      0.73     19463

    accuracy                           0.60     22039
   macro avg       0.54      0.59      0.49     22039
weighted avg       0.83      0.60      0.68     22039


Fairness metrics by Gender:
              accuracy  selection_rate
Gender_group                          
Man           0.595223        0.574640
NonBinary     0.625298        0.634845
Woman         0.792360        0.811949

Fairness metrics by Age:
             accuracy  selection_rate
Age_encoded                          
0            0.710062        0.725300
1            0.412926        0.333804

Fairness metrics by EdLevel:
               accuracy  selection_rate
EdLevel                                
Master         0.484334        0.438308
NoHigherEd     0.315466    

In [23]:
# --- 6. Custom Evaluation ---
evaluate_model(y_test_pmute, y_pred_lime_localmute)

run_consistent_metrics(y_test_pmute, y_pred_lime_localmute, sensitive_col='Gender_group', label_name='Gender')
run_consistent_metrics(y_test_pmute, y_pred_lime_localmute, sensitive_col='Age_encoded', label_name='Age')
run_consistent_metrics(y_test_pmute, y_pred_lime_localmute, sensitive_col='EdLevel', label_name='EdLevel')

Accuracy: 0.6049276282952947
Classification Report:
               precision    recall  f1-score   support

           0       0.16      0.58      0.25      2576
           1       0.92      0.61      0.73     19463

    accuracy                           0.60     22039
   macro avg       0.54      0.59      0.49     22039
weighted avg       0.83      0.60      0.68     22039


Accuracy by Gender group:
Gender Man: 0.5952
Gender NonBinary: 0.6253
Gender Woman: 0.7924

Accuracy by Age group:
Age 0: 0.7101
Age 1: 0.4129

Accuracy by EdLevel group:
EdLevel Master: 0.4843
EdLevel NoHigherEd: 0.3155
EdLevel Other: 0.2966
EdLevel PhD: 0.557
EdLevel Undergraduate: 0.7879


In [24]:
# --- 7. Custom Fairness Functions ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

disparate_impact(df, 2, 0, 'Gender')
disparate_impact(df, 1, 0, 'Gender')
disparate_impact(df, '>35', '<35', 'Age')

equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_lime_localmute),
    ["Man", "NonBinary", "Woman"],
    'Gender',
    df.reset_index(drop=True)
)

# EO for Age
equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_lime_localmute),
    ['<35', '>35'],
    'Age_group',
    df.reset_index(drop=True)
)

# EO for EdLevel
equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_lime_localmute),
    ['Undergraduate', 'Master', 'PhD', 'Other', 'NoHigherEd'],
    'EdLevel',
    df.reset_index(drop=True)
)



Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
Man: TPR = 0.00
NonBinary: TPR = 0.00
Woman: TPR = 0.00

Equal Opportunity by group:
<35: TPR = 0.61
>35: TPR = 0.61

Equal Opportunity by group:
Undergraduate: TPR = 0.61
Master: TPR = 0.61
PhD: TPR = 0.59
Other: TPR = 0.59
NoHigherEd: TPR = 0.61
