In [1]:
import os
import sys

src_path = r"C:\Users\asrit\Documents\516-Project-main\516-Project-main\src"
if src_path not in sys.path:
    sys.path.append(src_path)

In [2]:
import pandas as pd
import numpy as np
import shap

from sklearn.metrics import mean_absolute_error, accuracy_score
from scipy.stats import pearsonr

from data_loader import load_dataset
from modeling import train_smote_forest, evaluate_model, train_random_forest
from fairness_metrics import print_group_rates, disparate_impact, equal_opportunity
from preprocess import apply_reweighing

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.metrics import MetricFrame, selection_rate
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds

from lime.lime_tabular import LimeTabularExplainer

pip install 'aif360[inFairness]'


In [3]:
# Load data
df = load_dataset("../data/cleaned_dataset.csv")

In [4]:
df_original = pd.read_csv("../data/cleaned_dataset.csv")

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(df_original['Gender'])

for i, label in enumerate(le.classes_):
    print(f"{i} → {label}")


0 → Man
1 → NonBinary
2 → Woman


# Pre-Processing

Using a preprocessing mitigation strategy - AIF360’s Reweighing

In [5]:
# Step 1: Apply AIF360 Reweighing (only used for fairness metrics, not for training if SMOTE is applied)
df = apply_reweighing(df, protected_attr='Gender', label_col='Employment')

# Step 2: Feature engineering for modeling
features = ['Gender_encoded', 'Age_encoded', 'EdLevel', 'YearsCode', 'YearsCodePro', 'ComputerSkills', 'PreviousSalary']
X = pd.get_dummies(df[features], drop_first=True)
y = df['Employment'].astype(int)


In [6]:
# Step 3: Choose model strategy
use_smote = True  # Toggle this to False to use reweighing instead of SMOTE

if use_smote:
    model, X_test, y_test, y_pred = train_smote_forest(X, y)
else:
    sample_weights = df['instance_weight']
    model, X_test, y_test, y_pred = train_random_forest(X, y, sample_weights)

# Step 4: Evaluate base model performance (before fairness post-processing)
evaluate_model(y_test, y_pred)

# --- 3. Equal Accuracy by Gender Group ---
y_test_aligned = y_test.reset_index(drop=True)
y_pred_aligned = pd.Series(y_pred).reset_index(drop=True)
gender_column = df.loc[y_test.index, 'Gender_encoded'].reset_index(drop=True)

# --- 1. Pearson's Correlation ---
pearson_corr, _ = pearsonr(y_test, y_pred)
print(f"Pearson Correlation: {round(pearson_corr, 4)}")

# --- 2. Mean Absolute Error ---
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {round(mae, 4)}")

equal_accuracy = {}
for group in sorted(gender_column.unique()):
    group_mask = gender_column == group
    acc = accuracy_score(y_test_aligned[group_mask], y_pred_aligned[group_mask])
    equal_accuracy[group] = acc
    print(f"Accuracy for Gender {group}: {round(acc, 4)}")



Accuracy: 0.7463133536004356
Classification Report:
               precision    recall  f1-score   support

           0       0.15      0.25      0.19      2571
           1       0.89      0.81      0.85     19468

    accuracy                           0.75     22039
   macro avg       0.52      0.53      0.52     22039
weighted avg       0.81      0.75      0.77     22039

Pearson Correlation: 0.0531
Mean Absolute Error: 0.2537
Accuracy for Gender 0: 0.7403
Accuracy for Gender 1: 0.6706
Accuracy for Gender 2: 0.8977


In [7]:
# Step 5: Custom fairness metrics based on fairness-aware predictions
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

# Disparate impact comparisons
disparate_impact(df, 2, 0, 'Gender')  # Woman vs Man
disparate_impact(df, 1, 0, 'Gender')  # NonBinary vs Man
disparate_impact(df, '>35', '<35', 'Age')

# Equal opportunity using fairness-aware predictions
equal_opportunity(
    y_test.reset_index(drop=True),
    pd.Series(y_pred), 
    [0, 1, 2],  # Man, NonBinary, Woman
    'Gender',
    df.reset_index(drop=True)
)



Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
0: TPR = 0.81
1: TPR = 0.82
2: TPR = 0.78


# In-processing

In [8]:
def run_consistent_metrics(y_test, y_pred, sensitive_col='Gender_encoded', label_name='Gender'):
    """
    Evaluate predictions using MAE, Pearson correlation, and group-wise accuracy.
    Assumes access to global df and X_test_pmute for index alignment.
    """
    # Align predictions and labels
    y_test = y_test.reset_index(drop=True)
    y_pred = pd.Series(y_pred).reset_index(drop=True)
    
    # Align sensitive attribute values from df using test indices
    sensitive_series = df.loc[y_test.index, sensitive_col].reset_index(drop=True)

    # --- 1. Pearson Correlation ---
    pearson_corr, _ = pearsonr(y_test, y_pred)
    print(f"\nPearson Correlation: {round(pearson_corr, 4)}")

    # --- 2. Mean Absolute Error ---
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Mean Absolute Error: {round(mae, 4)}")

    # --- 3. Group-wise Accuracy ---
    print(f"\nAccuracy by {label_name} group:")
    for group in sorted(sensitive_series.unique()):
        group_mask = sensitive_series == group
        acc = accuracy_score(y_test[group_mask], y_pred[group_mask])
        print(f"{label_name} {group}: {round(acc, 4)}")


In [9]:
# --- 1. Prepare data: remove sensitive feature from X, pass separately ---
sensitive_feature_test = df.loc[X_test.index, "Gender_encoded"]
sensitive_feature_train = df.loc[X.index.difference(X_test.index), "Gender_encoded"]

X_train_eg = X.loc[X.index.difference(X_test.index)].drop(columns=["Gender_encoded"])
y_train_eg = y.loc[X.index.difference(X_test.index)]

X_test_eg = X_test.drop(columns=["Gender_encoded"])

# --- 2. Train fairness-aware model ---
eg_model = ExponentiatedGradient(
    estimator=LogisticRegression(solver="liblinear", class_weight='balanced'),
    constraints=EqualizedOdds(),
    eps=0.01
)
eg_model.fit(X_train_eg, y_train_eg, sensitive_features=sensitive_feature_train)

# --- 3. Predict ---
y_pred_eg = eg_model.predict(X_test_eg)

# --- 4. Evaluate performance ---
print("Exponentiated Gradient on SMOTE-balanced data:")
print("Accuracy:", accuracy_score(y_test, y_pred_eg))
print("Classification Report:")
print(classification_report(y_test, y_pred_eg))


Exponentiated Gradient on SMOTE-balanced data:
Accuracy: 0.6909569399700531
Classification Report:
              precision    recall  f1-score   support

           0       0.13      0.30      0.19      2571
           1       0.89      0.74      0.81     19468

    accuracy                           0.69     22039
   macro avg       0.51      0.52      0.50     22039
weighted avg       0.80      0.69      0.74     22039



In [10]:
# --- 5. Custom evaluation function (consistent with pre-processing) ---
evaluate_model(y_test, y_pred_eg)

# --- 6. Reusable consistency metrics: Pearson, MAE, group-wise accuracy ---
run_consistent_metrics(y_test, y_pred_eg, sensitive_col='Gender_encoded', label_name='Gender')
run_consistent_metrics(y_test, y_pred_eg, sensitive_col='Age_encoded', label_name='Age')
run_consistent_metrics(y_test, y_pred_eg, sensitive_col='EdLevel', label_name='EdLevel')



Accuracy: 0.6909569399700531
Classification Report:
               precision    recall  f1-score   support

           0       0.13      0.30      0.19      2571
           1       0.89      0.74      0.81     19468

    accuracy                           0.69     22039
   macro avg       0.51      0.52      0.50     22039
weighted avg       0.80      0.69      0.74     22039


Pearson Correlation: 0.032
Mean Absolute Error: 0.309

Accuracy by Gender group:
Gender 0: 0.6912
Gender 1: 0.6489
Gender 2: 0.7037

Pearson Correlation: 0.032
Mean Absolute Error: 0.309

Accuracy by Age group:
Age 0: 0.6902
Age 1: 0.6926

Pearson Correlation: 0.032
Mean Absolute Error: 0.309

Accuracy by EdLevel group:
EdLevel Master: 0.6834
EdLevel NoHigherEd: 0.6819
EdLevel Other: 0.6846
EdLevel PhD: 0.7041
EdLevel Undergraduate: 0.6967


In [11]:
# --- 7. Group fairness metrics ---
# Gender
mf_eg_gender = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test,
    y_pred=y_pred_eg,
    sensitive_features=sensitive_feature_test
)
print("\nFairness metrics by Gender:")
print(mf_eg_gender.by_group)

# Age
mf_eg_age = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test,
    y_pred=y_pred_eg,
    sensitive_features=df.loc[X_test.index, 'Age_encoded']
)
print("\nFairness metrics by Age:")
print(mf_eg_age.by_group)

# EdLevel
mf_eg_edlevel = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test,
    y_pred=y_pred_eg,
    sensitive_features=df.loc[X_test.index, 'EdLevel']
)
print("\nFairness metrics by EdLevel:")
print(mf_eg_edlevel.by_group)


Fairness metrics by Gender:
                accuracy  selection_rate
Gender_encoded                          
0               0.688893        0.736050
1               0.710900        0.734597
2               0.724172        0.763158

Fairness metrics by Age:
             accuracy  selection_rate
Age_encoded                          
0            0.701553        0.746954
1            0.671138        0.719198

Fairness metrics by EdLevel:
               accuracy  selection_rate
EdLevel                                
Master         0.695279        0.751788
NoHigherEd     0.638356        0.714155
Other          0.656463        0.711812
PhD            0.677749        0.716113
Undergraduate  0.704658        0.741090


In [12]:
# --- 8. Custom fairness functions ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

disparate_impact(df, 2, 0, 'Gender')  # Woman vs Man
disparate_impact(df, 1, 0, 'Gender')  # NonBinary vs Man
disparate_impact(df, '>35', '<35', 'Age')

equal_opportunity(
    y_test.reset_index(drop=True),
    pd.Series(y_pred_eg),
    [0, 1, 2],  # Gender groups
    'Gender',
    df.reset_index(drop=True)
)



Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
0: TPR = 0.74
1: TPR = 0.69
2: TPR = 0.75


# Post Processing

In [13]:
# --- 1. 3-Way Split: Train (60%), Validation (10%), Test (30%) ---
X_trainval, X_test_post, y_trainval, y_test_post = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

X_train_post, X_val_post, y_train_post, y_val_post = train_test_split(
    X_trainval, y_trainval, test_size=0.125, stratify=y_trainval, random_state=42
)

# --- 2. Apply SMOTE to training data ---
smote = SMOTE(random_state=42)
X_train_post_smote, y_train_post_smote = smote.fit_resample(X_train_post, y_train_post)

# --- 3. Train classifier on SMOTE-balanced data ---
model_post = RandomForestClassifier(n_estimators=100, random_state=42)
model_post.fit(X_train_post_smote, y_train_post_smote)

# --- 4. Apply ThresholdOptimizer using validation set ---
sensitive_gender_val = df.loc[X_val_post.index, "Gender_encoded"]

postprocessor = ThresholdOptimizer(
    estimator=model_post,
    constraints="equalized_odds",
    predict_method="predict_proba"
)

postprocessor.fit(
    X_val_post,
    y_val_post,
    sensitive_features=sensitive_gender_val
)

In [14]:
# --- 5. Predict on test set ---
sensitive_gender_test = df.loc[X_test_post.index, "Gender_encoded"]
y_pred_fair = postprocessor.predict(
    X_test_post,
    sensitive_features=sensitive_gender_test
)

# --- 6. Evaluate overall performance ---
print("\nFairness-aware Evaluation (SMOTE + Equalized Odds):")
print("Overall Accuracy:", accuracy_score(y_test_post, y_pred_fair))
print("Classification Report:")
print(classification_report(y_test_post, y_pred_fair))

# --- 7. Custom Evaluation ---
evaluate_model(y_test_post, y_pred_fair)

run_consistent_metrics(y_test_post, y_pred_fair, sensitive_col='Gender_encoded', label_name='Gender')
run_consistent_metrics(y_test_post, y_pred_fair, sensitive_col='Age_encoded', label_name='Age')
run_consistent_metrics(y_test_post, y_pred_fair, sensitive_col='EdLevel', label_name='EdLevel')


Fairness-aware Evaluation (SMOTE + Equalized Odds):
Overall Accuracy: 0.8688688234493398
Classification Report:
              precision    recall  f1-score   support

           0       0.26      0.06      0.10      2576
           1       0.89      0.98      0.93     19463

    accuracy                           0.87     22039
   macro avg       0.57      0.52      0.52     22039
weighted avg       0.81      0.87      0.83     22039

Accuracy: 0.8688688234493398
Classification Report:
               precision    recall  f1-score   support

           0       0.26      0.06      0.10      2576
           1       0.89      0.98      0.93     19463

    accuracy                           0.87     22039
   macro avg       0.57      0.52      0.52     22039
weighted avg       0.81      0.87      0.83     22039


Pearson Correlation: 0.0752
Mean Absolute Error: 0.1311

Accuracy by Gender group:
Gender 0: 0.8679
Gender 1: 0.8511
Gender 2: 0.9008

Pearson Correlation: 0.0752
Mean Absolute Er

In [15]:
# --- 8. MetricFrame Fairness Evaluation ---
mf_post_gender = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_post,
    y_pred=y_pred_fair,
    sensitive_features=sensitive_gender_test
)
print("\nFairness metrics by Gender:")
print(mf_post_gender.by_group)

mf_post_age = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_post,
    y_pred=y_pred_fair,
    sensitive_features=df.loc[X_test_post.index, 'Age_encoded']
)
print("\nFairness metrics by Age:")
print(mf_post_age.by_group)

mf_post_edlevel = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_post,
    y_pred=y_pred_fair,
    sensitive_features=df.loc[X_test_post.index, 'EdLevel']
)
print("\nFairness metrics by EdLevel:")
print(mf_post_edlevel.by_group)


Fairness metrics by Gender:
                accuracy  selection_rate
Gender_encoded                          
0               0.867129        0.970241
1               0.875895        0.980907
2               0.901077        0.977473

Fairness metrics by Age:
             accuracy  selection_rate
Age_encoded                          
0            0.891862        0.981813
1            0.826879        0.950628

Fairness metrics by EdLevel:
               accuracy  selection_rate
EdLevel                                
Master         0.878386        0.981767
NoHigherEd     0.791740        0.931459
Other          0.798100        0.929534
PhD            0.879423        0.982962
Undergraduate  0.891759        0.980401


In [16]:
# --- 9. Custom Fairness Metrics ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

disparate_impact(df, 2, 0, 'Gender')  # Woman vs Man
disparate_impact(df, 1, 0, 'Gender')  # NonBinary vs Man
disparate_impact(df, '>35', '<35', 'Age')

equal_opportunity(
    y_test_post.reset_index(drop=True),
    pd.Series(y_pred_fair),
    [0, 1, 2],  # Gender groups
    'Gender',
    df.reset_index(drop=True)
)



Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
0: TPR = 0.97
1: TPR = 0.98
2: TPR = 0.99


# Explainability

## a) Proxy Mute

In [17]:
# --- 1. Train-Test Split ---
X_train_pmute, X_test_pmute, y_train_pmute, y_test_pmute = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# --- 2. Train Base Model (Logistic Regression with Class Weights) ---
lr_model_base = LogisticRegression(solver="liblinear", class_weight='balanced')
lr_model_base.fit(X_train_pmute, y_train_pmute)

# --- 3. SHAP Explainability (KernelExplainer for probability output) ---
explainer = shap.KernelExplainer(
    lr_model_base.predict_proba,
    X_train_pmute.sample(100, random_state=42)
)
shap_values = explainer.shap_values(X_test_pmute[:100])

# --- 4. Mean Absolute SHAP Importance ---
mean_abs_shap = np.abs(shap_values[1]).mean(axis=0)
shap_summary = pd.DataFrame({
    "feature": X_train_pmute.columns,
    "mean_abs_shap": mean_abs_shap
}).sort_values(by="mean_abs_shap", ascending=False)

  0%|          | 0/100 [00:00<?, ?it/s]

In [18]:
# --- 5. Define Proxy Features to Mute (based on SHAP summary) ---
proxy_features = ['PreviousSalary', 'EdLevel_Undergraduate', 'ComputerSkills']

# --- 6. Muting Proxy Features in Test Set ---
X_test_muted = X_test_pmute.copy()
for col in proxy_features:
    if col in X_test_muted.columns:
        X_test_muted[col] = X_test_muted[col].mean()

# --- 7. Predict on Muted Test Set ---
y_pred_muted = lr_model_base.predict(X_test_muted)

# --- 8. Performance Metrics ---
print("ProxyMute (Revised Proxy List):")
print("Accuracy:", accuracy_score(y_test_pmute, y_pred_muted))
print("Classification Report:")
print(classification_report(y_test_pmute, y_pred_muted))


ProxyMute (Revised Proxy List):
Accuracy: 0.6712645764326876
Classification Report:
              precision    recall  f1-score   support

           0       0.16      0.43      0.24      2576
           1       0.90      0.70      0.79     19463

    accuracy                           0.67     22039
   macro avg       0.53      0.57      0.51     22039
weighted avg       0.82      0.67      0.73     22039



In [19]:
# --- 9. Fairness Evaluation Using MetricFrame ---
# Gender
mf_shap_gender = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_pmute,
    y_pred=y_pred_muted,
    sensitive_features=df.loc[X_test_pmute.index, "Gender_encoded"]
)
print("\nFairness metrics by Gender (ProxyMute SHAP):")
print(mf_shap_gender.by_group)

# Age
mf_shap_age = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_pmute,
    y_pred=y_pred_muted,
    sensitive_features=df.loc[X_test_pmute.index, "Age_encoded"]
)
print("\nFairness metrics by Age (ProxyMute SHAP):")
print(mf_shap_age.by_group)

# EdLevel
mf_shap_edlevel = MetricFrame(
    metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
    y_true=y_test_pmute,
    y_pred=y_pred_muted,
    sensitive_features=df.loc[X_test_pmute.index, "EdLevel"]
)
print("\nFairness metrics by EdLevel (ProxyMute SHAP):")
print(mf_shap_edlevel.by_group)



Fairness metrics by Gender (ProxyMute SHAP):
                accuracy  selection_rate
Gender_encoded                          
0               0.662799        0.676489
1               0.763723        0.787589
2               0.804114        0.860921

Fairness metrics by Age (ProxyMute SHAP):
             accuracy  selection_rate
Age_encoded                          
0            0.866442        0.949091
1            0.314824        0.208771

Fairness metrics by EdLevel (ProxyMute SHAP):
               accuracy  selection_rate
EdLevel                                
Master         0.676226        0.681891
NoHigherEd     0.601054        0.630053
Other          0.543199        0.534314
PhD            0.626474        0.609436
Undergraduate  0.716169        0.745301


In [20]:
# --- 10. Consistent Utility Metrics ---
evaluate_model(y_test_pmute, y_pred_muted)

run_consistent_metrics(y_test_pmute, y_pred_muted, sensitive_col='Gender_encoded', label_name='Gender')
run_consistent_metrics(y_test_pmute, y_pred_muted, sensitive_col='Age_encoded', label_name='Age')
run_consistent_metrics(y_test_pmute, y_pred_muted, sensitive_col='EdLevel', label_name='EdLevel')



Accuracy: 0.6712645764326876
Classification Report:
               precision    recall  f1-score   support

           0       0.16      0.43      0.24      2576
           1       0.90      0.70      0.79     19463

    accuracy                           0.67     22039
   macro avg       0.53      0.57      0.51     22039
weighted avg       0.82      0.67      0.73     22039


Pearson Correlation: 0.0935
Mean Absolute Error: 0.3287

Accuracy by Gender group:
Gender 0: 0.6717
Gender 1: 0.6277
Gender 2: 0.6789

Pearson Correlation: 0.0935
Mean Absolute Error: 0.3287

Accuracy by Age group:
Age 0: 0.67
Age 1: 0.6739

Pearson Correlation: 0.0935
Mean Absolute Error: 0.3287

Accuracy by EdLevel group:
EdLevel Master: 0.6662
EdLevel NoHigherEd: 0.6667
EdLevel Other: 0.6664
EdLevel PhD: 0.6415
EdLevel Undergraduate: 0.6776


In [21]:
# --- 11. Custom Fairness Metrics ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

disparate_impact(df, 2, 0, 'Gender')  # Woman vs Man
disparate_impact(df, 1, 0, 'Gender')  # NonBinary vs Man
disparate_impact(df, '>35', '<35', 'Age')

equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_muted),
    [0, 1, 2],  # Gender groups
    'Gender',
    df.reset_index(drop=True)
)


Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
0: TPR = 0.70
1: TPR = 0.66
2: TPR = 0.70


## b) Refined Proxy Mute

In [22]:
# --- 1. Initialize LIME Explainer ---
explainer = LimeTabularExplainer(
    training_data=X_train_pmute.values,
    feature_names=X_train_pmute.columns.tolist(),
    class_names=["Not Employed", "Employed"],
    mode="classification",
    discretize_continuous=False
)

# --- 2. Local Muting: Top 2 Features per Instance ---
X_test_localmute_muted = X_test_pmute.copy()

for i in range(500):  # Apply LIME only to top 500 for speed
    exp = explainer.explain_instance(
        X_test_pmute.iloc[i].values,
        lambda x: lr_model_base.predict_proba(pd.DataFrame(x, columns=X_train_pmute.columns)),
        num_features=2
    )
    top_features = [f[0] for f in exp.as_list()]
    
    for f in top_features:
        f_name = f.split('<')[0].split('>')[0].split('=')[0].strip()
        if f_name in X_test_localmute_muted.columns:
            col_idx = X_test_localmute_muted.columns.get_loc(f_name)
            mean_val = X_train_pmute[f_name].mean()
            col_dtype = X_test_localmute_muted.dtypes[f_name]
            X_test_localmute_muted.iat[i, col_idx] = col_dtype.type(mean_val)

# --- 3. Predict ---
y_pred_lime_localmute = lr_model_base.predict(X_test_localmute_muted)

In [23]:
# --- 4. Performance Metrics ---
print("Refined ProxyMute (LIME, Top 2 Features):")
print("Accuracy:", accuracy_score(y_test_pmute, y_pred_lime_localmute))
print("Classification Report:")
print(classification_report(y_test_pmute, y_pred_lime_localmute))

# --- 5. Fairness: MetricFrame by Group ---
for attr, label in [("Gender_encoded", "Gender"), ("Age_encoded", "Age"), ("EdLevel", "EdLevel")]:
    mf = MetricFrame(
        metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
        y_true=y_test_pmute,
        y_pred=y_pred_lime_localmute,
        sensitive_features=df.loc[X_test_pmute.index, attr]
    )
    print(f"\nFairness metrics by {label}:")
    print(mf.by_group)

Refined ProxyMute (LIME, Top 2 Features):
Accuracy: 0.6051544988429602
Classification Report:
              precision    recall  f1-score   support

           0       0.16      0.58      0.25      2576
           1       0.92      0.61      0.73     19463

    accuracy                           0.61     22039
   macro avg       0.54      0.59      0.49     22039
weighted avg       0.83      0.61      0.68     22039


Fairness metrics by Gender:
                accuracy  selection_rate
Gender_encoded                          
0               0.595709        0.575222
1               0.644391        0.653938
2               0.779628        0.795299

Fairness metrics by Age:
             accuracy  selection_rate
Age_encoded                          
0            0.700162        0.711607
1            0.431649        0.359195

Fairness metrics by EdLevel:
               accuracy  selection_rate
EdLevel                                
Master         0.473889        0.428217
NoHigherEd     0.

In [24]:
# --- 6. Custom Evaluation ---
evaluate_model(y_test_pmute, y_pred_lime_localmute)

run_consistent_metrics(y_test_pmute, y_pred_lime_localmute, sensitive_col='Gender_encoded', label_name='Gender')
run_consistent_metrics(y_test_pmute, y_pred_lime_localmute, sensitive_col='Age_encoded', label_name='Age')
run_consistent_metrics(y_test_pmute, y_pred_lime_localmute, sensitive_col='EdLevel', label_name='EdLevel')

Accuracy: 0.6051544988429602
Classification Report:
               precision    recall  f1-score   support

           0       0.16      0.58      0.25      2576
           1       0.92      0.61      0.73     19463

    accuracy                           0.61     22039
   macro avg       0.54      0.59      0.49     22039
weighted avg       0.83      0.61      0.68     22039


Pearson Correlation: 0.1219
Mean Absolute Error: 0.3948

Accuracy by Gender group:
Gender 0: 0.6054
Gender 1: 0.5851
Gender 2: 0.6068

Pearson Correlation: 0.1219
Mean Absolute Error: 0.3948

Accuracy by Age group:
Age 0: 0.6041
Age 1: 0.6074

Pearson Correlation: 0.1219
Mean Absolute Error: 0.3948

Accuracy by EdLevel group:
EdLevel Master: 0.6063
EdLevel NoHigherEd: 0.6155
EdLevel Other: 0.5942
EdLevel PhD: 0.5917
EdLevel Undergraduate: 0.6077


In [25]:
# --- 7. Custom Fairness Functions ---
print_group_rates(df, 'Gender')
print_group_rates(df, 'Age')
print_group_rates(df, 'EdLevel')

disparate_impact(df, 2, 0, 'Gender')
disparate_impact(df, 1, 0, 'Gender')
disparate_impact(df, '>35', '<35', 'Age')

equal_opportunity(
    y_test_pmute.reset_index(drop=True),
    pd.Series(y_pred_lime_localmute),
    [0, 1, 2],
    'Gender',
    df.reset_index(drop=True)
)


Selection Rates by Gender:
0: 0.88
2: 0.91
1: 0.87

Selection Rates by Age:
<35: 0.90
>35: 0.85

Selection Rates by EdLevel:
Master: 0.88
Undergraduate: 0.90
PhD: 0.90
Other: 0.84
NoHigherEd: 0.80

Disparate Impact (2/0): 1.03

Disparate Impact (1/0): 0.98

Disparate Impact (>35/<35): 0.95

Equal Opportunity by group:
0: TPR = 0.61
1: TPR = 0.59
2: TPR = 0.61
