<a href="https://colab.research.google.com/github/mhowe23/EECE-5644-Final-Project/blob/main/preprocessing_Fairface_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# FairFace Preprocessing Pipeline for Bias Mitigation


# Block 1: Import Dependencies

In [None]:
from datasets import load_from_disk
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer, MaxAbsScaler, RobustScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold,train_test_split
from collections import Counter
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn import preprocessing
import imblearn
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
import pprint
from collections import defaultdict
from tkinter import font
import matplotlib.pyplot as plt
import pandas as pd

from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)


# Helpers

In [None]:
def dataset_to_feature_matrix(dataset, max_samples=None):
    """
    Convert a HF dataset with an 'image_features' column and 'race' label
    into:
    X: numpy array of shape (n, d_feat)
    y_str: list of race labels (strings)
    """
    if max_samples is None:
        n = len(dataset)
    else:
        n = min(len(dataset), max_samples)

    #peek at first example to get feature dimensionality
    d_feat = len(dataset[0]["image_features"])
    X = np.zeros((n, d_feat), dtype=np.float32)
    y_str = []

    for i in range(n):
        ex = dataset[i]
        feats = np.array(ex["image_features"], dtype=np.float32)
        X[i] = feats
        y_str.append(ex["race"])

    return X, y_str

# Block 2 : Loading Datasets

In [None]:
# load datasets

#if testing with 70/30 split:
#TRAIN_PATH = "biased_train_70_30_split"
#train_ds = load_from_disk(TRAIN_PATH)

#TEST_PATH = "balanced_test_70_30_split"
#test_ds = load_from_disk(TEST_PATH)


#if testing with 90/10 split:
TRAIN_PATH = "biased_train"
train_ds = load_from_disk(TRAIN_PATH)

TEST_PATH = "balanced_test"
test_ds = load_from_disk(TEST_PATH)

print("Biased train size:", len(train_ds))
print("Balanced test size:", len(test_ds))
print("Train race counts:", Counter(train_ds["race"]))
print("Test race counts:", Counter(test_ds["race"]))

# Block 3: Building Feature Matrices

In [None]:
MAX_TRAIN = None  #or cap to eg. 12000

print("\nConverting train set to feature matrix...")
X_train, y_train_str = dataset_to_feature_matrix(train_ds, max_samples=MAX_TRAIN)
print("Converting test set to feature matrix...")
X_test, y_test_str = dataset_to_feature_matrix(test_ds)

print("X_train shape:", X_train.shape)
print("X_test  shape:", X_test.shape)

# Block 4: Encoding Race Labels as Integers

In [None]:
all_races = sorted(set(y_train_str) | set(y_test_str))
race_to_id = {r: i for i, r in enumerate(all_races)}
id_to_race = {i: r for r, i in race_to_id.items()}

y_train = np.array([race_to_id[r] for r in y_train_str], dtype=int)
y_test = np.array([race_to_id[r] for r in y_test_str], dtype=int)

print("Races (classes):", all_races)



# Block 5: Different Scaling Methods

In [None]:
scaling_methods = {
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler(),
    #'RobustScaler': RobustScaler(),
    #'Normalizer': Normalizer(),
    'MaxAbsScaler': MaxAbsScaler()
}

# Block 6: Sampling Strategies to Help Fix Imbalances

### 6.1 Random UnderSampling

In [None]:

print("\nUndersampling the majority class to fix imbalance...")
rand_under_sampling = RandomUnderSampler(random_state= 42)
X_train_rand_under, y_train_rand_under = rand_under_sampling.fit_resample(X_train,y_train)
print("Train set size with undersampling:", X_train_rand_under.shape)

fix_format = {str(k): v for k, v in Counter(y_train_rand_under).items()}
print("Counts for train set class with undersampling:")
pprint.pprint(fix_format)

### 6.2 Random Oversampling

In [None]:
print("\nOversampling the majority class to fix imbalance...")
rand_over_sampling = RandomOverSampler(random_state= 42)
X_train_rand_over, y_train_rand_over = rand_over_sampling.fit_resample(X_train,y_train)
print("Train set size with oversampling:", X_train_rand_over.shape)

fix_format = {str(k): v for k, v in Counter(y_train_rand_over).items()}
print("Counts for train set class with oversampling:")
pprint.pprint(fix_format)

### 6.3 SMOTE

In [None]:
print("\nSMOTE Sampling to fix imbalance...")
smote_sampler = SMOTE(sampling_strategy= 'auto',random_state=42,k_neighbors=5)
#smote_sampler = SMOTE(random_state=42)
X_train_smote, y_train_smote= smote_sampler.fit_resample(X_train, y_train)
print("SMOTE sampled train set size:", X_train_smote.shape)

fix_format = {str(k): v for k, v in Counter(y_train_smote).items()}
print("Counts for SMOTE sampled train set class:")
pprint.pprint(fix_format)

### 6.4 All Sampling Methods

In [None]:
sampling_methods = {"No Sampling": None,
                    "Undersampling": RandomUnderSampler(random_state=42),
                    "Oversampling": RandomOverSampler(random_state=42),
                    "SMOTE": SMOTE(random_state=42)}

# Block 7: Logistic Regression

In [None]:
print("\nTraining multinomial Logistic Regression on image_features...")

clf = LogisticRegression(
    max_iter=1000,
    multi_class="multinomial",
    solver="lbfgs",
    n_jobs=-1,
    #balancing weight
    #class_weight="balanced",
    verbose=1)

In [None]:
print("Fitting Logistic Regression...")

clf.fit(X_train, y_train)

# Evaluate

print("\nEvaluating on balanced test set...")
y_pred = clf.predict(X_test)
overall_acc = accuracy_score(y_test, y_pred)
print("\nOverall test accuracy: {:.3f}".format(overall_acc))

print("\nAccuracy by race:")
for race in all_races:
    idx = [i for i, r in enumerate(y_test_str) if r == race]
    if len(idx) == 0:
        continue
    acc_r = accuracy_score(y_test[idx], y_pred[idx])
    print("  {:20s}  n={:4d}  acc={:.3f}".format(race, len(idx), acc_r))

# Block 8:  Pipeline Experiments

#####  1. Logistic Regression with different scaling methods.
#####  2. Logistic Regression with different scaling and sampling methods.
#####  3. Logistic Regression with different scaling and sampling methods using GridSearch (cv).
#####  4. Logistic Regression with balanced weights and different scaling methods.
#####  5. Logistic Regression with balanced weights and different scaling methods using GridSearch (cv).
#####  6. Logistic Regression and Reweighing (AIF360)

## 8.1 Logistic Regression with different scaling methods.

In [None]:
#logistic regression with just scaling

acc_results_sc = {}
race_wise_acc_sc = {}
race_wise_std_sc = {}
fairness_metrics_race = {}

best_fairness = float('inf')
best_model_fairness = None


print("\nLogistic Regression with different scaling methods:")
for scaler_type, scaler in scaling_methods.items():
    pipeline = ImbPipeline([('scaler',scaler),('classifier', clf)])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    overall_acc_sc = accuracy_score(y_test, y_pred)

    print(f"\nOverall test accuracy with {scaler_type}: {overall_acc_sc:.3f}")
    print("\nAccuracy by race:")
    acc_results_sc[(scaler_type)] = overall_acc_sc

    for race in all_races:
        idx = [i for i, r in enumerate(y_test_str) if r == race]
        if len(idx) == 0:
            continue
        acc_r = accuracy_score(y_test[idx], y_pred[idx])
        acc_results_sc[(scaler_type, race)] = acc_r
        race_wise_acc_sc[(scaler_type, race)] = acc_r
        race_wise_std_sc[(scaler_type, race)] = np.std(y_pred[idx])
        std_acc = (y_pred[idx] == y_test[idx]).astype(int)
        std_r = np.std(std_acc)
        race_wise_acc_sc[(scaler_type, race)] = acc_r
        race_wise_std_sc[(scaler_type, race)] = std_r
        print(f"  {race:20s}  n={len(idx):4d}  acc={acc_r:.3f}  std={std_r:.3f}")

    # tracking fairness sdp and disparate impact per race against White
    idx_race_White = [i for i, r in enumerate(y_test_str) if r == 'White']
    y_pred_race_White = y_pred[idx_race_White]
    abs_eod = []
    abs_spd = []
    #abs_di = []

    print(f"\n")
    print(f"Fairness metrics for each race compared to White:")
    for race in all_races:
        if race == 'White':
            continue
        idx_race = [i for i, r in enumerate(y_test_str) if r == race]
        y_pred_race = y_pred[idx_race]


        #equal opportunity difference
        priv_groups_TPR = np.sum((y_test[idx_race_White] == race_to_id['White']) & (y_pred[idx_race_White] == race_to_id['White'])) / (np.sum(y_test[idx_race_White] == race_to_id['White']))
        unpriv_groups_TPR = np.sum((y_test[idx_race] == race_to_id[race]) & (y_pred[idx_race] == race_to_id[race])) / (np.sum(y_test[idx_race] == race_to_id[race]))

        eod = priv_groups_TPR - unpriv_groups_TPR


        #statistical parity difference
        static_parity_diff = y_pred_race.mean() - y_pred_race_White.mean()

        #disparate impact
        #disparate_impact = y_pred_race.mean() / (y_pred_race_White.mean() + 1e-10)


        fairness_metrics_race[(scaler_type, race)] = {
            'statistical_parity_difference': static_parity_diff,
            #'disparate_impact': disparate_impact,
            'equal_opportunity_difference': eod
        }


        #print(f"  {race:20s}  statistical_parity_difference: {static_parity_diff:.3f}  disparate_impact: {disparate_impact:.3f}, equal_opportunity_difference: {eod:.3f}")
        print(f"  {race:20s}  statistical_parity_difference: {static_parity_diff:.3f}  equal_opportunity_difference: {eod:.3f}" )
        abs_spd.append(abs(static_parity_diff))
        #abs_di.append(abs(disparate_impact - 1))
        abs_eod.append(abs(eod))

    #overall fairness for each scaler type not all races not per races

    abs_eod = np.mean(abs_eod)
    abs_spd = np.mean(abs_spd)
    #abs_di = np.mean(abs_di)

    #fairness = abs_spd + abs_di + abs_eod
    fairness = abs_spd + abs_eod
    if fairness < best_fairness:
        best_fairness = fairness
        best_model_fairness = (scaler_type)

    print()
    print(f"{scaler_type} -> overall fairness: {fairness:.3f}")
    # print(f"{scaler_type} -> overall SDP: {abs_spd:.3f}, overall DI: {abs_di:.3f}, overall EOD: {abs_eod:.3f}")
    print (f"{scaler_type} -> overall SDP: {abs_spd:.3f}, overall EOD: {abs_eod:.3f}")
    print()
print(f"Model with best fairness based on SPD (statistical parity difference) and EOD (equal opportunity difference): {best_model_fairness} with fairness: {best_fairness:.3f}")


## 8.2 Logistic Regression using different scaling and sampling methods.

In [None]:
acc_results_sc_samp1 = {}
race_wise_acc_sc_samp1 = {}
race_wise_std_sc_samp1 = {}
fairness_metrics_race = {}

best_fairness = float('inf')
best_model_fairness = None

# Pipeline elements for each combination of scaler and sampler, considering PCA
for sampler_type, sampler in sampling_methods.items():
    for scaler_type, scaler in scaling_methods.items():
        if sampler_type != "No Sampling":
            pipeline = ImbPipeline(
            [('scaler', scaler),
            ('pca', PCA(n_components=0.9, random_state=42)),
            ('sampler', sampler),
            ('logisticregression', clf)]
            )
        else:
            pipeline = ImbPipeline(
            [('scaler', scaler),
            ('pca', PCA(n_components=0.9, random_state=42)),
            ('sampler', 'passthrough'),
            ('logisticregression', clf)]
            )
        print(f"\nApplying {scaler_type} and {sampler_type}..." )

        # Fit the pipeline
        pipeline.fit(X_train, y_train)


        #Evaluate
        y_pred = pipeline.predict(X_test)
        overall_acc_sc_samp = accuracy_score(y_test, y_pred)
        print("\n Sampling Method: {} | Scaling Method: {} | Overall test accuracy: {:.3f}".format(sampler_type, scaler_type, overall_acc_sc_samp))
        #results
        acc_results_sc_samp1[(scaler_type, sampler_type)] = overall_acc_sc_samp

        for race in all_races:
            idx = [i for i, r in enumerate(y_test_str) if r == race]
            if len(idx) == 0:
                continue
            acc_r = accuracy_score(y_test[idx], y_pred[idx])
            std_acc = (y_pred[idx] == y_test[idx]).astype(int)
            std_r = np.std(std_acc)
            print(f"  {race:20s}  n={len(idx):4d}  acc={acc_r:.3f}  std={std_r:.3f}")
            race_wise_acc_sc_samp1[(scaler_type, sampler_type, race)] = acc_r
            race_wise_std_sc_samp1[(scaler_type, sampler_type, race)] = std_r

    # tracking fairness sdp and disparate impact per race against Whitef
        idx_race_White = [i for i, r in enumerate(y_test_str) if r == 'White']
        y_pred_race_White = y_pred[idx_race_White]
        abs_spd = []
        abs_eod = []
       # abs_di = []


        print(f"\n")
        print(f"Fairness metrics for each race compared to White:")

        for race in all_races:
            if race == 'White':
                continue
            idx_race = [i for i, r in enumerate(y_test_str) if r == race]
            y_pred_race = y_pred[idx_race]

            #equal opportunity difference
            priv_groups_TPR = np.sum((y_test[idx_race_White] == race_to_id['White']) & (y_pred[idx_race_White] == race_to_id['White'])) / (np.sum(y_test[idx_race_White] == race_to_id['White']))
            unpriv_groups_TPR = np.sum((y_test[idx_race] == race_to_id[race]) & (y_pred[idx_race] == race_to_id[race])) / (np.sum(y_test[idx_race] == race_to_id[race]))

            eod = priv_groups_TPR - unpriv_groups_TPR

            #statistical parity difference
            static_parity_diff = y_pred_race.mean() - y_pred_race_White.mean()




            fairness_metrics_race[(scaler_type, race)] = {
                'statistical_parity_difference': static_parity_diff,
                'equal_opportunity_difference': eod
            }
            print(f"{race:20s} statistical parity_difference: {static_parity_diff:.3f},  equal_opportunity_difference: {eod:.3f}")

            abs_spd.append(abs(static_parity_diff))
            abs_eod.append(abs(eod))

        abs_spd = np.mean(abs_spd)
        abs_eod = np.mean(abs_eod)

        fairness = abs_spd + abs_eod

        if fairness < best_fairness:
            best_fairness = fairness
            best_model_fairness = (scaler_type, sampler_type)
        print()
        print(f"{scaler_type},{sampler_type} -> overall_fairness: {fairness:.3f}")
        print(f"{scaler_type},{sampler_type} -> overall SDP: {abs_spd:.3f}, overall EOD: {abs_eod:.3f}")

    print()
print(f"Model with best fairness based on SPD (statistical parity difference) and EOD (equal opportunity difference): {best_model_fairness} with fairness: {best_fairness:.3f}")


## 8.3 Logistic Regression using different scaling and sampling methods + Grid Search.

In [None]:
acc_results_sc_samp = {}
race_wise_acc_sc_samp = {}
race_wise_std_sc_samp = {}
fairness_metrics_race = {}

best_fairness = float('inf')
best_model_fairness = None

# Pipeline elements for each combination of scaler and sampler, considering PCA
for sampler_type, sampler in sampling_methods.items():
    for scaler_type, scaler in scaling_methods.items():
        if sampler_type != "No Sampling":
            pipeline = ImbPipeline(
            [('scaler', scaler),
            ('pca', PCA(n_components=0.9, random_state=42)),
            ('sampler', sampler),
            ('logisticregression', clf)]
            )
        else:
            pipeline = ImbPipeline(
            [('scaler', scaler),
            ('pca', PCA(n_components=0.9, random_state=42)),
            ('sampler', 'passthrough'),
            ('logisticregression', clf)]
            )
        print(f"\nApplying {scaler_type} and {sampler_type}..." )

        # GridSearchCV for hyperparameter tuning
        grid_search = GridSearchCV(
            estimator=pipeline,
            param_grid={
                'logisticregression__C': [0.1, 1, 10],
                'logisticregression__penalty': ['l2'],
            },
            scoring='accuracy',
            cv=StratifiedKFold(n_splits=3),
            n_jobs=-1,
            verbose=1
        )
        grid_search.fit(X_train, y_train)


        #Evaluate
        y_pred = grid_search.predict(X_test)
        overall_acc_sc_samp = accuracy_score(y_test, y_pred)
        print("\n Sampling Method: {} | Scaling Method: {} | Overall test accuracy: {:.3f}".format(sampler_type, scaler_type, overall_acc_sc_samp))
        #results
        acc_results_sc_samp[(scaler_type, sampler_type)] = overall_acc_sc_samp

        for race in all_races:
            idx = [i for i, r in enumerate(y_test_str) if r == race]
            if len(idx) == 0:
                continue
            acc_r = accuracy_score(y_test[idx], y_pred[idx])
            std_acc = (y_pred[idx] == y_test[idx]).astype(int)
            std_r = np.std(std_acc)
            print(f"  {race:20s}  n={len(idx):4d}  acc={acc_r:.3f}  std={std_r:.3f}")
            race_wise_acc_sc_samp[(scaler_type, sampler_type, race)] = acc_r
            race_wise_std_sc_samp[(scaler_type, sampler_type, race)] = std_r

        # tracking fairness sdp and disparate impact per race against White
        idx_race_White = [i for i, r in enumerate(y_test_str) if r == 'White']
        y_pred_race_White = y_pred[idx_race_White]
        abs_eod = []
        abs_spd = []


        print(f"\n")
        print(f"Fairness metrics for each race compared to White:")

        for race in all_races:
            if race == 'White':
                continue
            idx_race = [i for i, r in enumerate(y_test_str) if r == race]
            y_pred_race = y_pred[idx_race]

            #equal opportunity difference
            priv_groups_TPR = np.sum((y_test[idx_race_White] == race_to_id['White']) & (y_pred[idx_race_White] == race_to_id['White'])) / (np.sum(y_test[idx_race_White] == race_to_id['White']))
            unpriv_groups_TPR = np.sum((y_test[idx_race] == race_to_id[race]) & (y_pred[idx_race] == race_to_id[race])) / (np.sum(y_test[idx_race] == race_to_id[race]))

            eod = priv_groups_TPR - unpriv_groups_TPR

            #statistical parity difference
            static_parity_diff = y_pred_race.mean() - y_pred_race_White.mean()

            fairness_metrics_race[(scaler_type, race)] = {
                'statistical_parity_difference': static_parity_diff,
                'equal_opportunity_difference': eod
            }
            print(f"  {race:20s}  statistical_parity_difference: {static_parity_diff:.3f}  equal_opportunity_difference: {eod:.3f}")

            abs_spd.append(abs(static_parity_diff))
            abs_eod.append(abs(eod))
        abs_spd = np.mean(abs_spd)
        abs_eod = np.mean(abs_eod)

        fairness = abs_spd + abs_eod

        if fairness < best_fairness:
            best_fairness = fairness
            best_model_fairness = (scaler_type, sampler_type)
        print()
        print(f"{scaler_type},{sampler_type} -> overall fairness: {fairness:.3f}")
        print(f"{scaler_type},{sampler_type} -> overall SDP: {abs_spd:.3f}, overall EOD: {abs_eod:.3f}")

    print()
print(f"Model with best fairness based on SPD (statistical parity difference) and EOD (equal opportunity difference): {best_model_fairness} with fairness: {best_fairness:.3f}")



## 8.4 Logistic Regression with balanced weights and different scaling methods.

In [None]:
# with class weights = balanced and no sampling
clf_lr = LogisticRegression(
    max_iter=1000,
    multi_class="multinomial",
    solver="lbfgs",
    n_jobs=-1,
    #balancing weight
    class_weight="balanced",
    verbose=1)

In [None]:

#No sampling methods, using class weight = balanced

acc_results_bw_sc1 = {}
race_wise_acc_bw_sc1 = {}
race_wise_std_bw_sc1 = {}
fairness_metrics_race = {}
# Pipeline elements for each combination of scaler, considering PCA


best_fairness = float('inf')
best_model_fairness = None

for scaler_type, scaler in scaling_methods.items():
    pipeline = ImbPipeline(
        [('scaler', scaler),
        ('pca', PCA(n_components=0.9, random_state=42)),
        ('logisticregression', clf_lr)]
    )
    print(f"\nApplying {scaler_type}...")

    #Grid Search CV for pipeline
    grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid={'logisticregression__C': [0.1, 1, 10],'logisticregression__penalty': ['l2']},
    scoring='accuracy',
    #cv=StratifiedKFold(n_splits=5),
    cv=StratifiedKFold(n_splits=3),
    n_jobs=1,
    verbose=0
    )

    # Evaluate
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    overall_acc_bw_sc = accuracy_score(y_test, y_pred)
    print("\n Balanced weight -> Scaling Method: {} | Overall test accuracy: {:.3f}".format(scaler_type, overall_acc_bw_sc))

    acc_results_bw_sc1[(scaler_type)] = overall_acc_bw_sc
    for race in all_races:
        idx = [i for i, r in enumerate(y_test_str) if r == race]
        if len(idx) == 0:
            continue
        acc_r = accuracy_score(y_test[idx], y_pred[idx])
        std_acc = (y_pred[idx] == y_test[idx]).astype(int)
        std_r = np.std(std_acc)
        print(f" {race:20s}  n={len(idx):4d}  acc={acc_r:.3f}  std={std_r:.3f}")
        race_wise_acc_bw_sc1[(scaler_type, race)] = acc_r
        race_wise_std_bw_sc1[(scaler_type, race)] = std_r

    # tracking fairness sdp and disparate impact per race against White
    idx_race_White = [i for i, r in enumerate(y_test_str) if r == 'White']
    y_pred_race_White = y_pred[idx_race_White]
    abs_eod = []
    abs_spd = []

    print(f"\n")
    print(f"Fairness metrics for each race compared to White:")

    for race in all_races:
        if race == 'White':
            continue
        idx_race = [i for i, r in enumerate(y_test_str) if r == race]
        y_pred_race = y_pred[idx_race]

        #equal opportunity difference
        priv_groups_TPR = np.sum((y_test[idx_race_White] == race_to_id['White']) & (y_pred[idx_race_White] == race_to_id['White'])) / (np.sum(y_test[idx_race_White] == race_to_id['White']))
        unpriv_groups_TPR = np.sum((y_test[idx_race] == race_to_id[race]) & (y_pred[idx_race] == race_to_id[race])) / (np.sum(y_test[idx_race] == race_to_id[race]))

        eod = priv_groups_TPR - unpriv_groups_TPR

        #statistical parity difference
        static_parity_diff = y_pred_race.mean() - y_pred_race_White.mean()


        fairness_metrics_race[(scaler_type, race)] = {
                'statistical_parity_difference': static_parity_diff,
                'equal_opportunity_difference': eod
            }
        print(f"  {race:20s}  statistical_parity_difference: {static_parity_diff:.3f}  equal_opportunity_difference: {eod:.3f}")

        abs_spd.append(abs(static_parity_diff))
        abs_eod.append(abs(eod))
    abs_spd = np.mean(abs_spd)
    abs_eod = np.mean(abs_eod)

    fairness = abs_spd + abs_eod
    if fairness < best_fairness:
            best_fairness = fairness
            best_model_fairness = (scaler_type)
    print()
    print(f"{scaler_type} -> overall fairness: {fairness:.3f}")
    print(f"{scaler_type} -> overall SDP: {abs_spd:.3f}, overall EOD: {abs_eod:.3f}")
print()
print(f"Model with best fairness based on SPD (statistical parity difference) and EOD (equal opportunity difference): {best_model_fairness} with fairness: {best_fairness:.3f}")



## 8.5 Logistic Regression with balanced weights and different scaling methods + GridSearch (CV).

In [None]:

acc_results_bw_sc = {}
race_wise_acc_bw_sc = {}
race_wise_std_bw_sc = {}
fairness_metrics_race = {}

best_fairness = float('inf')
best_model_fairness = None
# Pipeline elements for each combination of scaler, considering PCA

for scaler_type, scaler in scaling_methods.items():
    pipeline = ImbPipeline(
        [('scaler', scaler),
        ('pca', PCA(n_components=0.9, random_state=42)),
        ('logisticregression', clf_lr)]
        )
    print(f"\nApplying {scaler_type}..." )

    #Grid Search CV for pipeline
    grid_search = GridSearchCV(
            estimator=pipeline,
            param_grid={'logisticregression__C': [0.1, 1, 10],'logisticregression__penalty': ['l2']},
            scoring='accuracy',
            #cv=StratifiedKFold(n_splits=5),
            cv=StratifiedKFold(n_splits=3),
            n_jobs=1,
            verbose=0
        )

    grid_search.fit(X_train, y_train)

        #Evaluate
    y_pred = grid_search.predict(X_test)
    overall_acc = accuracy_score(y_test, y_pred)
    print("\nBalanced weight -> Scaling Method: {}".format(scaler_type))
    print("Overall test accuracy: {:>2.3f}".format(overall_acc))
        #results
    acc_results_bw_sc[(scaler_type)] = overall_acc

    for race in all_races:
        idx = [i for i, r in enumerate(y_test_str) if r == race]
        if len(idx) == 0:
            continue
        acc_r = accuracy_score(y_test[idx], y_pred[idx])
        std_acc = (y_pred[idx] == y_test[idx]).astype(int)
        std_r = np.std(std_acc)
        #mean difference
        print(f"{race:20s}  n={len(idx):4d}  acc={acc_r:>2.3f}  std={std_r:>2.3f}")
        race_wise_std_bw_sc[(scaler_type, race)] = std_r

    # tracking fairness sdp and disparate impact per race against White
    idx_race_White = [i for i, r in enumerate(y_test_str) if r == 'White']
    y_pred_race_White = y_pred[idx_race_White]
    abs_eod = []
    abs_spd = []

    print(f"\n")
    print(f"Fairness metrics for each race compared to White:")

    for race in all_races:
        if race == 'White':
            continue
        idx_race = [i for i, r in enumerate(y_test_str) if r == race]
        y_pred_race = y_pred[idx_race]

        #equal opportunity difference
        priv_groups_TPR = np.sum((y_test[idx_race_White] == race_to_id['White']) & (y_pred[idx_race_White] == race_to_id['White'])) / (np.sum(y_test[idx_race_White] == race_to_id['White']))
        unpriv_groups_TPR = np.sum((y_test[idx_race] == race_to_id[race]) & (y_pred[idx_race] == race_to_id[race])) / (np.sum(y_test[idx_race] == race_to_id[race]))

        eod = priv_groups_TPR - unpriv_groups_TPR

        #statistical parity difference
        static_parity_diff = y_pred_race.mean() - y_pred_race_White.mean()


        fairness_metrics_race[(scaler_type, race)] = {
            'statistical_parity_difference': static_parity_diff,
            'equal_opportunity_difference': eod
                }

        print(f"{race:20s}  statistical_parity_difference: {static_parity_diff:.3f}, equal_opportunity_difference: {eod:.3f}")

        abs_spd.append(abs(static_parity_diff))
        abs_eod.append(abs(eod))
    abs_spd = np.mean(abs_spd)
    abs_eod = np.mean(abs_eod)

    fairness = abs_spd + abs_eod
    if fairness < best_fairness:
        best_fairness = fairness
        best_model_fairness = (scaler_type)
    print()
    print(f"{scaler_type} -> overall fairness: {fairness:.3f}")
    print(f"{scaler_type} -> overall SDP: {abs_spd:.3f}, overall EOD: {abs_eod:.3f}")
    print()
print(f"Model with best fairness based on SPD (statistical parity difference) and EOD (equal opportunity difference): {best_model_fairness} with fairness: {best_fairness:.3f}")



## 8.6. Logistic Regression and Reweighing (AIF360).

In [None]:
train_features = pd.DataFrame(X_train,columns=[f'features_{i}' for i in range(X_train.shape[1])])
test_features = pd.DataFrame(X_test,columns=[f'features_{i}' for i in range(X_test.shape[1])])

#race is not binary, so mapping helps to create privileged and unprivileged groups
race_map = {'Black': 1, 'East Asian': 1, 'Indian': 1, 'Latino_Hispanic': 1, 'Middle Eastern': 1,'Southeast Asian': 1, 'White': 0}

'''
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
df_train = preprocessor.fit_transform(df_train)
df_test = preprocessor.transform(df_test)
'''

df_train = train_features.copy()
df_train['race'] = y_train_str
df_train['labels'] = y_train.astype(float)
df_test = test_features.copy()
df_test['race'] = y_test_str
df_test['labels'] = y_test.astype(float)

df_train['race_bin'] = df_train['race'].map(race_map)
df_test['race_bin'] = df_test['race'].map(race_map)

unprivileged_groups = [{'race_bin': 0.0}]
privileged_groups = [{'race_bin': 1.0}]

#dropping unrequired columns
df_train = df_train.drop(columns=['race'])
df_test = df_test.drop(columns=['race'])


#AIF360 StandardDataset
train_aif = StandardDataset(df_train, label_name='labels', favorable_classes=[1],
                            protected_attribute_names=['race_bin'],
                            privileged_classes=[[1.0]])

test_aif = StandardDataset(df_test, label_name='labels', favorable_classes=[1],
                            protected_attribute_names=['race_bin'],
                            privileged_classes=[[1.0]])

In [None]:
#Reweighing
RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
train_aif_transf = RW.fit_transform(train_aif)
test_rw = RW.transform(test_aif)

#Extracting features and labels
X_train_rw = train_aif_transf.features
y_train_rw = train_aif_transf.labels.ravel()

'''
X_test_aif = test_aif.features
y_test_aif = test_aif.labels.ravel()
'''

X_test_aif = test_rw.features
y_test_aif = test_rw.labels.ravel()
sample_weights = train_aif_transf.instance_weights

print("\nTraining Logistic Regression with Reweighing...")

lr_rw_clf = LogisticRegression(
    max_iter=1000,
    multi_class="multinomial",
    solver="lbfgs",
    n_jobs=-1,
    verbose=1)

rw_acc_results_sc = {}
rw_acc_std_sc = {}
rw_race_wise_acc = {}
fairness_metrics_race = {}


for scaler_type, scaler in scaling_methods.items():
    pipeline = ImbPipeline([('scaler',scaler),('classifier', lr_rw_clf)])
    pipeline.fit(X_train_rw, y_train_rw, classifier__sample_weight=sample_weights)

# Evaluate
    y_pred = pipeline.predict(X_test_aif)
    overall_acc_rw = accuracy_score(y_test_aif, y_pred)
    print(f"\nOverall test accuracy with Reweighing and {scaler_type}: {overall_acc_rw:.3f}")
    print("\nAccuracy by race:")
    rw_acc_results_sc[(scaler_type)] = overall_acc_rw

    for race in all_races:
        idx = [i for i, r in enumerate(y_test_str) if r == race]
        if len(idx) == 0:
            continue
        acc_r = accuracy_score(y_test_aif[idx], y_pred[idx])
        rw_race_wise_acc[(scaler_type, race)] = acc_r
        rw_acc_std_sc[(scaler_type, race)] = np.std(y_pred[idx])
        std_acc = (y_pred[idx] == y_test[idx]).astype(int)
        std_r = np.std(std_acc)
        print(f"  {race:20s}  n={len(idx):4d}  acc={acc_r:.3f}  std={std_r:.3f}")

    # tracking fairness sdp and disparate impact per race against White

    idx_race_white = [i for i, r in enumerate(y_test_str) if r == 'White']
    y_pred_race_white = y_pred[idx_race_white]
    abs_eod = []
    abs_spd = []

    print(f"\n")
    print(f"Fairness metrics for each race compared to White:")

    for race in all_races:
        if race == 'White':
            continue
        idx_race = [i for i, r in enumerate(y_test_str) if r == race]
        y_pred_race = y_pred[idx_race]

        # equal opportunity difference
        priv_groups_TPR = np.sum((y_test[idx_race_White] == race_to_id['White']) & (y_pred[idx_race_White] == race_to_id['White'])) / (np.sum(y_test[idx_race_White] == race_to_id['White']))
        unpriv_groups_TPR = np.sum((y_test[idx_race] == race_to_id[race]) & (y_pred[idx_race] == race_to_id[race])) / (np.sum(y_test[idx_race] == race_to_id[race]))

        eod = priv_groups_TPR - unpriv_groups_TPR

        #statistical parity difference
        static_parity_diff = y_pred_race.mean() - y_pred_race_white.mean()


        fairness_metrics_race[(scaler_type, race)] = {
            'statistical_parity_difference': static_parity_diff,
            'equal_opportunity_difference': eod
        }
        print(f"{race:20s}  statistical_parity_difference: {static_parity_diff:.3f}  equal_opportunity_difference: {eod:.3f}")


        abs_spd.append(abs(static_parity_diff))
        abs_eod.append(abs(eod))
    abs_spd = np.mean(abs_spd)
    abs_eod = np.mean(abs_eod)

    best_fairness = abs_spd + abs_eod
    print()
    print(f"{scaler_type} -> overall fairness: {best_fairness:.3f}")
    print(f"{scaler_type} -> overall SPD: {abs_spd:.3f}, overall EOD: {abs_eod:.3f}")
print()
print(f"Model with best fairness with Reweighing based on SPD (statistical parity difference) and EOD (equal opportunity difference): {best_fairness:.3f}, Scaler: {scaler_type}")

## References
###### https://huggingface.co/datasets/ryanramos/fairface
###### https://codecut.ai/pipeline-gridsearchcv-prevent-data-leakage-when-scaling-the-data-3/#:~:text=May%2023%2C%202023-,Pipeline%20+%20GridSearchCV:%20Prevent%20Data%20Leakage%20when%20Scaling%20the%20Data,previous%20tips%20on%20machine%20learning.
######  https://www.geeksforgeeks.org/machine-learning/data-pre-processing-wit-sklearn-using-standard-and-minmax-scaler/
###### https://www.geeksforgeeks.org/machine-learning/performing-feature-selection-with-gridsearchcv-in-sklearn/
###### https://aif360.readthedocs.io/en/latest/modules/generated/aif360.datasets.StandardDataset.html?utm
###### https://medium.com/ibm-data-ai/fairness-in-machine-learning-pre-processing-algorithms-a670c031fba8
###### https://chatgpt.com/share/692fec2d-c7c4-800d-a270-93fce02447c6
