In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, accuracy_score
from sklearn.inspection import permutation_importance
from fairlearn.metrics import MetricFrame
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import equalized_odds_difference, demographic_parity_difference, demographic_parity_ratio 
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score
from sklearn.utils.class_weight import compute_sample_weight
from aif360.metrics import ClassificationMetric
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing 
from aif360.algorithms.inprocessing import AdversarialDebiasing
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

import tensorflow as tf
import warnings


pd.set_option("display.max_columns", None)
warnings.simplefilter(action='ignore', category=FutureWarning)

if tf.__version__.startswith('2'):
    tf.compat.v1.disable_eager_execution


  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))





In [2]:
df = pd.read_csv('final_project_dataset.csv')
df.describe()

Unnamed: 0,encounter_id,patient_nbr,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,number_outpatient,number_inpatient,number_emergency,num_lab_procedures,number_diagnoses,num_medications,num_procedures
count,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0
mean,167429000.0,54936080.0,2.027694,3.655316,5.701096,4.49019,0.362429,0.600855,0.195086,43.255745,7.424434,16.776035,1.349021
std,101904600.0,38578400.0,1.427585,5.219279,4.064532,2.999667,1.249295,1.207934,0.92041,19.657319,1.924872,8.39713,1.719104
min,12522.0,135.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
25%,88295960.0,23572190.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,32.0,6.0,11.0,0.0
50%,154763700.0,46307830.0,1.0,1.0,7.0,4.0,0.0,0.0,0.0,44.0,8.0,15.0,1.0
75%,232089700.0,88236270.0,3.0,3.0,7.0,6.0,0.0,1.0,0.0,57.0,9.0,21.0,2.0
max,443867200.0,189502600.0,8.0,28.0,25.0,14.0,42.0,21.0,76.0,132.0,16.0,81.0,6.0


In [3]:
df = df[ (df['race'] != '?') & (df['gender'] != '?')]

In [4]:
df.head(2)

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,primary_diagnosis_code,other_diagnosis_codes,number_outpatient,number_inpatient,number_emergency,num_lab_procedures,number_diagnoses,num_medications,num_procedures,ndc_code,max_glu_serum,A1Cresult,change,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,?,Pediatrics-Endocrinology,250.83,?|?,0,0,0,41,1,1,0,,,,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,?,?,276.0,250.01|255,0,0,0,59,9,18,0,68071-1701,,,Ch,>30


In [5]:
a = df['readmitted'].unique()
print(a)

['NO' '>30' '<30']


Let us map our results to binary values. 0 if the patient hasnt been readmitted else 1

In [6]:
df['readmitted'] = df['readmitted'].apply(lambda x: 0 if x == 'NO' else 1)
#df.replace("?", np.nan, inplace=True)

Lets drop unnecessary columns

In [7]:
df = df.drop(columns=['encounter_id', 'patient_nbr','payer_code'])

In [8]:
print((df['readmitted'].isna()).sum())

0


In [9]:
def convert_weight(value):
    if pd.isna(value) or value == '?':
        return np.nan
    elif '[' in value and ')' in value:
        lower, upper = value.strip('[]()').split('-')
        return (float(lower) + float(upper)) / 2
    elif '>' in value:
        a = value.strip('>')
        return int(a)
    else:
        return float(value)

df['weight'] = df['weight'].apply(convert_weight)

Let us define numerical and categorical columns for processing

In [11]:
df['age'].fillna(df['age'].mode(), inplace=True)


In [12]:
#df['weight'] = df['weight'].apply(lambda x: df['weight'].mean() if ('?' in x) else x).astype(int)

In [13]:
print(df.dtypes)

race                         object
gender                       object
age                          object
weight                      float64
admission_type_id             int64
discharge_disposition_id      int64
admission_source_id           int64
time_in_hospital              int64
medical_specialty            object
primary_diagnosis_code       object
other_diagnosis_codes        object
number_outpatient             int64
number_inpatient              int64
number_emergency              int64
num_lab_procedures            int64
number_diagnoses              int64
num_medications               int64
num_procedures                int64
ndc_code                     object
max_glu_serum                object
A1Cresult                    object
change                       object
readmitted                    int64
dtype: object


In [18]:
categorical = [
    'race',
    'gender',
    'age',
    'medical_specialty',
    'ndc_code',
    'max_glu_serum',
    'A1Cresult',
    'change',
    'other_diagnosis_codes'
]

numerical = [
    'weight',
    'admission_type_id',
    'discharge_disposition_id',
    'admission_source_id',
    'time_in_hospital',
    'number_outpatient',
    'number_inpatient',
    'number_emergency',
    'num_lab_procedures',
    'number_diagnoses',
    'num_medications',
    'num_procedures',
    #'payer_code',
    'primary_diagnosis_code'
]

In [19]:
print(df[categorical].isna().sum())  # Find missing values in categorical features


race                          0
gender                        0
age                           0
medical_specialty             0
ndc_code                  23059
max_glu_serum            133176
A1Cresult                114989
change                        0
other_diagnosis_codes         0
dtype: int64


In [20]:
print(df[numerical].isna().sum())

weight                      136005
admission_type_id                0
discharge_disposition_id         0
admission_source_id              0
time_in_hospital                 0
number_outpatient                0
number_inpatient                 0
number_emergency                 0
num_lab_procedures               0
number_diagnoses                 0
num_medications                  0
num_procedures                   0
primary_diagnosis_code           0
dtype: int64


In [21]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numerical),
        ("cat", Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical)
    ]
)

In [22]:
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter = 1000,random_state=42))
])

In [23]:
df[numerical] = df[numerical].replace('?', np.nan)
df[categorical] = df[categorical].replace('?', np.nan)

In [24]:
df[numerical] = df[numerical].apply(pd.to_numeric, errors='coerce')

# Handle any remaining missing values (for instance, using the mean or median)
df[numerical] = df[numerical].fillna(df[numerical].mean())

In [None]:
X = df.drop('readmitted', axis=1)
y = df['readmitted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)






pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_proba))




# scores = cross_val_score(pipeline, X_train, y_train, cv=10, scoring='roc_auc')
# print(f'Cross-validation ROC-AUC: {scores.mean()} ± {scores.std()}')

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.75      0.70     22498
           1       0.65      0.52      0.58     19537

    accuracy                           0.65     42035
   macro avg       0.65      0.64      0.64     42035
weighted avg       0.65      0.65      0.64     42035

ROC-AUC Score: 0.6920001688297347


Well that is just a tiny bit better than random guessing.

Lets try again, with a Random Forest Classifier

In [26]:
def get_rf_pipeline():
    #this function returns a random forest classifier pipeline
    numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

                                    
    preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical),
        ('cat', categorical_transformer, categorical)
    ])

    pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
    ])
    return pipeline_rf

In [None]:
pipeline_rf = get_rf_pipeline()

X = df.drop('readmitted', axis=1)
y = df['readmitted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

pipeline_rf.fit(X_train, y_train)

y_pred = pipeline_rf.predict(X_test)
y_pred_proba = pipeline_rf.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_proba))

# scores = cross_val_score(pipeline_rf, X_train, y_train, cv=10, scoring='roc_auc')
# print(f'Cross-validation ROC-AUC: {scores.mean()} ± {scores.std()}')

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.85      0.80     22498
           1       0.80      0.68      0.74     19537

    accuracy                           0.77     42035
   macro avg       0.78      0.77      0.77     42035
weighted avg       0.78      0.77      0.77     42035

ROC-AUC Score: 0.8559920800635521


In [46]:
groups = ['gender', 'race']
results = []
privileged = {}
for group in groups:
    privileged[group] = {}

In [29]:
# def evaluate_accuracy(X_test, y_test, y_pred, y_pred_proba, group):
#     results = []
#     for value in X_test[group].unique():
#         mask = X_test[group] == value
#         if mask.sum() > 0:
#             group_y_test = y_test[mask]
#             group_y_pred = y_pred[mask]
#             group_y_pred_proba = y_pred_proba[mask]
#             try:
#                 roc_auc = roc_auc_score(group_y_test, group_y_pred_proba)
#             except ValueError:
#                 roc_auc = float('nan')
#             tn, fp, fn, tp = confusion_matrix(group_y_test, group_y_pred).ravel()

#             results.append({
#                 "Group": group,
#                 "Value": value,
#                 "Accuracy": (group_y_test == group_y_pred).mean(),
#                 "ROC-AUC": roc_auc,
#                 "TP": tp,
#                 "TN": tn,
#                 "FP": fp,
#                 "FN": fn
#             })

#     for result in results:
#         print(result)
    
#     return results

    
# for group in groups:
#     results = evaluate_accuracy(X_test, y_test, y_pred, y_pred_proba, group)

In [47]:
def evaluate_fairness(y_true, y_pred, sensitive_features, group_name):
    eod = equalized_odds_difference(
        y_true=y_true,
        y_pred=y_pred,
        sensitive_features=sensitive_features
    )
    
    dpd = demographic_parity_difference(
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_features
    )
    
    di_ratio = demographic_parity_ratio(
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_features
    )
    
    print(f'\n group is {group_name}')
    print(f"Demographic Parity Ratio: {di_ratio:.4f}")
    print(f"Equalized Odds Difference: {eod:.4f}")
    print(f"Demographic Parity Difference: {dpd:.4f}")
    
    
    positive_rates = {}
    for group_value in sensitive_features.unique():
        mask = sensitive_features == group_value
        group_y_pred = y_pred[mask]
        positive_rate = group_y_pred.mean()
        positive_rates[group_value] = positive_rate
        print(f"Subgroup: {group_value}, Positive Prediction Rate: {positive_rate:.4f}")
    
    max_rate = max(positive_rates.values())
    min_rate = min(positive_rates.values())
    
    positive_rates = dict(sorted(positive_rates.items(), key=lambda x: x[1]))
    
    
    values = list(positive_rates.values())
    q1, q3 = np.percentile(values, [25, 75])
    iqr = q3 - q1
    threshold = iqr * 0.5 
        
        
    
    
    for group_value, rate in positive_rates.items():
        
        if rate == min_rate or (rate - min_rate <= threshold):
            print(f"--> Unprivileged Group: {group_value} (Positive Rate: {rate:.4f})")
            
            if 'unprivileged' in privileged[group_name]:
                privileged[group_name]['unprivileged'].append(group_value)
            else:
                privileged[group_name]['unprivileged'] = [group_value]

        elif rate == max_rate:
            
            print(f"--> Privileged Group: {group_value} (Positive Rate: {rate:.4f})")
            if 'privileged' in privileged[group_name]:
                privileged[group_name]['privileged'].append(group_value)
            else:
                privileged[group_name]['privileged'] = [group_value]

In [48]:
for group in groups:
    evaluate_fairness(y_test, y_pred, X_test[group], group)


 group is gender
Demographic Parity Ratio: 0.0000
Equalized Odds Difference: 0.6959
Demographic Parity Difference: 0.4142
Subgroup: Female, Positive Prediction Rate: 0.4142
Subgroup: Male, Positive Prediction Rate: 0.3758
Subgroup: Unknown/Invalid, Positive Prediction Rate: 0.0000
--> Unprivileged Group: Unknown/Invalid (Positive Rate: 0.0000)
--> Privileged Group: Female (Positive Rate: 0.4142)

 group is race
Demographic Parity Ratio: 0.4684
Equalized Odds Difference: 0.1482
Demographic Parity Difference: 0.2171
Subgroup: Caucasian, Positive Prediction Rate: 0.4083
Subgroup: Other, Positive Prediction Rate: 0.3169
Subgroup: AfricanAmerican, Positive Prediction Rate: 0.3695
Subgroup: Hispanic, Positive Prediction Rate: 0.3188
Subgroup: Asian, Positive Prediction Rate: 0.1912
--> Unprivileged Group: Asian (Positive Rate: 0.1912)
--> Privileged Group: Caucasian (Positive Rate: 0.4083)


We can note different results based on gender/race. 

In [None]:

for group in groups:
    
    aif_dict = {}

    for element in privileged[group]['privileged']:    
        aif_dict[element] = 1
    
    for element in privileged[group]['unprivileged']:
        aif_dict[element] = 0


    privileged_class = [key for key, value in aif_dict.items() if value == 1]
    unprivileged_class = [key for key, value in aif_dict.items() if value == 0]

    print(f'privileged classes for group {group} are {privileged_class}')
    print(f'unprivileged classes for group {group} are {unprivileged_class}')
    
    #map column values to 0,1 s, based on whether or not the entry is privileged
    #df[group] = df[group].apply(lambda x: 1 if x in(privileged_class) else 0)


encoder = LabelEncoder()


for col in categorical:
    # Fit and transform the column to encode categorical values
    df[col] = encoder.fit_transform(df[col].astype(str))  # Ensure all categories are considered by converting to string

    # Optionally, print the mapping of original values to encoded labels
    print(f"Encoded {col}: {dict(zip(encoder.classes_, range(len(encoder.classes_))))}")





all_unique_values = set()

# Collect unique values for each column
for col in categorical:
    all_unique_values.update(df[col].astype(str).unique())

encoder.fit(sorted(all_unique_values)) 

privileged classes for group gender are ['Unknown/Invalid']
unprivileged classes for group gender are ['Female']
privileged classes for group race are ['Asian']
unprivileged classes for group race are ['Caucasian']
Encoded race: {'AfricanAmerican': 0, 'Asian': 1, 'Caucasian': 2, 'Hispanic': 3, 'Other': 4}
Encoded gender: {'Female': 0, 'Male': 1, 'Unknown/Invalid': 2}
Encoded age: {'[0-10)': 0, '[10-20)': 1, '[20-30)': 2, '[30-40)': 3, '[40-50)': 4, '[50-60)': 5, '[60-70)': 6, '[70-80)': 7, '[80-90)': 8, '[90-100)': 9}
Encoded medical_specialty: {'AllergyandImmunology': 0, 'Anesthesiology': 1, 'Anesthesiology-Pediatric': 2, 'Cardiology': 3, 'Cardiology-Pediatric': 4, 'DCPTEAM': 5, 'Dentistry': 6, 'Dermatology': 7, 'Emergency/Trauma': 8, 'Endocrinology': 9, 'Endocrinology-Metabolism': 10, 'Family/GeneralPractice': 11, 'Gastroenterology': 12, 'Gynecology': 13, 'Hematology': 14, 'Hematology/Oncology': 15, 'Hospitalist': 16, 'InfectiousDiseases': 17, 'InternalMedicine': 18, 'Nephrology': 19

First we create an AIF360 dataset

In [37]:
categorical

['race',
 'gender',
 'age',
 'medical_specialty',
 'ndc_code',
 'max_glu_serum',
 'A1Cresult',
 'change',
 'other_diagnosis_codes']

In [33]:
for col in df.columns:
    if col not in categorical and col not in numerical:
        print(col)

readmitted


Handle null values

In [None]:
df[numerical] = df[numerical].replace('?', np.nan)
df[categorical] = df[categorical].replace('?', np.nan)

df[numerical] = df[numerical].fillna(df[numerical].mean())
df[categorical] = df[categorical].fillna(df[categorical].mode().iloc[0])

scaler = StandardScaler()
df[numerical] = scaler.fit_transform(df[numerical])


df.head(5)

Unnamed: 0,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,medical_specialty,primary_diagnosis_code,other_diagnosis_codes,number_outpatient,number_inpatient,number_emergency,num_lab_procedures,number_diagnoses,num_medications,num_procedures,ndc_code,max_glu_serum,A1Cresult,change,readmitted
0,2,0,0,-1.8699000000000002e-17,2.769215,4.061454,-1.158213,-1.163319,37,-1.194483,18081,-0.291159,-0.499943,-0.213535,-0.113146,-3.358732,-1.881204,-0.785972,238,2,1,1,0
1,2,0,1,-1.8699000000000002e-17,-0.719669,-0.510775,0.31459,-0.496817,18,-1.071165,1273,-0.291159,-0.499943,-0.213535,0.801255,0.812587,0.145807,-0.785972,238,2,1,0,1
2,0,0,2,-1.8699000000000002e-17,-0.719669,-0.510775,0.31459,-0.830068,18,0.751409,3036,1.297335,0.322302,-0.213535,-1.637148,-0.751658,-0.450373,2.126795,65,2,1,1,0
3,2,1,3,-1.8699000000000002e-17,-0.719669,-0.510775,0.31459,-0.830068,18,-2.384202,2146,-0.291159,-0.499943,-0.213535,0.039254,-0.230243,-0.092665,-0.203419,238,2,1,0,0
4,2,1,4,-1.8699000000000002e-17,-0.719669,-0.510775,0.31459,-1.163319,18,-1.458217,339,-0.291159,-0.499943,-0.213535,0.394854,-1.273072,-1.046552,-0.785972,11,2,1,0,0
5,2,1,4,-1.8699000000000002e-17,-0.719669,-0.510775,0.31459,-1.163319,18,-1.458217,339,-0.291159,-0.499943,-0.213535,0.394854,-1.273072,-1.046552,-0.785972,238,2,1,0,0
6,2,1,5,-1.8699000000000002e-17,-0.021892,-0.510775,-0.912746,-0.496817,18,-0.395049,7065,-0.291159,-0.499943,-0.213535,-0.621147,0.812587,-0.092665,2.709348,185,2,1,1,1
7,2,1,6,-1.8699000000000002e-17,0.675885,-0.510775,-0.912746,-0.163566,18,-0.395049,7202,-0.291159,-0.499943,-0.213535,1.360056,-0.230243,0.503515,-0.203419,143,2,1,0,0
8,2,1,6,-1.8699000000000002e-17,0.675885,-0.510775,-0.912746,-0.163566,18,-0.395049,7202,-0.291159,-0.499943,-0.213535,1.360056,-0.230243,0.503515,-0.203419,105,2,1,0,0
9,2,1,6,-1.8699000000000002e-17,0.675885,-0.510775,-0.912746,-0.163566,18,-0.395049,7202,-0.291159,-0.499943,-0.213535,1.360056,-0.230243,0.503515,-0.203419,183,2,1,0,0


Lets create an aif360 dataset

In [None]:
dataset = StandardDataset(
    df,
    label_name='readmitted',
    favorable_classes=[0],
    protected_attribute_names=['race'],
    privileged_classes=[[2,4,3,0]]
    # categorical_features=categorical_columns,  # Pass all categorical columns
    # features_to_drop=features_to_drop,
    #na_values=na_values
)

In [None]:
df.dtypes

race                          int32
gender                        int32
age                           int32
weight                      float64
admission_type_id           float64
discharge_disposition_id    float64
admission_source_id         float64
time_in_hospital            float64
payer_code                  float64
medical_specialty             int32
primary_diagnosis_code      float64
other_diagnosis_codes         int32
number_outpatient           float64
number_inpatient            float64
number_emergency            float64
num_lab_procedures          float64
number_diagnoses            float64
num_medications             float64
num_procedures              float64
ndc_code                      int32
max_glu_serum                 int32
A1Cresult                     int32
change                        int32
readmitted                    int64
dtype: object

In [None]:
print(df['race'].unique())

[2 0 4 1 3]


In [52]:
privileged_groups = [{'race': 2}, {'race': 0}, {'race': 3}, {'race' : 4}]
unprivileged_groups = [{'race': 1}]

In [53]:
df['race'].isna().sum()

0

PRE PROCESSING MITIGATION TECHNIQUES

First, let us try reweighting our data

In [57]:
reweighing = Reweighing(
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

reweighed_data = reweighing.fit_transform(dataset)

X_train, X_test, y_train, y_test = train_test_split(
    reweighed_data.features, reweighed_data.labels.ravel(), test_size=0.3, random_state=42
)

X_train = pd.DataFrame(X_train, columns=X.columns)
X_test = pd.DataFrame(X_test, columns=X.columns)

rf = get_rf_pipeline()
rf.fit(X_train, y_train)




y_pred = rf.predict(X_test)

dataset_test = dataset.copy()
dataset_test.features = X_test
dataset_test.labels = y_test

reweighed_dataset_test = dataset.copy()
reweighed_dataset_test.features = X_test
reweighed_dataset_test.labels = y_pred



y_pred_proba = rf.predict_proba(X_test)[:, 1]

In [58]:
for group in groups:
    evaluate_fairness(y_test, y_pred, X_test[group], group)


 group is gender
Demographic Parity Ratio: 0.0000
Equalized Odds Difference: 0.7065
Demographic Parity Difference: 0.4240
Subgroup: 0.0, Positive Prediction Rate: 0.4240
Subgroup: 1.0, Positive Prediction Rate: 0.3755
Subgroup: 2.0, Positive Prediction Rate: 0.0000
--> Unprivileged Group: 2.0 (Positive Rate: 0.0000)
--> Privileged Group: 0.0 (Positive Rate: 0.4240)

 group is race
Demographic Parity Ratio: 0.3925
Equalized Odds Difference: 0.2584
Demographic Parity Difference: 0.2512
Subgroup: 2.0, Positive Prediction Rate: 0.4134
Subgroup: 0.0, Positive Prediction Rate: 0.3767
Subgroup: 4.0, Positive Prediction Rate: 0.2928
Subgroup: 1.0, Positive Prediction Rate: 0.1623
Subgroup: 3.0, Positive Prediction Rate: 0.3295
--> Unprivileged Group: 1.0 (Positive Rate: 0.1623)
--> Privileged Group: 2.0 (Positive Rate: 0.4134)


Encoded race: {'AfricanAmerican': 0, 'Asian': 1, 'Caucasian': 2, 'Hispanic': 3, 'Other': 4}

Let's try resampling

In [60]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numerical),
        ("cat", Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical)
    ]
)

smote = SMOTE(random_state=42)

pipeline = ImbPipeline([
    ("preprocessor", preprocessor),
    ("resampler", smote),
    ("classifier", RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

for group in groups:
    evaluate_fairness(y_test,y_pred, X_test[group], group)

Accuracy: 0.767360532889259

 group is gender
Demographic Parity Ratio: 0.0000
Equalized Odds Difference: 0.7080
Demographic Parity Difference: 0.4304
Subgroup: Female, Positive Prediction Rate: 0.4304
Subgroup: Male, Positive Prediction Rate: 0.3808
Subgroup: Unknown/Invalid, Positive Prediction Rate: 0.0000
--> Unprivileged Group: Unknown/Invalid (Positive Rate: 0.0000)
--> Privileged Group: Female (Positive Rate: 0.4304)

 group is race
Demographic Parity Ratio: 0.4402
Equalized Odds Difference: 0.2597
Demographic Parity Difference: 0.2352
Subgroup: Caucasian, Positive Prediction Rate: 0.4201
Subgroup: AfricanAmerican, Positive Prediction Rate: 0.3798
Subgroup: Other, Positive Prediction Rate: 0.2989
Subgroup: Asian, Positive Prediction Rate: 0.1849
Subgroup: Hispanic, Positive Prediction Rate: 0.3272
--> Unprivileged Group: Asian (Positive Rate: 0.1849)
--> Privileged Group: Caucasian (Positive Rate: 0.4201)


How about Adversial Debiasing?

Adversial Debiasing for Race; (Asian=Privileged, White=Unprivileged)

In [65]:
privileged_groups = privileged_groups[0]

In [None]:
def get_adversial_debiasing(privileged_groups, unprivileged_groups,):
    sess = tf.compat.v1.Session()
    tf.compat.v1.disable_eager_execution()

    train_dataset, test_dataset = dataset.split([0.7], shuffle=True, seed=42)


    adversarial_model = AdversarialDebiasing(
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
        scope_name='debiased_classifier',
        sess=sess,
        num_epochs=100,
        batch_size=128,
        classifier_num_hidden_units=100,
        debias=True,
        adversary_loss_weight=0.001
    )

    adversarial_model.fit(train_dataset)
    y_pred_adversarial = adversarial_model.predict(test_dataset).labels
    
    print("Adversarial Debiasing Results:")
    print("Accuracy:", accuracy_score(y_test, y_pred_adversarial))
    print("ROC-AUC:", roc_auc_score(y_test, y_pred_adversarial))
    print("Classification Report:")
    print(classification_report(y_test, y_pred_adversarial))
    
    for group in groups:
        evaluate_fairness(y_test,y_pred_adversarial,X_test[group],group)