In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, accuracy_score
from sklearn.inspection import permutation_importance
from fairlearn.metrics import MetricFrame
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import equalized_odds_difference, demographic_parity_difference, demographic_parity_ratio 
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score
from sklearn.utils.class_weight import compute_sample_weight
from aif360.metrics import ClassificationMetric
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing 
from aif360.algorithms.inprocessing import AdversarialDebiasing
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

import tensorflow as tf
import warnings


pd.set_option("display.max_columns", None)
warnings.simplefilter(action='ignore', category=FutureWarning)

if tf.__version__.startswith('2'):
    tf.compat.v1.disable_eager_execution


  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))





In [2]:
df = pd.read_csv('final_project_dataset.csv')
df.describe()

Unnamed: 0,encounter_id,patient_nbr,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,number_outpatient,number_inpatient,number_emergency,num_lab_procedures,number_diagnoses,num_medications,num_procedures
count,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0,143424.0
mean,167429000.0,54936080.0,2.027694,3.655316,5.701096,4.49019,0.362429,0.600855,0.195086,43.255745,7.424434,16.776035,1.349021
std,101904600.0,38578400.0,1.427585,5.219279,4.064532,2.999667,1.249295,1.207934,0.92041,19.657319,1.924872,8.39713,1.719104
min,12522.0,135.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
25%,88295960.0,23572190.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,32.0,6.0,11.0,0.0
50%,154763700.0,46307830.0,1.0,1.0,7.0,4.0,0.0,0.0,0.0,44.0,8.0,15.0,1.0
75%,232089700.0,88236270.0,3.0,3.0,7.0,6.0,0.0,1.0,0.0,57.0,9.0,21.0,2.0
max,443867200.0,189502600.0,8.0,28.0,25.0,14.0,42.0,21.0,76.0,132.0,16.0,81.0,6.0


Lets filter unknown rows


In [3]:
df = df[ (df['race'] != '?') & (df['gender'] != '?') & (df['gender'] != 'Unknown/Invalid') & (df['race'] != 'Other')] 

In [4]:
df.head(2)

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,primary_diagnosis_code,other_diagnosis_codes,number_outpatient,number_inpatient,number_emergency,num_lab_procedures,number_diagnoses,num_medications,num_procedures,ndc_code,max_glu_serum,A1Cresult,change,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,?,Pediatrics-Endocrinology,250.83,?|?,0,0,0,41,1,1,0,,,,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,?,?,276.0,250.01|255,0,0,0,59,9,18,0,68071-1701,,,Ch,>30


In [5]:
a = df['readmitted'].unique()
print(a)

['NO' '>30' '<30']


Let us map our results to binary values. 0 if the patient hasnt been readmitted else 1

In [None]:
df['readmitted'] = df['readmitted'].apply(lambda x: 0 if x == 'NO' else 1)

Lets drop unnecessary columns

In [7]:
df = df.drop(columns=['encounter_id', 'patient_nbr','payer_code'])

In [8]:
print((df['readmitted'].isna()).sum())

0


In [9]:
def convert_weight(value):
    if pd.isna(value) or value == '?':
        return np.nan
    elif '[' in value and ')' in value:
        lower, upper = value.strip('[]()').split('-')
        return (float(lower) + float(upper)) / 2
    elif '>' in value:
        a = value.strip('>')
        return int(a)
    else:
        return float(value)

df['weight'] = df['weight'].apply(convert_weight)

Let us define numerical and categorical columns for processing

In [10]:
df['age'].fillna(df['age'].mode(), inplace=True)

In [12]:
print(df.dtypes)

race                         object
gender                       object
age                          object
weight                      float64
admission_type_id             int64
discharge_disposition_id      int64
admission_source_id           int64
time_in_hospital              int64
medical_specialty            object
primary_diagnosis_code       object
other_diagnosis_codes        object
number_outpatient             int64
number_inpatient              int64
number_emergency              int64
num_lab_procedures            int64
number_diagnoses              int64
num_medications               int64
num_procedures                int64
ndc_code                     object
max_glu_serum                object
A1Cresult                    object
change                       object
readmitted                    int64
dtype: object


In [32]:
categorical = [
    'race',
    'gender',
    'age',
    'medical_specialty',
    'ndc_code',
    'max_glu_serum',
    'A1Cresult',
    'change',
    'other_diagnosis_codes',
    'primary_diagnosis_code'
]

numerical = [
    'weight',
    'admission_type_id',
    'discharge_disposition_id',
    'admission_source_id',
    'time_in_hospital',
    'number_outpatient',
    'number_inpatient',
    'number_emergency',
    'num_lab_procedures',
    'number_diagnoses',
    'num_medications',
    'num_procedures'
    #'payer_code',
]

In [36]:
print(df[categorical].isna().sum())


race                      0
gender                    0
age                       0
medical_specialty         0
ndc_code                  0
max_glu_serum             0
A1Cresult                 0
change                    0
other_diagnosis_codes     0
primary_diagnosis_code    0
dtype: int64


In [34]:
print(df[numerical].isna().sum())

weight                      0
admission_type_id           0
discharge_disposition_id    0
admission_source_id         0
time_in_hospital            0
number_outpatient           0
number_inpatient            0
number_emergency            0
num_lab_procedures          0
number_diagnoses            0
num_medications             0
num_procedures              0
dtype: int64


Deal with nulls

In [30]:
df.dtypes

race                         object
gender                       object
age                          object
weight                      float64
admission_type_id             int64
discharge_disposition_id      int64
admission_source_id           int64
time_in_hospital              int64
medical_specialty            object
primary_diagnosis_code       object
other_diagnosis_codes        object
number_outpatient             int64
number_inpatient              int64
number_emergency              int64
num_lab_procedures            int64
number_diagnoses              int64
num_medications               int64
num_procedures                int64
ndc_code                     object
max_glu_serum                object
A1Cresult                    object
change                       object
readmitted                    int64
dtype: object

Handle missing values

In [None]:
df[numerical] = df[numerical].replace('?', np.nan)
df[categorical] = df[categorical].replace('?', np.nan)


df[numerical] = df[numerical].fillna(df[numerical].mean())
df[categorical] = df[categorical].fillna(df[categorical].mode().iloc[0])

In [38]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numerical),
        ("cat", Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical)
    ]
)

In [39]:
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter = 1000,random_state=42))
])

In [41]:
X = df.drop('readmitted', axis=1)
y = df['readmitted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)






pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_proba))




# scores = cross_val_score(pipeline, X_train, y_train, cv=10, scoring='roc_auc')
# print(f'Cross-validation ROC-AUC: {scores.mean()} ± {scores.std()}')

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.74      0.69     22103
           1       0.64      0.53      0.58     19280

    accuracy                           0.64     41383
   macro avg       0.64      0.64      0.64     41383
weighted avg       0.64      0.64      0.64     41383

ROC-AUC Score: 0.6914921403433154


Well that is just a tiny bit better than random guessing.

Lets try again, with a Random Forest Classifier

In [None]:
def get_rf_pipeline_without_encoding():
    #this function returns a random forest classifier pipeline
    numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent'))])

                                    
    preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical),
        ('cat', categorical_transformer, categorical)
    ])

    pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
    ])
    return pipeline_rf

In [42]:
def get_rf_pipeline():
    #this function returns a random forest classifier pipeline
    numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

                                    
    preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical),
        ('cat', categorical_transformer, categorical)
    ])

    pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
    ])
    return pipeline_rf

In [43]:
pipeline_rf = get_rf_pipeline()

X = df.drop('readmitted', axis=1)
y = df['readmitted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

pipeline_rf.fit(X_train, y_train)

y_pred = pipeline_rf.predict(X_test)
y_pred_proba = pipeline_rf.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_proba))

# scores = cross_val_score(pipeline_rf, X_train, y_train, cv=10, scoring='roc_auc')
# print(f'Cross-validation ROC-AUC: {scores.mean()} ± {scores.std()}')

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.85      0.80     22103
           1       0.80      0.69      0.74     19280

    accuracy                           0.77     41383
   macro avg       0.78      0.77      0.77     41383
weighted avg       0.78      0.77      0.77     41383

ROC-AUC Score: 0.8586105216937


We hereby define our sensitive attributes

In [44]:
groups = ['gender', 'race']
results = []
privileged = {}
for group in groups:
    privileged[group] = {}

Lets check how fair our data is, and which are the most privileged and unprivileged groups.
The class labels which are the least likely to get a positive prediction, are the privileged ones.

In [None]:
def evaluate_fairness(y_true, y_pred, sensitive_features, group_name):
    eod = equalized_odds_difference(
        y_true=y_true,
        y_pred=y_pred,
        sensitive_features=sensitive_features
    )
    
    dpd = demographic_parity_difference(
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_features
    )
    
    di_ratio = demographic_parity_ratio(
    y_true=y_true,
    y_pred=y_pred,
    sensitive_features=sensitive_features
    )
    
    print(f'\n group is {group_name}')
    print(f"Demographic Parity Ratio: {di_ratio:.4f}")
    print(f"Equalized Odds Difference: {eod:.4f}")
    print(f"Demographic Parity Difference: {dpd:.4f}")
    
    
    positive_rates = {}
    for group_value in sensitive_features.unique():
        mask = sensitive_features == group_value
        group_y_pred = y_pred[mask]
        positive_rate = group_y_pred.mean()
        positive_rates[group_value] = positive_rate
        print(f"Subgroup: {group_value}, Positive Prediction Rate: {positive_rate:.4f}")
    
    max_rate = max(positive_rates.values())
    min_rate = min(positive_rates.values())
    
    positive_rates = dict(sorted(positive_rates.items(), key=lambda x: x[1]))
    
    
    values = list(positive_rates.values())
    q1, q3 = np.percentile(values, [25, 75])
    iqr = q3 - q1
    threshold = iqr * 0.5 
        
    
    for group_value, rate in positive_rates.items():
        
        if rate == min_rate or (rate - min_rate <= threshold):
            print(f"--> Privileged Group: {group_value} (Positive Rate: {rate:.4f})")
            
            if 'privileged' in privileged[group_name]:
                privileged[group_name]['privileged'].append(group_value)
            else:
                privileged[group_name]['privileged'] = [group_value]

        elif rate == max_rate:
            
            print(f"--> Unprivileged Group: {group_value} (Positive Rate: {rate:.4f})")
            if 'unprivileged' in privileged[group_name]:
                privileged[group_name]['unprivileged'].append(group_value)
            else:
                privileged[group_name]['unprivileged'] = [group_value]

In [46]:
for group in groups:
    evaluate_fairness(y_test, y_pred, X_test[group], group)


 group is gender
Demographic Parity Ratio: 0.8856
Equalized Odds Difference: 0.0425
Demographic Parity Difference: 0.0487
Subgroup: Male, Positive Prediction Rate: 0.3774
Subgroup: Female, Positive Prediction Rate: 0.4262
--> Privileged Group: Male (Positive Rate: 0.3774)
--> Unprivileged Group: Female (Positive Rate: 0.4262)

 group is race
Demographic Parity Ratio: 0.5025
Equalized Odds Difference: 0.1964
Demographic Parity Difference: 0.2066
Subgroup: Caucasian, Positive Prediction Rate: 0.4152
Subgroup: Asian, Positive Prediction Rate: 0.2086
Subgroup: AfricanAmerican, Positive Prediction Rate: 0.3703
Subgroup: Hispanic, Positive Prediction Rate: 0.3262
--> Privileged Group: Asian (Positive Rate: 0.2086)
--> Unprivileged Group: Caucasian (Positive Rate: 0.4152)


We can note different results based on gender/race. 

In [47]:

for group in groups:
    
    aif_dict = {}

    for element in privileged[group]['privileged']:    
        aif_dict[element] = 1
    
    for element in privileged[group]['unprivileged']:
        aif_dict[element] = 0


    privileged_class = [key for key, value in aif_dict.items() if value == 1]
    unprivileged_class = [key for key, value in aif_dict.items() if value == 0]

    print(f'privileged classes for group {group} are {privileged_class}')
    print(f'unprivileged classes for group {group} are {unprivileged_class}')
    
    #map column values to 0,1 s, based on whether or not the entry is privileged
    #df[group] = df[group].apply(lambda x: 1 if x in(privileged_class) else 0)


encoder = LabelEncoder()


for col in categorical:
    
    df[col] = encoder.fit_transform(df[col].astype(str))  # Ensure all categories are considered by converting to string

    print(f"Encoded {col}: {dict(zip(encoder.classes_, range(len(encoder.classes_))))}")



all_unique_values = set()

for col in categorical:
    all_unique_values.update(df[col].astype(str).unique())

encoder.fit(sorted(all_unique_values)) 

privileged classes for group gender are ['Male']
unprivileged classes for group gender are ['Female']
privileged classes for group race are ['Asian']
unprivileged classes for group race are ['Caucasian']
Encoded race: {'AfricanAmerican': 0, 'Asian': 1, 'Caucasian': 2, 'Hispanic': 3}
Encoded gender: {'Female': 0, 'Male': 1}
Encoded age: {'[0-10)': 0, '[10-20)': 1, '[20-30)': 2, '[30-40)': 3, '[40-50)': 4, '[50-60)': 5, '[60-70)': 6, '[70-80)': 7, '[80-90)': 8, '[90-100)': 9}
Encoded medical_specialty: {'AllergyandImmunology': 0, 'Anesthesiology': 1, 'Anesthesiology-Pediatric': 2, 'Cardiology': 3, 'Cardiology-Pediatric': 4, 'DCPTEAM': 5, 'Dentistry': 6, 'Dermatology': 7, 'Emergency/Trauma': 8, 'Endocrinology': 9, 'Endocrinology-Metabolism': 10, 'Family/GeneralPractice': 11, 'Gastroenterology': 12, 'Gynecology': 13, 'Hematology': 14, 'Hematology/Oncology': 15, 'Hospitalist': 16, 'InfectiousDiseases': 17, 'InternalMedicine': 18, 'Nephrology': 19, 'Neurology': 20, 'Neurophysiology': 21, 'Ob

First we create an AIF360 dataset

Handle null values

In [49]:
scaler = StandardScaler()
df[numerical] = scaler.fit_transform(df[numerical])


df.head(5)

Unnamed: 0,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,medical_specialty,primary_diagnosis_code,other_diagnosis_codes,number_outpatient,number_inpatient,number_emergency,num_lab_procedures,number_diagnoses,num_medications,num_procedures,ndc_code,max_glu_serum,A1Cresult,change,readmitted
0,2,0,0,3.143812e-15,2.769862,4.055945,-1.157225,-1.164024,37,123,17919,-0.291435,-0.500797,-0.21332,-0.112624,-3.363731,-1.884,-0.785233,238,2,1,1,0
1,2,0,1,3.143812e-15,-0.718341,-0.511339,0.313759,-0.497334,18,142,1256,-0.291435,-0.500797,-0.21332,0.803133,0.8114,0.144269,-0.785233,238,2,1,0,1
2,0,0,2,3.143812e-15,-0.718341,-0.511339,0.313759,-0.830679,18,453,3003,1.289595,0.319129,-0.21332,-1.638886,-0.754274,-0.45228,2.13205,65,2,1,1,0
3,2,1,3,3.143812e-15,-0.718341,-0.511339,0.313759,-0.830679,18,552,2121,-0.291435,-0.500797,-0.21332,0.040002,-0.232383,-0.094351,-0.201776,238,2,1,0,0
4,2,1,4,3.143812e-15,-0.718341,-0.511339,0.313759,-1.164024,18,53,334,-0.291435,-0.500797,-0.21332,0.39613,-1.276165,-1.04883,-0.785233,11,2,1,0,0


Lets create an aif360 dataset

In [None]:
dataset = StandardDataset(
    df,
    label_name='readmitted',
    favorable_classes=[0],
    protected_attribute_names=['race', 'gender'],
    privileged_classes=[[1,3], [1]]
    # categorical_features=categorical_columns,
    # features_to_drop=features_to_drop,
    #na_values=na_values
)

In [None]:
print(df['race'].unique())

In [61]:
unprivileged_groups = [{'race': 2}, {'race': 0}, {'gender' : 0}]
privileged_groups = [{'race': 1}, {'race': 3}, {'gender' : 1}]

In [58]:
df['race'].isna().sum()

0

PRE PROCESSING MITIGATION TECHNIQUES

First, let us try reweighting our data

In [136]:
reweighing = Reweighing(
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

reweighed_data = reweighing.fit_transform(dataset)

X_train, X_test, y_train, y_test = train_test_split(
    reweighed_data.features, reweighed_data.labels.ravel(), test_size=0.3, random_state=42
)

X_train = pd.DataFrame(X_train, columns=X.columns)
X_test = pd.DataFrame(X_test, columns=X.columns)

rf = get_rf_pipeline_without_encoding()
rf.fit(X_train, y_train)




y_pred = rf.predict(X_test)

dataset_test = dataset.copy()
dataset_test.features = X_test
dataset_test.labels = y_test

reweighed_dataset_test = dataset.copy()
reweighed_dataset_test.features = X_test
reweighed_dataset_test.labels = y_pred



y_pred_proba = rf.predict_proba(X_test)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

  self.w_up_fav = n_fav*n_up / (n*n_up_fav)
  self.w_up_unfav = n_unfav*n_up / (n*n_up_unfav)


Accuracy: 0.7692047459101563


In [64]:
for group in groups:
    evaluate_fairness(y_test, y_pred, X_test[group], group)


 group is gender
Demographic Parity Ratio: 0.9278
Equalized Odds Difference: 0.0296
Demographic Parity Difference: 0.0308
Subgroup: 1.0, Positive Prediction Rate: 0.3961
Subgroup: 0.0, Positive Prediction Rate: 0.4269
--> Privileged Group: 1.0 (Positive Rate: 0.3961)
--> Unprivileged Group: 0.0 (Positive Rate: 0.4269)

 group is race
Demographic Parity Ratio: 0.6303
Equalized Odds Difference: 0.1442
Demographic Parity Difference: 0.1560
Subgroup: 2.0, Positive Prediction Rate: 0.4220
Subgroup: 0.0, Positive Prediction Rate: 0.3874
Subgroup: 1.0, Positive Prediction Rate: 0.2660
Subgroup: 3.0, Positive Prediction Rate: 0.3353
--> Privileged Group: 1.0 (Positive Rate: 0.2660)
--> Unprivileged Group: 2.0 (Positive Rate: 0.4220)


Let's try resampling

In [68]:
#redefine x and y after having applied label encoding
X = df.drop('readmitted', axis=1)
y = df['readmitted']

In [69]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numerical),
        ("cat", Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent'))
            #, ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical)
    ]
)

smote = SMOTE(random_state=42)

pipeline = ImbPipeline([
    ("preprocessor", preprocessor),
    ("resampler", smote),
    ("classifier", RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

for group in groups:
    evaluate_fairness(y_test,y_pred, X_test[group], group)

Accuracy: 0.7734577000217481

 group is gender
Demographic Parity Ratio: 0.9346
Equalized Odds Difference: 0.0264
Demographic Parity Difference: 0.0286
Subgroup: 1, Positive Prediction Rate: 0.4095
Subgroup: 0, Positive Prediction Rate: 0.4381
--> Privileged Group: 1 (Positive Rate: 0.4095)
--> Unprivileged Group: 0 (Positive Rate: 0.4381)

 group is race
Demographic Parity Ratio: 0.7198
Equalized Odds Difference: 0.0835
Demographic Parity Difference: 0.1219
Subgroup: 2, Positive Prediction Rate: 0.4350
Subgroup: 0, Positive Prediction Rate: 0.3956
Subgroup: 1, Positive Prediction Rate: 0.3131
Subgroup: 3, Positive Prediction Rate: 0.3412
--> Privileged Group: 1 (Positive Rate: 0.3131)
--> Privileged Group: 3 (Positive Rate: 0.3412)
--> Unprivileged Group: 2 (Positive Rate: 0.4350)


How about Adversial Debiasing?

In [138]:
def get_adversial_debiasing(privileged_groups, unprivileged_groups):
    sess = tf.compat.v1.Session()
    tf.compat.v1.disable_eager_execution()

    train_dataset, test_dataset = dataset.split([0.7], shuffle=True, seed=42)


    adversarial_model = AdversarialDebiasing(
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
        scope_name='debiased_classifier',
        sess=sess,
        num_epochs=100,
        batch_size=128,
        classifier_num_hidden_units=100,
        debias=True,
        adversary_loss_weight=0.0001
    )

    adversarial_model.fit(train_dataset)
    y_pred_adversarial = adversarial_model.predict(test_dataset).labels
    
    print("Adversarial Debiasing Results:")
    print("Accuracy:", accuracy_score(y_test, y_pred_adversarial))
    print("ROC-AUC:", roc_auc_score(y_test, y_pred_adversarial))
    print("Classification Report:")
    print(classification_report(y_test, y_pred_adversarial))
    for group in groups:
        evaluate_fairness(y_test,y_pred_adversarial,X_test[group],group)

Adversial Debiasing for Race; (Asian=Privileged, White=Unprivileged)

In [140]:
privileged_groups = [{'race': 1}]
unprivileged_groups = [{'race': 2}]
tf.compat.v1.reset_default_graph()

get_adversial_debiasing(privileged_groups, unprivileged_groups)

epoch 0; iter: 0; batch classifier loss: 257.878479; batch adversarial loss: 0.378883
epoch 0; iter: 200; batch classifier loss: 42.213440; batch adversarial loss: -0.279931
epoch 0; iter: 400; batch classifier loss: 1.506448; batch adversarial loss: -0.558018
epoch 0; iter: 600; batch classifier loss: 1.131390; batch adversarial loss: -0.902132
epoch 1; iter: 0; batch classifier loss: 1.318234; batch adversarial loss: -0.503950
epoch 1; iter: 200; batch classifier loss: 1.127713; batch adversarial loss: -1.739916
epoch 1; iter: 400; batch classifier loss: 1.381770; batch adversarial loss: -2.151649
epoch 1; iter: 600; batch classifier loss: 1.631709; batch adversarial loss: -2.029321
epoch 2; iter: 0; batch classifier loss: 0.787404; batch adversarial loss: -2.245456
epoch 2; iter: 200; batch classifier loss: 1.094564; batch adversarial loss: -1.269193
epoch 2; iter: 400; batch classifier loss: 0.841635; batch adversarial loss: -1.837111
epoch 2; iter: 600; batch classifier loss: 0.99

Adversial Debiasing for Gender

In [128]:
privileged_groups = [{'gender': 1}]
unprivileged_groups = [{'gender': 2}]

tf.compat.v1.reset_default_graph()

get_adversial_debiasing(privileged_groups, unprivileged_groups)

epoch 0; iter: 0; batch classifier loss: 257.878479; batch adversarial loss: 0.760571
epoch 0; iter: 200; batch classifier loss: 37.428455; batch adversarial loss: 0.724498
epoch 0; iter: 400; batch classifier loss: 2.999765; batch adversarial loss: 0.683402
epoch 0; iter: 600; batch classifier loss: 1.351569; batch adversarial loss: 0.702342
epoch 1; iter: 0; batch classifier loss: 1.115182; batch adversarial loss: 0.674557
epoch 1; iter: 200; batch classifier loss: 1.042283; batch adversarial loss: 0.700829
epoch 1; iter: 400; batch classifier loss: 0.813903; batch adversarial loss: 0.688961
epoch 1; iter: 600; batch classifier loss: 0.928221; batch adversarial loss: 0.689345
epoch 2; iter: 0; batch classifier loss: 0.803135; batch adversarial loss: 0.691285
epoch 2; iter: 200; batch classifier loss: 0.667343; batch adversarial loss: 0.704446
epoch 2; iter: 400; batch classifier loss: 1.239906; batch adversarial loss: 0.696282
epoch 2; iter: 600; batch classifier loss: 0.781200; batc

Let us now use some in-processing techniques

Fairlearn Constraints

First we will use a Demographic Parity Constraint

In [None]:
def get_rf_pipeline():
    #this function returns a random forest classifier pipeline
    numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

                                    
    preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical),
        ('cat', categorical_transformer, categorical)
    ])

    pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
    ])
    return pipeline_rf

In [130]:
rf = RandomForestClassifier(random_state=42, class_weight='balanced')

preprocessor = ColumnTransformer(
    transformers=[
        ("num", Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numerical),
        ("cat", Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent'))
            #,('encoder', OneHotEncoder())
        ]), categorical)
    ]
)

fair_model = ExponentiatedGradient(
    estimator=rf, 
    constraints=DemographicParity(),
)

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("fair_classifier", fair_model)
])

X = df.drop('readmitted', axis=1)
y = df['readmitted']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

fair_model.fit(X_train, y_train, sensitive_features=X_train[groups])

y_pred = fair_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7647826402145809


In [131]:
for group in groups:
    evaluate_fairness(y_test, y_pred, X_test[group], group)


 group is gender
Demographic Parity Ratio: 0.9696
Equalized Odds Difference: 0.0140
Demographic Parity Difference: 0.0128
Subgroup: 1, Positive Prediction Rate: 0.4081
Subgroup: 0, Positive Prediction Rate: 0.4209
--> Privileged Group: 1 (Positive Rate: 0.4081)
--> Unprivileged Group: 0 (Positive Rate: 0.4209)

 group is race
Demographic Parity Ratio: 0.9079
Equalized Odds Difference: 0.1352
Demographic Parity Difference: 0.0400
Subgroup: 2, Positive Prediction Rate: 0.4197
Subgroup: 0, Positive Prediction Rate: 0.3964
Subgroup: 1, Positive Prediction Rate: 0.4343
Subgroup: 3, Positive Prediction Rate: 0.3943
--> Privileged Group: 3 (Positive Rate: 0.3943)
--> Privileged Group: 0 (Positive Rate: 0.3964)
--> Unprivileged Group: 1 (Positive Rate: 0.4343)


Here we will be using an equalized odds constraint

In [132]:
rf = RandomForestClassifier(random_state=42, class_weight='balanced')

preprocessor = ColumnTransformer(
    transformers=[
        ("num", Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numerical),
        ("cat", Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent'))
            #,('encoder', OneHotEncoder())
        ]), categorical)
    ]
)

fair_model = ExponentiatedGradient(
    estimator=rf, 
    constraints=EqualizedOdds(),
)

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("fair_classifier", fair_model)
])

X = df.drop('readmitted', axis=1)
y = df['readmitted']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

fair_model.fit(X_train, y_train, sensitive_features=X_train[groups])

y_pred = fair_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7687214556702028


In [133]:
for group in groups:
    evaluate_fairness(y_test, y_pred, X_test[group], group)


 group is gender
Demographic Parity Ratio: 0.9344
Equalized Odds Difference: 0.0247
Demographic Parity Difference: 0.0279
Subgroup: 1, Positive Prediction Rate: 0.3981
Subgroup: 0, Positive Prediction Rate: 0.4260
--> Privileged Group: 1 (Positive Rate: 0.3981)
--> Unprivileged Group: 0 (Positive Rate: 0.4260)

 group is race
Demographic Parity Ratio: 0.6280
Equalized Odds Difference: 0.1564
Demographic Parity Difference: 0.1576
Subgroup: 2, Positive Prediction Rate: 0.4236
Subgroup: 0, Positive Prediction Rate: 0.3842
Subgroup: 1, Positive Prediction Rate: 0.2660
Subgroup: 3, Positive Prediction Rate: 0.3282
--> Privileged Group: 1 (Positive Rate: 0.2660)
--> Unprivileged Group: 2 (Positive Rate: 0.4236)


Post Processing Mitigation techniques

With equalized odd's fairness constraint

In [135]:
from fairlearn.postprocessing import ThresholdOptimizer


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

base_model = RandomForestClassifier(random_state=42, class_weight='balanced')

base_model.fit(X_train, y_train)

y_pred_test = base_model.predict(X_test)

base_accuracy = accuracy_score(y_test, y_pred_test)
print(f"Base Model Accuracy: {base_accuracy}")


for group in groups:
    print(f"\nEvaluating for sensitive attribute: {group}")

    
    dem_parity_diff_base = demographic_parity_difference(y_test, y_pred_test, sensitive_features=X_test[group])
    eq_odds_diff_base = equalized_odds_difference(y_test, y_pred_test, sensitive_features=X_test[group])
    print(f"Base Model Demographic Parity Difference ({group}): {dem_parity_diff_base}")
    print(f"Base Model Equalized Odds Difference ({group}): {eq_odds_diff_base}")

    postprocessor = ThresholdOptimizer(
        estimator=base_model, 
        constraints="equalized_odds",
        prefit=True
    )

    postprocessor.fit(X_test, y_test, sensitive_features=X_test[group])

    y_pred_fair = postprocessor.predict(X_test, sensitive_features=X_test[group])

    fair_accuracy = accuracy_score(y_test, y_pred_fair)
    print(f"Fair Model Accuracy ({group}): {fair_accuracy}")

    dem_parity_diff_fair = demographic_parity_difference(y_test, y_pred_fair, sensitive_features=X_test[group])
    eq_odds_diff_fair = equalized_odds_difference(y_test, y_pred_fair, sensitive_features=X_test[group])
    print(f"Fair Model Demographic Parity Difference ({group}): {dem_parity_diff_fair}")
    print(f"Fair Model Equalized Odds Difference ({group}): {eq_odds_diff_fair}")

Base Model Accuracy: 0.7702196554140589

Evaluating for sensitive attribute: gender
Base Model Demographic Parity Difference (gender): 0.03135029872315853
Base Model Equalized Odds Difference (gender): 0.030202384642461222
Fair Model Accuracy (gender): 0.7653384239905275
Fair Model Demographic Parity Difference (gender): 0.009934017774833381
Fair Model Equalized Odds Difference (gender): 0.0045877704985210765

Evaluating for sensitive attribute: race
Base Model Demographic Parity Difference (race): 0.15347885516056653
Base Model Equalized Odds Difference (race): 0.13408077629163384
Fair Model Accuracy (race): 0.7270135079622067
Fair Model Demographic Parity Difference (race): 0.07433949569475723
Fair Model Equalized Odds Difference (race): 0.039934116477096104


In [137]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

base_model = RandomForestClassifier(random_state=42, class_weight='balanced')

base_model.fit(X_train, y_train)

y_pred_test = base_model.predict(X_test)

base_accuracy = accuracy_score(y_test, y_pred_test)
print(f"Base Model Accuracy: {base_accuracy}")


for group in groups:
    print(f"\nEvaluating for sensitive attribute: {group}")

    
    dem_parity_diff_base = demographic_parity_difference(y_test, y_pred_test, sensitive_features=X_test[group])
    eq_odds_diff_base = equalized_odds_difference(y_test, y_pred_test, sensitive_features=X_test[group])
    print(f"Base Model Demographic Parity Difference ({group}): {dem_parity_diff_base}")
    print(f"Base Model Equalized Odds Difference ({group}): {eq_odds_diff_base}")

    postprocessor = ThresholdOptimizer(
        estimator=base_model, 
        constraints="demographic_parity",
        prefit=True
    )

    postprocessor.fit(X_test, y_test, sensitive_features=X_test[group])

    y_pred_fair = postprocessor.predict(X_test, sensitive_features=X_test[group])

    fair_accuracy = accuracy_score(y_test, y_pred_fair)
    print(f"Fair Model Accuracy ({group}): {fair_accuracy}")

    dem_parity_diff_fair = demographic_parity_difference(y_test, y_pred_fair, sensitive_features=X_test[group])
    eq_odds_diff_fair = equalized_odds_difference(y_test, y_pred_fair, sensitive_features=X_test[group])
    print(f"Fair Model Demographic Parity Difference ({group}): {dem_parity_diff_fair}")
    print(f"Fair Model Equalized Odds Difference ({group}): {eq_odds_diff_fair}")

Base Model Accuracy: 0.7702196554140589

Evaluating for sensitive attribute: gender
Base Model Demographic Parity Difference (gender): 0.03135029872315853
Base Model Equalized Odds Difference (gender): 0.030202384642461222
Fair Model Accuracy (gender): 0.7631636179107363
Fair Model Demographic Parity Difference (gender): 0.00047423451891581703
Fair Model Equalized Odds Difference (gender): 0.012240665535375617

Evaluating for sensitive attribute: race
Base Model Demographic Parity Difference (race): 0.15347885516056653
Base Model Equalized Odds Difference (race): 0.13408077629163384
Fair Model Accuracy (race): 0.7644926660706087
Fair Model Demographic Parity Difference (race): 0.020508111417202324
Fair Model Equalized Odds Difference (race): 0.15120840100195437
