# Import Libraries

In [1]:
!pip install -q pandas==2.0.3
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, f1_score


import warnings
warnings.simplefilter(action='ignore', category='PerformanceWarning')

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
beatrix-jupyterlab 2023.128.151533 requires jupyterlab~=3.6.0, but you have jupyterlab 4.1.6 which is incompatible.
libpysal 4.9.2 requires packaging>=22, but you have packaging 21.3 which is incompatible.
libpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.
mizani 0.11.1 requires pandas>=2.1.0, but you have pandas 2.0.3 which is incompatible.
momepy 0.7.0 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.
osmnx 1.9.2 requires shapely>=2.0, but you have shapely 1.8.5.post1 which is incompatible.
plotnine 0.13.4 requires pandas<3.0.0,>=2.1.0, but you have pandas 2.0.3 which is incompatible.
spopt 0.6.0 requires shapely>=2.0.1, but you have shapely 1.8.5.pos

In [2]:
!pip install -q BlackBoxAuditing 
!pip install -q aif360
import aif360
from aif360.datasets import StandardDataset
from aif360.metrics import ClassificationMetric

2024-05-14 23:12:31.614970: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-14 23:12:31.615119: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-14 23:12:31.780507: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Prepare Dataset

In [3]:
train_df = pd.read_csv('/kaggle/input/ca-independent-medical-review/Independent_Medical_Review__IMR__Determinations__Trend.csv')
train_df = train_df[['Findings','Patient Gender','Age Range','Type']].dropna().reset_index(drop=True)
train_df['Type'] = train_df['Type'].map({'Urgent Care': 1,'Medical Necessity': 1,'Experimental/Investigational': 0})
train_df = train_df.sample(frac=1,random_state=42).reset_index(drop=True)
train_df

Unnamed: 0,Findings,Patient Gender,Age Range,Type
0,A male enrollee requested Harvoni 90/400 mg fo...,Male,51-64,1
1,A 31-year-old male enrollee has requested adul...,Male,31-40,1
2,A 60-year-old male enrollee has requested Soun...,Male,51-64,0
3,A 38-year-old male enrollee has requested arti...,Male,31-40,0
4,The patient is a 53-year-old female with right...,Female,51-64,1
...,...,...,...,...
18010,A 59-year-old female enrollee has requested fo...,Female,51-64,1
18011,A 56-year-old female enrollee has requested co...,Female,51-64,1
18012,A 57-year-old female enrollee has requested ma...,Female,51-64,1
18013,Nature of Statutory Criteria/Case Summary: An...,Male,31-40,1


In [4]:
from sklearn.preprocessing import LabelEncoder
feats = ['Patient Gender','Age Range']
le = LabelEncoder()
for f in feats:
    le.fit(train_df[f].values.reshape(-1,))
    train_df[f] = le.transform(train_df[f].values.reshape(-1,))

In [5]:
CouVec = TfidfVectorizer(stop_words='english',max_features=50)
CouVec.fit(train_df['Findings'].values)
train_words = pd.DataFrame(CouVec.transform(train_df['Findings']).toarray())
train_words.shape

(18015, 50)

In [6]:
train_words = pd.concat([train_df[['Patient Gender','Age Range','Type']],train_words],axis=1).copy()
train_df = train_words
train_df.columns = train_df.columns.astype(str)

In [7]:
# DataFrame to store the metrics values
metrics_values = pd.DataFrame(columns=['Original','Corr','Expon','Grid','Thres'],index=range(2))

# Original Model

In [8]:
from sklearn.utils.extmath import softmax
from sklearn.linear_model import RidgeClassifier

class RidgeClassifierwithProba(RidgeClassifier):
    def predict_proba(self, X):
        d = self.decision_function(X)
        d_2d = np.c_[-d, d]
        return softmax(d_2d)
    
model = RidgeClassifierwithProba()

In [9]:
# CV and OOF Predictions

X = train_df.drop('Type',axis=1)
y = train_df['Type']

y_pred = pd.Series(0,index=X.index)

scores = []                   
for train_index, test_index in StratifiedKFold(n_splits=5).split(X, y):
    X_Train, X_Test = X.iloc[train_index,:], X.iloc[test_index,:]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]
    
    model.fit(X_Train.values,y_Train.values)

    preds = model.predict(X_Test.values)
    y_pred[test_index] = preds
    
    scores.append(f1_score(y_Test.values,preds))
    print(scores[-1])

print("Mean:",np.mean(scores),"\nSTD: ", np.std(scores),'\n')

0.9798172124904798
0.9830894926847806
0.9834064466908259
0.9788692175899487
0.9817212490479817
Mean: 0.9813807237008033 
STD:  0.0017827861809340511 



# Metrics

In [10]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(2):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Patient Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred

        import numpy as np

        # Define the groups
        groups = [
            {'Patient Gender': 0},
            {'Patient Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.03297033790725762
Max Average Odds Difference (Ideal value = 0): 0.009108036651532987
Max Equal Opportunity Difference (Ideal value = 0): 0.010906694715780985
Max Theil Index (Ideal value = 0): 0.018832896433778702
Max Generalized Entropy Index (Ideal value = 0): 0.013573528645527832


In [11]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls in range(7):
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Age Range'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred

        import numpy as np

        # Define the groups
        groups = [
            {'Age Range': 0},
            {'Age Range': 1},
            {'Age Range': 2},
            {'Age Range': 3},
            {'Age Range': 4},
            {'Age Range': 5},
            {'Age Range': 6}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.1967133484268908
Max Average Odds Difference (Ideal value = 0): 0.037805277305967736
Max Equal Opportunity Difference (Ideal value = 0): 0.056533447407729476
Max Theil Index (Ideal value = 0): 0.018832896433778702
Max Generalized Entropy Index (Ideal value = 0): 0.013573528645527832


In [12]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in range(7):   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Type', favorable_classes=[age_cls],
                                             protected_attribute_names=['Patient Gender','Age Range'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred

            import numpy as np

            # Define the groups
            groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender+Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19795889853009152
Max Average Odds Difference (Ideal value = 0): 0.060817638109360764
Max Equal Opportunity Difference (Ideal value = 0): 0.1043470980854061
Max Theil Index (Ideal value = 0): 0.018832896433778702
Max Generalized Entropy Index (Ideal value = 0): 0.013573528645527832


# Mitigation Algorithms

# Preprocessing Algorithms:

## 1- Reweighing

In [13]:
import pandas as pd
import numpy as np
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing
from sklearn.model_selection import StratifiedKFold

X = train_df.drop('Type', axis=1)
y = train_df['Type']

y_pred_1 = pd.Series(0, index=X.index)

groups = [{'Patient Gender': 0, 'Age Range': i} for i in range(7)] + [{'Patient Gender': 1, 'Age Range': i} for i in range(7)]

scores = []
for train_index, test_index in StratifiedKFold(n_splits=5).split(X.values, y.values):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Create a combined DataFrame for X_Train and y_Train
    combined_train = pd.concat([X_Train, y_Train], axis=1)

    # Convert to AIF360 format
    aif360_train = StandardDataset(combined_train,
                                   label_name='Type', favorable_classes=[1],
                                   protected_attribute_names=['Patient Gender', 'Age Range'],
                                   privileged_classes=[[1, i] for i in range(7)])

    # Apply the reweighing algorithm
    priv_group = [{'Patient Gender': 1, 'Age Range': i} for i in range(7)]
    unprivileged_groups = [{'Patient Gender': 0, 'Age Range': i} for i in range(7)]
    
    RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=priv_group)
    aif360_train = RW.fit_transform(aif360_train)

    # Extract the weights and apply to the training data
    weights = aif360_train.instance_weights

    # Fit the model using the instance weights
    model.fit(X_Train, y_Train, sample_weight=weights)

    preds = model.predict(X_Test)
    y_pred_1[test_index] = preds

    scores.append(accuracy_score(y_Test.values, preds))
    print(scores[-1])

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')


0.9708576186511241
0.9744657230086039
0.9744657230086039
0.9691923397169026
0.9725228975853455
Mean: 0.972300860394116 
STD:  0.0020575912287372233 



In [14]:
y_pred_1.to_csv('y_pred_Reweighing.csv',index=False)

In [15]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(2):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Patient Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_1

        import numpy as np

        # Define the groups
        groups = [
            {'Patient Gender': 0},
            {'Patient Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.030485648507126595
Max Average Odds Difference (Ideal value = 0): 0.01284546011288433
Max Equal Opportunity Difference (Ideal value = 0): 0.01749179190915484
Max Theil Index (Ideal value = 0): 0.01943477602976334
Max Generalized Entropy Index (Ideal value = 0): 0.01386931899245455


In [16]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls in range(7):
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Age Range'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_1

        import numpy as np

        # Define the groups
        groups = [
            {'Age Range': 0},
            {'Age Range': 1},
            {'Age Range': 2},
            {'Age Range': 3},
            {'Age Range': 4},
            {'Age Range': 5},
            {'Age Range': 6}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19710391960212628
Max Average Odds Difference (Ideal value = 0): 0.04214954672067302
Max Equal Opportunity Difference (Ideal value = 0): 0.06539525141950164
Max Theil Index (Ideal value = 0): 0.01943477602976334
Max Generalized Entropy Index (Ideal value = 0): 0.01386931899245455


In [17]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in range(7):   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Type', favorable_classes=[age_cls],
                                             protected_attribute_names=['Patient Gender','Age Range'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_1

            import numpy as np

            # Define the groups
            groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender+Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19712611047465162
Max Average Odds Difference (Ideal value = 0): 0.07358810932076318
Max Equal Opportunity Difference (Ideal value = 0): 0.13004630591930533
Max Theil Index (Ideal value = 0): 0.01943477602976334
Max Generalized Entropy Index (Ideal value = 0): 0.01386931899245455


## 2- Disparate Impact Remover

In [18]:
import pandas as pd
import numpy as np
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import DisparateImpactRemover
from sklearn.model_selection import StratifiedKFold

X = train_df.drop('Type', axis=1)
y = train_df['Type']

y_pred_2 = pd.Series(0, index=X.index)

scores = []
for train_index, test_index in StratifiedKFold(n_splits=5).split(X.values, y.values):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Create a combined DataFrame for X_Train and y_Train
    combined_train = pd.concat([X_Train, y_Train], axis=1)

    # Convert to AIF360 format
    aif360_train = StandardDataset(combined_train,
                                   label_name='Type', favorable_classes=[1],
                                   protected_attribute_names=['Patient Gender', 'Age Range'],
                                   privileged_classes=[[1, i] for i in range(7)])

    # Apply the DisparateImpactRemover
    di_remover = DisparateImpactRemover(repair_level=1.0)
    aif360_train = di_remover.fit_transform(aif360_train)

    # Extract the transformed features and labels
    X_Train_transformed = aif360_train.features
    y_Train_transformed = aif360_train.labels.ravel()

    # Fit the model using the transformed data
    model.fit(X_Train_transformed, y_Train_transformed)

    # Transform the test set in the same way
    combined_test = pd.concat([X_Test, y_Test], axis=1)
    aif360_test = StandardDataset(combined_test,
                                  label_name='Type', favorable_classes=[1],
                                  protected_attribute_names=['Patient Gender', 'Age Range'],
                                  privileged_classes=[[1, i] for i in range(7)])
    aif360_test = di_remover.fit_transform(aif360_test)

    X_Test_transformed = aif360_test.features
    y_Test_transformed = aif360_test.labels.ravel()

    preds = model.predict(X_Test_transformed)
    y_pred_2[test_index] = preds

    scores.append(accuracy_score(y_Test_transformed, preds))
    print(scores[-1])

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')


0.9689147932278657
0.9744657230086039
0.9758534554537885
0.9675270607826811
0.9744657230086039
Mean: 0.9722453510963087 
STD:  0.003353606986842776 



In [19]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(2):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Patient Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_2

        import numpy as np

        # Define the groups
        groups = [
            {'Patient Gender': 0},
            {'Patient Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.03600398454988585
Max Average Odds Difference (Ideal value = 0): 0.0028617153177400113
Max Equal Opportunity Difference (Ideal value = 0): 0.0076186969926840264
Max Theil Index (Ideal value = 0): 0.019419013745947955
Max Generalized Entropy Index (Ideal value = 0): 0.013892589464757986


In [20]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls in range(7):
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Age Range'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_2

        import numpy as np

        # Define the groups
        groups = [
            {'Age Range': 0},
            {'Age Range': 1},
            {'Age Range': 2},
            {'Age Range': 3},
            {'Age Range': 4},
            {'Age Range': 5},
            {'Age Range': 6}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.1972886031776003
Max Average Odds Difference (Ideal value = 0): 0.04483817315098201
Max Equal Opportunity Difference (Ideal value = 0): 0.07131519830945487
Max Theil Index (Ideal value = 0): 0.019419013745947955
Max Generalized Entropy Index (Ideal value = 0): 0.013892589464757986


In [21]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in range(7):   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Type', favorable_classes=[age_cls],
                                             protected_attribute_names=['Patient Gender','Age Range'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_2

            import numpy as np

            # Define the groups
            groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender+Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.1981946147031162
Max Average Odds Difference (Ideal value = 0): 0.0667227975818659
Max Equal Opportunity Difference (Ideal value = 0): 0.11615741703041638
Max Theil Index (Ideal value = 0): 0.019419013745947955
Max Generalized Entropy Index (Ideal value = 0): 0.013892589464757986


## 3- Learning Fair Representations (LFR)

In [22]:
# import pandas as pd
# import numpy as np
# from aif360.datasets import StandardDataset
# from aif360.algorithms.preprocessing import LFR
# from sklearn.model_selection import StratifiedKFold

# X = train_df.drop('Type', axis=1)
# y = train_df['Type']

# y_pred_3 = pd.Series(0, index=X.index)

# scores = []
# for train_index, test_index in StratifiedKFold(n_splits=5).split(X.values, y.values):
#     X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
#     y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

#     # Create a combined DataFrame for X_Train and y_Train
#     combined_train = pd.concat([X_Train, y_Train], axis=1)

#     # Convert to AIF360 format
#     aif360_train = StandardDataset(combined_train,
#                                    label_name='Type', favorable_classes=[1],
#                                    protected_attribute_names=['Patient Gender', 'Age Range'],
#                                    privileged_classes=[[1, i] for i in range(7)])

#     # Apply the LFR algorithm
#     lfr = LFR(unprivileged_groups=[{'Patient Gender': 0}], privileged_groups=[{'Patient Gender': 1}],seed=42,k=4,verbose=0)
#     lfr.fit(aif360_train)
#     aif360_train_transformed = lfr.transform(aif360_train)

#     # Extract the transformed features and labels
#     X_Train_transformed = aif360_train_transformed.features
#     y_Train_transformed = aif360_train_transformed.labels.ravel()

#     # Fit the model using the transformed data
#     model.fit(X_Train_transformed, y_Train_transformed)

#     # Convert the test data to AIF360 format without the label
#     combined_test = X_Test.copy()
#     combined_test['Type'] = y_Test
#     aif360_test = StandardDataset(combined_test,
#                                   label_name='Type', favorable_classes=[1],
#                                   protected_attribute_names=['Patient Gender', 'Age Range'],
#                                   privileged_classes=[[1, i] for i in range(7)])

#     # Transform the test set using the LFR model fitted on the training set
#     aif360_test_transformed = lfr.transform(aif360_test)

#     X_Test_transformed = aif360_test_transformed.features

#     preds = model.predict(X_Test_transformed)
#     y_pred_3[test_index] = preds

#     scores.append(accuracy_score(y_Test.values, preds))
#     print(scores[-1])

# print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')


In [23]:
# y_pred_3.to_csv('y_pred_LFR.csv',index=False)

In [24]:
# import pandas as pd
# import numpy as np

# # Initialize variables to store maximum values of each metric
# max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

# for age_cls in range(2):
#     for priv_cls in [0,1]:
#         # Convert to AIF360 format
#         aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
#                                          label_name='Type', favorable_classes=[age_cls],
#                                          protected_attribute_names=['Patient Gender'],
#                                          privileged_classes=[[priv_cls]])

#         # Create a dataset for the predictions
#         aif360_pred_dataset = aif360_dataset.copy()
#         aif360_pred_dataset.labels = y_pred_3

#         import numpy as np

#         # Define the groups
#         groups = [
#             {'Patient Gender': 0},
#             {'Patient Gender': 1}
#         ]



#         # Iterate through the groups
#         for i, privileged_group in enumerate(groups):
#             # Define unprivileged groups as all other groups
#             unprivileged_groups = groups[:i] + groups[i+1:]

#             # Calculate metrics
#             metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
#                                           unprivileged_groups=unprivileged_groups,
#                                           privileged_groups=[privileged_group])

#             # Update maximum values
#             max_spd = max(max_spd, metric.statistical_parity_difference())
#             max_aod = max(max_aod, metric.average_odds_difference())
#             max_eod = max(max_eod, metric.equal_opportunity_difference())
#             max_theil = max(max_theil, metric.theil_index())
#             max_gei = max(max_gei, metric.generalized_entropy_index())
#             max_di = max(max_di, metric.disparate_impact())

# # Print maximum values
# print("Gender Bias Identification:")
# print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
# print("Max Average Odds Difference (Ideal value = 0):", max_aod)
# print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
# print("Max Theil Index (Ideal value = 0):", max_theil)
# print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [25]:
# import pandas as pd
# import numpy as np

# # Initialize variables to store maximum values of each metric
# max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# # Find Max value for metrics by looping into all unique_values for the target and for the privileged class
# for age_cls in range(2):
#     for priv_cls in range(7):
#         # Convert to AIF360 format
#         aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
#                                          label_name='Type', favorable_classes=[age_cls],
#                                          protected_attribute_names=['Age Range'],
#                                          privileged_classes=[[priv_cls]])

#         # Create a dataset for the predictions
#         aif360_pred_dataset = aif360_dataset.copy()
#         aif360_pred_dataset.labels = y_pred_3

#         import numpy as np

#         # Define the groups
#         groups = [
#             {'Age Range': 0},
#             {'Age Range': 1},
#             {'Age Range': 2},
#             {'Age Range': 3},
#             {'Age Range': 4},
#             {'Age Range': 5},
#             {'Age Range': 6}]


#         # Iterate through the groups
#         for i, privileged_group in enumerate(groups):
#             # Define unprivileged groups as all other groups
#             unprivileged_groups = groups[:i] + groups[i+1:]

#             # Calculate metrics
#             metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
#                                           unprivileged_groups=unprivileged_groups,
#                                           privileged_groups=[privileged_group])

#             # Update maximum values
#             max_spd = max(max_spd, metric.statistical_parity_difference())
#             max_aod = max(max_aod, metric.average_odds_difference())
#             max_eod = max(max_eod, metric.equal_opportunity_difference())
#             max_theil = max(max_theil, metric.theil_index())
#             max_gei = max(max_gei, metric.generalized_entropy_index())
#             max_di = max(max_di, metric.disparate_impact())

# # Print maximum values
# print("Ethnicity Bias Identification:")
# print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
# print("Max Average Odds Difference (Ideal value = 0):", max_aod)
# print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
# print("Max Theil Index (Ideal value = 0):", max_theil)
# print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [26]:
# import pandas as pd
# import numpy as np


# # Initialize variables to store maximum values of each metric
# max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# # Find Max value for metrics by looping into all unique_values for the target and for the privileged class
# for age_cls in range(2):
#     for priv_cls_1 in [0,1]:   # Gender        
#         for priv_cls_2 in range(7):   # Ethnicity

#             # Convert to AIF360 format
#             aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
#                                              label_name='Type', favorable_classes=[age_cls],
#                                              protected_attribute_names=['Patient Gender','Age Range'],
#                                              privileged_classes=[[priv_cls_1], [priv_cls_2]])

#             # Create a dataset for the predictions
#             aif360_pred_dataset = aif360_dataset.copy()
#             aif360_pred_dataset.labels = y_pred_3

#             import numpy as np

#             # Define the groups
#             groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

#             # Iterate through the groups
#             for i, privileged_group in enumerate(groups):
#                 # Define unprivileged groups as all other groups
#                 unprivileged_groups = groups[:i] + groups[i+1:]

#                 # Calculate metrics
#                 metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
#                                               unprivileged_groups=unprivileged_groups,
#                                               privileged_groups=[privileged_group])

#                 # Update maximum values
#                 max_spd = max(max_spd, metric.statistical_parity_difference())
#                 max_aod = max(max_aod, metric.average_odds_difference())
#                 max_eod = max(max_eod, metric.equal_opportunity_difference())
#                 max_theil = max(max_theil, metric.theil_index())
#                 max_gei = max(max_gei, metric.generalized_entropy_index())
#                 max_di = max(max_di, metric.disparate_impact())

# # Print maximum values
# print("Gender+Ethnicity Bias Identification:")
# print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
# print("Max Average Odds Difference (Ideal value = 0):", max_aod)
# print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
# print("Max Theil Index (Ideal value = 0):", max_theil)
# print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

# In-processing Algorithms:

## AdversarialDebiasing

In [27]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [28]:
import pandas as pd
import numpy as np
from aif360.datasets import StandardDataset
from aif360.algorithms.inprocessing import AdversarialDebiasing
from sklearn.model_selection import StratifiedKFold

# Function to convert dataframe to AIF360 StandardDataset
def convert_to_dataset(df, label_name, favorable_classes, protected_attribute_names, privileged_classes):
    return StandardDataset(df,
                           label_name=label_name,
                           favorable_classes=favorable_classes,
                           protected_attribute_names=protected_attribute_names,
                           privileged_classes=privileged_classes)

X = train_df.drop('Type', axis=1)
y = train_df['Type']

y_pred_4 = pd.Series(0, index=X.index)

scores = []

for train_index, test_index in StratifiedKFold(n_splits=5).split(X.values, y.values):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Create a combined DataFrame for X_Train and y_Train
    combined_train = pd.concat([X_Train, y_Train], axis=1)

    # Convert to AIF360 format
    aif360_train = convert_to_dataset(combined_train,
                                      label_name='Type', favorable_classes=[1],
                                      protected_attribute_names=['Patient Gender', 'Age Range'],
                                      privileged_classes=[[1, i] for i in range(7)])

    # Apply the AdversarialDebiasing algorithm
    tf.reset_default_graph()
    sess = tf.Session()
    adv_debiasing = AdversarialDebiasing(privileged_groups=[{'Patient Gender': 1}],
                                         unprivileged_groups=[{'Patient Gender': 0}],
                                         scope_name='adv_debiasing',
                                         debias=True,
                                         sess=sess)
    adv_debiasing.fit(aif360_train)

    # Transform the training data
    aif360_train_pred = adv_debiasing.predict(aif360_train)
    X_Train_transformed = aif360_train_pred.features
    y_Train_transformed = aif360_train_pred.labels.ravel()

    # Train your model using the debiased training data
    model.fit(X_Train_transformed, y_Train_transformed)

    # Get predictions from your model
    preds = model.predict(X_Test.values)
    y_pred_4[test_index] = preds

    scores.append(accuracy_score(y_Test.values, preds))
    print(scores[-1])

sess.close()

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')

epoch 0; iter: 0; batch classifier loss: 0.697595; batch adversarial loss: 0.709872
epoch 1; iter: 0; batch classifier loss: 0.248158; batch adversarial loss: 0.721172
epoch 2; iter: 0; batch classifier loss: 0.152113; batch adversarial loss: 0.690555
epoch 3; iter: 0; batch classifier loss: 0.083501; batch adversarial loss: 0.698332
epoch 4; iter: 0; batch classifier loss: 0.056352; batch adversarial loss: 0.690404
epoch 5; iter: 0; batch classifier loss: 0.088809; batch adversarial loss: 0.708901
epoch 6; iter: 0; batch classifier loss: 0.054993; batch adversarial loss: 0.704442
epoch 7; iter: 0; batch classifier loss: 0.034658; batch adversarial loss: 0.673003
epoch 8; iter: 0; batch classifier loss: 0.011596; batch adversarial loss: 0.675597
epoch 9; iter: 0; batch classifier loss: 0.058036; batch adversarial loss: 0.688501
epoch 10; iter: 0; batch classifier loss: 0.016414; batch adversarial loss: 0.691310
epoch 11; iter: 0; batch classifier loss: 0.045363; batch adversarial loss:

In [29]:
y_pred_4.to_csv('y_pred_AD.csv',index=False)

In [30]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(2):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Patient Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_4

        import numpy as np

        # Define the groups
        groups = [
            {'Patient Gender': 0},
            {'Patient Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.03247258470046133
Max Average Odds Difference (Ideal value = 0): 0.009786128684587711
Max Equal Opportunity Difference (Ideal value = 0): 0.01197450037515102
Max Theil Index (Ideal value = 0): 0.01918802884823289
Max Generalized Entropy Index (Ideal value = 0): 0.013725876540357411


In [31]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls in range(7):
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Age Range'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_4

        import numpy as np

        # Define the groups
        groups = [
            {'Age Range': 0},
            {'Age Range': 1},
            {'Age Range': 2},
            {'Age Range': 3},
            {'Age Range': 4},
            {'Age Range': 5},
            {'Age Range': 6}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19726739915331287
Max Average Odds Difference (Ideal value = 0): 0.0408168455630907
Max Equal Opportunity Difference (Ideal value = 0): 0.06266611499540664
Max Theil Index (Ideal value = 0): 0.01918802884823289
Max Generalized Entropy Index (Ideal value = 0): 0.013725876540357411


In [32]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in range(7):   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Type', favorable_classes=[age_cls],
                                             protected_attribute_names=['Patient Gender','Age Range'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_4

            import numpy as np

            # Define the groups
            groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender+Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19760421263486116
Max Average Odds Difference (Ideal value = 0): 0.06835083251480223
Max Equal Opportunity Difference (Ideal value = 0): 0.11885955795745862
Max Theil Index (Ideal value = 0): 0.01918802884823289
Max Generalized Entropy Index (Ideal value = 0): 0.013725876540357411


# Postprocessing Algorithms:

## 1- Equalized Odds Postprocessing

In [33]:
import pandas as pd
import numpy as np
from aif360.datasets import StandardDataset
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Function to convert dataframe to AIF360 StandardDataset
def convert_to_dataset(df, label_name, favorable_classes, protected_attribute_names, privileged_classes):
    return StandardDataset(df,
                           label_name=label_name,
                           favorable_classes=favorable_classes,
                           protected_attribute_names=protected_attribute_names,
                           privileged_classes=privileged_classes)

# Preprocessing the data
X = train_df.drop('Type', axis=1)
y = train_df['Type']

y_pred_5 = pd.Series(0, index=X.index)

scores = []

for train_index, test_index in StratifiedKFold(n_splits=5).split(X.values, y.values):
    X_Train, X_Test = X.values[train_index], X.values[test_index]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Train your model on the original data
    model.fit(X_Train, y_Train)
    
    # Make predictions on the training and test sets
    y_train_pred = model.predict(X_Train)
    y_test_pred = model.predict(X_Test)
    
    # Combine predictions with training data
    combined_train = pd.DataFrame(X_Train, columns=X.columns)
    combined_train['Type'] = y_Train.values
    combined_train['pred'] = y_train_pred

    # Convert to AIF360 format for the training set
    aif360_train = convert_to_dataset(combined_train,
                                      label_name='Type', favorable_classes=[1],
                                      protected_attribute_names=['Patient Gender'],
                                      privileged_classes=[[1]])
    
    # Convert to AIF360 format for the predicted training set
    combined_train_pred = combined_train.copy()
    combined_train_pred['Type'] = combined_train['pred']
    aif360_train_pred = convert_to_dataset(combined_train_pred,
                                           label_name='Type', favorable_classes=[1],
                                           protected_attribute_names=['Patient Gender'],
                                           privileged_classes=[[1]])
    
    # Apply Calibrated Equalized Odds Postprocessing using only the training set
    cpp = CalibratedEqOddsPostprocessing(privileged_groups=[{'Patient Gender': 1}],
                                         unprivileged_groups=[{'Patient Gender': 0}],
                                         cost_constraint="weighted")
    cpp = cpp.fit(aif360_train, aif360_train_pred)
    
    # Apply the post-processing to the test predictions
    combined_test = pd.DataFrame(X_Test, columns=X.columns)
    combined_test['pred'] = y_test_pred
    aif360_test_pred = convert_to_dataset(combined_test,
                                          label_name='pred', favorable_classes=[1],
                                          protected_attribute_names=['Patient Gender'],
                                          privileged_classes=[[1]])
    
    aif360_test_pred = cpp.predict(aif360_test_pred)
    
    # Get the final predictions
    preds = aif360_test_pred.labels.ravel()
    y_pred_5[test_index] = preds

    scores.append(accuracy_score(y_Test.values, preds))
    print(scores[-1])

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')


0.9703025256730502
0.9752983624757147
0.9747432694976409
0.9680821537607549
0.9733555370524563
Mean: 0.9723563696919234 
STD:  0.0027509323770999565 



In [34]:
y_pred_5.to_csv('y_pred_EOP.csv',index=False)

In [35]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(2):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Patient Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_5

        import numpy as np

        # Define the groups
        groups = [
            {'Patient Gender': 0},
            {'Patient Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.03213869198060243
Max Average Odds Difference (Ideal value = 0): 0.010403880077618832
Max Equal Opportunity Difference (Ideal value = 0): 0.01336282510868736
Max Theil Index (Ideal value = 0): 0.019342018780042932
Max Generalized Entropy Index (Ideal value = 0): 0.013837018489957793


In [36]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls in range(7):
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Age Range'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_5

        import numpy as np

        # Define the groups
        groups = [
            {'Age Range': 0},
            {'Age Range': 1},
            {'Age Range': 2},
            {'Age Range': 3},
            {'Age Range': 4},
            {'Age Range': 5},
            {'Age Range': 6}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19603617531681944
Max Average Odds Difference (Ideal value = 0): 0.03669835752175789
Max Equal Opportunity Difference (Ideal value = 0): 0.05440624043049058
Max Theil Index (Ideal value = 0): 0.019342018780042932
Max Generalized Entropy Index (Ideal value = 0): 0.013837018489957793


In [37]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in range(7):   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Type', favorable_classes=[age_cls],
                                             protected_attribute_names=['Patient Gender','Age Range'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_5

            import numpy as np

            # Define the groups
            groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender+Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19730456505796018
Max Average Odds Difference (Ideal value = 0): 0.05973878678464788
Max Equal Opportunity Difference (Ideal value = 0): 0.10226852814152754
Max Theil Index (Ideal value = 0): 0.019342018780042932
Max Generalized Entropy Index (Ideal value = 0): 0.013837018489957793


## 2- Reject Option Classification

In [38]:
import pandas as pd
import numpy as np
from aif360.datasets import StandardDataset
from aif360.algorithms.postprocessing import RejectOptionClassification
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Function to convert dataframe to AIF360 StandardDataset
def convert_to_dataset(df, label_name, favorable_classes, protected_attribute_names, privileged_classes):
    return StandardDataset(df,
                           label_name=label_name,
                           favorable_classes=favorable_classes,
                           protected_attribute_names=protected_attribute_names,
                           privileged_classes=privileged_classes)

# Preprocessing the data
X = train_df.drop('Type', axis=1)
y = train_df['Type']

y_pred_6 = pd.Series(0, index=X.index)

scores = []

for train_index, test_index in StratifiedKFold(n_splits=5).split(X.values, y.values):
    X_Train, X_Test = X.values[train_index], X.values[test_index]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Train your model on the original data
    model.fit(X_Train, y_Train)
    
    # Make predictions on the training and test sets
    y_train_pred = model.predict(X_Train)
    y_test_pred = model.predict(X_Test)
    
    # Combine predictions with training data
    combined_train = pd.DataFrame(X_Train, columns=X.columns)
    combined_train['Type'] = y_Train.values
    combined_train['pred'] = y_train_pred
    combined_train['scores'] = model.predict_proba(X_Train)[:, 1]  # Get probability scores

    # Convert to AIF360 format for the training set
    aif360_train = convert_to_dataset(combined_train,
                                      label_name='Type', favorable_classes=[1],
                                      protected_attribute_names=['Patient Gender'],
                                      privileged_classes=[[1]])
    
    # Convert to AIF360 format for the predicted training set
    combined_train_pred = combined_train.copy()
    combined_train_pred['Type'] = combined_train['pred']
    aif360_train_pred = convert_to_dataset(combined_train_pred,
                                           label_name='Type', favorable_classes=[1],
                                           protected_attribute_names=['Patient Gender'],
                                           privileged_classes=[[1]])
    
    # Apply Reject Option Classification using only the training set
    roc = RejectOptionClassification(privileged_groups=[{'Patient Gender': 1}],
                                     unprivileged_groups=[{'Patient Gender': 0}],
                                     low_class_thresh=0.01, high_class_thresh=0.99,
                                     num_class_thresh=100, num_ROC_margin=50, metric_name="Average odds difference")
    roc = roc.fit(aif360_train, aif360_train_pred)
    
    # Apply the post-processing to the test predictions
    combined_test = pd.DataFrame(X_Test, columns=X.columns)
    combined_test['pred'] = y_test_pred
    combined_test['scores'] = model.predict_proba(X_Test)[:, 1]  # Get probability scores
    aif360_test_pred = convert_to_dataset(combined_test,
                                          label_name='pred', favorable_classes=[1],
                                          protected_attribute_names=['Patient Gender'],
                                          privileged_classes=[[1]])
    
    aif360_test_pred = roc.predict(aif360_test_pred)
    
    # Get the final predictions
    preds = aif360_test_pred.labels.ravel()
    y_pred_6[test_index] = preds

    scores.append(accuracy_score(y_Test.values, preds))
    print(scores[-1])

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')


0.9705800721620872
0.9752983624757147
0.9758534554537885
0.9691923397169026
0.9733555370524563
Mean: 0.9728559533721898 
STD:  0.0026000640418829615 



In [39]:
y_pred_6.to_csv('y_pred_ROC.csv',index=False)

In [40]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(2):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Patient Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_6

        import numpy as np

        # Define the groups
        groups = [
            {'Patient Gender': 0},
            {'Patient Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.03297033790725762
Max Average Odds Difference (Ideal value = 0): 0.009108036651532987
Max Equal Opportunity Difference (Ideal value = 0): 0.010906694715780985
Max Theil Index (Ideal value = 0): 0.018832896433778702
Max Generalized Entropy Index (Ideal value = 0): 0.013573528645527832


In [41]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls in range(7):
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Type', favorable_classes=[age_cls],
                                         protected_attribute_names=['Age Range'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_6

        import numpy as np

        # Define the groups
        groups = [
            {'Age Range': 0},
            {'Age Range': 1},
            {'Age Range': 2},
            {'Age Range': 3},
            {'Age Range': 4},
            {'Age Range': 5},
            {'Age Range': 6}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.1967133484268908
Max Average Odds Difference (Ideal value = 0): 0.037805277305967736
Max Equal Opportunity Difference (Ideal value = 0): 0.056533447407729476
Max Theil Index (Ideal value = 0): 0.018832896433778702
Max Generalized Entropy Index (Ideal value = 0): 0.013573528645527832


In [42]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(2):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in range(7):   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Type', favorable_classes=[age_cls],
                                             protected_attribute_names=['Patient Gender','Age Range'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_6

            import numpy as np

            # Define the groups
            groups = [{'Patient Gender':0, 'Age Range':i} for i in range(7)] + [{'Patient Gender':1, 'Age Range':i} for i in range(7)] 

            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

Gender+Ethnicity Bias Identification:
Max Statistical Parity Difference (Ideal value = 0): 0.19795889853009152
Max Average Odds Difference (Ideal value = 0): 0.060817638109360764
Max Equal Opportunity Difference (Ideal value = 0): 0.1043470980854061
Max Theil Index (Ideal value = 0): 0.018832896433778702
Max Generalized Entropy Index (Ideal value = 0): 0.013573528645527832
