# Import Libraries

In [None]:
!pip install -q BlackBoxAuditing 
!pip install -q aif360

import numpy as np
import pandas as pd
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score
import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import Adam
from aif360.datasets import StandardDataset
from aif360.metrics import ClassificationMetric

import warnings
warnings.simplefilter(action='ignore', category='PerformanceWarning')

# Prepare Dataset

In [None]:
import os
import cv2
import numpy as np
import pandas as pd

def extract_info(image_name: str, path: str) -> dict:
    """
    Extracts information from image name and returns a dictionary with the extracted features.
    """
    # Split the image name by underscore
    parts = image_name.split("_")
    
    # Check if the image name is valid
    if len(parts) < 4:
        return None
    
    # Extract the features
    age = int(parts[0])
    gender = "Male" if parts[1] == "0" else "Female"
    ethnicity_map = {0: "White", 1: "Black", 2: "Asian", 3: "Indian", 4: "Others"}
    ethnicity = ethnicity_map.get(int(parts[2]))
    
    # Check if the ethnicity is valid
    if ethnicity is None:
        return None
    
    # Read and resize the image
    image = cv2.imread(path + image_name, 0)
    image = cv2.resize(image, dsize=(64, 64))
    
    # Create a dictionary with the extracted features and image data
    features = {"Age": age, "Gender": gender, "Ethnicity": ethnicity, "Image": image}
    
    return features

# Set the path to the directory containing the images
path = "/kaggle/input/utkface-new/UTKFace/"

# List all image files in the directory
image_files = [f for f in os.listdir(path) if f.endswith(".jpg")]

# Extract the features from all image names
features_list = [extract_info(image_name, path) for image_name in image_files]

# Filter out invalid image names
features_list = [features for features in features_list if features is not None]

# Create a DataFrame with the extracted features
df = pd.DataFrame(features_list)

# Display the resulting DataFrame
df


In [None]:
# Scaling
df['Image'] = df['Image'] / 255

# 1st Model: For Feature Extraction

In [None]:
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, concatenate
from keras.models import Model

# For Feature Extraction
inputs_cnn = Input(shape=(64,64,1))
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs_cnn)
x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(128, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)

x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.5)(x)
outputs_dense = Dense(1)(x)

cnn_model = Model(inputs=inputs_cnn, outputs=outputs_dense)

In [None]:
# Data Splitting
X = df.drop('Age',axis=1)
y = df['Age'].copy()
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

x_train_images = np.stack(x_train['Image'].values)
x_test_images = np.stack(x_test['Image'].values)

In [None]:
# Training the Model
optimizer = Adam()
cnn_model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mean_absolute_error'])
history = cnn_model.fit(x_train_images, y_train.values, validation_data=(x_test_images, y_test.values), epochs=25, batch_size=128)

## Extracting the CNN Features

In [None]:
intermediate_layer_model = Model(inputs=cnn_model.input,
                                 outputs=cnn_model.get_layer(index=-8).output)
intermediate_output = intermediate_layer_model.predict(x_train_images)
intermediate_output_test = intermediate_layer_model.predict(x_test_images)
intermediate_output.shape

In [None]:
x_train.drop('Image',inplace=True,axis=1)
x_test.drop('Image',inplace=True,axis=1)
del x_train_images, x_test_images

## Reducing the Dimensolinality of The CNN Features

In [None]:
from sklearn.decomposition import PCA
p_comp = 18
pca = PCA(n_components=p_comp, random_state=42)
x_train[[f'PCA{x}' for x in range(0,18)]] = pca.fit_transform(intermediate_output).copy()
x_test[[f'PCA{x}' for x in range(0,18)]] = pca.transform(intermediate_output_test).copy()
del intermediate_output, intermediate_output_test

# 2nd Model: Predicting Age Group (CNN features + Gender and Ethnicity)

In [None]:
X = pd.concat([x_train,x_test]).reset_index(drop=True)
y = pd.concat([y_train,y_test]).reset_index(drop=True)

In [None]:
# Label Encoding
feats = ['Gender','Ethnicity']
le = LabelEncoder()
for f in feats:
    X[f] = le.fit_transform(X[f])

In [None]:
# Merge "Asian" and "Others" because they have low number of samples
ethnicity_mapping = {
0: 0,
1: 1,
2: 2,
3: 0,
4: 4
}

# Apply the mapping to your data
X['Ethnicity'] = X['Ethnicity'].map(ethnicity_mapping).values.astype('float32')

In [None]:
# Converting Age to Age_group
def age_group(age):
    if age >=0 and age < 18:
        return 0
    elif age < 30:
        return 1
    elif age < 80:
        return 2
    else:
        return 3
    
y = y.squeeze().apply(age_group)
sns.histplot(y)

# Original Model

In [None]:
# Model
inputs_dense = Input(shape=(20,))
x = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
x = Dropout(0.5)(x)
x = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(x)
x = Dropout(0.5)(x)
outputs_dense = Dense(4, activation='softmax', kernel_initializer='glorot_uniform')(x)
dense_model = Model(inputs=inputs_dense, outputs=outputs_dense)

# Save Initial Weights
initial_weights = dense_model.get_weights()

In [None]:
# CV and OOF Predictions
y_pred = pd.DataFrame(0,columns=[0,1,2,3],index=X.index)

scores = []                   
for train_index, test_index in StratifiedKFold(n_splits=5).split(X, y):
    X_Train, X_Test = X.iloc[train_index,:], X.iloc[test_index,:]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    dense_model = Model(inputs=inputs_dense, outputs=outputs_dense)
    dense_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    # Reinitialize the weights
    dense_model.set_weights(initial_weights)
    
    dense_model.fit(X_Train.values.astype('float32'), y_Train.values.astype('float32'),
                    epochs=25, batch_size=64, verbose=0)
    preds = dense_model(X_Test.values.astype('float32')).numpy().squeeze()
    y_pred.loc[test_index,:] = preds
    
    scores.append(accuracy_score(y_Test.values.astype('float32'),np.argmax(preds,axis=1)))
    print(scores[-1])

print("Mean:",np.mean(scores),"\nSTD: ", np.std(scores),'\n')

# Metrics

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = np.argmax(y_pred.values,axis=1)

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

# Mitigation Algorithms

# Preprocessing Algorithms:

## 1- Reweighing

In [None]:
from aif360.algorithms.preprocessing import Reweighing
from aif360.datasets import StandardDataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Model Definition
def create_model():
    inputs_dense = Input(shape=(20,))
    x = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
    x = Dropout(0.5)(x)
    x = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    outputs_dense = Dense(4, activation='softmax', kernel_initializer='glorot_uniform')(x)
    model = Model(inputs=inputs_dense, outputs=outputs_dense)
    return model

# Save Initial Weights
dense_model = create_model()
initial_weights = dense_model.get_weights()

# CV and OOF Predictions
y_pred_1 = pd.DataFrame(0, columns=[0, 1, 2, 3], index=X.index)
scores = []

# Define the groups
groups = [
    {'Gender': 0, 'Ethnicity': 0.0},
    {'Gender': 0, 'Ethnicity': 1.0},
    {'Gender': 0, 'Ethnicity': 2.0},
    {'Gender': 1, 'Ethnicity': 0.0},
    {'Gender': 1, 'Ethnicity': 1.0},
    {'Gender': 1, 'Ethnicity': 2.0},
    {'Gender': 0, 'Ethnicity': 4.0},
    {'Gender': 1, 'Ethnicity': 4.0}
]

# Stratified K-Fold
for train_index, test_index in tqdm(StratifiedKFold(n_splits=5).split(X, y)):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Initialize weights array
    weights = np.ones(len(y_Train))

    for priv_group in groups:
        # Determine unprivileged groups
        unprivileged_groups = [group for group in groups if group != priv_group]

        # Convert to AIF360 format
        aif360_train = StandardDataset(pd.concat([X_Train, y_Train], axis=1),
                                       label_name='Age', favorable_classes=[1],
                                       protected_attribute_names=['Gender', 'Ethnicity'],
                                       privileged_classes=[list(priv_group.values())])

        # Apply the reweighing algorithm
        RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=[priv_group])
        aif360_train = RW.fit_transform(aif360_train)

        # Update weights
        weights *= aif360_train.instance_weights

    # Normalize weights
    weights /= np.mean(weights)

    # Initialize and compile model
    dense_model = create_model()
    dense_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    dense_model.set_weights(initial_weights)

    # Train the model with the reweighted dataset
    dense_model.fit(X_Train.astype('float32'), y_Train.astype('float32'), sample_weight=weights, epochs=25, batch_size=64, verbose=0)

    # Make predictions and evaluate
    preds = dense_model.predict(X_Test.astype('float32')).squeeze()
    y_pred_1.loc[test_index, :] = preds
    scores.append(accuracy_score(y_Test.values.astype('float32'), np.argmax(preds, axis=1)))

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')



In [None]:
y_pred_1.to_csv('y_pred_Reweighing.csv',index=False)

In [None]:
y_pred_1 = pd.read_csv('/kaggle/input/aif360-preds/y_pred_Reweighing.csv')
accuracy_score(y_Test.values.astype('float32'), np.argmax(preds, axis=1))

#### Metrics

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_1.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_1.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = np.argmax(y_pred_1.values,axis=1)

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

## 2- Disparate Impact Remover

In [None]:
from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.datasets import StandardDataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Model Definition
def create_model():
    inputs_dense = Input(shape=(20,))
    x = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
    x = Dropout(0.5)(x)
    x = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    outputs_dense = Dense(4, activation='softmax', kernel_initializer='glorot_uniform')(x)
    model = Model(inputs=inputs_dense, outputs=outputs_dense)
    return model

# Save Initial Weights
dense_model = create_model()
initial_weights = dense_model.get_weights()

# CV and OOF Predictions
y_pred_2 = pd.DataFrame(0, columns=[0, 1, 2, 3], index=X.index)
scores = []

# Define the groups
groups = [
    {'Gender': 0, 'Ethnicity': 0.0},
    {'Gender': 0, 'Ethnicity': 1.0},
    {'Gender': 0, 'Ethnicity': 2.0},
    {'Gender': 1, 'Ethnicity': 0.0},
    {'Gender': 1, 'Ethnicity': 1.0},
    {'Gender': 1, 'Ethnicity': 2.0},
    {'Gender': 0, 'Ethnicity': 4.0},
    {'Gender': 1, 'Ethnicity': 4.0}
]

for train_index, test_index in tqdm(StratifiedKFold(n_splits=5).split(X, y)):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Initialize a DataFrame to hold the transformed features
    transformed_X_Train = pd.DataFrame(index=X_Train.index, columns=X_Train.columns)

    for priv_group in tqdm(groups):
        # Determine unprivileged groups
        unprivileged_groups = [group for group in groups if group != priv_group]

        # Convert to AIF360 format
        aif360_train = StandardDataset(pd.concat([X_Train, y_Train], axis=1),
                                       label_name='Age', favorable_classes=[1],
                                       protected_attribute_names=['Gender', 'Ethnicity'],
                                       privileged_classes=[list(priv_group.values())])

        # Apply Disparate Impact Remover
        DIR = DisparateImpactRemover(repair_level=1.0)
        aif360_train = DIR.fit_transform(aif360_train)

        # Update the transformed features
        transformed_X_Train = transformed_X_Train.add(pd.DataFrame(aif360_train.features, columns=X.columns, index=X_Train.index), fill_value=0)

    # Average the transformed features
    transformed_X_Train /= len(groups)

    # Initialize and compile model
    dense_model = create_model()
    dense_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    dense_model.set_weights(initial_weights)

    # Train the model with the transformed dataset
    dense_model.fit(transformed_X_Train.astype('float32'), y_Train.astype('float32'), epochs=25, batch_size=64, verbose=0)

    # Make predictions and evaluate
    preds = dense_model.predict(X_Test.astype('float32')).squeeze()
    y_pred_2.loc[test_index, :] = preds
    scores.append(accuracy_score(y_Test.values.astype('float32'), np.argmax(preds, axis=1)))

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')

In [None]:
y_pred_2.to_csv('y_pred_DisparateImpactRemover.csv',index=False)

In [None]:
y_pred_2 = pd.read_csv('/kaggle/input/aif360-preds/y_pred_DisparateImpactRemover.csv')
accuracy_score(y_Test.values.astype('float32'), np.argmax(preds, axis=1))

### Metrics

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_2.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_2.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = np.argmax(y_pred_2.values,axis=1)

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

## 3- Learning Fair Representations (LFR)

In [None]:
from aif360.algorithms.preprocessing import LFR
from aif360.datasets import StandardDataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Model Definition
def create_model():
    inputs_dense = Input(shape=(20,))
    x = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
    x = Dropout(0.5)(x)
    x = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    outputs_dense = Dense(4, activation='softmax', kernel_initializer='glorot_uniform')(x)
    model = Model(inputs=inputs_dense, outputs=outputs_dense)
    return model


# Initialize model and weights
dense_model = create_model()
initial_weights = dense_model.get_weights()

# CV and OOF Predictions
y_pred_3 = pd.DataFrame(0, columns=[0, 1, 2, 3], index=X.index)
scores = []

# Define the groups
groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]

# Stratified K-Fold
for train_index, test_index in tqdm(StratifiedKFold(n_splits=5).split(X, y)):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    # Initialize a DataFrame to hold the transformed features
    transformed_X_Train = pd.DataFrame(index=X_Train.index, columns=X_Train.columns)

    for priv_group in tqdm(groups):
        # Determine unprivileged groups
        unprivileged_groups = [group for group in groups if group != priv_group]

        # Apply LFR for each pair of unprivileged and privileged groups
        for unpriv_group in unprivileged_groups:
            # Convert to AIF360 format
            aif360_train = StandardDataset(pd.concat([X_Train, y_Train], axis=1),
                                           label_name='Age', favorable_classes=[2],
                                           protected_attribute_names=['Gender', 'Ethnicity'],
                                           privileged_classes=[list(priv_group.values())])

            # Apply Learning Fair Representations
            lfr = LFR(unprivileged_groups=[unpriv_group], privileged_groups=[priv_group],seed=42,k=4,verbose=0)
            lfr = lfr.fit(aif360_train)
            aif360_train = lfr.transform(aif360_train)

            # Update the transformed features
            transformed_X_Train = transformed_X_Train.add(pd.DataFrame(aif360_train.features, columns=X.columns, index=X_Train.index), fill_value=0)

    # Average the transformed features
    transformed_X_Train /= len(groups) * (len(groups) - 1)

    # Initialize and compile model
    dense_model = create_model()
    dense_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    dense_model.set_weights(initial_weights)

    # Train the model with the transformed dataset
    dense_model.fit(transformed_X_Train.astype('float32'), y_Train.astype('float32'), epochs=25, batch_size=64, verbose=0)

    # Make predictions and evaluate
    preds = dense_model.predict(X_Test.astype('float32')).squeeze()
    y_pred_3.loc[test_index, :] = preds
    scores.append(accuracy_score(y_Test.values.astype('float32'), np.argmax(preds, axis=1)))

print("Mean:", np.mean(scores), "\nSTD: ", np.std(scores), '\n')

In [None]:
y_pred_3.to_csv('y_pred_LFR.csv',index=False)

### Metrics

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_3.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_3.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())
            max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = np.argmax(y_pred_3.values,axis=1)

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())
                max_di = max(max_di, metric.disparate_impact())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

# In-processing Algorithms:

### 1- Adversarial Debiasing

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from aif360.algorithms.inprocessing import AdversarialDebiasing
from aif360.datasets import StandardDataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Dense, Dropout, Input
from tqdm import tqdm

# Model Definition for Adversarial Debiasing
def create_adv_model():
    inputs_dense = Input(shape=(20,))
    x = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
    x = Dropout(0.5)(x)
    x = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    outputs_dense = Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform')(x)  # Binary output
    model = tf.keras.Model(inputs=inputs_dense, outputs=outputs_dense)
    return model

# CV and OOF Predictions
y_pred_4 = pd.DataFrame(0, columns=[0, 1, 2, 3], index=X.index)
scores = []

# Define the groups
groups = [
    {'Gender': 0},
    {'Gender': 1},
]
priv_group = groups[0]
unprivileged_groups = [groups[1]]

# Unique values of the target
unique_targets = y.unique()

for train_index, test_index in tqdm(StratifiedKFold(n_splits=5).split(X, y)):
    X_Train, X_Test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_Train, y_Test = y.iloc[train_index], y.iloc[test_index]

    for target_value in unique_targets:
        tf.reset_default_graph()
        sess = tf.Session()

        # Convert the target to binary for the current class
        y_Train_binary = (y_Train == target_value).astype(int)
        y_Test_binary = (y_Test == target_value).astype(int)

        # Convert to AIF360 format
        aif360_train = StandardDataset(pd.concat([X_Train, y_Train_binary], axis=1),
                                       label_name='Age', favorable_classes=[1],
                                       protected_attribute_names=['Gender'],
                                       privileged_classes=[[priv_group['Gender']]])

        aif360_test = StandardDataset(pd.concat([X_Test, y_Test_binary], axis=1),
                                      label_name='Age', favorable_classes=[1],
                                      protected_attribute_names=['Gender'],
                                      privileged_classes=[[priv_group['Gender']]])

        # Initialize and train Adversarial Debiasing model
        adv_model = AdversarialDebiasing(privileged_groups=[priv_group],
                                         unprivileged_groups=unprivileged_groups,
                                         scope_name='adv_debiasing',adversary_loss_weight=0.01,
                                         sess=sess,
                                         num_epochs=10, 
                                         batch_size=32)
        adv_model.fit(aif360_train)

        # Make predictions and evaluate
        preds = adv_model.predict(aif360_test).labels
        y_pred_4.loc[test_index, target_value] = preds[:, 0]
        scores.append(accuracy_score(y_Test_binary.values, preds[:, 0]))

        # Close TensorFlow session
        sess.close()

accuracy_score(y,np.argmax(y_pred_4.values,axis=1))

In [None]:
y_pred_4.to_csv('y_pred_AD.csv',index=False)

### Metrics

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_4.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = np.argmax(y_pred_4.values,axis=1)

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = np.argmax(y_pred_4.values,axis=1)

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

# Postprocessing Algorithms:

### 1- Equalized Odds Postprocessing

In [None]:
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from aif360.algorithms.postprocessing import EqOddsPostprocessing,CalibratedEqOddsPostprocessing
from aif360.datasets import StandardDataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from tqdm import tqdm
import tensorflow.keras.backend as K

# Define your deep learning model
def create_model():
    inputs_dense = Input(shape=(100,))
    x = Dense(128, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    x = Dense(32, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    outputs_dense = Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform')(x)  # Change for binary output
    model = Model(inputs=inputs_dense, outputs=outputs_dense)
    return model

privileged_groups = [{'Gender': 1}]
unprivileged_groups = [{'Gender': 0}]

n_classes = 4  # Number of classes
scores = []
y_pred_all_classes = np.zeros((len(y), n_classes))

for class_index in range(n_classes):
    for train_index, test_index in tqdm(StratifiedKFold(n_splits=5).split(X, y)):
        X_Train, X_Test = X.loc[train_index, :], X.loc[test_index, :]
        y_Train, y_Test = y[train_index], y[test_index]

        # Convert to binary classification for the current class
        y_Train_binary = (y_Train == class_index).astype(int)
        y_Test_binary = (y_Test == class_index).astype(int)

        # Initialize and train the model
        dense_model = create_model()
        dense_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])  # Use binary_crossentropy
        dense_model.fit(X_Train, y_Train_binary, epochs=25, batch_size=64, verbose=0)

        # Predict using the standard model
        standard_preds = dense_model.predict(X_Test).flatten()  # Flatten for binary output

        # Convert to AIF360 format for postprocessing
        aif360_test = StandardDataset(pd.concat([X_Test[['Gender']], pd.DataFrame(y_Test_binary, columns=['Age'], index=X_Test.index)], axis=1),
                                      label_name='Age', favorable_classes=[1],
                                      protected_attribute_names=['Gender'],
                                      privileged_classes=[[1]])
        aif360_preds = StandardDataset(pd.concat([X_Test[['Gender']], pd.DataFrame(standard_preds, columns=['Age'], index=X_Test.index)], axis=1),
                                       label_name='Age', favorable_classes=[1],
                                       protected_attribute_names=['Gender'],
                                       privileged_classes=[[1]])

        # Apply Equalized Odds Postprocessing
        eq_odds = CalibratedEqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=42)
        eq_odds = eq_odds.fit(aif360_test, aif360_preds)
        transformed_preds = eq_odds.predict(aif360_preds).labels

        # Store the predictions for the current class
        y_pred_all_classes[test_index, class_index] = transformed_preds.squeeze()

# Convert the predictions for each class into final multi-class predictions
y_pred_final = np.argmax(y_pred_all_classes, axis=1)
overall_accuracy = accuracy_score(y, y_pred_final)

print("Overall Accuracy:", overall_accuracy)


In [None]:
pd.DataFrame(y_pred_final).to_csv('y_pred_EOP.csv',index=False)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_final

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_final

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_final

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

### 2- Reject Option Classification

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from aif360.datasets import StandardDataset
from aif360.algorithms.postprocessing import RejectOptionClassification
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from tqdm import tqdm

# Define your deep learning model
def create_model():
    inputs_dense = Input(shape=(100,))
    x = Dense(128, activation='relu', kernel_initializer='glorot_uniform')(inputs_dense)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    x = Dense(32, activation='relu', kernel_initializer='glorot_uniform')(x)
    x = Dropout(0.5)(x)
    outputs_dense = Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform')(x)  # Change for binary output
    model = Model(inputs=inputs_dense, outputs=outputs_dense)
    return model

privileged_groups = [{'Gender': 1}]
unprivileged_groups = [{'Gender': 0}]

n_classes = 4  # Number of classes
scores = []
y_pred_all_classes = np.zeros((len(y), n_classes))

for class_index in range(n_classes):
    for train_index, test_index in tqdm(StratifiedKFold(n_splits=5).split(X, y)):
        X_Train, X_Test = X.loc[train_index, :], X.loc[test_index, :]
        y_Train, y_Test = y[train_index], y[test_index]

        # Convert to binary classification for the current class
        y_Train_binary = (y_Train == class_index).astype(int)
        y_Test_binary = (y_Test == class_index).astype(int)

        # Initialize and train the model
        dense_model = create_model()
        dense_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])  # Use binary_crossentropy
        dense_model.fit(X_Train, y_Train_binary, epochs=25, batch_size=64, verbose=0)

        # Predict using the standard model
        standard_preds = dense_model.predict(X_Test).flatten()  # Flatten for binary output

        # Convert to AIF360 format for postprocessing
        aif360_test = StandardDataset(pd.concat([X_Test[['Gender']], pd.DataFrame(y_Test_binary, columns=['Age'], index=X_Test.index)], axis=1),
                                      label_name='Age', favorable_classes=[1],
                                      protected_attribute_names=['Gender'],
                                      privileged_classes=[[1]])
        aif360_preds = StandardDataset(pd.concat([X_Test[['Gender']], pd.DataFrame(standard_preds, columns=['Age'], index=X_Test.index)], axis=1),
                                       label_name='Age', favorable_classes=[1],
                                       protected_attribute_names=['Gender'],
                                       privileged_classes=[[1]])

        # Apply Reject Option Classification
        roc = RejectOptionClassification(unprivileged_groups=unprivileged_groups,
                                         privileged_groups=privileged_groups,
                                         low_class_thresh=0.01, high_class_thresh=0.99,
                                         num_class_thresh=100, num_ROC_margin=50,
                                         metric_name="Statistical parity difference",,
                                         metric_ub=0.05, metric_lb=-0.05)
        roc = roc.fit(aif360_test, aif360_preds)
        transformed_preds = roc.predict(aif360_preds).labels


        # Store the predictions for the current class
        y_pred_all_classes[test_index, class_index] = transformed_preds.squeeze()

# Convert the predictions for each class into final multi-class predictions
y_pred_final = np.argmax(y_pred_all_classes, axis=1)
overall_accuracy = accuracy_score(y, y_pred_final)

print("Overall Accuracy:", overall_accuracy)


In [None]:
pd.DataFrame(y_pred_final).to_csv('y_pred_ROC.csv',index=False)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf

for age_cls in range(3):
    for priv_cls in [0,1]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Gender'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_final

        import numpy as np

        # Define the groups
        groups = [
            {'Gender': 0},
            {'Gender': 1}
        ]



        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Gender Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np

# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls in [0.0,1.0,2.0,4.0]:
        # Convert to AIF360 format
        aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                         label_name='Age', favorable_classes=[age_cls],
                                         protected_attribute_names=['Ethnicity'],
                                         privileged_classes=[[priv_cls]])

        # Create a dataset for the predictions
        aif360_pred_dataset = aif360_dataset.copy()
        aif360_pred_dataset.labels = y_pred_final

        import numpy as np

        # Define the groups
        groups = [
            {'Ethnicity': 0.0},
            {'Ethnicity': 1.0},
            {'Ethnicity': 2.0},
            {'Ethnicity': 4.0}]


        # Iterate through the groups
        for i, privileged_group in enumerate(groups):
            # Define unprivileged groups as all other groups
            unprivileged_groups = groups[:i] + groups[i+1:]

            # Calculate metrics
            metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=[privileged_group])

            # Update maximum values
            max_spd = max(max_spd, metric.statistical_parity_difference())
            max_aod = max(max_aod, metric.average_odds_difference())
            max_eod = max(max_eod, metric.equal_opportunity_difference())
            max_theil = max(max_theil, metric.theil_index())
            max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)

In [None]:
import pandas as pd
import numpy as np


# Initialize variables to store maximum values of each metric
max_spd = max_aod = max_eod = max_theil = max_gei = max_di = -np.inf


# Find Max value for metrics by looping into all unique_values for the target and for the privileged class
for age_cls in range(3):
    for priv_cls_1 in [0,1]:   # Gender        
        for priv_cls_2 in [0.0,1.0,2.0,4.0]:   # Ethnicity

            # Convert to AIF360 format
            aif360_dataset = StandardDataset(df=pd.concat([X, y], axis=1),
                                             label_name='Age', favorable_classes=[age_cls],
                                             protected_attribute_names=['Gender','Ethnicity'],
                                             privileged_classes=[[priv_cls_1], [priv_cls_2]])

            # Create a dataset for the predictions
            aif360_pred_dataset = aif360_dataset.copy()
            aif360_pred_dataset.labels = y_pred_final

            import numpy as np

            # Define the groups
            groups = [
                {'Gender': 0, 'Ethnicity': 0.0},
                {'Gender': 0, 'Ethnicity': 1.0},
                {'Gender': 0, 'Ethnicity': 2.0},
                {'Gender': 1, 'Ethnicity': 0.0},
                {'Gender': 1, 'Ethnicity': 1.0},
                {'Gender': 1, 'Ethnicity': 2.0},
                {'Gender': 0, 'Ethnicity': 4.0},
                {'Gender': 1, 'Ethnicity': 4.0}
            ]



            # Iterate through the groups
            for i, privileged_group in enumerate(groups):
                # Define unprivileged groups as all other groups
                unprivileged_groups = groups[:i] + groups[i+1:]

                # Calculate metrics
                metric = ClassificationMetric(aif360_dataset, aif360_pred_dataset,
                                              unprivileged_groups=unprivileged_groups,
                                              privileged_groups=[privileged_group])

                # Update maximum values
                max_spd = max(max_spd, metric.statistical_parity_difference())
                max_aod = max(max_aod, metric.average_odds_difference())
                max_eod = max(max_eod, metric.equal_opportunity_difference())
                max_theil = max(max_theil, metric.theil_index())
                max_gei = max(max_gei, metric.generalized_entropy_index())

# Print maximum values
print("Gender+Ethnicity Bias Identification:")
print("Max Statistical Parity Difference (Ideal value = 0):", max_spd)
print("Max Average Odds Difference (Ideal value = 0):", max_aod)
print("Max Equal Opportunity Difference (Ideal value = 0):", max_eod)
print("Max Theil Index (Ideal value = 0):", max_theil)
print("Max Generalized Entropy Index (Ideal value = 0):", max_gei)