## Exploring Fairness in Machine Learning Models: AI Fairness 360

Both metrics suggest that the model exhibits significant bias against the unprivileged group (likely females if sex is the protected attribute), favoring the privileged group (likely males). This highlights the need for bias mitigation techniques to improve fairness in the model's predictions.

* Statistical Parity Difference of -0.185 indicates a bias where the unprivileged group is receiving 18.5% fewer favorable outcomes compared to the privileged group.

* Disparate Impact of 0.322 indicates that the unprivileged group is receiving favorable outcomes at only 32.2% the rate of the privileged group.

In [1]:
!pip install aif360
!pip install scikit-learn
!pip install pandas
!pip install numpy

Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl (259 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.7/259.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: aif360
Successfully installed aif360-0.6.1


In [2]:
import pandas as pd
import numpy as np

# Load the Adult Income Dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
                'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']

data = pd.read_csv(url, header=None, names=column_names, na_values=' ?')
data


pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [3]:
!pip install aif360[Reductions] aif360[inFairness]

Collecting fairlearn~=0.7 (from aif360[Reductions])
  Downloading fairlearn-0.10.0-py3-none-any.whl (234 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m234.1/234.1 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting skorch (from aif360[Reductions])
  Downloading skorch-1.0.0-py3-none-any.whl (239 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.4/239.4 kB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting inFairness>=0.2.2 (from aif360[Reductions])
  Downloading inFairness-0.2.3-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting POT>=0.8.0 (from inFairness>=0.2.2->aif360[Reductions])
  Downloading POT-0.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (835 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m835.4/835.4 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32561 entries, 0 to 32560
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             32561 non-null  int64 
 1   workclass       32561 non-null  object
 2   fnlwgt          32561 non-null  int64 
 3   education       32561 non-null  object
 4   education-num   32561 non-null  int64 
 5   marital-status  32561 non-null  object
 6   occupation      32561 non-null  object
 7   relationship    32561 non-null  object
 8   race            32561 non-null  int64 
 9   sex             32561 non-null  int64 
 10  capital-gain    32561 non-null  int64 
 11  capital-loss    32561 non-null  int64 
 12  hours-per-week  32561 non-null  int64 
 13  native-country  32561 non-null  object
 14  income          32561 non-null  int64 
dtypes: int64(9), object(6)
memory usage: 3.7+ MB


In [16]:
!pip install BlackBoxAuditing

Collecting BlackBoxAuditing
  Downloading BlackBoxAuditing-0.1.54.tar.gz (2.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/2.6 MB[0m [31m4.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.6/2.6 MB[0m [31m37.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: BlackBoxAuditing
  Building wheel for BlackBoxAuditing (setup.py) ... [?25l[?25hdone
  Created wheel for BlackBoxAuditing: filename=BlackBoxAuditing-0.1.54-py2.py3-none-any.whl size=1394753 sha256=2ec41104a2e318401894517b4af59d1247e10eac9fb5f9a16aaf10cec671e85f
  Stored in directory: /root/.cache/pip/wheels/c0/4f/b1/80e

In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric

# Load the Adult Income Dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
                'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']

data = pd.read_csv(url, header=None, names=column_names, na_values=' ?')
data = data.dropna().copy()  # Ensure we're working with a full copy to avoid SettingWithCopyWarning

# Preprocess the dataset using .loc to avoid SettingWithCopyWarning
data.loc[:, 'income'] = data['income'].apply(lambda x: 1 if x == ' >50K' else 0)
data.loc[:, 'sex'] = data['sex'].apply(lambda x: 1 if x == ' Male' else 0)

# Ensure data types are consistent
data.loc[:, 'income'] = data['income'].astype(int)
data.loc[:, 'sex'] = data['sex'].astype(int)

# Verify data types
print(f"Data types:\n{data.dtypes}")

# Check for class distribution in the protected attribute
print("Sex distribution in the dataset:")
print(data['sex'].value_counts())

# Split the data
X = data.drop(columns=['income'])
y = data['income']

# Include 'sex' in the split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
sex_train = X_train['sex'].copy()
sex_test = X_test['sex'].copy()

# One-hot encode categorical features
encoder = OneHotEncoder(drop='first', sparse_output=False)
X_train_encoded = encoder.fit_transform(X_train.select_dtypes(include=['object']))
X_test_encoded = encoder.transform(X_test.select_dtypes(include=['object']))

X_train_encoded_df = pd.DataFrame(X_train_encoded, columns=encoder.get_feature_names_out(X_train.select_dtypes(include=['object']).columns))
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=encoder.get_feature_names_out(X_train.select_dtypes(include=['object']).columns))

X_train_final = pd.concat([X_train.select_dtypes(exclude=['object']).reset_index(drop=True), X_train_encoded_df], axis=1)
X_test_final = pd.concat([X_test.select_dtypes(exclude=['object']).reset_index(drop=True), X_test_encoded_df], axis=1)

# Standardize numerical features
scaler = StandardScaler()
X_train_final = scaler.fit_transform(X_train_final)
X_test_final = scaler.transform(X_test_final)

# Ensure labels are integers
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# Check data integrity before training
print("Data integrity check:")
print(f"X_train_final shape: {X_train_final.shape}, y_train shape: {y_train.shape}")
print(f"X_test_final shape: {X_test_final.shape}, y_test shape: {y_test.shape}")
print(f"y_train unique values: {y_train.unique()}")
print(f"y_test unique values: {y_test.unique()}")
print(f"y_train data type: {y_train.dtype}")
print(f"y_test data type: {y_test.dtype}")

# Train a baseline RandomForest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train_final, y_train)
baseline_predictions = model.predict(X_test_final)

# Evaluate baseline model
def evaluate_model(predictions, y_test, label):
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, zero_division=1)
    recall = recall_score(y_test, predictions, zero_division=1)
    f1 = f1_score(y_test, predictions, zero_division=1)

    print(f"{label} model performance:")
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1 Score: {f1}')
    print("\n")

evaluate_model(baseline_predictions, y_test, 'Baseline')

# Convert to AIF360 BinaryLabelDataset
def to_bld(X, y, sex):
    df = pd.DataFrame(X, columns=encoder.get_feature_names_out(X_train.select_dtypes(include=['object']).columns).tolist() + X_train.select_dtypes(exclude=['object']).columns.tolist())
    df['income'] = y.values
    df['sex'] = sex.values  # Ensure 'sex' is included as a column
    return BinaryLabelDataset(favorable_label=1,
                              unfavorable_label=0,
                              df=df,
                              label_names=['income'],
                              protected_attribute_names=['sex'])

train_bld = to_bld(X_train_final, y_train, sex_train)
test_bld = to_bld(X_test_final, y_test, sex_test)

# Debug: Check predictions distribution
print("Predictions distribution in the test set:")
print(pd.Series(baseline_predictions).value_counts())

# Evaluate fairness metrics for baseline model
test_bld_pred = test_bld.copy()
test_bld_pred.labels = baseline_predictions.reshape(-1, 1)

metric_orig = ClassificationMetric(test_bld, test_bld_pred,
                                   unprivileged_groups=[{'sex': 0}],
                                   privileged_groups=[{'sex': 1}])

print("Baseline model fairness metrics:")
print("Statistical parity difference: ", metric_orig.statistical_parity_difference())
print("Disparate impact: ", metric_orig.disparate_impact())
print("\n")



Data types:
age                int64
workclass         object
fnlwgt             int64
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
capital-gain       int64
capital-loss       int64
hours-per-week     int64
native-country    object
income            object
dtype: object
Sex distribution in the dataset:
sex
1    20380
0     9782
Name: count, dtype: int64
Data integrity check:
X_train_final shape: (21113, 96), y_train shape: (21113,)
X_test_final shape: (9049, 96), y_test shape: (9049,)
y_train unique values: [0 1]
y_test unique values: [0 1]
y_train data type: int64
y_test data type: int64
Baseline model performance:
Accuracy: 0.8511437727925738
Precision: 0.7343345416882444
Recall: 0.6296625222024866
F1 Score: 0.6779823093473584


Predictions distribution in the test set:
0    7118
1    1931
Name: count, dtype: int64
Baseline model fairness metrics:
Statisti

In [41]:
from aif360.algorithms.preprocessing import Reweighing

# Reweighing
rw = Reweighing(unprivileged_groups=[{'sex': 0}],
                privileged_groups=[{'sex': 1}])
train_rw = rw.fit_transform(train_bld)

model_rw = RandomForestClassifier(random_state=42)
model_rw.fit(train_rw.features, train_rw.labels.ravel(), sample_weight=train_rw.instance_weights)
rw_predictions = model_rw.predict(X_test_final_df)

# Evaluate the reweighed model
evaluate_model(rw_predictions, y_test, 'Reweighing')
rw_fairness_metrics = evaluate_fairness(rw_predictions, test_bld, 'Reweighing')
print(rw_fairness_metrics)




Reweighing model performance:
Accuracy: 0.8493756216156482
Precision: 0.7275985663082437
Recall: 0.6309946714031972
F1 Score: 0.6758620689655173


{'model': 'Reweighing', 'accuracy': 0.8493756216156482, 'precision': 0.7275985663082437, 'recall': 0.6309946714031972, 'f1': 0.6758620689655173, 'statistical_parity_diff': -0.18706651382314976, 'disparate_impact': 0.3219252737879457}


In [35]:
from aif360.algorithms.preprocessing import DisparateImpactRemover

# Apply Disparate Impact Remover
dir = DisparateImpactRemover(repair_level=1.0)
train_dir = dir.fit_transform(train_bld)
test_dir = dir.fit_transform(test_bld)

model_dir = RandomForestClassifier(random_state=42)
model_dir.fit(train_dir.features, train_dir.labels.ravel())
dir_predictions = model_dir.predict(test_dir.features)

# Evaluate the DIR model
evaluate_model(dir_predictions, y_test, 'DIR')
dir_fairness_metrics = evaluate_fairness(dir_predictions, test_bld, 'DIR')
print(dir_fairness_metrics)


DIR model performance:
Accuracy: 0.8468339042988176
Precision: 0.7343073593073594
Recall: 0.6025754884547069
F1 Score: 0.6619512195121952


{'model': 'DIR', 'accuracy': 0.8468339042988176, 'precision': 0.7343073593073594, 'recall': 0.6025754884547069, 'f1': 0.6619512195121952, 'statistical_parity_diff': -0.17403262603987377, 'disparate_impact': 0.33087831389925876}


The performance of the Learning Fair Representations (LFR) model indicates a significant issue. The model has an accuracy of 75.1%, which is misleading because the precision is 1.0 but recall is 0.0, leading to an F1 score of 0.0. This implies that the model is not identifying any positive cases correctly (i.e., not predicting anyone as earning more than 50K).

The fairness metrics also indicate issues:

Statistical Parity Difference: 0.0, suggesting no difference in treatment between the groups, but this is because the model is not making any positive predictions.
Disparate Impact: NaN, indicating a division by zero or other invalid operation, likely due to the lack of positive predictions.

In [36]:
from aif360.algorithms.preprocessing import LFR

# Apply Learning Fair Representations
lfr = LFR(unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
train_lfr = lfr.fit_transform(train_bld)
test_lfr = lfr.transform(test_bld)

model_lfr = RandomForestClassifier(random_state=42)
model_lfr.fit(train_lfr.features, train_lfr.labels.ravel())
lfr_predictions = model_lfr.predict(test_lfr.features)

# Evaluate the LFR model
evaluate_model(lfr_predictions, y_test, 'LFR')
lfr_fairness_metrics = evaluate_fairness(lfr_predictions, test_bld, 'LFR')
print(lfr_fairness_metrics)


LFR model performance:
Accuracy: 0.751132721847718
Precision: 1.0
Recall: 0.0
F1 Score: 0.0


{'model': 'LFR', 'accuracy': 0.751132721847718, 'precision': 1.0, 'recall': 0.0, 'f1': 0.0, 'statistical_parity_diff': 0.0, 'disparate_impact': nan}


  return metric_fun(privileged=False) / metric_fun(privileged=True)


In [42]:
# Collect all results
results = []
results.append(evaluate_fairness(baseline_predictions, test_bld, 'Baseline'))
results.append(rw_fairness_metrics)
results.append(dir_fairness_metrics)

# Convert results to a DataFrame for easier plotting
results_df = pd.DataFrame(results)

# Plot performance metrics
fig, axs = plt.subplots(2, 2, figsize=(14, 10))

metrics = ['accuracy', 'precision', 'recall', 'f1']
for ax, metric in zip(axs.flatten(), metrics):
    results_df.plot(x='model', y=metric, kind='bar', ax=ax, legend=False)
    ax.set_title(f'{metric.capitalize()} by Model')
    ax.set_ylabel(metric.capitalize())

plt.tight_layout()
plt.show()

# Plot fairness metrics
fig, axs = plt.subplots(1, 2, figsize=(14, 5))

fairness_metrics = ['statistical_parity_diff', 'disparate_impact']
for ax, metric in zip(axs, fairness_metrics):
    results_df.plot(x='model', y=metric, kind='bar', ax=ax, legend=False)
    ax.set_title(f'{metric.replace("_", " ").capitalize()} by Model')
    ax.set_ylabel(metric.replace("_", " ").capitalize())

plt.tight_layout()
plt.show()

In [49]:
results_df

Unnamed: 0,model,accuracy,precision,recall,f1,statistical_parity_diff,disparate_impact
0,Baseline,0.851144,0.734335,0.629663,0.677982,-0.185007,0.321789
1,Reweighing,0.849376,0.727599,0.630995,0.675862,-0.187067,0.321925
2,DIR,0.846834,0.734307,0.602575,0.661951,-0.174033,0.330878
