In [1]:
#step 1: Installing the required libraries
pip install aif360 scikit-learn pandas matplotlib

Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Downloading aif360-0.6.1-py3-none-any.whl (259 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.7/259.7 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: aif360
Successfully installed aif360-0.6.1


In [3]:
# step 2 : loading the dataset
!mkdir -p data/raw/german/
!wget -O data/raw/german/german.data https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data
!wget -O data/raw/german/german.doc https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc

--2025-06-25 20:48:52--  https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘data/raw/german/german.data’

          data/raw/     [<=>                 ]       0  --.-KB/s               data/raw/german/ger     [ <=>                ]  77.92K  --.-KB/s    in 0.03s   

2025-06-25 20:48:52 (2.99 MB/s) - ‘data/raw/german/german.data’ saved [79793]

--2025-06-25 20:48:52--  https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘data/raw/german/german.doc’

dat

In [5]:
# Step 3: Prepare Dataset
import pandas as pd
from aif360.datasets import StructuredDataset

columns = ['status', 'duration', 'credit_history', 'purpose', 'credit_amount', 'savings', 'employment',
           'installment_commitment', 'personal_status', 'other_parties', 'residence_since', 'property_magnitude',
           'age', 'other_payment_plans', 'housing', 'existing_credits', 'job', 'num_dependents',
           'own_telephone', 'foreign_worker', 'class']

df = pd.read_csv("data/raw/german/german.data", sep=' ', header=None, names=columns)
df['class'] = df['class'].map({1: 1, 2: 0})  # 1=favorable
df['age_binary'] = (df['age'] >= 25).astype(int)
df_encoded = pd.get_dummies(df.drop(columns=['age']), drop_first=True)
df_encoded['age_binary'] = df['age_binary']
df_encoded['class'] = df['class']

dataset_orig = StructuredDataset(
    df=df_encoded,
    label_names=['class'],
    protected_attribute_names=['age_binary']
)


In [7]:
#Step 4: converting structured dataset to binary
from aif360.datasets import BinaryLabelDataset

# Convert to BinaryLabelDataset
dataset_orig = dataset_orig.convert_to_dataframe()[0]
dataset_orig = BinaryLabelDataset(df=dataset_orig,
                                  label_names=['class'],
                                  protected_attribute_names=['age_binary'])


In [8]:
# Step 5: Check Bias Metrics
from aif360.metrics import BinaryLabelDatasetMetric

metric = BinaryLabelDatasetMetric(dataset_orig,
                                  privileged_groups=[{'age_binary': 1}],
                                  unprivileged_groups=[{'age_binary': 0}])

print("Statistical parity difference:", metric.statistical_parity_difference())
print("Disparate impact:", metric.disparate_impact())


Statistical parity difference: -0.12854990969960323
Disparate impact: 0.8212484098784929


In [9]:
# Step 6: Bias Mitigation (Reweighing)
from aif360.algorithms.preprocessing import Reweighing

RW = Reweighing(unprivileged_groups=[{'age_binary': 0}],
                privileged_groups=[{'age_binary': 1}])
dataset_transf = RW.fit_transform(dataset_orig)

metric_transf = BinaryLabelDatasetMetric(dataset_transf,
                                         privileged_groups=[{'age_binary': 1}],
                                         unprivileged_groups=[{'age_binary': 0}])

print("After Reweighing - Statistical parity difference:", metric_transf.statistical_parity_difference())
print("After Reweighing - Disparate impact:", metric_transf.disparate_impact())


After Reweighing - Statistical parity difference: 1.1102230246251565e-16
After Reweighing - Disparate impact: 1.0000000000000002
