# **Load all necessary packages**

In [1]:
%pip install aif360
##Download the files from these links and move these files to the path mentioned below
##https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data
##https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc
!git clone https://github.com/Trusted-AI/AIF360
! mv /content/german.data /usr/local/lib/python3.6/dist-packages/aif360/datasets/../data/raw/german/german.data
! mv /content/german.doc  /usr/local/lib/python3.6/dist-packages/aif360/datasets/../data/raw/german/german.doc
! mv AIF360/examples/* .

%matplotlib inline
import sys
import numpy as np
from tqdm import tqdm
import pickle
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
from common_utils import compute_metrics
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score

all_metrics =  ["Statistical parity difference",
                   "Average odds difference",
                   "Equal opportunity difference"]

Note: you may need to restart the kernel to use updated packages.
fatal: destination path 'AIF360' already exists and is not an empty directory.
mv: cannot stat '/content/german.data': No such file or directory
mv: cannot stat '/content/german.doc': No such file or directory
mv: cannot stat 'AIF360/examples/*': No such file or directory


0.1.36ubuntu1 is an invalid version and will not be supported in a future release
0.23ubuntu1 is an invalid version and will not be supported in a future release
2022-05-06 00:58:55.440546: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-06 00:58:55.440605: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# this dataset also contains protected attribute for "sex" which we do not
# consider in this evaluation
dataset_orig = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes=[lambda x: x >= 25],# age >=25 is considered privileged
    features_to_drop=['personal_status', 'sex'] # ignore sex-related attributes
)
#dividing the dataset into train and test
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]
# Get the dataset and split into train and test
dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)
dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes,
      dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)

df, dict_df = dataset_orig.convert_to_dataframe()
print("Shape: ", df.shape)
df.head(10).values

#### Training Dataset shape

(700, 57)


#### Favorable and unfavorable labels

1.0 2.0


#### Protected attribute names

['age']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['month', 'credit_amount', 'investment_as_income_percentage', 'residence_since', 'age', 'number_of_credits', 'people_liable_for', 'status=A11', 'status=A12', 'status=A13', 'status=A14', 'credit_history=A30', 'credit_history=A31', 'credit_history=A32', 'credit_history=A33', 'credit_history=A34', 'purpose=A40', 'purpose=A41', 'purpose=A410', 'purpose=A42', 'purpose=A43', 'purpose=A44', 'purpose=A45', 'purpose=A46', 'purpose=A48', 'purpose=A49', 'savings=A61', 'savings=A62', 'savings=A63', 'savings=A64', 'savings=A65', 'employment=A71', 'employment=A72', 'employment=A73', 'employment=A74', 'employment=A75', 'other_debtors=A101', 'other_debtors=A102', 'other_debtors=A103', 'property=A121', 'property=A122', 'property=A123', 'property=A124', 'installment_plans=A141', 'installment_plans=A142', 'installment_plans=A143', 'housing=A151', 'housing=A152', 'housing=A153', 'skill_level=A171', 'skill_level=A172', 'skill_level=A173', 'skill_level=A174', 'telephone=A191', 'telephone=A192', 'foreign_wor

array([[6.000e+00, 1.169e+03, 4.000e+00, 4.000e+00, 1.000e+00, 2.000e+00,
        1.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00,
        1.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 1.000e+00,
        0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00,
        1.000e+00, 1.000e+00, 0.000e+00, 1.000e+00],
       [4.800e+01, 5.951e+03, 2.000e+00, 2.000e+00, 0.000e+00, 1.000e+00,
        1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+

# **Applying Logistic Regression on Orignal Dataset**

In [3]:
# Logistic regression classifier and predictions
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
w_train = dataset_orig_train.instance_weights.ravel()

lmod = LogisticRegression(class_weight = 'balanced')
lmod.fit(X_train, y_train, sample_weight=dataset_orig_train.instance_weights)
lmod.predict(X_train)

# save the model
# import joblib
# joblib.dump(lmod, "lmod_bias_model.pkl")
# load model
# model = joblib.load('lmod_model.pkl' )
# lmod.predict(X_train)


array([1., 2., 1., 1., 2., 2., 2., 1., 2., 1., 2., 1., 2., 2., 1., 2., 2.,
       1., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 2.,
       1., 1., 1., 2., 1., 2., 1., 1., 1., 1., 1., 2., 2., 1., 2., 1., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 2., 2., 2., 1., 1., 2., 1., 2., 1.,
       1., 2., 2., 2., 1., 2., 1., 1., 1., 1., 2., 2., 2., 2., 1., 1., 2.,
       2., 2., 1., 1., 1., 1., 2., 2., 2., 1., 2., 1., 1., 1., 1., 1., 2.,
       2., 1., 2., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1.,
       2., 2., 1., 2., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 2., 2.,
       2., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 2., 2., 2., 2., 2., 2.,
       1., 2., 2., 2., 1., 2., 1., 2., 1., 1., 1., 2., 2., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1.,
       1., 2., 1., 2., 1., 2., 2., 2., 1., 1., 1., 1., 1., 1., 2., 1., 2.,
       1., 1., 1., 1., 1., 2., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.,
       1., 1., 1., 2., 1.

## **Saving the Model**


