In [1]:
import openml, fairlib
import fairlib as fl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import KBinsDiscretizer
from keras.models import Sequential
from keras.layers import Dense
from fairlib.inprocessing import Fauci

INFO:fairlib:Using Keras backend: TENSORFLOW
INFO:fairlib:fairlib loaded


In [2]:
dataset = openml.datasets.get_dataset(179)
X, y, _, names = dataset.get_data(target=dataset.default_target_attribute)

INFO:openml.datasets.dataset:pickle write adult


In [3]:
imputer = SimpleImputer(strategy='most_frequent')
X_imputed = imputer.fit_transform(X)

In [4]:
X_discretized = X_imputed.copy()
for col in X.columns:
    if X[col].dtype == 'category':
        le = LabelEncoder()
        X_discretized[:, X.columns.get_loc(col)] = le.fit_transform(X_discretized[:, X.columns.get_loc(col)])


In [5]:
X = fairlib.DataFrame(X_discretized, columns=names)
y = y.apply(lambda x: x == ">50K").astype(int)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [7]:
X_train['income'] = y_train

In [8]:
X_train

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,native-country,income
5551,3,1,277024.0,11,9,5,10,1,2,1,0,0,2,38,0
36721,4,4,164616.0,10,16,2,9,0,4,1,4,0,2,38,1
2638,3,3,159755.0,8,11,2,9,0,4,1,0,0,3,38,1
36214,3,3,260052.0,9,13,0,3,4,4,1,4,0,2,38,1
27010,2,4,188615.0,9,13,2,11,0,4,1,0,0,3,38,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,1,3,200117.0,9,13,2,3,0,1,1,0,2,3,38,1
44732,0,3,90896.0,11,9,4,6,3,4,0,0,0,2,38,0
38158,0,3,370057.0,11,9,0,0,1,4,0,0,0,2,38,0
860,0,3,216284.0,1,7,4,0,3,4,0,0,0,0,38,0


In [9]:
fauci_train_dataset = fl.DataFrame(X_train)

In [10]:
fauci_train_dataset.targets = "income"
fauci_train_dataset.sensitive = 'sex' # fauci currently supports only one sensitive attribute

In [11]:
model = Sequential()
model.add(Dense(32, activation='relu'))  # Second hidden layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for classification

In [12]:
inprocessing = Fauci(model, loss='binary_crossentropy', regularizer='sp', optimizer='adam', metrics=['accuracy'])

In [13]:
inprocessing.fit(fauci_train_dataset, converting_to_type=float, epochs=10, batch_size=32, validation_split=0.3)

Epoch 1/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6444 - loss: 85.7257 - val_accuracy: 0.7601 - val_loss: 108.8396
Epoch 2/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6432 - loss: 76.5861 - val_accuracy: 0.7601 - val_loss: 77.5062
Epoch 3/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6424 - loss: 56.1298 - val_accuracy: 0.7601 - val_loss: 187.0828
Epoch 4/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6510 - loss: 68.7884 - val_accuracy: 0.7601 - val_loss: 169.5655
Epoch 5/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6440 - loss: 98.0908 - val_accuracy: 0.7586 - val_loss: 7.9686
Epoch 6/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6414 - loss: 61.2038 - val_accuracy: 0.7621 - val_loss: 34.2699
Epoch 7/10
[1

<keras.src.callbacks.history.History at 0x165f734a0>

In [14]:
y_pred = inprocessing.predict(X_test.astype(float))

[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297us/step


In [15]:
y_pred = (y_pred > 0.5).astype(int)

In [16]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.7640526119865988

In [17]:
X_test['income'] = y_pred

In [18]:
spd_dataset = fl.DataFrame(X_test)
spd_dataset.targets = "income"
spd_dataset.sensitive = 'sex'

spd_dataset.statistical_parity_difference()

(income=1, sex=1) -> 0.03169839931944696
(income=1, sex=0) -> -0.03169839931944696

## Test With un-fair model

In [19]:
model = Sequential()
model.add(Dense(32, activation='relu'))  # Second hidden layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for classification

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [21]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [22]:
X_train = X_train.astype(float)
y_train = y_train.astype(float)

In [23]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.3)

Epoch 1/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 495us/step - accuracy: 0.6219 - loss: 324.5220 - val_accuracy: 0.7601 - val_loss: 204.3399
Epoch 2/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 427us/step - accuracy: 0.6407 - loss: 52.1193 - val_accuracy: 0.7601 - val_loss: 55.1216
Epoch 3/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431us/step - accuracy: 0.6321 - loss: 61.8301 - val_accuracy: 0.7564 - val_loss: 5.6382
Epoch 4/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 425us/step - accuracy: 0.6446 - loss: 49.9472 - val_accuracy: 0.2400 - val_loss: 52.1989
Epoch 5/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 425us/step - accuracy: 0.6503 - loss: 35.2348 - val_accuracy: 0.7616 - val_loss: 31.3728
Epoch 6/10
[1m716/716[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 422us/step - accuracy: 0.6476 - loss: 61.5969 - val_accuracy: 0.7601 - val_loss: 130.5771
Ep

<keras.src.callbacks.history.History at 0x1663fed50>

In [24]:
y_pred = model.predict(X_test.astype(float))
y_pred = (y_pred > 0.5).astype(int)

[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step


In [25]:
accuracy_score(y_test, y_pred)

0.5351780617942673

In [26]:
X_test['income'] = y_pred
spd_dataset = fl.DataFrame(X_test)
spd_dataset.targets = "income"
spd_dataset.sensitive = 'sex'

spd_dataset.statistical_parity_difference()

(income=1, sex=1) -> 0.4262214411553074
(income=1, sex=0) -> -0.4262214411553074