In [1]:
#Imports some of the libraries that we will be using
import pandas as pd
import numpy as np
import math
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn import metrics
from sklearn import tree
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')


In [2]:
from load_dataset import load
from classifier import NeuralNetwork, Logistic_Regression, SVM
from utils import *
from metrics import *  # include fairness and corresponding derivatives
from expl import explanation_candidate_generation, get_top_k_expl
from influence import *

['.git', '.idea', '.ipynb_checkpoints', 'acs_income', 'classifier.py', 'compas', 'config.json', 'expl.py', 'functions.py', 'german.data', 'german_if.ipynb', 'german_income', 'influence.py', 'influence_functions.ipynb', 'load_dataset.py', 'loss.py', 'metrics.py', 'test.ipynb', 'time.ipynb', 'utils.py', '__pycache__']


In [3]:
dataset = 'german'
clf_name = 'Logistic_Regression'
metric = 0
support = 0.05
support_small = 0.3

In [4]:
X_train, X_test, y_train, y_test = load(dataset)

In [5]:
duplicates = 1
make_duplicates = lambda x, d: pd.concat([x]*d, axis=0).reset_index(drop=True)
X_train = make_duplicates(X_train, duplicates)
X_test = make_duplicates(X_test, duplicates)
y_train = make_duplicates(y_train, duplicates)
y_test = make_duplicates(y_test, duplicates)

In [6]:
len(X_train)

800

In [7]:
import copy
X_train_orig = copy.deepcopy(X_train)
X_test_orig = copy.deepcopy(X_test)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
cl = eval(clf_name)(input_size=X_train.shape[-1])

num_params = len(convert_grad_to_ndarray(list(cl.parameters())))
if isinstance(cl, Logistic_Regression):
    loss_func = logistic_loss_torch
elif isinstance(cl, SVM):
    loss_func = svm_loss_torch
elif isinstance(cl, NeuralNetwork):
    loss_func = nn_loss_torch
else:
    raise NotImplementedError

In [9]:
clf = eval(clf_name)(input_size=X_train.shape[-1])

clf.fit(X_train, y_train)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)

Initial statistical parity:  -0.09527579839341338
Initial TPR parity:  -0.07785149414194836
Initial predictive parity:  -0.10136869475949661
Initial accuracy:  0.755


In [10]:
metric_val = [spd_0, tpr_parity_0, predictive_parity_0][metric]
del_F_del_theta = get_del_F_del_theta(clf, X_test_orig, X_test, y_test, dataset, metric)

In [11]:
hessian_all_points = get_hessian_all_points(clf, X_train, y_train, loss_func)

  0%|          | 0/800 [00:00<?, ?it/s]

100%|██████████| 800/800 [00:11<00:00, 67.52it/s]


In [12]:
del_L_del_theta = get_del_L_del_theta(clf, X_train, y_train, loss_func)

In [13]:
hinv_v, hinv = get_hinv_v(hessian_all_points, del_F_del_theta)

In [14]:
inf = np.matmul(np.transpose(hinv_v), del_F_del_theta)

In [15]:
inf

0.9357628711602708

In [16]:
#delta fairness function
#print(del_F_del_theta)
#hessian for each datapoint
#print(hessian_all_points)
#del_L_del_theta for each datapoint
#print(del_L_del_theta)
#inverse of hessian
print(inf.shape)

()
