In [1]:
import pandas as pd

In [2]:
import tensorflow as tf

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
from keras.models import Model
from keras.layers import Dense, Input
from keras import regularizers
from keras.losses import binary_crossentropy

Using TensorFlow backend.


In [6]:
import keras.backend as K

In [7]:
import numpy as np

# Краткий обзор датасета

In [8]:
compass_df = pd.read_csv("/home/noomkcalb/Документы/mfti_nlp/final-project/fairness/compass/propublicaCompassRecividism_data_fairml.csv/propublica_data_for_fairml.csv")

In [9]:
compass_df.shape

(6172, 12)

In [10]:
compass_df.columns

Index(['Two_yr_Recidivism', 'Number_of_Priors', 'score_factor',
       'Age_Above_FourtyFive', 'Age_Below_TwentyFive', 'African_American',
       'Asian', 'Hispanic', 'Native_American', 'Other', 'Female',
       'Misdemeanor'],
      dtype='object')

In [11]:
compass_df["Two_yr_Recidivism"].value_counts()

0    3363
1    2809
Name: Two_yr_Recidivism, dtype: int64

- Age Category
- Gender
- Race (White/Black)
- Priors Count
- Recidive - target

In [12]:
# compass_df["Race_Bias"] = ((compass_df["Asian"] + compass_df["African_American"] + compass_df["Hispanic"]) > 0).astype(int)
compass_df["Race_Bias"] = (compass_df["African_American"] > 0.5).astype(int)

In [13]:
compass_df.drop(columns=["score_factor"], inplace=True)

In [14]:
scaler = StandardScaler()
compass_df["Number_of_Priors"] = scaler.fit_transform(compass_df["Number_of_Priors"].values.reshape(-1, 1))



In [15]:
compass_df.head(5)

Unnamed: 0,Two_yr_Recidivism,Number_of_Priors,Age_Above_FourtyFive,Age_Below_TwentyFive,African_American,Asian,Hispanic,Native_American,Other,Female,Misdemeanor,Race_Bias
0,0,-0.684413,1,0,0,0,0,0,1,0,0,0
1,1,-0.684413,0,0,1,0,0,0,0,0,0,1
2,1,0.158866,0,1,1,0,0,0,0,0,0,1
3,0,-0.684413,0,0,0,0,0,0,1,0,1,0
4,1,2.267065,0,0,0,0,0,0,0,0,0,0


In [16]:
X = compass_df.drop(columns=["Two_yr_Recidivism"])
y = compass_df["Two_yr_Recidivism"]

In [17]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3)

In [18]:
train_X.shape

(4320, 11)

In [19]:
test_X.shape

(1852, 11)

In [20]:
fixed_train_X = train_X.drop(columns="Race_Bias")
fixed_test_X = test_X.drop(columns="Race_Bias")

# Классификация логрегом (для сравнения)

In [21]:
from sklearn.linear_model import LogisticRegression

In [22]:
from sklearn.model_selection import GridSearchCV

In [23]:
grid = {'C': np.linspace(0.001, 0.01, 10)}
model = LogisticRegression(solver="lbfgs")
search = GridSearchCV(model, grid, cv=3, scoring='accuracy')

In [24]:
search.fit(fixed_train_X, train_y)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': array([0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
       0.01 ])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [25]:
search.best_estimator_.coef_

array([[ 0.62491025, -0.37326138,  0.43216632,  0.23265812, -0.02589623,
        -0.00882629,  0.00341589, -0.02991352, -0.19451785, -0.19670986]])

In [26]:
search.best_estimator_

LogisticRegression(C=0.009000000000000001, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=None, penalty='l2', random_state=None,
          solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)

In [27]:
search.best_score_

0.6685185185185185

# Классификация логрегом
(Один нейрон с сигмоидной функцией активации)

In [45]:
def roc_auc(X):
    return roc_auc_score(X["real"], X["prediction"])

def subgroup_roc_auc(X):
    target_X = X[X["African_American"] > 0.5]
    return roc_auc_score(target_X["real"], target_X["prediction"])

def bpsn_roc_auc(X):
    target_X = X[((X["African_American"] > 0.5) & (X["real"] < 0.5)) | ((X["African_American"] < 0.5) & (X["real"] > 0.5))]
    return roc_auc_score(target_X["real"], target_X["prediction"])

def bnsp_roc_auc(X):
    target_X = X[((X["African_American"] < 0.5) & (X["real"] < 0.5)) | ((X["African_American"] > 0.5) & (X["real"] > 0.5))]
    return roc_auc_score(target_X["real"], target_X["prediction"])

In [46]:
s = tf.Session()

In [62]:
input_features = tf.placeholder(tf.float64, shape=(None, fixed_train_X.shape[1]))
labels = tf.placeholder(tf.float64, shape=(None, ))
W = tf.Variable(0.001 * np.random.randn(fixed_train_X.shape[1], ))

In [63]:
output = tf.reduce_sum(tf.multiply(input_features, W), axis=1)

In [64]:
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=output))

In [65]:
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)

In [66]:
s.run(tf.global_variables_initializer())

In [67]:
s.run(W)

array([ 6.31899884e-04,  4.01608272e-04, -1.27745328e-05,  1.07448002e-03,
        6.69104437e-04, -1.57690105e-03, -1.10465598e-03,  5.65508213e-04,
       -9.38519609e-04,  7.22237730e-04])

In [68]:
for i in range(0, 1000):
    loss_value, _ = s.run([loss, optimizer], {input_features: fixed_train_X, labels: train_y})
    if (i % 50 == 0):
        print(loss_value)

0.6930481122547945
0.6226399731515996
0.6146771250957466
0.6135109405323601
0.6133802453241755
0.6133711315367998
0.6133707759052841
0.6133707693095624
0.6133707692690135
0.6133707692689767
0.6133707692689762
0.6133707692689757
0.6133707692689759
0.6133707692689754
0.6133707692689756
0.6133707692689755
0.6133707692689757
0.6133707692689757
0.6133707692689757
0.6133707692689757


In [69]:
fixed_test_X.columns

Index(['Number_of_Priors', 'Age_Above_FourtyFive', 'Age_Below_TwentyFive',
       'African_American', 'Asian', 'Hispanic', 'Native_American', 'Other',
       'Female', 'Misdemeanor'],
      dtype='object')

In [70]:
s.run(W)

array([ 0.78420594, -0.69186416,  0.65146485,  0.12324298, -0.77926445,
       -0.11137015,  0.12457262, -0.1658482 , -0.37199488, -0.29794066])

In [71]:
view_X = fixed_test_X.copy()
view_X["prediction"] = s.run(output, {input_features: fixed_test_X})
view_X["real"] = test_y

In [72]:
accuracy_score(view_X["real"], view_X["prediction"] > 0.5)

0.6279697624190065

In [73]:
roc_auc(view_X)

0.7281123053486798

In [74]:
subgroup_roc_auc(view_X)

0.7228121974062256

In [75]:
# Это значение необходимо улучшить
bpsn_roc_auc(view_X)

0.5137711622381137

In [76]:
bnsp_roc_auc(view_X)

0.8830461287380598

# Добавление штрафа

Начать в 15:00

# Добавление компонента лосса

### Обучение модели

In [104]:
train_bias = train_X["Race_Bias"]
test_bias = test_X["Race_Bias"]

In [105]:
input_features = tf.placeholder(tf.float64, shape=(None, fixed_train_X.shape[1]))

target_labels = tf.placeholder(tf.float64, shape=(None, ))
bias_labels = tf.placeholder(tf.float64, shape=(None, ))

W = tf.Variable(0.001 * np.random.randn(fixed_train_X.shape[1], ))

In [106]:
output = tf.reduce_sum(tf.multiply(input_features, W), axis=1)

In [107]:
target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target_labels, logits=output))

In [108]:
bias_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=bias_labels, logits=output))

In [109]:
optimizer = tf.train.AdamOptimizer(0.01)#.minimize(loss)

In [110]:
# target_gradients = {variable: gradient for gradient, variable in optimizer.compute_gradients(target_loss) if gradient is not None}

In [111]:
# bias_gradients = {variable: gradient for gradient, variable in optimizer.compute_gradients(bias_loss) if gradient is not None}

$$ \nabla w = \nabla_{J}w - \frac{\nabla_{J}w*\nabla_{H}w}{|\nabla_{H}w|} \nabla_{H}w $$

In [112]:
# full_gradients = []
# for variable in target_gradients:
#     bias_length = tf.norm(bias_gradients[variable])
#     target_bias_projection = tf.multiply(target_gradients[variable], bias_gradients[variable])
#     full_gradients.append((target_gradients[variable] - (target_bias_projection / bias_length), variable))

In [113]:
# applicator = optimizer.apply_gradients(full_gradients)

In [126]:
kappa = 0.12

In [127]:
# WHY IS THIS WORKING!!!!!!!!!!!!!
applicator = optimizer.minimize(- kappa*bias_loss + (1-kappa) * target_loss)

In [128]:
s.run(tf.global_variables_initializer())

In [129]:
fixed_test_X.columns

Index(['Number_of_Priors', 'Age_Above_FourtyFive', 'Age_Below_TwentyFive',
       'African_American', 'Asian', 'Hispanic', 'Native_American', 'Other',
       'Female', 'Misdemeanor'],
      dtype='object')

In [130]:
s.run(W)

array([-4.22996347e-04, -9.38630373e-06,  3.99616319e-04,  4.74512106e-04,
        4.91864744e-04, -3.35599322e-04, -5.25673581e-04,  3.18635713e-04,
        2.01564206e-04, -1.40102538e-04])

In [131]:
gradients_history = []
for i in range(0, 1000):
#     target_gradients_vector, bias_gradients_vector, full_gradients_vector
    target_loss_value, bias_loss_value, _ = s.run([target_loss, bias_loss, applicator], {
        input_features: fixed_train_X, 
        target_labels: train_y, 
        bias_labels: train_bias
    })
    if (i % 50 == 0):
        print(target_loss_value, bias_loss_value)

0.6931951157472857 0.6930619795784023
0.6282007766425577 0.6891407244418474
0.6215553558096408 0.7196994135069842
0.6217531964492723 0.7371835298794188
0.6224881811589429 0.7453600940951739
0.6228873103529401 0.7486585725287758
0.6230324107180631 0.7497583819833784
0.6230727702625015 0.7500566977271222
0.6230815315947911 0.7501210464574956
0.6230829822336638 0.7501316869447264
0.6230831533704911 0.7501329419793847
0.6230831653130481 0.7501330295582695
0.6230831654075657 0.7501330302513991
0.6230831653512469 0.7501330298383955
0.6230831653491977 0.7501330298233657
0.6230831653494721 0.7501330298253787
0.623083165349478 0.7501330298254211
0.6230831653494766 0.7501330298254099
0.6230831653494763 0.75013302982541
0.6230831653494763 0.75013302982541


In [132]:
s.run(W)

array([ 0.92587252, -0.63230007,  0.90441226, -0.34257244, -0.66613433,
        0.11008336,  0.42095192,  0.04911594, -0.32447672, -0.22058114])

In [133]:
view_X = fixed_test_X.copy()
view_X["prediction"] = s.run(output, {input_features: fixed_test_X})
view_X["real"] = test_y

In [134]:
roc_auc(view_X)

0.7250162209433536

In [135]:
subgroup_roc_auc(view_X)

0.7210978300553995

In [136]:
bpsn_roc_auc(view_X)

0.688483858438621

In [137]:
bnsp_roc_auc(view_X)

0.7555640890254244