In [1]:
import pandas as pd

In [2]:
import tensorflow as tf

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
from keras.models import Model
from keras.layers import Dense, Input
from keras import regularizers
from keras.losses import binary_crossentropy

Using TensorFlow backend.


In [6]:
import keras.backend as K

In [7]:
import numpy as np

# Краткий обзор датасета

In [8]:
compass_df = pd.read_csv("/home/noomkcalb/Документы/mfti_nlp/final-project/fairness/compass/propublicaCompassRecividism_data_fairml.csv/propublica_data_for_fairml.csv")

In [9]:
compass_df.shape

(6172, 12)

In [10]:
compass_df.columns

Index(['Two_yr_Recidivism', 'Number_of_Priors', 'score_factor',
       'Age_Above_FourtyFive', 'Age_Below_TwentyFive', 'African_American',
       'Asian', 'Hispanic', 'Native_American', 'Other', 'Female',
       'Misdemeanor'],
      dtype='object')

In [11]:
compass_df["Two_yr_Recidivism"].value_counts()

0    3363
1    2809
Name: Two_yr_Recidivism, dtype: int64

- Age Category
- Gender
- Race (White/Black)
- Priors Count
- Recidive - target

In [12]:
compass_df["Race_Bias"] = ((compass_df["Asian"] + compass_df["African_American"] + compass_df["Hispanic"]) > 0).astype(int)

In [13]:
compass_df.drop(columns=["score_factor"], inplace=True)

In [14]:
scaler = StandardScaler()
compass_df["Number_of_Priors"] = scaler.fit_transform(compass_df["Number_of_Priors"].values.reshape(-1, 1))



In [15]:
compass_df.head(5)

Unnamed: 0,Two_yr_Recidivism,Number_of_Priors,Age_Above_FourtyFive,Age_Below_TwentyFive,African_American,Asian,Hispanic,Native_American,Other,Female,Misdemeanor,Race_Bias
0,0,-0.684413,1,0,0,0,0,0,1,0,0,0
1,1,-0.684413,0,0,1,0,0,0,0,0,0,1
2,1,0.158866,0,1,1,0,0,0,0,0,0,1
3,0,-0.684413,0,0,0,0,0,0,1,0,1,0
4,1,2.267065,0,0,0,0,0,0,0,0,0,0


In [16]:
X = compass_df.drop(columns=["Two_yr_Recidivism"])
y = compass_df["Two_yr_Recidivism"]

In [17]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3)

In [18]:
train_X.shape

(4320, 11)

In [19]:
test_X.shape

(1852, 11)

In [20]:
fixed_train_X = train_X.drop(columns="Race_Bias")
fixed_test_X = test_X.drop(columns="Race_Bias")

# Классификация логрегом (для сравнения)

In [21]:
from sklearn.linear_model import LogisticRegression

In [22]:
from sklearn.model_selection import GridSearchCV

In [23]:
grid = {'C': np.linspace(0.001, 0.01, 10)}
model = LogisticRegression(solver="lbfgs")
search = GridSearchCV(model, grid, cv=3, scoring='accuracy')

In [24]:
search.fit(fixed_train_X, train_y)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': array([0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009,
       0.01 ])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [25]:
search.best_estimator_.coef_

array([[ 0.64078428, -0.41152458,  0.47540335,  0.17757996, -0.02977347,
        -0.05518527, -0.01032121, -0.04559138, -0.16565973, -0.23135935]])

In [26]:
search.best_estimator_

LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [27]:
search.best_score_

0.6650462962962963

# Классификация логрегом
(Один нейрон с сигмоидной функцией активации)

In [28]:
def roc_auc(X):
    return roc_auc_score(X["real"], X["prediction"])

def subgroup_roc_auc(X):
    target_X = X[X["African_American"] > 0.5]
    return roc_auc_score(target_X["real"], target_X["prediction"])

def bpsn_roc_auc(X):
    target_X = X[((X["African_American"] > 0.5) & (X["real"] < 0.5)) | ((X["African_American"] < 0.5) & (X["real"] > 0.5))]
    return roc_auc_score(target_X["real"], target_X["prediction"])

def bnsp_roc_auc(X):
    target_X = X[((X["African_American"] < 0.5) & (X["real"] < 0.5)) | ((X["African_American"] > 0.5) & (X["real"] > 0.5))]
    return roc_auc_score(target_X["real"], target_X["prediction"])

In [29]:
s = tf.Session()

In [30]:
input_features = tf.placeholder(tf.float64, shape=(None, fixed_train_X.shape[1]))
labels = tf.placeholder(tf.float64, shape=(None, ))
W = tf.Variable(0.001 * np.random.randn(fixed_train_X.shape[1], ))

Instructions for updating:
Colocations handled automatically by placer.


In [31]:
output = tf.reduce_sum(tf.multiply(input_features, W), axis=1)

In [32]:
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=output))

In [33]:
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)

Instructions for updating:
Use tf.cast instead.


In [34]:
s.run(tf.global_variables_initializer())

In [35]:
s.run(W)

array([-0.00202679, -0.00016279, -0.00116221,  0.00032478,  0.00064287,
       -0.00024866, -0.00089542,  0.00133754,  0.00156174,  0.00075724])

In [36]:
for i in range(0, 1000):
    loss_value, _ = s.run([loss, optimizer], {input_features: fixed_train_X, labels: train_y})
    if (i % 50 == 0):
        print(loss_value)

0.6935301049370373
0.6219123227452658
0.6133259304677249
0.6120306638450035
0.6118793429867788
0.6118680541170926
0.6118675719724037
0.6118675617722038
0.6118675616937596
0.6118675616937074
0.6118675616937068
0.6118675616937066
0.6118675616937062
0.6118675616937067
0.6118675616937068
0.6118675616937069
0.6118675616937069
0.6118675616937069
0.6118675616937069
0.6118675616937069


In [37]:
fixed_test_X.columns

Index(['Number_of_Priors', 'Age_Above_FourtyFive', 'Age_Below_TwentyFive',
       'African_American', 'Asian', 'Hispanic', 'Native_American', 'Other',
       'Female', 'Misdemeanor'],
      dtype='object')

In [38]:
s.run(W)

array([ 0.79677386, -0.70637239,  0.72373113,  0.05630165, -0.84133978,
       -0.21337521, -1.05483896, -0.21050486, -0.28836074, -0.31529698])

In [39]:
view_X = fixed_test_X.copy()
view_X["prediction"] = s.run(output, {input_features: fixed_test_X})
view_X["real"] = test_y

In [40]:
accuracy_score(view_X["real"], view_X["prediction"] > 0.5)

0.6349892008639308

In [41]:
roc_auc(view_X)

0.7154918736770965

In [42]:
subgroup_roc_auc(view_X)

0.7136292141628714

In [43]:
# Это значение необходимо улучшить
bpsn_roc_auc(view_X)

0.4932274206270953

In [44]:
bnsp_roc_auc(view_X)

0.8641857152396002

# Добавление компонента лосса

### Обучение модели

In [45]:
train_bias = train_X["Race_Bias"]
test_bias = test_X["Race_Bias"]

In [46]:
input_features = tf.placeholder(tf.float64, shape=(None, fixed_train_X.shape[1]))

target_labels = tf.placeholder(tf.float64, shape=(None, ))
bias_labels = tf.placeholder(tf.float64, shape=(None, ))

W = tf.Variable(0.001 * np.random.randn(fixed_train_X.shape[1], ))

In [47]:
output = tf.reduce_sum(tf.multiply(input_features, W), axis=1)

In [48]:
target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target_labels, logits=output))

In [49]:
bias_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=bias_labels, logits=output))

In [50]:
optimizer = tf.train.AdamOptimizer(0.01)#.minimize(loss)

### Вариант с изменением градиента (не сработал)

In [51]:
# target_gradients = {variable: gradient for gradient, variable in optimizer.compute_gradients(target_loss) if gradient is not None}

In [52]:
# bias_gradients = {variable: gradient for gradient, variable in optimizer.compute_gradients(bias_loss) if gradient is not None}

$$ \nabla w = \nabla_{J}w - \frac{\nabla_{J}w*\nabla_{H}w}{|\nabla_{H}w|} \nabla_{H}w $$

In [53]:
# full_gradients = []
# for variable in target_gradients:
#     bias_length = tf.norm(bias_gradients[variable])
#     target_bias_projection = tf.multiply(target_gradients[variable], bias_gradients[variable])
#     full_gradients.append((target_gradients[variable] - (target_bias_projection / bias_length), variable))

In [54]:
# applicator = optimizer.apply_gradients(full_gradients)

### Вариант с изменением лосса

In [96]:
# ПОЧЕМУ ЭТО РАБОТАЕТ?!?!?!?
kappa = 0.1
applicator = optimizer.minimize(- kappa*bias_loss + target_loss)

In [97]:
s.run(tf.global_variables_initializer())

In [98]:
fixed_test_X.columns

Index(['Number_of_Priors', 'Age_Above_FourtyFive', 'Age_Below_TwentyFive',
       'African_American', 'Asian', 'Hispanic', 'Native_American', 'Other',
       'Female', 'Misdemeanor'],
      dtype='object')

In [99]:
s.run(W)

array([ 0.00090099, -0.00030851,  0.00078369, -0.00026781,  0.00047223,
       -0.00021082, -0.00024544,  0.00036428,  0.00022639, -0.00034155])

In [100]:
gradients_history = []
for i in range(0, 1000):
#     target_gradients_vector, bias_gradients_vector, full_gradients_vector
    target_loss_value, bias_loss_value, _ = s.run([target_loss, bias_loss, applicator], {
        input_features: fixed_train_X, 
        target_labels: train_y, 
        bias_labels: train_bias
    })
    if (i % 50 == 0):
        print(target_loss_value, bias_loss_value)

0.6929893891077443 0.6931265768429603
0.624300059170241 0.7244740659371466
0.6171135111493525 0.763184642688744
0.6167789518829043 0.7811229225835818
0.6172595767213577 0.7895654502249573
0.6175424129495806 0.7929221253551961
0.6176523591481443 0.7941060634176242
0.6176911791615729 0.7945138015924333
0.6177061849609792 0.7946692963404669
0.617712941819002 0.7947383140371329
0.6177162083956735 0.7947713266211656
0.6177177577122505 0.7947868933546315
0.6177184511325032 0.7947938413121403
0.6177187410864731 0.7947967431080735
0.6177188541015479 0.794797873581978
0.6177188950861625 0.794798283468324
0.6177189088792461 0.7947984214034756
0.6177189131727638 0.7947984643390485
0.6177189144038561 0.7947984766500068
0.6177189147273731 0.7947984798851767


In [101]:
s.run(W)

array([ 0.8922647 , -0.68256511,  0.9137429 , -0.27398426, -1.47863165,
       -0.63369014, -0.92524877, -0.06512947, -0.24963107, -0.26936003])

In [102]:
view_X = fixed_test_X.copy()
view_X["prediction"] = s.run(output, {input_features: fixed_test_X})
view_X["real"] = test_y

In [103]:
roc_auc(view_X)

0.7122195210092315

In [104]:
subgroup_roc_auc(view_X)

0.7124797988091021

### BPSN была увеличена, при этом BNSP упала

In [105]:
bpsn_roc_auc(view_X)

0.6055203608755669

In [106]:
bnsp_roc_auc(view_X)

0.7909368114135636