In [1]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import classifier as cl
import utils
import time
import multiprocessing as mp
import dill


seed = 1
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))





## Running linear regression to get sensetive directions 

protected_regression = linear_model.LinearRegression(fit_intercept = False)
protected_regression.fit(x_unprotected_train, x_protected_train)
sensetive_directions = protected_regression.coef_

def projection_matrix(sensetive_directions):
    n, d = sensetive_directions.shape
    mx = np.identity(d)
    for vector in sensetive_directions:
        vector = vector/np.linalg.norm(vector, ord=2)
        vector = vector.reshape((-1,1))
        mx = mx - 0.99* vector @ vector.T
    return mx




unprotected_directions = projection_matrix(sensetive_directions)



# Casing to tensor 
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
unprotected_directions = tf.cast(unprotected_directions, dtype = tf.float32)
inv_unprotected_direction = tf.linalg.inv(unprotected_directions)

init_graph = utils.ClassifierGraph(50, 2)
#graph = cl.Classifier(init_graph, x_unprotected_train, y_train, x_unprotected_test, y_test, num_steps = 1000) # use for unfair algo
graph = cl.Classifier(init_graph, tf.matmul(x_unprotected_train, unprotected_directions), 
                        y_train, tf.matmul(x_unprotected_test, unprotected_directions), y_test, num_steps = 1000) # for fair algo



Done step 200

Done step 400

Done step 600

Done step 800

Done step 1000



In [2]:
def sample_perturbation(data_point, learning_rate = 1e-3, num_steps = 20):
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    for _ in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph(x)
            loss = utils.EntropyLoss(y, prob)

        gradient = g.gradient(loss, x)
        x = x + learning_rate * tf.matmul(gradient, inv_unprotected_direction) #/ tf.linalg.norm(gradient, ord = np.inf)
    return x.numpy()


In [3]:
x = x_unprotected_test[0]
y = y_test[0]
data = x, y
sample_perturbation(data)

array([[-1.1753671e+00, -1.6067826e+00, -1.4705397e-01, -2.1894576e-01,
         1.5996068e+00,  9.3263538e-05, -2.2186493e-04,  9.9947041e-01,
        -4.8863363e-05, -3.3794309e-04, -2.0201206e-04, -1.7743303e-04,
        -5.9206248e-04, -2.8444476e-05, -1.0421117e-04, -2.1349395e-06,
         9.9932384e-01, -4.0368436e-04,  8.1799990e-05, -2.8746232e-04,
        -4.4900592e-04, -3.0661109e-04, -4.9690076e-05, -3.0351541e-04,
        -1.9107449e-04, -4.2180295e-04,  9.9956208e-01, -2.5411073e-05,
        -6.1848514e-05, -6.3593514e-05, -1.0655816e-04,  1.7099055e-04,
        -1.8060386e-04, -1.8011966e-04, -6.4957544e-04, -3.2210827e-04,
        -4.7512303e-04,  9.9959230e-01,  3.0132543e-04]], dtype=float32)

In [4]:
x = tf.reshape(x, (1, -1))
y = tf.reshape(y, (1, -1))

In [5]:
with tf.GradientTape() as g:
    g.watch(x)
    prob = graph(x)
    loss = utils.EntropyLoss(y, prob)

gradient = g.gradient(loss, x)

In [6]:
u = tf.linalg.inv(unprotected_directions)

In [7]:
u

<tf.Tensor: shape=(39, 39), dtype=float32, numpy=
array([[ 9.9996185e-01,  1.1376848e-04,  5.8734498e-05, ...,
         2.9607913e-03,  2.7763366e-05, -2.2696673e-03],
       [ 1.1376851e-04,  9.9987900e-01, -3.1136515e-05, ...,
        -4.3144813e-03, -1.4073697e-03, -1.4989179e-03],
       [ 5.8734506e-05, -3.1136544e-05,  1.0000046e+00, ...,
        -1.5802447e-03, -9.1745204e-04, -1.9624338e-03],
       ...,
       [ 2.9607913e-03, -4.3144813e-03, -1.5802443e-03, ...,
         8.6362684e-01, -2.9521117e-02,  5.2355272e-03],
       [ 2.7763735e-05, -1.4073701e-03, -9.1745215e-04, ...,
        -2.9521113e-02,  1.0080533e+00,  5.1919676e-02],
       [-2.2696671e-03, -1.4989178e-03, -1.9624340e-03, ...,
         5.2355272e-03,  5.1919688e-02,  1.1783721e+00]], dtype=float32)>

In [8]:
tf.matmul(gradient, u)

<tf.Tensor: shape=(1, 39), dtype=float32, numpy=
array([[ 0.01043551,  0.05923065,  0.07095308,  0.03998211,  0.05019232,
        -0.0172134 , -0.01468189, -0.0669503 , -0.02355065, -0.04954261,
        -0.02172976, -0.01249817, -0.08480156,  0.05365053, -0.00941733,
        -0.00760272, -0.04413841, -0.02914601,  0.01829208, -0.01286143,
        -0.01241331, -0.03239373,  0.03819576, -0.02832818, -0.05086481,
        -0.09576268, -0.06183094,  0.07220908,  0.01977082,  0.02160866,
         0.00141751,  0.00646481, -0.02405381, -0.1152992 , -0.03177928,
        -0.07902008, -0.07768835, -0.08926576,  0.07077864]],
      dtype=float32)>

In [9]:
x

<tf.Tensor: shape=(1, 39), dtype=float32, numpy=
array([[-1.1755865 , -1.6070586 , -0.14741328, -0.219069  ,  1.5994685 ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ]],
      dtype=float32)>