In [1]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import utils
import classifier as cl

In [2]:
seed = 1
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

In [3]:
x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))
y_train, y_test = np.array(y_train, dtype= 'int32'), np.array(y_test, dtype='int32')

protected_regression = linear_model.LinearRegression(fit_intercept = False)
protected_regression.fit(x_unprotected_train, x_protected_train)
sensetive_directions = protected_regression.coef_

def projection_matrix(sensetive_directions):
    n, d = sensetive_directions.shape
    mx = np.identity(d)
    for vector in sensetive_directions:
        vector = vector/np.linalg.norm(vector, ord=2)
        vector = vector.reshape((-1,1))
        mx = mx - 0.99* vector @ vector.T
    return mx

unprotected_directions = projection_matrix(sensetive_directions)


In [4]:
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
unprotected_directions = tf.cast(unprotected_directions, dtype = tf.float32)


In [5]:
graph = utils.ClassifierGraph(50, 2)
graph = cl.Classifier(graph, x_unprotected_train, y_train, x_unprotected_test, y_test, num_steps=1000)

unprotected_directions = tf.cast(unprotected_directions, dtype = tf.float32)

Done step 200

Done step 400

Done step 600

Done step 800

Done step 1000



In [6]:
regularizer = 1e-2
learning_rate = 1e-3
x = x_unprotected_train[0, :]
y = y_train[0, : ]

In [7]:
x = tf.reshape(x, (1, -1))
y = tf.reshape(y, (1, -1))
x_start = x

x_start

<tf.Tensor: id=71216, shape=(1, 39), dtype=float32, numpy=
array([[ 1.0228531 , -0.04535909, -0.14741328, -0.219069  ,  0.67812765,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ,  0.        ]],
      dtype=float32)>

In [8]:
for _ in range(100):
    with tf.GradientTape() as g:
        g.watch(x)
        purturb = tf.linalg.matmul(x - x_start, unprotected_directions)
        prob = graph(x)
        loss = utils.EntropyLoss(y, prob) - regularizer * tf.reduce_sum(purturb**2)

    gradient = g.gradient(loss, x)
    x = x + learning_rate * gradient / tf.linalg.norm(gradient, ord = 2)

In [9]:
x

<tf.Tensor: id=79018, shape=(1, 39), dtype=float32, numpy=
array([[ 1.0349126e+00, -2.8684437e-02, -1.2339086e-01, -2.1087645e-01,
         6.9033527e-01,  6.0225818e-03, -7.0167417e-03,  9.8234510e-01,
         4.2951531e-03, -1.0878822e-02,  2.1276853e-04, -1.4931154e-03,
         9.8200589e-01, -2.6740716e-03,  2.8400466e-02, -1.7670636e-03,
        -2.5012206e-02,  3.1955547e-03, -3.3968575e-02, -1.5382072e-02,
        -1.6312398e-02, -1.8414931e-02,  4.7395905e-03, -5.8301375e-03,
        -1.7799774e-02, -1.6227551e-02, -2.1124270e-02, -1.1296313e-02,
         5.2130218e-03, -8.4327469e-03,  9.9663967e-01, -6.3510393e-03,
        -2.5058609e-02,  1.0285573e-02,  9.6709698e-01, -1.3559571e-02,
        -1.8522402e-02, -2.5062757e-02,  1.1961920e-02]], dtype=float32)>

In [10]:
x_start

<tf.Tensor: id=71216, shape=(1, 39), dtype=float32, numpy=
array([[ 1.0228531 , -0.04535909, -0.14741328, -0.219069  ,  0.67812765,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ,  0.        ]],
      dtype=float32)>

In [11]:
x.numpy()

array([[ 1.0349126e+00, -2.8684437e-02, -1.2339086e-01, -2.1087645e-01,
         6.9033527e-01,  6.0225818e-03, -7.0167417e-03,  9.8234510e-01,
         4.2951531e-03, -1.0878822e-02,  2.1276853e-04, -1.4931154e-03,
         9.8200589e-01, -2.6740716e-03,  2.8400466e-02, -1.7670636e-03,
        -2.5012206e-02,  3.1955547e-03, -3.3968575e-02, -1.5382072e-02,
        -1.6312398e-02, -1.8414931e-02,  4.7395905e-03, -5.8301375e-03,
        -1.7799774e-02, -1.6227551e-02, -2.1124270e-02, -1.1296313e-02,
         5.2130218e-03, -8.4327469e-03,  9.9663967e-01, -6.3510393e-03,
        -2.5058609e-02,  1.0285573e-02,  9.6709698e-01, -1.3559571e-02,
        -1.8522402e-02, -2.5062757e-02,  1.1961920e-02]], dtype=float32)

In [12]:
x_unprotected_train.shape[0]

36177

In [13]:
def sample_purturbation(x, y, regularizer = 1e-2, learning_rate = 1e-4, num_steps = 20):
    
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    for _ in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            purturb = tf.linalg.matmul(x - x_start, unprotected_directions)
            prob = graph(x)
            loss = utils.EntropyLoss(y, prob) - regularizer * tf.reduce_sum(purturb**2)

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient / tf.linalg.norm(gradient, ord = 2)
    return x





In [14]:
x, y = x_unprotected_train[0], y_train[0]

In [15]:
x

<tf.Tensor: id=79022, shape=(39,), dtype=float32, numpy=
array([ 1.0228531 , -0.04535909, -0.14741328, -0.219069  ,  0.67812765,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
        0.        ,  0.        ,  0.        ,  0.        ], dtype=float32)>

In [16]:
y

<tf.Tensor: id=79026, shape=(2,), dtype=float32, numpy=array([1., 0.], dtype=float32)>

In [17]:
import time
start_time = time.time()
sample_purturbation(x, y)
end_time = time.time()

In [18]:
end_time - start_time

0.2862379550933838

In [19]:
start_time = time.time()
for x, y in zip(x_unprotected_train[:30], y_train[:30]):
    sample_purturbation(x, y)
end_time = time.time()

In [20]:
end_time - start_time

7.752289772033691

In [23]:
from adversarial_sample import *

[[ 9.99928687e-01  5.15363856e-05  3.47176646e-06 ...  2.20238180e-03
   1.03024868e-03  1.86161822e-03]
 [ 5.15363856e-05  9.99914640e-01 -3.42820389e-05 ... -2.58575815e-03
  -4.51321975e-04  4.80510264e-04]
 [ 3.47176646e-06 -3.42820389e-05  9.99978850e-01 ... -7.63695793e-04
   1.43468950e-04  1.11507031e-03]
 ...
 [ 2.20238180e-03 -2.58575815e-03 -7.63695793e-04 ...  9.11442685e-01
  -2.57592549e-02 -1.97681295e-02]
 [ 1.03024868e-03 -4.51321975e-04  1.43468950e-04 ... -2.57592549e-02
   9.83329245e-01 -3.80214060e-02]
 [ 1.86161822e-03  4.80510264e-04  1.11507031e-03 ... -1.97681295e-02
  -3.80214060e-02  8.82116015e-01]]
Done step 200

Done step 400

Done step 600

Done step 800

Done step 1000

Time taken 0.2494730806350708
