In [1]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import classifier as cl
import utils
import time
import multiprocessing as mp
import random
import matplotlib.pyplot as plt
import scipy
plt.ioff()


seed = 1
tf.random.set_seed(seed)
np.random.seed(seed)
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))





## Running linear regression to get sensetive directions 

protected_regression = linear_model.LinearRegression(fit_intercept = False)
protected_regression.fit(x_unprotected_train, x_protected_train)
sensetive_directions = protected_regression.coef_

sensetive_directions = scipy.linalg.orth(sensetive_directions.T).T
for i, s in enumerate(sensetive_directions):
    while np.linalg.norm(s) != 1:
        s = s/ np.linalg.norm(s)
    sensetive_directions[i] = s





unprotected_directions = utils.projection_matrix(sensetive_directions)
protected_directions = utils.projection_matrix2(sensetive_directions)



# Casing to tensor 
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
unprotected_directions = tf.cast(unprotected_directions, dtype = tf.float32)
protected_directions = tf.cast(protected_directions, dtype = tf.float32)
sensetive_directions = tf.cast(sensetive_directions, dtype = tf.float32)


init_graph = utils.ClassifierGraph(50, 2)
graph = cl.Classifier(init_graph, x_unprotected_train, y_train, num_steps = 1000) # use for unfair algo
#graph = cl.Classifier(init_graph, tf.matmul(x_unprotected_train, unprotected_directions), 
#                        y_train, tf.matmul(x_unprotected_test, unprotected_directions), y_test, num_steps = 10000) # for fair algo





Done step 200

Done step 400

Done step 600

Done step 800

Done step 1000



In [12]:
def sample_perturbation(data_point, regularizer = 5e-0, learning_rate = 5e-2, num_steps = 200):
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    #x += tf.cast(np.random.normal(size=(1, 39)), dtype = tf.float32)*1e-9
    for _ in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
            loss = utils.EntropyLoss(y, prob) - regularizer * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient#utils.protected_direction(gradient, sensetive_directions)
        #print(gradient)
    return_loss = utils.EntropyLoss(y, graph(x)) / utils.EntropyLoss(y, graph(x_start))
    
    return return_loss.numpy()




cpus = mp.cpu_count()
print(f'Number of cpus : {cpus}')
start_time = time.time()
with mp.Pool(cpus) as pool:
    perturbed_test_samples = pool.map(sample_perturbation, zip(x_unprotected_test[:10], y_test[:10]))

Number of cpus : 8


In [13]:
perturbed_test_samples

[1.260227,
 3.0681648,
 1.060196,
 7.3872895,
 7.708021,
 5.423564,
 1.4314591,
 15.38068,
 4.16826,
 9.205591]

In [14]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import classifier as cl
import utils
import time
import multiprocessing as mp
import random
import matplotlib.pyplot as plt
import scipy
plt.ioff()


seed = 1
tf.random.set_seed(seed)
np.random.seed(seed)
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))





## Running linear regression to get sensetive directions 

protected_regression = linear_model.LinearRegression(fit_intercept = False)
protected_regression.fit(x_unprotected_train, x_protected_train)
sensetive_directions = protected_regression.coef_

sensetive_directions = scipy.linalg.orth(sensetive_directions.T).T
for i, s in enumerate(sensetive_directions):
    while np.linalg.norm(s) != 1:
        s = s/ np.linalg.norm(s)
    sensetive_directions[i] = s





unprotected_directions = utils.projection_matrix(sensetive_directions)
protected_directions = utils.projection_matrix2(sensetive_directions)



# Casing to tensor 
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
unprotected_directions = tf.cast(unprotected_directions, dtype = tf.float32)
protected_directions = tf.cast(protected_directions, dtype = tf.float32)
sensetive_directions = tf.cast(sensetive_directions, dtype = tf.float32)

init_graph = utils.ClassifierGraph(50, 2)
#graph = cl.Classifier(init_graph, x_unprotected_train, y_train, x_unprotected_test, y_test, num_steps = 10000) # use for unfair algo
graph = cl.Classifier(init_graph, utils.unprotected_direction(x_unprotected_train, sensetive_directions), 
                        y_train, num_steps = 1000) # for fair algo



def sample_perturbation(data_point, regularizer = 5e0, learning_rate = 5e-2, num_steps = 200):
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    #x += tf.cast(np.random.normal(size=(1, 39)), dtype = tf.float32)*1e-9
    for _ in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph(utils.unprotected_direction(x, sensetive_directions))
            perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
            loss = utils.EntropyLoss(y, prob) - regularizer * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient #utils.protected_direction(gradient, sensetive_directions)

    #print(utils.unprotected_direction(x - x_start, sensetive_directions))
    return_loss = utils.EntropyLoss(y, graph(utils.unprotected_direction(x, sensetive_directions)))\
         / utils.EntropyLoss(y, graph(utils.unprotected_direction(x_start, sensetive_directions)))
    
    return return_loss.numpy()




cpus = mp.cpu_count()
print(f'Number of cpus : {cpus}')
start_time = time.time()
with mp.Pool(cpus) as pool:
    perturbed_test_samples = pool.map(sample_perturbation, zip(x_unprotected_test[:10], y_test[:10]))

perturbed_test_samples

Done step 200

Done step 400

Done step 600

Done step 800

Done step 1000

Number of cpus : 8
tf.Tensor(
[[ 0.10568471  0.22098015  0.23347273  0.04557371  0.15213545  0.11486526
  -0.15622678 -0.10134134  0.08558103 -0.14717892  0.06092646  0.0703868
  -0.19816823  0.22133996  0.1802274   0.03834645 -0.2007695   0.04507047
   0.1469216  -0.01513197  0.0834027  -0.11244565  0.18484515 -0.1281626
  -0.12175724 -0.10823195 -0.2001048   0.0594045  -0.0779786   0.04756334
   0.06056025  0.15712681 -0.10743533  0.28483444 -0.07648704 -0.15540151
  -0.18481846 -0.2033757   0.02307328]], shape=(1, 39), dtype=float32)
tf.Tensor(
[[ 1.4259930e-03  6.8340288e-03  4.4478518e-03  1.6720010e-03
   2.9367425e-03  4.5243156e-05 -2.1931308e-03 -8.7418472e-03
  -1.0599549e-04  1.1674287e-03  8.6992473e-04  2.2309592e-03
  -2.4469178e-03  4.2135278e-03  4.3956639e-04  2.7072502e-03
  -5.6952895e-03 -1.2988755e-03  2.2560454e-03 -3.8427387e-03
   4.1464665e-03 -2.3031712e-03  4.2821201e-03 -2.3240484e-0

[1.0486113,
 2.220094,
 1.0155952,
 4.5169597,
 5.713907,
 1.3127115,
 1.1291466,
 1.4196779,
 2.8324146,
 5.152049]

In [1]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import classifier as cl
import utils
import time
import multiprocessing as mp
import random
import matplotlib.pyplot as plt
import scipy
plt.ioff()


seed = 1
tf.random.set_seed(seed)
np.random.seed(seed)
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))






# Casing to tensor 
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)


init_graph = utils.ClassifierGraph(50, 2)
graph = cl.Classifier(init_graph, x_unprotected_train, y_train, num_steps = 1200) # use for unfair algo


Done step 200

Done step 400

Done step 600

Done step 800

Done step 1000

Done step 1200



In [2]:
a = graph(x_unprotected_test)

In [4]:
tf.reduce_sum(a, axis = 1)

<tf.Tensor: shape=(9045,), dtype=float32, numpy=array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)>

In [2]:
graph.model

<tensorflow.python.keras.engine.sequential.Sequential at 0x142e44390>

In [1]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import utils
import time
import multiprocessing as mp
import random
import matplotlib.pyplot as plt
import scipy
plt.ioff()


seed = 1
tf.random.set_seed(seed)
np.random.seed(seed)
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))





## Running linear regression to get sensetive directions 

#protected_regression = linear_model.LinearRegression(fit_intercept = False)
#protected_regression.fit(x_unprotected_train, x_protected_train)
#sensetive_directions = protected_regression.coef_



sensetive_directions = []
protected_regression = linear_model.LogisticRegression(fit_intercept = True)
protected_regression.fit(x_unprotected_test, x_protected_test[:, 0])
sensetive_directions.append(protected_regression.coef_.reshape((-1,)))
protected_regression.fit(x_unprotected_test, x_protected_test[:, 1])
sensetive_directions.append(protected_regression.coef_.reshape((-1,)))
sensetive_directions = np.array(sensetive_directions)

sensetive_directions = scipy.linalg.orth(sensetive_directions.T).T
for i, s in enumerate(sensetive_directions):
    while np.linalg.norm(s) != 1:
        s = s/ np.linalg.norm(s)
    sensetive_directions[i] = s





unprotected_directions = utils.projection_matrix(sensetive_directions)
protected_directions = utils.projection_matrix2(sensetive_directions)



# Casing to tensor 
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
unprotected_directions = tf.cast(unprotected_directions, dtype = tf.float32)
protected_directions = tf.cast(protected_directions, dtype = tf.float32)
sensetive_directions = tf.cast(sensetive_directions, dtype = tf.float32)



graph = tf.keras.models.load_model('graph')               

def sample_perturbation(data_point, regularizer = 20, learning_rate = 3e-2, num_steps = 200):
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    #x += tf.cast(np.random.normal(size=(1, 39)), dtype = tf.float32)*1e-9
    for _ in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
            loss = utils.EntropyLoss(y, prob)  - regularizer * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient#utils.protected_direction(gradient, sensetive_directions)

    return_loss = utils.EntropyLoss(y, graph(x)) / utils.EntropyLoss(y, graph(x_start))
    print('Done')
    
    return return_loss.numpy()




cpus = mp.cpu_count()
print(f'Number of cpus : {cpus}')
start_time = time.time()
perturbed_test_samples = []
# for data in zip(x_unprotected_test[:20], y_test[:20]):
#     perturbed_test_samples.append(sample_perturbation(data))
with mp.Pool(5) as pool:
    perturbed_test_samples = pool.map(sample_perturbation, zip(x_unprotected_test[:20], y_test[:20]))

Number of cpus : 8


In [53]:
x, y  = x_unprotected_test[0], y_test[0]

In [54]:
x = tf.reshape(x, (1, -1))
y = tf.reshape(y, (1, -1))
x_start = x
    #x += tf.cast(np.random.normal(size=(1, 39)), dtype = tf.float32)*1e-9
for _ in range(200):
    with tf.GradientTape() as g:
        g.watch(x)
        prob = graph(x)
        perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
        loss = utils.EntropyLoss(y, prob)  - 20 * tf.norm(perturb)**2

    gradient = g.gradient(loss, x)
    x = x + 3e-2* gradient#utils.protected_direction(gradient, sensetive_directions)

return_loss = utils.EntropyLoss(y, graph(x)) / utils.EntropyLoss(y, graph(x_start))

In [55]:
return_loss

<tf.Tensor: shape=(), dtype=float32, numpy=1.0159092>

In [33]:
a = graph(x_unprotected_test)

In [34]:
tf.reduce_sum(a, axis = 1)

<tf.Tensor: shape=(9045,), dtype=float32, numpy=
array([1.        , 1.        , 1.        , ..., 1.        , 1.        ,
       0.99999994], dtype=float32)>

In [1]:
y_test


NameError: name 'y_test' is not defined

In [6]:
import numpy as np
start = np.arange(0, 9001, 200)
end = np.arange(200, 9201, 200)
end[-1] = 9045

In [7]:
start

array([   0,  200,  400,  600,  800, 1000, 1200, 1400, 1600, 1800, 2000,
       2200, 2400, 2600, 2800, 3000, 3200, 3400, 3600, 3800, 4000, 4200,
       4400, 4600, 4800, 5000, 5200, 5400, 5600, 5800, 6000, 6200, 6400,
       6600, 6800, 7000, 7200, 7400, 7600, 7800, 8000, 8200, 8400, 8600,
       8800, 9000])

In [8]:
end

array([ 200,  400,  600,  800, 1000, 1200, 1400, 1600, 1800, 2000, 2200,
       2400, 2600, 2800, 3000, 3200, 3400, 3600, 3800, 4000, 4200, 4400,
       4600, 4800, 5000, 5200, 5400, 5600, 5800, 6000, 6200, 6400, 6600,
       6800, 7000, 7200, 7400, 7600, 7800, 8000, 8200, 8400, 8600, 8800,
       9000, 9045])