In [1]:
import numpy as np
import tensorflow as tf
from adult_modified import preprocess_adult_data
from sklearn import linear_model
import model
import utils
import time
import multiprocessing as mp
import random
import matplotlib.pyplot as plt
import scipy
plt.ioff()


seed = 1
tf.random.set_seed(seed)
np.random.seed(seed)
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed)

x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))


# Casing to tensor 
#y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
#y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)




graph = model.model   
prob = graph(x_unprotected_test)
y_pred = tf.argmax(prob, axis = 1)
y_pred = y_pred.numpy()

In [2]:
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [3]:
y_test

array([0., 0., 0., ..., 0., 0., 0.])

In [4]:
dataset_orig_test

               instance weights  features                             \
                                                                       
                                      age education-num capital-gain   
instance names                                                         
13237                       1.0 -1.175586     -1.607059    -0.147413   
23644                       1.0  0.264770     -0.435784    -0.147413   
178                         1.0 -1.554628     -0.435784    -0.147413   
16741                       1.0 -0.341696     -0.435784    -0.147413   
28620                       1.0 -0.948162     -0.435784    -0.147413   
...                         ...       ...           ...          ...   
47211                       1.0 -1.099778      1.125916    -0.147413   
35106                       1.0  0.264770      1.125916    -0.147413   
5661                        1.0 -1.478820     -2.387908    -0.147413   
13155                       1.0 -1.630436     -1.607059    -0.14

In [10]:
gender = dataset_orig_test.features[:, 39]
race = dataset_orig_test.features[:, 40]

In [11]:
gender

array([1., 1., 1., ..., 1., 0., 0.])

In [2]:
import numpy as np
np.random.seed(1)
# Adult data processing
seeds = np.random.randint(100000, size = (10, ))

In [3]:
seeds

array([98539, 77708,  5192, 98047, 50057, 73349, 21440, 98448, 20609,
       49100])

In [1]:
import json
with open(f'./models/data_5192.txt', 'r') as f:
    data = json.load(f)

In [2]:
data

{'ens_weights': [0.010803723570691603,
  0.9197313399673761,
  0.06946493646193297],
 'coefs': [[0.21777141478992051,
   -0.027299767430933004,
   0.3442915144907627,
   0.027579741429418887,
   -0.05290625993730652,
   1.0090856904788823,
   0.6366576753224581,
   0.22400712845643617,
   -0.4225848146313691,
   -0.5978489423190935,
   0.6276459359901265,
   -1.6418093776982465,
   -0.18191138217336908,
   -1.9001714632601627,
   0.33888351135664485,
   0.6255134614685023,
   0.2637065706004156,
   0.5584563742113735,
   0.13067622340314772,
   0.8390569341315941,
   -1.5620814838683108,
   -0.93462352408953,
   0.4181269486110605,
   -1.4715193492952878,
   -0.321339007432083,
   0.6735703384936026,
   0.6277512164239468,
   1.0168662938338793,
   0.3428309620547239,
   0.25153330931616813,
   0.5062221470113151,
   0.5913151340787777,
   -1.1425566236630909,
   -3.694690744961524,
   -0.023016960521797823,
   0.8165595847473007,
   0.22142926155825018,
   1.4124798035372133,
   1.102

In [3]:
import tensorflow as tf
def graph(x):
    n, _ = x.shape
    prob = tf.zeros([n, 1], dtype = tf.float32)
    for coef, intercept, weight in zip(data['coefs'], data['intercepts'], data['ens_weights']):
        coef = tf.cast(coef, dtype = tf.float32)
        coef = tf.reshape(coef, [-1, 1])
        model_logit = x @ coef + tf.cast(intercept, dtype = tf.float32)
        model_prob = tf.exp(model_logit) / (1 + tf.exp(model_logit))
        prob += model_prob * tf.cast(weight, dtype = tf.float32)
    return tf.concat([1-prob, prob], axis = 1)

In [5]:
import numpy as np
graph(tf.cast(np.random.normal(size = (100, 39)), dtype = tf.float32))

<tf.Tensor: shape=(100, 2), dtype=float32, numpy=
array([[5.26580811e-02, 9.47341919e-01],
       [9.84162748e-01, 1.58372801e-02],
       [4.37700748e-01, 5.62299252e-01],
       [8.61591697e-02, 9.13840830e-01],
       [9.82932806e-01, 1.70671642e-02],
       [8.67677689e-01, 1.32322341e-01],
       [5.40668726e-01, 4.59331244e-01],
       [9.10726666e-01, 8.92733410e-02],
       [1.00920618e-01, 8.99079382e-01],
       [1.24382913e-01, 8.75617087e-01],
       [2.67193377e-01, 7.32806623e-01],
       [7.76839852e-01, 2.23160133e-01],
       [9.79957044e-01, 2.00429577e-02],
       [9.05347407e-01, 9.46526080e-02],
       [1.40269399e-01, 8.59730601e-01],
       [9.83048022e-01, 1.69519745e-02],
       [2.95132399e-02, 9.70486760e-01],
       [1.84688568e-02, 9.81531143e-01],
       [6.94157958e-01, 3.05842012e-01],
       [9.22045231e-01, 7.79547542e-02],
       [6.29081130e-02, 9.37091887e-01],
       [2.91490555e-02, 9.70850945e-01],
       [9.46654677e-01, 5.33452928e-02],
       