In [1]:
import numpy as np
import random

In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def norm_x(x):
    for col in range(x.shape[1]):
        vals = x[:,col]
        vals = (vals-np.mean(vals))/np.std(vals)
        x[:,col] = vals
    return x

In [41]:
def sampleChania(Chania, n_points):
    data = Chania[:,0:8]
    order = np.argsort(np.random.random(data.shape[0]))
    x = data[order][:n_points, 1:5]   
    trueusers = data[order][:n_points,0]
    for u in range(len(trueusers)):
        trueusers[u] = int(trueusers[u])
    n_clusters = 5
    x = norm_x(x)
    return tf.constant(x, dtype=tf.float32), tf.constant(trueusers, dtype=tf.float32)

### Sample dataset

In [5]:
Chania = np.genfromtxt('processedChania.csv', delimiter=',')

### Create privitizer and adversary

In [6]:
def privatizer(x):
    with tf.variable_scope("priv", reuse=tf.AUTO_REUSE):
        l1 = tf.layers.dense(x, 16, activation=tf.nn.relu)
        l2 = tf.layers.dense(l1, x.shape[1].value)
    return l2


def adversary(y):
    with tf.variable_scope("ad", reuse=tf.AUTO_REUSE):
        l1 = tf.layers.dense(y, 16, activation=tf.nn.relu)
        # num users = 5
        l2 = tf.layers.dense(l1, 5, activation=tf.nn.softmax)
    return l2

### Signal Map Error

In [7]:
def signal_map_error(x, y):
    with tf.variable_scope("priv", reuse=tf.AUTO_REUSE):

        # build model from input
        inputmodel = keras.Sequential([
            keras.layers.Dense(10, activation=tf.nn.relu),
            keras.layers.Dense(10, activation=tf.nn.relu),
            keras.layers.Dense(1)        
        ])
        optimizer = tf.train.FtrlOptimizer(0.001)
        inputmodel.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
        inputmodel.fit(x[:,2:], x[:,1], steps_per_epoch=1, epochs=100, verbose=0)   

        # build model from output
        outputmodel = keras.Sequential([
            keras.layers.Dense(10, activation=tf.nn.relu),
            keras.layers.Dense(10, activation=tf.nn.relu),
            keras.layers.Dense(1)        
        ])
        outputmodel.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
        outputmodel.fit(y[:,2:], y[:,1], steps_per_epoch=1, epochs=100, verbose=0)  
        
        # compare inputmodel(input) and outputmodel(input)
        map_error = tf.reduce_mean(tf.squared_difference(inputmodel(x[:,2:]), outputmodel(x[:,2:])))

    return map_error

### Classification Error

In [8]:
def classification_error(z, trueusers):
    with tf.variable_scope("ad", reuse=tf.AUTO_REUSE):
        class_error = tf.reduce_mean(tf.keras.backend.sparse_categorical_crossentropy(trueusers, z))
    return class_error

### Privatizer Loss

In [9]:
def privatizer_loss(x, y, trueusers, threshold):
    with tf.variable_scope("priv", reuse=tf.AUTO_REUSE):
        penalty = 1
        
        c_e = classification_error(z, trueusers)
        s_m_e = signal_map_error(x, y)
        
        zero = tf.constant(0, dtype=tf.float32)
        loss = -1*c_e + penalty*tf.math.maximum(zero, threshold-s_m_e)
        
    return loss        

### GAP

In [34]:
# initialize privatizer and adversary
# for i in range(n):
#     choose sample points
#     y = privatizer(x)
#     z = adversary(y)
#     for k in range(100):
#         train adversary
#     train privatizer

In [42]:
# initialize 
threshold = tf.constant(0.5, dtype=tf.float32)
x, trueusers = sampleChania(Chania, n_points=100)
y = privatizer(x)
z = adversary(y)
n = 10

# initialize loss variables
class_error = classification_error(z, trueusers)
priv_loss = privatizer_loss(x, y, trueusers, threshold)

# initialize session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(n):

    # sample points
    x, trueusers = sampleChania(Chania, n_points=100)
    
    # run data through privatizer
    y = privatizer(x)
    
    # run data through adversary
    z = adversary(y)

    ad_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="ad")
    ad_train = tf.train.GradientDescentOptimizer(0.01).minimize(class_error, var_list = ad_vars)
    
    priv_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="priv")
    priv_train = tf.train.GradientDescentOptimizer(0.01).minimize(priv_loss, var_list = priv_vars)
    
    for k in range(100):
        
        # train adversary
        _, a_loss = sess.run((ad_train, class_error))
        
    # train privatizer
    _, p_loss = sess.run((priv_train, priv_loss))
    
    print("Iterations: %d\t Adversary loss: %.4f\t Privatizer loss: %.4f"%(i, a_loss,p_loss))

Iterations: 0	 Adversary loss: 1.2536	 Privatizer loss: -0.7539
Iterations: 1	 Adversary loss: 1.1004	 Privatizer loss: -0.6024
Iterations: 2	 Adversary loss: 1.0093	 Privatizer loss: -0.5123
Iterations: 3	 Adversary loss: 0.9477	 Privatizer loss: -0.4516
Iterations: 4	 Adversary loss: 0.9013	 Privatizer loss: -0.4061
Iterations: 5	 Adversary loss: 0.8666	 Privatizer loss: -0.3725
Iterations: 6	 Adversary loss: 0.8418	 Privatizer loss: -0.3491
Iterations: 7	 Adversary loss: 0.8248	 Privatizer loss: -0.3337
Iterations: 8	 Adversary loss: 0.8132	 Privatizer loss: -0.3241
Iterations: 9	 Adversary loss: 0.8070	 Privatizer loss: -0.3202
