In [48]:
import os 
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
import tensorflow as tf
import numpy as np
rng = np.random
from sklearn.linear_model import LinearRegression
from privacy.analysis import privacy_ledger
from privacy.analysis.rdp_accountant import compute_rdp
from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.optimizers import dp_optimizer
from tqdm import tqdm_notebook as tqdm

orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))

In [63]:
datadir = "~/Sail/DP_LinearRegression"
df = pd.read_csv(
    f"{datadir}/First_Results.csv")

# Data Loader 


In [50]:
def dataloader(n,p,mu,sigma): 
    X = np.random.choice(3,size = (n,p), p = [0.7,0.25,0.05])
    betha = np.random.normal(mu,sigma,p)
    A = betha.dot(X.T)
    h = np.var(A)
    while h > 1: 
        betha = np.random.normal(mu,sigma,p)
        A = betha.dot(X.T)
        h = np.var(A)
    noise = np.random.normal(0,1-h,n)
    Y = A + noise
    return X, Y 

def feature_normalize(dataset):
    mu = np.mean(dataset,axis=0)
    sigma = np.std(dataset,axis=0)
    return (dataset - mu)/sigma

def append_bias_reshape(X,Y):
    n_training_samples = X.shape[0]
    n_dim = X.shape[1]
    # We add a line of ones to take into account the intercept
    X = np.reshape(np.c_[np.ones(n_training_samples),X],[n_training_samples,n_dim + 1])
    Y = np.reshape(Y,[n_training_samples,1])
    return X,Y

In [51]:
def Exp(n,p,mu,sigma,learning_rate,training_epochs,l2_norm_clip,noise_multiplier,num_microbatches = 1): 
    A,B = dataloader(n,p,mu,sigma)
    
    B = np.reshape(B,[A.shape[0],1])
    A = normalized_features = feature_normalize(A)
    rnd_indices = np.random.rand(len(A)) < 0.80
    train_x = A[rnd_indices]
    train_y = B[rnd_indices]
    test_x = A[~rnd_indices]
    test_y = B[~rnd_indices]
    m = train_x.shape[0]
    n = train_x.shape[1]
    m_test = test_x.shape[0]
    n_test = test_x.shape[1]
    
    #Variable 
    X = tf.placeholder(tf.float32, [m, n])
    Y = tf.placeholder(tf.float32, [m, 1])
    X_test = tf.placeholder(tf.float32, [m_test, n])
    Y_test = tf.placeholder(tf.float32, [m_test, 1])

    # weights
    W = tf.Variable(tf.zeros([n, 1], dtype=np.float32), name="weight")
    b = tf.Variable(tf.zeros([1], dtype=np.float32), name="bias")
    # Construct a linear model
    pred = tf.add(tf.matmul(X, W), b)
    pred_test = tf.add(tf.matmul(X_test, W), b)
    # Mean squared error
    cost = tf.reduce_sum(tf.square(pred - Y)) / (2*m) #Loss 
    # Gradient descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    #R^2
    total_error = tf.reduce_sum(tf.square(tf.subtract(Y_test, tf.reduce_mean(Y_test))))
    unexplained_error = tf.reduce_sum(tf.square(tf.subtract(Y_test, pred_test)))
    R_squared = tf.subtract(tf.cast(1,dtype = 'float32'), tf.math.divide(unexplained_error, total_error))
    R_squared_adj = 1 - (1-R_squared**2)*((n_test - 1)/(n_test -(p+1)))
    #DP 
    optimizer_DP = dp_optimizer.DPGradientDescentGaussianOptimizer(
        l2_norm_clip,
        noise_multiplier,
        num_microbatches,
        learning_rate = learning_rate
        )
    train_op = optimizer_DP.minimize(loss=cost)
    
    with tf.Session() as sess:
        
        init = tf.global_variables_initializer()
        sess.run(init)
        for epoch in range(training_epochs):
            sess.run(train_op, feed_dict={X: train_x, Y: train_y})   
        training_cost = sess.run(cost, feed_dict={X: train_x, Y: train_y})
        
        Variance = sess.run(total_error,feed_dict={Y_test: test_y})
        SSE = sess.run(unexplained_error, feed_dict={X_test: test_x, Y_test: test_y} )
        R_2_DP = sess.run(R_squared,feed_dict={X_test: test_x, Y_test: test_y})
        
        rdp = compute_rdp(1,noise_multiplier,steps=training_epochs,orders=orders)
        epsilon = get_privacy_spent(orders, rdp, target_delta= (1e-6))[0]
        R_2_adj_DP = 1 - (1-R_2_DP**2)*((m_test - 1)/(m_test - (p + 1)))
        sess.close()
        
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        for epoch in range(training_epochs):
            sess.run(optimizer, feed_dict={X: train_x, Y: train_y})
            
        training_cost = sess.run(cost, feed_dict={X: train_x, Y: train_y})
        Variance = sess.run(total_error,feed_dict={Y_test: test_y})
        SSE = sess.run(unexplained_error, feed_dict={X_test: test_x, Y_test: test_y})
        R_2 = sess.run(R_squared,feed_dict={X_test: test_x, Y_test: test_y})
        R_2_adj = 1 - (1 - R_2**2) * ((m_test - 1) / (m_test - (p + 1)))
        sess.close()
        
    return R_2,R_2_adj,R_2_DP,R_2_adj_DP,epsilon

In [28]:
%time ans = Exp(3000,300,mu =0.01,sigma = 0.1,learning_rate = 0.025,training_epochs = 600,\
            l2_norm_clip = 0.4,noise_multiplier = 0.5,num_microbatches = 1)
print(ans)

CPU times: user 10.8 s, sys: 1.31 s, total: 12.1 s
Wall time: 9.83 s
(0.9985274, 0.9943403535011518, 0.7933877, 0.2874308846965097, 1458.1551055796426)


In [52]:
keys = ['N','P','R_2','R_2_adj','R_2_DP','R_2_adj_DP','epsilon']

d = {'N':[],'P':[],'R_2':[],'R_2_adj':[],'R_2_DP':[],'R_2_adj_DP':[],'epsilon':[]}

N = [i for i in range (1000,10000,1000)]
P = [50]

for p in tqdm(P):
        
    for n in tqdm(N): 
        
        ans = Exp(n,p,mu =0.01,sigma = 0.1,learning_rate = 0.025,training_epochs = 700,\
            l2_norm_clip = 0.4,noise_multiplier = 0.5,num_microbatches = 1)
        
        for i,k in zip(ans,keys[2:]):
            d[k].append(i)
        d['N'].append(n)
        d['P'].append(p)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))

In [66]:
df1 = pd.DataFrame(d)
df2 = pd.concat([df1,df])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [11]:
df = df.drop(["index","Unnamed: 0"], axis = 1)

In [68]:
df2 = df2.sort_values(['P'])

In [70]:
df2 = df2.drop([17,13], axis = 0).drop(['Unnamed: 0'], axis = 1)

In [80]:
df2 = df2.drop(['level_0','index'], axis = 1)

In [87]:
df2

Unnamed: 0,N,P,R_2,R_2_DP,R_2_adj,R_2_adj_DP,epsilon
0,100,10,-0.064606,-0.048517,-2.485391,-2.491761,1678.155106
1,10000,10,-0.267697,-0.349098,-0.959825,-0.853831,1678.155106
2,100,10,-0.295457,-0.275067,-1.738115,-1.773015,1678.155106
3,1000,10,0.045841,0.049567,-0.05042,-0.050045,1678.155106
4,9000,50,0.18425,0.163755,0.006011,-0.001327,1678.155106
5,8000,50,0.148088,0.139863,-0.008827,-0.01127,1678.155106
6,1000,50,0.196575,0.201879,-0.265586,-0.262803,1678.155106
7,6000,50,0.263374,0.235547,0.028798,0.014309,1678.155106
8,2000,50,0.287974,0.255239,-0.040003,-0.060169,1678.155106
9,7000,50,0.180181,0.145559,-0.002846,-0.014536,1678.155106


In [86]:
df2.to_csv("Second_result.csv")

In [83]:
df3 = df2.style.apply(lambda x: ['background: lightgreen' if x['R_2'] > 0.98 else '' for i in x], 
               axis=1)

In [84]:
df3

Unnamed: 0,N,P,R_2,R_2_DP,R_2_adj,R_2_adj_DP,epsilon
0,100,10,-0.0646056,-0.0485168,-2.48539,-2.49176,1678.16
1,10000,10,-0.267697,-0.349098,-0.959825,-0.853831,1678.16
2,100,10,-0.295457,-0.275067,-1.73812,-1.77301,1678.16
3,1000,10,0.0458412,0.0495668,-0.0504196,-0.0500454,1678.16
4,9000,50,0.18425,0.163755,0.00601131,-0.00132726,1678.16
5,8000,50,0.148088,0.139863,-0.00882683,-0.0112698,1678.16
6,1000,50,0.196575,0.201879,-0.265586,-0.262803,1678.16
7,6000,50,0.263374,0.235547,0.0287979,0.0143091,1678.16
8,2000,50,0.287974,0.255239,-0.0400029,-0.0601687,1678.16
9,7000,50,0.180181,0.145559,-0.00284624,-0.0145357,1678.16
