In [1]:
#External libraries
import pandas as pd
import numpy as np
import time

In [2]:
#Import torch
import torch
import torch.nn as nn
import torch.utils.data as data_utils

In [3]:
! pip install python-dp


Collecting python-dp
  Downloading python_dp-1.1.1-cp37-cp37m-manylinux1_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 19.4 MB/s eta 0:00:01
[?25hInstalling collected packages: python-dp
Successfully installed python-dp-1.1.1
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
import pydp as dp  # by convention our package is to be imported as dp (for Differential Privacy!)
from pydp.algorithms.laplacian import BoundedMean

In [5]:
#Set a manual seed to maintain consistency
torch.manual_seed(0)

<torch._C.Generator at 0x7f8140a2c970>

<h2>Data Loading and Processing</h2>

In [6]:
!apt-get update
!apt-get install wget

Hit:1 http://deb.debian.org/debian buster InRelease
Get:2 http://deb.debian.org/debian buster-updates InRelease [51.9 kB]
Get:3 http://security.debian.org/debian-security buster/updates InRelease [65.4 kB]
Get:4 http://security.debian.org/debian-security buster/updates/main amd64 Packages [316 kB]
Fetched 433 kB in 1s (560 kB/s)    
Reading package lists... Done
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  wget
0 upgraded, 1 newly installed, 0 to remove and 1 not upgraded.
Need to get 902 kB of archives.
After this operation, 3335 kB of additional disk space will be used.
Get:1 http://deb.debian.org/debian buster/main amd64 wget amd64 1.20.1-1.1 [902 kB]
Fetched 902 kB in 0s (42.9 MB/s)
debconf: delaying package configuration, since apt-utils is not installed
Selecting previously unselected package wget.
(Reading database ... 20003 files and directories currently installed.)
Preparing to 

In [7]:
#Download Boston housing dataset
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data

--2022-02-16 17:44:18--  https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 49082 (48K) [application/x-httpd-php]
Saving to: ‘housing.data’


2022-02-16 17:44:19 (297 KB/s) - ‘housing.data’ saved [49082/49082]



In [8]:
#Import dataset and add headers
dataset=pd.read_csv("housing.data",delim_whitespace=True,
                    names=["crim","zn","indus",
                           "chas","nox","rm",
                           "age","dis","rad",
                           "tax","ptratio","black",
                           "lstat","medv"])

In [9]:
#Visualize and look at columns and rows of dataset
dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [10]:
#Visualize and look at columns and rows of dataset
dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [11]:
#Split data into features and target variables
features = dataset.drop("medv",axis=1)
targets = dataset["medv"]
print (min(targets))

5.0


In [12]:
#Normalize features
features = features.apply(
    lambda x: (x - x.mean()) / x.std()
)

In [13]:
#Convert features and targets into torch tensors
features = torch.tensor(features.values.astype(np.float32)) 
targets = torch.tensor(targets.values.astype(np.float32))

In [14]:
# Arguments for training
batch_size = 16
epochs = 300
train_test_split = 0.8
lr = 0.001

In [15]:
#Split dataset into train and test
train_indices=int(len(features)*train_test_split)

train_x = features[:train_indices]
train_y = targets[:train_indices]

test_x = features[train_indices+1:]
test_y = targets[train_indices+1:]

In [16]:
#Divide dataset into batches
def get_batches(X, y):
    batches = []
    for index in range(0,len(train_x)+1,batch_size):
        batches.append((X[index:index+batch_size],y[index:index+batch_size]))
    
    return batches

<h1>Plaintext Training</h1>

In [17]:
#Import syft
import syft as sy
sy.logger.remove()

In [18]:
#Define Linear regression model
class LinearSyNet(sy.Module):
    def __init__(self, torch_ref):
        super(LinearSyNet, self).__init__(torch_ref=torch_ref)
        self.fc1 = self.torch_ref.nn.Linear(13,1)

    def forward(self, x):
        x = self.fc1(x)
        return x

In [19]:
"""xtest=[]
xtest.append(np.array_split(test_x, nom))

ytest=[]
ytest.append(np.array_split(test_y, nom))"""

'xtest=[]\nxtest.append(np.array_split(test_x, nom))\n\nytest=[]\nytest.append(np.array_split(test_y, nom))'

In [20]:
print(len(train_x))
print(len(train_x[:404]))

404
404


In [21]:
def Train(nom,m):
    #Training Loop
    train_batches=get_batches(train_x,train_y)


    for i in range(nom):

        model1 = LinearSyNet(torch)
        criterion = torch.nn.MSELoss(reduction='mean') 
        optimizer = torch.optim.SGD(model1.parameters(), lr=lr)
        print("model: ", i)
        for epoch in range(epochs):
          running_loss = 0.0
          for index in range(0,len(train_batches)):
            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model1(train_batches[index][0]).reshape([-1])

            # get loss for the predicted output
            loss = criterion(outputs,train_batches[index][1])
            running_loss += loss
            # get gradients w.r.t to parameters
            loss.backward()

            # update parameters
            optimizer.step()

          test_accuracy = criterion(model1(test_x).reshape([-1]),test_y)
          if((epoch%50)==0):
             b =0
             print(f"Epoch {epoch}/{epochs}  Running Loss : {running_loss.item()/batch_size} and test loss: {test_accuracy.item()}")

        m.append(model1)
        return criterion

<h1>Encrypted Inference</h1>

In [22]:
#SyMPC imports required for encrypted inference
import sympc
from sympc.session import Session
from sympc.session import SessionManager
from sympc.tensor import MPCTensor
from sympc.protocol import Falcon,FSS

In [23]:
def get_clients(n_parties):
  #Generate required number of syft clients and return them.

  parties=[]
  for index in range(n_parties): 
      parties.append(sy.VirtualMachine(name = "worker"+str(index)).get_root_client())

  return parties

In [24]:
def split_send(data,session):
    """Splits data into number of chunks equal to number of parties and distributes it to respective 
       parties.
    """
    data_pointers = []
    
    split_size = int(len(data)/len(session.parties))+1
    for index in range(0,len(session.parties)):
        ptr=data[index*split_size:index*split_size+split_size].share(session=session)
        data_pointers.append(ptr)
        
    return data_pointers

In [25]:
def private_mean(result,lower,upper,privacy_budget: float) -> float:
    x = BoundedMean(privacy_budget,0,lower,upper, dtype="float")
    return x.quick_result(list(result))

In [26]:
import statistics

In [27]:
def transpose(a):
    b=[]
    for i in range(len(a[0])):
        b.append([])
        for j in range(3):
            b[i].append(a[j][i])
    return(b)

In [28]:
def private_mean(result,lower,upper,privacy_budget: float) -> float:
    x = BoundedMean(privacy_budget,0,lower,upper, dtype="float")
    return x.quick_result(list(result))

In [29]:
#from ._bounded_algorithms import Median
from pydp.algorithms.laplacian import Median

In [30]:
def private_median(result,lower,upper,privacy_budget: float) -> float:
    x = Median(privacy_budget,0,lower,upper, dtype="float")
    return x.quick_result(list(result))

In [37]:
import threading

In [49]:
def inf(i,mpc_model,pointers,results,all_predictions):
        
    for j in range(len(pointers[0])):
        encrypted_results = mpc_model[i](pointers[i][j])
        plaintext_results = encrypted_results.reconstruct()
        results.append(plaintext_results)
    prediction = torch.cat(results).reshape([-1])
    all_predictions.append(prediction)

In [58]:
def inference(n_clients,nom,privacy_budget,protocol=None):

  m=[]
  #criterion=Train(nom,m)
  train_batches=get_batches(train_x,train_y)


  for i in range(nom):

    model1 = LinearSyNet(torch)
    criterion = torch.nn.MSELoss(reduction='mean') 
    optimizer = torch.optim.SGD(model1.parameters(), lr=lr)
    #print("model: ", i)
    for epoch in range(epochs):
      running_loss = 0.0
      for index in range(0,len(train_batches)):
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model1(train_batches[index][0]).reshape([-1])

        # get loss for the predicted output
        loss = criterion(outputs,train_batches[index][1])
        running_loss += loss
        # get gradients w.r.t to parameters
        loss.backward()

        # update parameters
        optimizer.step()

      test_accuracy = criterion(model1(test_x).reshape([-1]),test_y)
      if((epoch%50)==0):
         b =0
         #print(f"Epoch {epoch}/{epochs}  Running Loss : {running_loss.item()/batch_size} and test loss: {test_accuracy.item()}")

    m.append(model1)
    
  #print("models trained")
  # Get VM clients 
  parties=get_clients(n_clients)

  # Setup the session for the computation
  if(protocol):
     session = Session(parties = parties,protocol = protocol)
  else:
     session = Session(parties = parties)
        
  SessionManager.setup_mpc(session)
  pointers=[]
  mpc_model=[]

  
  for i in range(nom):
        #Split data and send data to clients
        pointers.append(split_send(test_x,session))

        #Encrypt model 
        mpc_model.append(m[i].share(session))
  
        #Encrypt test data
        #test_data=MPCTensor(secret=test_x, session = session)
  all_predictions=[]
  ap=[]
    
    
  start_time = time.time()
  for i in range(nom):
        results = []
        inf(i,mpc_model,pointers,results,all_predictions)
        """t1 = threading.Thread(target=inf, args=(i,mpc_model,pointers,results,all_predictions,))
        t1.start()
        t1.join() """ 
  


  transMatt=transpose(all_predictions)
  mean=[]

  for i in range(len(transMatt)):
        for j in range(len(transMatt[0])):
            transMatt[i][j]=transMatt[i][j].item()
   
  for i in transMatt:
        x1 = private_median(i,min(i)-5,max(i)+5,privacy_budget)
        mean.append(x1)
  
  end_time=time.time()
  print("Inference time: ",str(end_time-start_time),"seconds")
  print("MSE Loss mean-private: ",criterion(torch.Tensor(mean),test_y).item()) 
  
      

In [None]:
#secure nodes, nom
for i in range(3,11):
    print("secure nodes: ",i)
    for j in range(3,21):
        print("models: ",j)
        predictions=inference(i,j,0.8)

secure nodes:  3
models:  3
Inference time:  1.6923036575317383 seconds
MSE Loss mean-private:  29.227087020874023
models:  4
