In [1]:
#External libraries
import pandas as pd
import numpy as np
import time

In [2]:
#Import torch
import torch
import torch.nn as nn
import torch.utils.data as data_utils

In [3]:
#Set a manual seed to maintain consistency
torch.manual_seed(0)

<torch._C.Generator at 0x7f42500469d0>

<h2>Data Loading and Processing</h2>

In [4]:
!apt-get update
!apt-get install wget

Hit:1 http://deb.debian.org/debian buster InRelease
Hit:2 http://deb.debian.org/debian buster-updates InRelease
Hit:3 http://security.debian.org/debian-security buster/updates InRelease
Reading package lists... Done                 
Reading package lists... Done
Building dependency tree       
Reading state information... Done
wget is already the newest version (1.20.1-1.1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [5]:
#Download Boston housing dataset
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data

--2022-01-17 17:40:58--  https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 49082 (48K) [application/x-httpd-php]
Saving to: ‘housing.data.1’


2022-01-17 17:40:59 (322 KB/s) - ‘housing.data.1’ saved [49082/49082]



In [6]:
#Import dataset and add headers
dataset=pd.read_csv("housing.data",delim_whitespace=True,
                    names=["crim","zn","indus",
                           "chas","nox","rm",
                           "age","dis","rad",
                           "tax","ptratio","black",
                           "lstat","medv"])

In [7]:
#Visualize and look at columns and rows of dataset
dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [8]:
#Visualize and look at columns and rows of dataset
dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [9]:
#Split data into features and target variables
features = dataset.drop("medv",axis=1)
targets = dataset["medv"]

In [10]:
#Normalize features
features = features.apply(
    lambda x: (x - x.mean()) / x.std()
)

In [11]:
#Convert features and targets into torch tensors
features = torch.tensor(features.values.astype(np.float32)) 
targets = torch.tensor(targets.values.astype(np.float32))

In [12]:
# Arguments for training
batch_size = 16
epochs = 300
train_test_split = 0.8
lr = 0.001

In [13]:
#Split dataset into train and test
train_indices=int(len(features)*train_test_split)

train_x = features[:train_indices]
train_y = targets[:train_indices]

test_x = features[train_indices+1:]
test_y = targets[train_indices+1:]

In [14]:
#Divide dataset into batches
def get_batches(X, y):
    batches = []
    for index in range(0,len(train_x)+1,batch_size):
        batches.append((X[index:index+batch_size],y[index:index+batch_size]))
    
    return batches

<h1>Plaintext Training</h1>

In [15]:
#Import syft
import syft as sy
sy.logger.remove()

In [16]:
#Define Linear regression model
class LinearSyNet(sy.Module):
    def __init__(self, torch_ref):
        super(LinearSyNet, self).__init__(torch_ref=torch_ref)
        self.fc1 = self.torch_ref.nn.Linear(13,1)

    def forward(self, x):
        x = self.fc1(x)
        return x

In [18]:
"""xtest=[]
xtest.append(np.array_split(test_x, nom))

ytest=[]
ytest.append(np.array_split(test_y, nom))"""

'xtest=[]\nxtest.append(np.array_split(test_x, nom))\n\nytest=[]\nytest.append(np.array_split(test_y, nom))'

In [19]:
print(len(train_x))
print(len(train_x[:404]))

404
404


In [20]:
def Train(nom,m):
    #Training Loop
    train_batches=get_batches(train_x,train_y)


    for i in range(nom):

        model1 = LinearSyNet(torch)
        criterion = torch.nn.MSELoss(reduction='mean') 
        optimizer = torch.optim.SGD(model1.parameters(), lr=lr)
        print("model: ", i)
        for epoch in range(epochs):
          running_loss = 0.0
          for index in range(0,len(train_batches)):
            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model1(train_batches[index][0]).reshape([-1])

            # get loss for the predicted output
            loss = criterion(outputs,train_batches[index][1])
            running_loss += loss
            # get gradients w.r.t to parameters
            loss.backward()

            # update parameters
            optimizer.step()

          test_accuracy = criterion(model1(test_x).reshape([-1]),test_y)
          if((epoch%50)==0):
             b =0
             print(f"Epoch {epoch}/{epochs}  Running Loss : {running_loss.item()/batch_size} and test loss: {test_accuracy.item()}")

        m.append(model1)
        return criterion

<h1>Encrypted Inference</h1>

In [21]:
#SyMPC imports required for encrypted inference
import sympc
from sympc.session import Session
from sympc.session import SessionManager
from sympc.tensor import MPCTensor
from sympc.protocol import Falcon,FSS

In [22]:
def get_clients(n_parties):
  #Generate required number of syft clients and return them.

  parties=[]
  for index in range(n_parties): 
      parties.append(sy.VirtualMachine(name = "worker"+str(index)).get_root_client())

  return parties

In [23]:
def split_send(data,session):
    """Splits data into number of chunks equal to number of parties and distributes it to respective 
       parties.
    """
    data_pointers = []
    
    split_size = int(len(data)/len(session.parties))+1
    for index in range(0,len(session.parties)):
        ptr=data[index*split_size:index*split_size+split_size].share(session=session)
        data_pointers.append(ptr)
        
    return data_pointers

In [26]:
def inference(n_clients,nom,protocol=None):

  m=[]
  #criterion=Train(nom,m)
  train_batches=get_batches(train_x,train_y)


  for i in range(nom):

    model1 = LinearSyNet(torch)
    criterion = torch.nn.MSELoss(reduction='mean') 
    optimizer = torch.optim.SGD(model1.parameters(), lr=lr)
    print("model: ", i)
    for epoch in range(epochs):
      running_loss = 0.0
      for index in range(0,len(train_batches)):
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model1(train_batches[index][0]).reshape([-1])

        # get loss for the predicted output
        loss = criterion(outputs,train_batches[index][1])
        running_loss += loss
        # get gradients w.r.t to parameters
        loss.backward()

        # update parameters
        optimizer.step()

      test_accuracy = criterion(model1(test_x).reshape([-1]),test_y)
      if((epoch%50)==0):
         b =0
         print(f"Epoch {epoch}/{epochs}  Running Loss : {running_loss.item()/batch_size} and test loss: {test_accuracy.item()}")

    m.append(model1)
  # Get VM clients 
  parties=get_clients(n_clients)

  # Setup the session for the computation
  if(protocol):
     session = Session(parties = parties,protocol = protocol)
  else:
     session = Session(parties = parties)
        
  SessionManager.setup_mpc(session)
  pointers=[]
  mpc_model=[]
  for i in range(nom):
        #Split data and send data to clients
        pointers.append(split_send(test_x,session))

        #Encrypt model 
        mpc_model.append(m[i].share(session))

        #Encrypt test data
        #test_data=MPCTensor(secret=test_x, session = session)
  for i in range(nom):
        #Perform inference and measure time taken
        start_time = time.time()

        results = []
        #print(len(pointers[1]))
        for j in range(len(pointers[0])):
            encrypted_results = mpc_model[i](pointers[i][j])
            plaintext_results = encrypted_results.reconstruct()
            results.append(plaintext_results)

        end_time = time.time()

        print(f"Time for inference: {end_time-start_time}s")

        predictions = torch.cat(results).reshape([-1])

        #Calculate Loss
        #print("MSE Loss: ",criterion(predictions,ytest[0][i]).item())
        print("MSE Loss: ",criterion(predictions,test_y).item())

        return predictions

In [27]:
predictions=inference(3,3,Falcon("semi-honest"))

model:  0
Epoch 0/300  Running Loss : 1009.952392578125 and test loss: 329.2429504394531
Epoch 50/300  Running Loss : 52.914222717285156 and test loss: 108.63790893554688
Epoch 100/300  Running Loss : 43.10272216796875 and test loss: 48.36807632446289
Epoch 150/300  Running Loss : 41.11921691894531 and test loss: 30.31987190246582
Epoch 200/300  Running Loss : 40.1476936340332 and test loss: 23.24506378173828
Epoch 250/300  Running Loss : 39.5960578918457 and test loss: 20.739559173583984
model:  1
Epoch 0/300  Running Loss : 965.5076293945312 and test loss: 332.89654541015625
Epoch 50/300  Running Loss : 52.6095085144043 and test loss: 105.93646240234375
Epoch 100/300  Running Loss : 42.9736328125 and test loss: 47.29835510253906
Epoch 150/300  Running Loss : 41.02622604370117 and test loss: 29.851165771484375
Epoch 200/300  Running Loss : 40.07943344116211 and test loss: 23.064735412597656
Epoch 250/300  Running Loss : 39.54554748535156 and test loss: 20.703411102294922
model:  2
Epo

In [29]:
predictions=inference(4,3)

model:  0
Epoch 0/300  Running Loss : 970.595703125 and test loss: 334.8712463378906
Epoch 50/300  Running Loss : 52.29546356201172 and test loss: 104.12493896484375
Epoch 100/300  Running Loss : 42.81524658203125 and test loss: 46.40382385253906
Epoch 150/300  Running Loss : 40.9397087097168 and test loss: 29.40231704711914
Epoch 200/300  Running Loss : 40.02814483642578 and test loss: 22.859432220458984
Epoch 250/300  Running Loss : 39.51382064819336 and test loss: 20.632125854492188
model:  1
Epoch 0/300  Running Loss : 967.79638671875 and test loss: 346.83319091796875
Epoch 50/300  Running Loss : 52.417213439941406 and test loss: 102.06861114501953
Epoch 100/300  Running Loss : 42.78068542480469 and test loss: 45.04148864746094
Epoch 150/300  Running Loss : 40.91387176513672 and test loss: 28.64797592163086
Epoch 200/300  Running Loss : 40.014122009277344 and test loss: 22.461275100708008
Epoch 250/300  Running Loss : 39.50718307495117 and test loss: 20.428884506225586
model:  2
Ep