In [1]:
#External libraries
import pandas as pd
import numpy as np
import time

In [2]:
#Import torch
import torch
import torch.nn as nn
import torch.utils.data as data_utils

In [3]:
! pip install python-dp


Collecting python-dp
  Downloading python_dp-1.1.1-cp37-cp37m-manylinux1_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 23.5 MB/s eta 0:00:01
[?25hInstalling collected packages: python-dp
Successfully installed python-dp-1.1.1
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
import pydp as dp  # by convention our package is to be imported as dp (for Differential Privacy!)
from pydp.algorithms.laplacian import BoundedMean

In [5]:
#Set a manual seed to maintain consistency
torch.manual_seed(0)

<torch._C.Generator at 0x7ff090a2b970>

<h2>Data Loading and Processing</h2>

In [6]:
!apt-get update
!apt-get install wget

Hit:1 http://deb.debian.org/debian buster InRelease
Get:2 http://deb.debian.org/debian buster-updates InRelease [51.9 kB]
Get:3 http://security.debian.org/debian-security buster/updates InRelease [65.4 kB]
Get:4 http://security.debian.org/debian-security buster/updates/main amd64 Packages [314 kB]
Fetched 431 kB in 1s (566 kB/s)    
Reading package lists... Done
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  wget
0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.
Need to get 902 kB of archives.
After this operation, 3335 kB of additional disk space will be used.
Get:1 http://deb.debian.org/debian buster/main amd64 wget amd64 1.20.1-1.1 [902 kB]
Fetched 902 kB in 0s (26.3 MB/s)
debconf: delaying package configuration, since apt-utils is not installed
Selecting previously unselected package wget.
(Reading database ... 20003 files and directories currently installed.)
Preparing to 

In [7]:
#Download Boston housing dataset
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data

--2022-02-05 09:01:53--  https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 49082 (48K) [application/x-httpd-php]
Saving to: ‘housing.data’


2022-02-05 09:01:54 (327 KB/s) - ‘housing.data’ saved [49082/49082]



In [8]:
#Import dataset and add headers
dataset=pd.read_csv("housing.data",delim_whitespace=True,
                    names=["crim","zn","indus",
                           "chas","nox","rm",
                           "age","dis","rad",
                           "tax","ptratio","black",
                           "lstat","medv"])

In [9]:
#Visualize and look at columns and rows of dataset
dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [10]:
#Visualize and look at columns and rows of dataset
dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [11]:
#Split data into features and target variables
features = dataset.drop("medv",axis=1)
targets = dataset["medv"]
print (min(targets))

5.0


In [12]:
#Normalize features
features = features.apply(
    lambda x: (x - x.mean()) / x.std()
)

In [13]:
#Convert features and targets into torch tensors
features = torch.tensor(features.values.astype(np.float32)) 
targets = torch.tensor(targets.values.astype(np.float32))

In [14]:
# Arguments for training
batch_size = 16
epochs = 300
train_test_split = 0.8
lr = 0.001

In [15]:
#Split dataset into train and test
train_indices=int(len(features)*train_test_split)

train_x = features[:train_indices]
train_y = targets[:train_indices]

test_x = features[train_indices+1:]
test_y = targets[train_indices+1:]

In [16]:
#Divide dataset into batches
def get_batches(X, y):
    batches = []
    for index in range(0,len(train_x)+1,batch_size):
        batches.append((X[index:index+batch_size],y[index:index+batch_size]))
    
    return batches

<h1>Plaintext Training</h1>

In [17]:
#Import syft
import syft as sy
sy.logger.remove()

In [18]:
#Define Linear regression model
class LinearSyNet(sy.Module):
    def __init__(self, torch_ref):
        super(LinearSyNet, self).__init__(torch_ref=torch_ref)
        self.fc1 = self.torch_ref.nn.Linear(13,1)

    def forward(self, x):
        x = self.fc1(x)
        return x

In [19]:
"""xtest=[]
xtest.append(np.array_split(test_x, nom))

ytest=[]
ytest.append(np.array_split(test_y, nom))"""

'xtest=[]\nxtest.append(np.array_split(test_x, nom))\n\nytest=[]\nytest.append(np.array_split(test_y, nom))'

In [20]:
print(len(train_x))
print(len(train_x[:404]))

404
404


In [21]:
def Train(nom,m):
    #Training Loop
    train_batches=get_batches(train_x,train_y)


    for i in range(nom):

        model1 = LinearSyNet(torch)
        criterion = torch.nn.MSELoss(reduction='mean') 
        optimizer = torch.optim.SGD(model1.parameters(), lr=lr)
        print("model: ", i)
        for epoch in range(epochs):
          running_loss = 0.0
          for index in range(0,len(train_batches)):
            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model1(train_batches[index][0]).reshape([-1])

            # get loss for the predicted output
            loss = criterion(outputs,train_batches[index][1])
            running_loss += loss
            # get gradients w.r.t to parameters
            loss.backward()

            # update parameters
            optimizer.step()

          test_accuracy = criterion(model1(test_x).reshape([-1]),test_y)
          if((epoch%50)==0):
             b =0
             print(f"Epoch {epoch}/{epochs}  Running Loss : {running_loss.item()/batch_size} and test loss: {test_accuracy.item()}")

        m.append(model1)
        return criterion

<h1>Encrypted Inference</h1>

In [22]:
#SyMPC imports required for encrypted inference
import sympc
from sympc.session import Session
from sympc.session import SessionManager
from sympc.tensor import MPCTensor
from sympc.protocol import Falcon,FSS

In [23]:
def get_clients(n_parties):
  #Generate required number of syft clients and return them.

  parties=[]
  for index in range(n_parties): 
      parties.append(sy.VirtualMachine(name = "worker"+str(index)).get_root_client())

  return parties

In [24]:
def split_send(data,session):
    """Splits data into number of chunks equal to number of parties and distributes it to respective 
       parties.
    """
    data_pointers = []
    
    split_size = int(len(data)/len(session.parties))+1
    for index in range(0,len(session.parties)):
        ptr=data[index*split_size:index*split_size+split_size].share(session=session)
        data_pointers.append(ptr)
        
    return data_pointers

In [25]:
def private_mean(result,lower,upper,privacy_budget: float) -> float:
    x = BoundedMean(privacy_budget,0,lower,upper, dtype="float")
    return x.quick_result(list(result))

In [26]:
import statistics

In [27]:
def transpose(a):
    b=[]
    for i in range(len(a[0])):
        b.append([])
        for j in range(3):
            b[i].append(a[j][i])
    return(b)

In [37]:
def inference(n_clients,nom,privacy_budget,protocol=None):

  m=[]
  #criterion=Train(nom,m)
  train_batches=get_batches(train_x,train_y)


  for i in range(nom):

    model1 = LinearSyNet(torch)
    criterion = torch.nn.MSELoss(reduction='mean') 
    optimizer = torch.optim.SGD(model1.parameters(), lr=lr)
    #print("model: ", i)
    for epoch in range(epochs):
      running_loss = 0.0
      for index in range(0,len(train_batches)):
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model1(train_batches[index][0]).reshape([-1])

        # get loss for the predicted output
        loss = criterion(outputs,train_batches[index][1])
        running_loss += loss
        # get gradients w.r.t to parameters
        loss.backward()

        # update parameters
        optimizer.step()

      test_accuracy = criterion(model1(test_x).reshape([-1]),test_y)
      if((epoch%50)==0):
         b =0
         #print(f"Epoch {epoch}/{epochs}  Running Loss : {running_loss.item()/batch_size} and test loss: {test_accuracy.item()}")

    m.append(model1)
    
  print("models trained")
  # Get VM clients 
  parties=get_clients(n_clients)

  # Setup the session for the computation
  if(protocol):
     session = Session(parties = parties,protocol = protocol)
  else:
     session = Session(parties = parties)
        
  SessionManager.setup_mpc(session)
  pointers=[]
  mpc_model=[]

  
  for i in range(nom):
        #Split data and send data to clients
        pointers.append(split_send(test_x,session))

        #Encrypt model 
        mpc_model.append(m[i].share(session))
  
        #Encrypt test data
        #test_data=MPCTensor(secret=test_x, session = session)
  all_predictions=[]
  ap=[]
  for i in range(nom):
        #Perform inference and measure time taken
        #start_time = time.time()

        results = []
        
        for j in range(len(pointers[0])):
            encrypted_results = mpc_model[i](pointers[i][j])
            plaintext_results = encrypted_results.reconstruct()
            results.append(plaintext_results)
        prediction = torch.cat(results).reshape([-1])   
        #print("Model: ", i )
        all_predictions.append(prediction)  
  
  transMatt=transpose(all_predictions)
  

  for i in range(len(transMatt)):
        for j in range(len(transMatt[0])):
            transMatt[i][j]=transMatt[i][j].item()
  mean=[]
  for i in transMatt:
        mean.append(statistics.mean(i))
      

  print("MSE Loss mean-simple: ",criterion(torch.Tensor(mean),test_y).item()) 
  
      

In [38]:
#secure nodes, nom
predictions=inference(3,3,0.1)

models trained
MSE Loss mean-simple:  20.242773056030273


all predictions:
    
    [tensor([ 2.5567,  4.9530, 19.2169, 12.0431, 19.0516, 11.7997, 16.3122, -1.2812,
         8.8086, -9.0112,  8.7289, 13.4905,  4.6239,  1.1151, 15.0841, 19.8134,
        18.3768, 17.5226, 12.9248, 14.0460,  9.2614, 15.6686, 12.3981, 14.4161,
        13.1036, 17.8804, 18.4760, 21.7828, 18.1430, 16.3232, 13.8728, 15.1361,
         8.7141,  4.1364, 12.4649, 11.2762, 17.4115, 19.2144, 18.2252, 11.0774,
        12.4953, 18.2854, 18.3429, 17.4301, 17.5314, 17.8769, 20.0457, 18.9907,
        23.3727, 16.3478, 17.0667, 13.6818, 13.7399, 17.7715, 18.8536, 20.0962,
        20.9220, 20.3113, 23.3999, 20.3261, 18.0829, 15.1238, 16.1493, 15.3859,
        17.1720, 19.6973, 22.4276, 22.2922, 26.0291, 15.0852, 15.0750, 20.1848,
         9.3712, 18.2995, 20.8878, 22.9516, 27.0709, 28.7826, 20.6696, 19.3281,
        22.4489, 19.2649, 20.9872, 12.7470,  8.8350,  3.8092, 14.9937, 17.4703,
        20.7737, 20.7735, 16.7450, 13.5249, 19.3021, 21.5879, 18.3616, 20.7143,
        24.5523, 23.1080, 29.0611, 27.4530, 23.1467]), 
    tensor([ 2.5338,  4.9599, 19.2389, 12.1007, 19.1551, 11.9326, 16.4683, -1.1239,
         8.8834, -8.9085,  8.8733, 13.6394,  4.7198,  1.2237, 15.2115, 19.8330,
        18.3964, 17.5683, 13.0893, 14.2062,  9.4060, 15.8197, 12.5193, 14.5313,
        13.2345, 18.0169, 18.6172, 21.9093, 18.2452, 16.4190, 13.9718, 15.2576,
         8.8488,  4.2569, 12.4558, 11.2626, 17.4062, 19.2041, 18.2197, 11.1252,
        12.6190, 18.3045, 18.3321, 17.4236, 17.5622, 18.0178, 20.0582, 18.9898,
        23.3789, 16.4853, 17.1891, 13.8161, 13.8718, 17.8075, 18.8423, 20.1431,
        20.9166, 20.3006, 23.3880, 20.3248, 18.1045, 15.2637, 16.1978, 15.4105,
        17.1833, 19.7167, 22.4548, 22.3133, 26.0509, 15.1181, 15.1379, 20.2040,
         9.4026, 18.3203, 20.9013, 22.9736, 27.0947, 28.8064, 20.6886, 19.3495,
        22.4619, 19.2807, 21.0017, 12.7734,  8.8858,  3.8730, 15.0247, 17.4931,
        20.7660, 20.7684, 16.7420, 13.5228, 19.2949, 21.5828, 18.3566, 20.7105,
        24.5383, 23.0900, 29.0451, 27.4370, 23.1260]), 
    tensor([ 2.5597,  4.9578, 19.2220, 12.0693, 19.1000, 11.8728, 16.3860, -1.2014,
         8.8477, -8.9524,  8.7954, 13.5571,  4.6718,  1.1812, 15.1403, 19.8118,
        18.3747, 17.5389, 12.9992, 14.1202,  9.3264, 15.7409, 12.4588, 14.4652,
        13.1590, 17.9409, 18.5384, 21.8369, 18.1788, 16.3582, 13.9100, 15.1849,
         8.7715,  4.1906, 12.4513, 11.2644, 17.3968, 19.1953, 18.2105, 11.0918,
        12.5453, 18.2810, 18.3241, 17.4140, 17.5335, 17.9314, 20.0367, 18.9759,
        23.3605, 16.4007, 17.1119, 13.7320, 13.7894, 17.7737, 18.8325, 20.1029,
        20.9030, 20.2903, 23.3757, 20.3129, 18.0812, 15.1767, 16.1628, 15.3917,
        17.1721, 19.6938, 22.4274, 22.2907, 26.0294, 15.0923, 15.0986, 20.1839,
         9.3824, 18.3015, 20.8890, 22.9502, 27.0687, 28.7794, 20.6648, 19.3253,
        22.4386, 19.2570, 20.9835, 12.8473,  8.9484,  3.9300, 15.0950, 17.5669,
        20.7952, 20.7982, 16.7710, 13.5466, 19.3190, 21.6092, 18.3826, 20.7342,
        24.5578, 23.1117, 29.0636, 27.4546, 23.1469])]

all predictions[0]:

tensor([ 2.5379,  4.9758, 19.2461, 12.0896, 19.1124, 11.8773, 16.3956, -1.1937,
         8.8650, -8.9728,  8.7872, 13.5492,  4.6651,  1.1527, 15.1230, 19.8235,
        18.3887, 17.5644, 13.0077, 14.1351,  9.3257, 15.7483, 12.4455, 14.4641,
        13.1608, 17.9593, 18.5546, 21.8574, 18.1852, 16.3652, 13.9032, 15.1793,
         8.7613,  4.1701, 12.4571, 11.2619, 17.4034, 19.2095, 18.2178, 11.0914,
        12.5369, 18.2899, 18.3355, 17.4297, 17.5446, 17.9337, 20.0513, 18.9913,
        23.3709, 16.4023, 17.1152, 13.7390, 13.7943, 17.7849, 18.8501, 20.1142,
        20.9211, 20.3065, 23.3984, 20.3310, 18.0951, 15.1949, 16.1952, 15.4142,
        17.1941, 19.7305, 22.4775, 22.3227, 26.0487, 15.1300, 15.1244, 20.2108,
         9.4046, 18.3283, 20.9125, 22.9912, 27.1134, 28.8242, 20.7040, 19.3517,
        22.4711, 19.2957, 21.0121, 12.7470,  8.8435,  3.8213, 14.9942, 17.4653,
        20.7509, 20.7452, 16.7119, 13.5071, 19.2834, 21.5673, 18.3456, 20.6984,
        24.5254, 23.0856, 29.0411, 27.4331, 23.1262])

In [None]:
a=[[1,2,3,5,6,1,5,6,7,8],[1,8,9,10,11,1,8,9,11,12]]
b=np.transpose(a)
c=[]
for i in range(len(b)):
    c.append(sum(b[i]) / len(b[i]))

    
print(b)
print()
print(len(a[0]),"a")
print(len(b[0]),"b")
print(len(c),"c")
print()
print(c.item())

In [32]:
a = [1, 2, 3]
b = torch.Tensor(a)
print(b[0].item())

1.0


In [None]:
x = torch.randn(2, 3)

print(type(x))
torch.transpose(x, -1, 0)