# Multi-Layer Perceptron Attack
TJ Kim, 12 Nov 2019

We build a multi-layer perceptron model to classify adult.csv data. Then we perform an equation solving attack. Then, a uniform query attack will be performed, as well as an adaptive retraining attack.

## Importing Data
First we import CSV data as pandas and divide to test and training set.

In [1]:
import re
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# Import Dataset
filename = "iris.csv"
df = pd.read_csv(filename, sep='\s*,\s*',engine = 'python')

# Split training/test data to input and output
cols = ['sepal length','sepal width', 'petal length', 'petal width']
y_val = 'class'

# Separate each dataset into training, testing, and query data
total_rest,total_test = train_test_split(df, test_size=0.4, shuffle=True)
total_train,queries = train_test_split(total_rest, test_size = 0.3, shuffle = True)

# Separate each sub dataset to input and output
total_train_data = total_train.loc[:,total_train.columns != y_val]
total_train_label = total_train.loc[:,total_train.columns == y_val]
total_test_data = total_train.loc[:,total_test.columns != y_val]
total_test_label = total_train.loc[:,total_test.columns == y_val]
query_data = queries.loc[:,queries.columns != y_val]
query_label = queries.loc[:,queries.columns == y_val]

# One Hot Encode Output Data
total_train_label = pd.get_dummies(total_train_label,prefix=['class'])
total_test_label = pd.get_dummies(total_test_label,prefix=['class'])
query_label = pd.get_dummies(query_label,prefix=['class'])

## Build Run MultiLayer Perceptron
To have easy access to the weight vectors associated with every layer, we will use pytorch.
Our perceptron will have an input layer, a dense layer, and ELU layer, another dense layer, and a loss layer.

In [2]:
import torch
import torch.nn as nn
import math

axes_max = total_train_data.values.max(axis=0)

X = torch.from_numpy(total_train_data.values/axes_max).float()
y = torch.from_numpy(total_train_label.values).float()

In [3]:
class Neural_Network(nn.Module):
    def __init__(self, ):
        super(Neural_Network, self).__init__()
        # parameters
        # TODO: parameters can be parameterized instead of declaring them here
        self.inputSize = 4
        self.outputSize = 3
        self.hiddenSize = 5
        
        # weights
        mu1, sigma1 = 0, math.sqrt(2.0/(self.inputSize+self.hiddenSize))
        mu2, sigma2 = 0,math.sqrt(2.0/(self.outputSize+self.hiddenSize))
        
        self.W1 = torch.from_numpy(np.random.normal(mu1,sigma1,[self.inputSize,self.hiddenSize])).float()
        self.W2 = torch.from_numpy(np.random.normal(mu2,sigma2,[self.hiddenSize,self.outputSize])).float()
        
    def forward(self, X):
        alpha = 0.9
        self.z1 = torch.matmul(X, self.W1) # 3 X 3 ".dot" does not broadcast in PyTorch
        #self.z2 = self.ELU(self.z1,alpha) # activation function
        self.z3 = torch.matmul(self.z1, self.W2)
        o = self.sigmoid(self.z3) # final activation function
        return o
        
    def sigmoid(self, s):
        return 1 / (1 + torch.exp(-s))
    
    def sigmoidPrime(self, s):
        # derivative of sigmoid
        return s * (1 - s)
    
    def ELU(self,s,alpha):
        temp_out = s.clone()
        
        temp_out[temp_out<=0] = alpha*(np.exp(temp_out[temp_out<=0]-1))
        
        return temp_out
    
    def ELUPrime(self,s,alpha):

        temp_out2 = torch.from_numpy(np.ones(s.shape)).float()
        temp_out2[s<=0] = alpha * torch.exp(s[s<=0])

        return temp_out2
        
    def backward(self, X, y, o):
        lr_rate = 0.01
        alpha = 0.9
        self.o_error = y - o # error in output
        self.o_delta = self.o_error * self.sigmoidPrime(o) # derivative of sig to error
        self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))
        #self.z2_delta = self.z2_error * self.ELUPrime(self.z2,alpha)
        self.W1 += lr_rate * torch.matmul(torch.t(X), self.z2_error)
        self.W2 += lr_rate * torch.matmul(torch.t(self.z1), self.o_delta)
        
    def train(self, X, y):
        # forward + backward pass for training
        o = self.forward(X)
        self.backward(X, y, o)
        
    def saveWeights(self, model):
        # we will use the PyTorch internal storage functions
        torch.save(model, "NN")
        # you can reload model with all the weights and so forth with:
        # torch.load("NN")
        
    def predict(self, X_query, mute):
        
        if mute is False:
            #print ("Predicted data based on trained weights: ")
            print ("Input: \n" + str(X_query))
            print ("Confidence: \n" + str(self.forward(X_query).numpy()))
        
        conf = self.forward(X_query).numpy()
        prediction = np.zeros(conf.shape)
        if conf.ndim == 1:
            b = np.argmax(conf)
            prediction[b] = 1
        else:
            for i in range(conf.shape[0]):
                b = np.argmax(conf[i,:])
                prediction[i,b] = 1

        return prediction, conf
        

Build instance of model we just built.

In [4]:
j = 3
NN = Neural_Network()
for i in range(1000):  # trains the NN 1,000 times
    print ("#" + str(i) + " Loss: " + str(torch.mean((y - NN(X))**2).detach().item()))  # mean sum squared loss
    NN.train(X, y)
NN.saveWeights(NN)
print("true: \n",y[j])
NN.predict(X[j,:],False)

#0 Loss: 0.28785791993141174
#1 Loss: 0.2760718762874603
#2 Loss: 0.2664417326450348
#3 Loss: 0.25849664211273193
#4 Loss: 0.2518538534641266
#5 Loss: 0.24621109664440155
#6 Loss: 0.24133603274822235
#7 Loss: 0.2370520532131195
#8 Loss: 0.23322732746601105
#9 Loss: 0.22976239025592804
#10 Loss: 0.226583331823349
#11 Loss: 0.22363436222076416
#12 Loss: 0.22087326645851135
#13 Loss: 0.21826781332492828
#14 Loss: 0.2157929241657257
#15 Loss: 0.21342922747135162
#16 Loss: 0.21116089820861816
#17 Loss: 0.20897528529167175
#18 Loss: 0.20686151087284088
#19 Loss: 0.2048104852437973
#20 Loss: 0.20281434059143066
#21 Loss: 0.2008659392595291
#22 Loss: 0.1989588588476181
#23 Loss: 0.19708740711212158
#24 Loss: 0.19524651765823364
#25 Loss: 0.19343145191669464
#26 Loss: 0.1916380524635315
#27 Loss: 0.18986277282238007
#28 Loss: 0.18810242414474487
#29 Loss: 0.18635444343090057
#30 Loss: 0.184616819024086
#31 Loss: 0.18288786709308624
#32 Loss: 0.181166410446167
#33 Loss: 0.17945191264152527
#34 L

#408 Loss: 0.0819849744439125
#409 Loss: 0.08194337040185928
#410 Loss: 0.08190185576677322
#411 Loss: 0.0818604826927185
#412 Loss: 0.08181917667388916
#413 Loss: 0.08177800476551056
#414 Loss: 0.08173692971467972
#415 Loss: 0.08169595897197723
#416 Loss: 0.08165508508682251
#417 Loss: 0.08161431550979614
#418 Loss: 0.08157366514205933
#419 Loss: 0.08153311163187027
#420 Loss: 0.08149264007806778
#421 Loss: 0.08145227283239365
#422 Loss: 0.08141199499368668
#423 Loss: 0.08137182146310806
#424 Loss: 0.0813317596912384
#425 Loss: 0.08129175752401352
#426 Loss: 0.08125190436840057
#427 Loss: 0.08121208101511002
#428 Loss: 0.0811724066734314
#429 Loss: 0.08113276958465576
#430 Loss: 0.08109323680400848
#431 Loss: 0.08105381578207016
#432 Loss: 0.08101446181535721
#433 Loss: 0.08097522705793381
#434 Loss: 0.08093604445457458
#435 Loss: 0.08089695870876312
#436 Loss: 0.08085796982049942
#437 Loss: 0.08081906288862228
#438 Loss: 0.08078022301197052
#439 Loss: 0.08074147254228592
#440 Loss: 0

#829 Loss: 0.06942799687385559
#830 Loss: 0.06940837949514389
#831 Loss: 0.06938882917165756
#832 Loss: 0.06936931610107422
#833 Loss: 0.06934984773397446
#834 Loss: 0.06933043897151947
#835 Loss: 0.06931108981370926
#836 Loss: 0.06929179280996323
#837 Loss: 0.06927253305912018
#838 Loss: 0.06925331801176071
#839 Loss: 0.06923419237136841
#840 Loss: 0.0692150816321373
#841 Loss: 0.06919603049755096
#842 Loss: 0.0691770538687706
#843 Loss: 0.06915810704231262
#844 Loss: 0.06913921982049942
#845 Loss: 0.0691203698515892
#846 Loss: 0.06910157948732376
#847 Loss: 0.0690828412771225
#848 Loss: 0.06906416267156601
#849 Loss: 0.0690455287694931
#850 Loss: 0.06902694702148438
#851 Loss: 0.06900840997695923
#852 Loss: 0.06898993253707886
#853 Loss: 0.06897149980068207
#854 Loss: 0.06895312666893005
#855 Loss: 0.06893480569124222
#856 Loss: 0.06891652941703796
#857 Loss: 0.06889829784631729
#858 Loss: 0.06888013333082199
#859 Loss: 0.06886200606822968
#860 Loss: 0.06884393095970154
#861 Loss: 0.

  "type " + obj.__name__ + ". It won't be checked "


(array([1., 0., 0.]),
 array([9.9020445e-01, 2.2456807e-01, 4.0706502e-07], dtype=float32))

Test NN model on validation data.

In [5]:
from sklearn.metrics import accuracy_score

X_test = torch.from_numpy(total_test_data.values/axes_max).float()
y_test = torch.from_numpy(total_test_label.values).float()
preds = np.zeros(X_test.shape[0])
confs = np.zeros(X_test.shape[0])
mute = True;


preds, confs = NN.predict(X_test,mute)
    
acc = accuracy_score(total_test_label,preds)
acc

0.9523809523809523

## Equation Solving Attack

Assuming that we have knowledge of the network shape, as well as the activation functions, we are able to use returned confidence intervals to perform an equation solving attack by building a system of equations based on the dependencies of consecutive layers.

After literature search, an equation solving attack seems impossible given the product of weights between different layers.