# Multi-Layer Perceptron Attack
TJ Kim, 12 Nov 2019

We build a multi-layer perceptron model to classify adult.csv data. Then we perform an equation solving attack. Then, a uniform query attack will be performed, as well as an adaptive retraining attack.

## Importing Data
First we import CSV data as pandas and divide to test and training set.

In [114]:
import re
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# Import Dataset
filename = "iris.csv"
df = pd.read_csv(filename, sep='\s*,\s*',engine = 'python')

# Split training/test data to input and output
cols = ['sepal length','sepal width', 'petal length', 'petal width']
y_val = 'class'

# Separate each dataset into training, testing, and query data
total_rest,total_test = train_test_split(df, test_size=0.4, shuffle=True)
total_train,queries = train_test_split(total_rest, test_size = 0.3, shuffle = True)

# Separate each sub dataset to input and output
total_train_data = total_train.loc[:,total_train.columns != y_val]
total_train_label = total_train.loc[:,total_train.columns == y_val]
total_test_data = total_train.loc[:,total_test.columns != y_val]
total_test_label = total_train.loc[:,total_test.columns == y_val]
query_data = queries.loc[:,queries.columns != y_val]
query_label = queries.loc[:,queries.columns == y_val]

# One Hot Encode Output Data
total_train_label = pd.get_dummies(total_train_label,prefix=['class'])
total_test_label = pd.get_dummies(total_test_label,prefix=['class'])
query_label = pd.get_dummies(query_label,prefix=['class'])

## Build Run MultiLayer Perceptron
To have easy access to the weight vectors associated with every layer, we will use pytorch.
Our perceptron will have an input layer, a dense layer, and ELU layer, another dense layer, and a loss layer.

In [115]:
import torch
import torch.nn as nn
import math

axes_max = total_train_data.values.max(axis=0)

X = torch.from_numpy(total_train_data.values/axes_max).float()
y = torch.from_numpy(total_train_label.values).float()

In [162]:
class Neural_Network(nn.Module):
    def __init__(self, ):
        super(Neural_Network, self).__init__()
        # parameters
        # TODO: parameters can be parameterized instead of declaring them here
        self.inputSize = 4
        self.outputSize = 3
        self.hiddenSize = 5
        
        # weights
        mu1, sigma1 = 0, math.sqrt(2.0/(self.inputSize+self.hiddenSize))
        mu2, sigma2 = 0,math.sqrt(2.0/(self.outputSize+self.hiddenSize))
        
        self.W1 = torch.from_numpy(np.random.normal(mu1,sigma1,[self.inputSize,self.hiddenSize])).float()
        self.W2 = torch.from_numpy(np.random.normal(mu2,sigma2,[self.hiddenSize,self.outputSize])).float()
        
    def forward(self, X):
        alpha = 0.9
        self.z1 = torch.matmul(X, self.W1) # 3 X 3 ".dot" does not broadcast in PyTorch
        #self.z2 = self.ELU(self.z1,alpha) # activation function
        self.z3 = torch.matmul(self.z1, self.W2)
        o = self.sigmoid(self.z3) # final activation function
        return o
        
    def sigmoid(self, s):
        return 1 / (1 + torch.exp(-s))
    
    def sigmoidPrime(self, s):
        # derivative of sigmoid
        return s * (1 - s)
    
    def ELU(self,s,alpha):
        temp_out = s.clone()
        
        temp_out[temp_out<=0] = alpha*(np.exp(temp_out[temp_out<=0]-1))
        
        return temp_out
    
    def ELUPrime(self,s,alpha):

        temp_out2 = torch.from_numpy(np.ones(s.shape)).float()
        temp_out2[s<=0] = alpha * torch.exp(s[s<=0])

        return temp_out2
        
    def backward(self, X, y, o):
        lr_rate = 0.01
        alpha = 0.9
        self.o_error = y - o # error in output
        self.o_delta = self.o_error * self.sigmoidPrime(o) # derivative of sig to error
        self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))
        #self.z2_delta = self.z2_error * self.ELUPrime(self.z2,alpha)
        self.W1 += lr_rate * torch.matmul(torch.t(X), self.z2_error)
        self.W2 += lr_rate * torch.matmul(torch.t(self.z1), self.o_delta)
        
    def train(self, X, y):
        # forward + backward pass for training
        o = self.forward(X)
        self.backward(X, y, o)
        
    def saveWeights(self, model):
        # we will use the PyTorch internal storage functions
        torch.save(model, "NN")
        # you can reload model with all the weights and so forth with:
        # torch.load("NN")
        
    def predict(self, X_query, mute):
        
        if mute is False:
            #print ("Predicted data based on trained weights: ")
            print ("Input: \n" + str(X_query))
            print ("Confidence: \n" + str(self.forward(X_query).numpy()))
        
        conf = self.forward(X_query).numpy()
        prediction = np.zeros(conf.shape)
        if conf.ndim == 1:
            b = np.argmax(conf)
            prediction[b] = 1
        else:
            for i in range(conf.shape[0]):
                b = np.argmax(conf[i,:])
                prediction[i,b] = 1

        return prediction, conf
        

Build instance of model we just built.

In [163]:
j = 3
NN = Neural_Network()
for i in range(1000):  # trains the NN 1,000 times
    print ("#" + str(i) + " Loss: " + str(torch.mean((y - NN(X))**2).detach().item()))  # mean sum squared loss
    NN.train(X, y)
NN.saveWeights(NN)
print("true: \n",y[j])
NN.predict(X[j,:],False)

#0 Loss: 0.2470022737979889
#1 Loss: 0.23690590262413025
#2 Loss: 0.23183780908584595
#3 Loss: 0.2288280427455902
#4 Loss: 0.2267027199268341
#5 Loss: 0.22500962018966675
#6 Loss: 0.22355343401432037
#7 Loss: 0.22223767638206482
#8 Loss: 0.2210085690021515
#9 Loss: 0.21983298659324646
#10 Loss: 0.21868902444839478
#11 Loss: 0.21756118535995483
#12 Loss: 0.21643811464309692
#13 Loss: 0.21531124413013458
#14 Loss: 0.21417361497879028
#15 Loss: 0.21301990747451782
#16 Loss: 0.21184562146663666
#17 Loss: 0.2106470763683319
#18 Loss: 0.2094210833311081
#19 Loss: 0.20816494524478912
#20 Loss: 0.20687632262706757
#21 Loss: 0.20555320382118225
#22 Loss: 0.20419400930404663
#23 Loss: 0.20279723405838013
#24 Loss: 0.20136196911334991
#25 Loss: 0.19988715648651123
#26 Loss: 0.19837234914302826
#27 Loss: 0.1968173086643219
#28 Loss: 0.19522204995155334
#29 Loss: 0.1935870200395584
#30 Loss: 0.19191284477710724
#31 Loss: 0.19020062685012817
#32 Loss: 0.18845166265964508
#33 Loss: 0.1866676956415176

#482 Loss: 0.0709332749247551
#483 Loss: 0.07089370489120483
#484 Loss: 0.07085415720939636
#485 Loss: 0.07081472128629684
#486 Loss: 0.0707753524184227
#487 Loss: 0.07073606550693512
#488 Loss: 0.07069683820009232
#489 Loss: 0.07065766304731369
#490 Loss: 0.07061857730150223
#491 Loss: 0.07057956606149673
#492 Loss: 0.07054062187671661
#493 Loss: 0.07050175964832306
#494 Loss: 0.07046293467283249
#495 Loss: 0.07042419910430908
#496 Loss: 0.07038553059101105
#497 Loss: 0.07034694403409958
#498 Loss: 0.07030842453241348
#499 Loss: 0.07026997953653336
#500 Loss: 0.07023157179355621
#501 Loss: 0.07019325345754623
#502 Loss: 0.07015498727560043
#503 Loss: 0.0701168030500412
#504 Loss: 0.07007867842912674
#505 Loss: 0.07004063576459885
#506 Loss: 0.07000266015529633
#507 Loss: 0.06996472924947739
#508 Loss: 0.06992688030004501
#509 Loss: 0.06988910585641861
#510 Loss: 0.06985137611627579
#511 Loss: 0.06981372088193893
#512 Loss: 0.06977612525224686
#513 Loss: 0.06973859667778015
#514 Loss: 

#963 Loss: 0.058286841958761215
#964 Loss: 0.058271750807762146
#965 Loss: 0.058256689459085464
#966 Loss: 0.05824165418744087
#967 Loss: 0.05822666734457016
#968 Loss: 0.05821170657873154
#969 Loss: 0.05819679796695709
#970 Loss: 0.05818188935518265
#971 Loss: 0.05816704407334328
#972 Loss: 0.058152228593826294
#973 Loss: 0.0581374391913414
#974 Loss: 0.05812269449234009
#975 Loss: 0.05810798332095146
#976 Loss: 0.05809330195188522
#977 Loss: 0.05807863920927048
#978 Loss: 0.05806403607130051
#979 Loss: 0.05804947018623352
#980 Loss: 0.05803492292761803
#981 Loss: 0.058020394295454025
#982 Loss: 0.0580059215426445
#983 Loss: 0.05799148604273796
#984 Loss: 0.05797707661986351
#985 Loss: 0.05796268582344055
#986 Loss: 0.05794835090637207
#987 Loss: 0.057934049516916275
#988 Loss: 0.05791977047920227
#989 Loss: 0.05790551006793976
#990 Loss: 0.057891324162483215
#991 Loss: 0.05787713825702667
#992 Loss: 0.05786299705505371
#993 Loss: 0.05784890428185463
#994 Loss: 0.05783481150865555
#99

(array([1., 0., 0.]),
 array([9.3582666e-01, 7.4832760e-02, 1.3464138e-05], dtype=float32))

Test NN model on validation data.

In [164]:
from sklearn.metrics import accuracy_score

X_test = torch.from_numpy(total_test_data.values/axes_max).float()
y_test = torch.from_numpy(total_test_label.values).float()
preds = np.zeros(X_test.shape[0])
confs = np.zeros(X_test.shape[0])
mute = True;


preds, confs = NN.predict(X_test,mute)
    
acc = accuracy_score(total_test_label,preds)
acc

0.9841269841269841

## Equation Solving Attack

Assuming that we have knowledge of the network shape, as well as the activation functions, we are able to use returned confidence intervals to perform an equation solving attack by building a system of equations based on the dependencies of consecutive layers.

