In [1]:
import pandas as pd
import numpy as np
np.random.seed(1)

In [2]:
df = pd.read_csv("../input_data/data.csv")

In [3]:
X = df.drop('xAttack',axis=1)
y = df['xAttack']

In [4]:
X = np.asarray(X)
X = (X-np.mean(X,axis=0))/(np.std(X,axis=0))

In [5]:
X.shape

(125973, 29)

In [6]:
def traintestvalidatesplit(data):
    x,y = data[0], data[1]
    n = x.shape[0]
    k = int(n*0.8)
    x_train = x[:k]
    y_train = y[:k]
    x_val = x[k:]
    y_val = y[k:]
    return [x_train,y_train],[x_val,y_val]

In [7]:
def linear(x):
    return x

In [8]:
def linear_derivative(x):
    return np.where(x > 0, 1.0, 1.0)

In [9]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [10]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [11]:
def relu(x):
    return np.where(x > 0, 1.0, 0.0)

In [12]:
def relu_derivative(x):
    return np.where(x > 0, 1.0, 0.0)

In [13]:
def tanh(x):
    return (2/(1+np.exp(-2*x))) - 1

In [14]:
def tanh_derivative(x):
    return 1 - (x * x) 

In [15]:
class NN:
    def __init__(self,layerSizes,activation,learningrate = 0.01):
        self.shape = layerSizes
        self.activation = activation
        self.learningrate = learningrate
        self.activations = {'sigmoid':[sigmoid,sigmoid_derivative],
                           'relu':[relu,relu_derivative],
                           'tanh':[tanh,tanh_derivative],
                           'linear':[linear,linear_derivative]}
        n = len(layerSizes)
        self.layers = []
        self.layers.append(np.ones(self.shape[0]))
        for i in range(1,n):
            self.layers.append(np.ones(self.shape[i]))
        self.weights = []
        for i in range(n-1):
            temp = np.zeros((self.layers[i].size,self.layers[i+1].size), dtype = 'd')
            self.weights.append(np.random.randn(*temp.shape))
        self.derivative = [0,]*len(self.weights)

    def forwardpass(self,data):
        self.layers[0] = data
        for i in range(1,len(self.shape)):
            self.layers[i][...] = self.activations[self.activation][0](np.dot(self.layers[i-1],self.weights[i-1]))
        return self.layers[-1]


    def backpropogation(self, target, momentum=0.1):
        error = target - self.layers[-1]
        weight_deltas = []
        weight_delta = error*self.activations[self.activation][1](self.layers[-1])
        weight_deltas.append(weight_delta)

        for i in range(len(self.shape)-2,0,-1):
            weight_delta = np.dot(weight_deltas[0],self.weights[i].T)*self.activations[self.activation][1](self.layers[i])
            weight_deltas.insert(0,weight_delta)
            
        for i in range(len(self.weights)):
            layer = np.atleast_2d(self.layers[i])
            weight_delta = np.atleast_2d(weight_deltas[i])
            der = np.dot(layer.T,weight_delta)
            self.weights[i] += self.learningrate*der + momentum*self.derivative[i]
            self.derivative[i] = der

        return (error**2).mean()

In [16]:
def trainMLP(network,samples, epochs=10, momentum=0.1):
    error_set = []
    for i in range(epochs):
        print('Epoch: ', i+1)
        n = samples[0].shape[0]
        error = 0
        for j in range(n):
            out = network.forwardpass(samples[0][j] )
            error += network.backpropogation( samples[1][j], momentum )
        error_set.append(error/n)
        print('Training error',error/n)
    return error_set, error/n

In [17]:
nn = NN([X.shape[1],14,X.shape[1]],'tanh',0.1)
epochs = 100
error_set, finalerror = trainMLP(nn,[X,X],epochs)

Epoch:  1


  


Training error 0.7786268105938058
Epoch:  2
Training error 0.7960363673271034
Epoch:  3
Training error 0.7995105627133938
Epoch:  4
Training error 0.8062014209028044
Epoch:  5
Training error 0.7965804093261678
Epoch:  6
Training error 0.8156787089778409
Epoch:  7
Training error 0.812937904196821
Epoch:  8
Training error 0.7709936085284226
Epoch:  9
Training error 0.7694184370997309
Epoch:  10
Training error 0.756600764097854
Epoch:  11
Training error 0.7658273358453733
Epoch:  12
Training error 0.7648781643118375
Epoch:  13
Training error 0.7483377355644321
Epoch:  14
Training error 0.7336940360142689
Epoch:  15
Training error 0.7289486116477437
Epoch:  16
Training error 0.7279297721779345
Epoch:  17
Training error 0.7237578798080379
Epoch:  18
Training error 0.7267322518084447
Epoch:  19
Training error 0.7234479745177119
Epoch:  20
Training error 0.7167966136763305
Epoch:  21
Training error 0.7417335308252123
Epoch:  22
Training error 0.740562369861287
Epoch:  23
Training error 0.7289

In [18]:
reduced_data = pd.DataFrame(np.dot(X,nn.weights[0]))

In [19]:
reduced_data.insert(loc=13, column='class', value=y)
reduced_data.shape

(125973, 15)

In [20]:
pd.DataFrame(reduced_data).to_csv("../input_data/reducedData_b.csv",index=False)