# Imports and data preprocessing

In [79]:
import pandas as pd
import numpy as np
import math

train_raw = pd.read_csv("./bank-note/train.csv", header=None).values
train_cols = train_raw.shape[1]
train_rows = train_raw.shape[0]
train_x = np.copy(train_raw)
train_x[:,train_cols - 1] = 1 # augment the bias 1
train_y = train_raw[:, train_cols - 1]
train_y[train_y > 0] = 1      # map 1 -> 1
train_y[train_y == 0] = -1    # map 0 -> -1

test_raw = pd.read_csv("./bank-note/test.csv", header=None).values
test_cols = test_raw.shape[1]
test_rows = test_raw.shape[0]
test_x = np.copy(test_raw)
test_x[:,test_cols - 1] = 1
test_y = test_raw[:, test_cols - 1]
test_y[test_y > 0] = 1
test_y[test_y == 0] = -1

# NN Implementation

In [80]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def loss(y, a):
    return 0.5 * (y-a)**2
###### forward pass
## given an augmented example x(n, 1) and the corresponsing weight matrix W(n1, n) 
## returns the next level's activations (n1, 1)
## does not apply sigmoid if is_last_layer = true
def forward_step(x, w, is_last_layer):
    activations = np.dot(w,x)
    if is_last_layer:
        return activations
    return [sigmoid(z) for z in activations]

## given an augmented example x(n, 1) 
## and the list of weight matrices W[w0, w1,w2] corresponsing to the weights each layer should be multiplied
## returns the final output and activations of the layers
def forward_pass(x, W):
    a = x
    activations = [x]
    for layer in range(len(W)-1):
        a = forward_step(a, W[layer], False)   # compute the activations of layes[0,n-2]
        activations.append(a)
    activations.append(forward_step(a, W[len(W)-1], True))       # compute the last layer output
    return activations

###### backward pass
## given an augmented example x(n, 1)
## given the true label y
## given the list of layers weight matrices W
## given list of layer activations
## returns DW [Dw0, dw1, ...] that is the derivatives of L with respect to weights of each layer
def backward_pass(x,y, W,A):
    DA = y - A[len(A)-1] # derivatives L with respect to the activations of this layer, initially DA  = dL/dy = y - y*
    DW = []              # list of derivatives with repect to the weights of all layers
    for layer in reversed(range(len(W))): #0,1,2
#         if layer == (len(W) - 1): ## if the last layer no sigmoid derivation
            A = np.reshape(np.array(A[layer]), (1,-1))               # activations of this layer
            A_next = np.reshape(np.array(A[layer+1]), (1,-1))        # activations of next layer
            t = A_next * (1 - A_next) * np.reshape(DA, (-1,1))       # sigmoid derivative
            DW.insert(0,np.matmul(t, a))
#             DA = np.matmul(np.transpose(W[layer]),t)
            
#             DA[-1] = 0 # drop the bias derivative
# #         else:
# #             DW.insert(0,np.matmul(np.reshape(DA, (-1,1)), np.reshape(np.array(A[layer]), (1,-1))))
# #             DA = np.matmul(np.transpose(W[layer]),DA)
# #             DA[-1] = 0 # drop the bias derivative
    return DW
        
###### Stocastic Gradient Decent
# def SGD(X,Y,learningRate, T):
#     rows = X.shape[0]
#     cols = X.shape[1]
#     w = np.zeros(cols)                              # 1. Initialize w = 0 ∈ ℜn
#     indices = np.arange(rows)
    
#     for epoch in range(T):                          # 2. For epoch = 1 … T:
#         np.random.shuffle(indices)                       #1. Shuffle the data
#         x = X[indices,:]
#         y = Y[indices]
#         for i in range(rows):                            #2. For each training example (xi, yi) ∈ D:
#             if np.sum(x[i] * w) * y[i] <= 0:                  #If yi wTxi ≤ 0, update w ← w + r yi xi
#                 w = w + learningRate * y[i] * x[i]
#     return w                                        # 3. Return w


# Main

In [81]:
##### NN Architechture ####
## You can add more layers. Also can change the size of each layer
layerOneSize = 3
layerTwoSize = 2
LayerSizes = [train_cols, layerOneSize, layerTwoSize, 1]

## randomly initializes the weight matrix of each layer
W = [np.random.rand(LayerSizes[layer+1], LayerSizes[layer]) for layer in range(len(LayerSizes)-1)]
for w in W:               # set the bias weight columns to 0
    w[:,-1 ] = 0
###########################

###### run the >> forward_pass << and get the activations for each layer ####
A = forward_pass(train_x[0], W)
# print(activations)
###########################

###### run the >> backward_pass << and get the derivations for each layer ####
DW = backward_pass(train_x[0],train_y[0], W, A)
# print(DW)
###########################



IndexError: list index out of range