# Deep Learning: NN from Scratch



In [None]:
from google.colab import files
files.upload()

# Part 1:
Create one hidden layer Neural Network Model from scratch. This model is feed-forward and includes back-propagation. Created generic initalise method to handle multiple input, output and hidden nodes defined by the input data (the hidden nodes is a constant set before training). The training data, after the nodes weights are initalised randomly, is fed-forward updating the weights with the inputted data and the sum is used in the activaiton function tanh(x) which calculates the output of that node. The error of the each forward propagtion is calcualted comparing the expected vs actual output to give an error. Stochastic Gradient descent is implemented to update the weights by the learning rate and the error in back propagation to improve the output for each epoch. Once the model is trained the test data is forward propagated through the model to get a classification prediction. Sckiet learn accuracy compares the actual test labels with the predicted.

In [0]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import random
import math

#function to create initial Neural Network 
#takes in parameters for inputs, hidden nodes and outputs
def init_NN(nodes_in, nodes_h, nodes_out):
    
    #create list for all initalised nodes and their weights
    init = []
    #create list for hidden nodes
    layer_h = []
    #create list for output nodes
    layer_out = []
    
    #for all nodes in hidden append random weight to dict for each input node +1
    #there will be extra input weight for the bias
    for i in range(nodes_h):
        l = []
        for j in range(nodes_in+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_h.append(dict_weights)
        
    #for all nodes in output append random weight to dict for each hidden node +1
    #there will be extra hidden weight for the bias
    for i in range(nodes_out):
        l = []
        for j in range(nodes_h+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_out.append(dict_weights)
        
    #append both hidden and output lists to initalised Neural Network
    init.append(layer_h)
    init.append(layer_out)
    return init


#tanh activiate weights function

def tanh(x):
    return np.tanh(x)

#tanh derivative activate weights function for gradiant descent 

def grad_tanh(x):
    return (1-np.square(x))

#function to forward propagate through network

def forward_prop(init, inputs):
    for layer in init:
        #new list to update weights
        updated_weights = []
        
        for node in layer:
            #get the weights from the dictionary for eachnode
            weights = node['w']
            #inialised to last weight in dict
            initalise = weights[-1]
            #add new weights  = old weights  * input data
            for i in range(len(weights)-1):
                initalise += weights[i] * inputs[i]
            #use tanh activation on new weights as non-linear model
            node['o'] = tanh(initalise)
            #output this as inputs for next layer
            updated_weights.append(node['o'])
        inputs = updated_weights
    return inputs

#cost function to determine the error of each node for gradient descent

def cost(init, output):
    for row in reversed(range(len(init))):
        layer = init[row]
        cost = []
        #for nodes except the output find the cost of that node
        if row != len(init)-1:
            for cell in range(len(layer)):
                #inialise cost to zero
                node_cost = 0.0
                #get the cost of each node in layer by finding error
                #multiply delta value of node with the weight of that node
                for node in init[row + 1]:
                    node_cost += (node['w'][cell] * node['d'])
                cost.append(node_cost)
        else:
            #for output node cost is predicted output - actual value
            for cell in range(len(layer)):
                node = layer[cell]
                cost.append(output[cell] - node['o'])
        for cell in range(len(layer)):
            #get the cost for each node node multiplying error by the derivative of activation
            #tanh on the output node
            #assign this to delta in node which will be used to update weights
            node = layer[cell]
            node['d'] = cost[cell] * grad_tanh(node['o'])
            
#back propagte through the neural network updating the weights with SGD

def back_prop(init, data, learning, epoch, output):
    #repeat feed forward and backwards for each epoch 
    for e in range(epoch):
        epoch_error = 0
        for inputs in data:
            #for every row in training set call forward propagate 
            result = forward_prop(init, inputs)
            #give the label ouputs as parameters for find the cost of nodes
            outputs = [0 for out in range(output)]
            outputs[inputs[-1]] = 1
            cost(init, outputs)
            epoch_error += sum([(outputs[out]-result[out])**2 for out in range(len(outputs))])
            #update weights based on new cost
            for layer in range(len(init)):
                update_input = inputs[:-1]
                #for all layers add output result to list
                if layer !=0:
                    update_input = []
                    for node in init[layer-1]:
                        update_input.append(node['o'])
                #for every node in each layer change the weights based on formula SGD
                # ( learning rate * cost of error of that node * the output of node )
                for node in init[layer]:
                    for cell in range(len(update_input)):
                        node['w'][cell] += learning*node['d']*update_input[cell]
        print("epochs: ", e," learning rate: ", learning, "error: ",epoch_error)
                        
#method to predict the y values of each row in test returning max from forward propagation of the model  

def predict(init, row):
    out = forward_prop(init, row)
    return out.index(max(out))

#method to train and test the Neural Network

def train_NN(train, test, learning, epoch, hidden):
    #set the amount of inputs to NN by the length of columns for parameters
    inputs = len(train[0]) - 1
    #set amount of outputs to all values outputted ie. 0/1
    outputs = len(set([row[-1] for row in train]))
    #call init_NN function to set amount of nodes in each layer and their weights
    init = init_NN(inputs, hidden,outputs)
    #call back propagate function to train model
    back_prop(init, train, learning, epoch, outputs)
    #create list of predictions for each row of test data
    y_pred = []
    for row in test:
        row_predict = predict(init, row)
        y_pred.append(row_predict)
    return(y_pred)

# Part 2:
Test the neural network on simple CSV file circles500.csv dataset.

In [None]:

#use panads to read csv skipping first line column names
df = pd.read_csv('circles500.csv', header = None, skiprows=1)

#scale values from mix to max to normalise results
scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(df))

#split the data into training and test with 60-40% split most effective
train, test = train_test_split(df, test_size=0.34)

#set learning rate, number of epochs and number of hidden layers in model
learning = 0.1
epoch = 1000
hidden_nodes = 5

#convert the dataframes to list 
train =train.values.tolist()
test = test.values.tolist()

#for the training and test set convert the label of each row to an integer from a float
for x in range(len(train)):
    train[x][-1] = int(train[x][-1]) 

for y in range(len(test)):
    test[y][-1] = int(test[y][-1])

#get the actual output label from the test set
y_test = []
for row in range(len(test)):
    y_test.append(test[row][-1])

#remove the label for test set so can relabel with prediction
for row in range(len(test)):
    #assign label to None
    test[row][-1] = None
#get the predicted labels
y_pred = train_NN(train, test, learning, epoch, hidden_nodes)

#get the accuracy of the model
pred_acc = accuracy_score(y_test, y_pred)
print("Prediction Accuracy = ", pred_acc)

# Part 3:
Test the Neural Network on a more difficult dataset comparing frogs and deers as given. This data set proved very slow to run and was taking approx 10mins per epoch we werent able to figure out the problem and therefore the results are lower than they should be.

In [None]:

# This function taken from the CIFAR website
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data_dict = unpickle("data_batch_1") # Keys: {b'batch_label', b'labels', b'data', b'filenames' }

data1 = data_dict[b'data']

""" This function converts the raw array of 3072 floats describing each image into a structure where the colour of each pixel
in an image is represented by its r,g,b (in this order) value for every pixel in the image in question.
The input into the function is an array of 3072 floats.
This assumes that the raw data coming from the CIFAR data is in the format where all the r values come first, followed by
all the b values and then all the g values """
def convert_rgb(dataplusindex):
    rgb_picture = dataplusindex
    rgb_picture.shape = (3,32,32)
    rgb_picture = rgb_picture.transpose([1, 2, 0])
   
    return rgb_picture


"""This function converts the R,G,B pixels into greyscale pixels. There are a few methods for doing this.
If the data is ordered correctly as R,G,B it is appropriate to use Method 2 (link below) which multiplies each component colour by
a certain constant, and then sums the total.
If not, one can use a standard average of the R,G,B, Method 1 (link below)"""    
def convert_greyscale(rgb_pic):
    greyscale_array = []
    for element in rgb_pic:
        for rgb_row in element:
            x = round(np.dot(rgb_row,[0.299, 0.587, 0.114]), 2) #https://www.prasannakumarr.in/journal/color-to-grayscale-python-image-processing. Method 2, assuming the rgb values come in in the format r, g, b and not b,g,r for example
            #x = (rgb_row[0] + rgb_row[1] + rgb_row[2])/3 # https://www.prasannakumarr.in/journal/color-to-grayscale-python-image-processing. Method 1
            greyscale_array.append(x)
    greyscale_array = np.array(greyscale_array)
    return greyscale_array

# Converting the data into format [greyscale list, classification].
# Each data instance will be added to a larger list.

#Frog is class 6 in the CIFAR data
#Deer is class 4 in the CIFAR data

# Choosing Frog to be class 1 for our implementation
# Choosing Deer to be class 0 for our implementation

Frog_Deer_classified_list = []
for i in range(len(data_dict[b'labels'])):
    if data_dict[b'labels'][i] == 4 or data_dict[b'labels'][i] ==6: #Filtering for Frog and Deer data, CIFAR classes 4 and 6
        rgb_pic = convert_rgb(data_dict[b'data'][i])
        greyscale_arr = convert_greyscale(rgb_pic)
        if data_dict[b'labels'][i] == 4:
            greyscale_arr = np.append(greyscale_arr,0)
            Frog_Deer_classified_list.append(greyscale_arr)
           
        if data_dict[b'labels'][i] == 6:
            greyscale_arr = np.append(greyscale_arr,1)
            Frog_Deer_classified_list.append(greyscale_arr)


#Now converting list from numpy array to list for inputting into pandas dataframe
new_Frog_Deer_classified_list = []
for element in Frog_Deer_classified_list:
    x = element.tolist()
    new_Frog_Deer_classified_list.append(x)
   
# Creating Pandas Data Frame from the filtered data
# Each row of the data frame contains 1025 entries (1025 columns), these are 1024 greyscale pixels and a classification (1/Frog or 0/Deer)
df_train = pd.DataFrame(new_Frog_Deer_classified_list)
#print(df_train)



data_dict_test = unpickle("test_batch") # Keys: {b'batch_label', b'labels', b'data', b'filenames' }

data_test = data_dict_test[b'data']

# Converting the data into format [greyscale list, classification].
# Each data instance will be added to a larger list.

#Frog is class 6 in the CIFAR data
#Deer is class 4 in the CIFAR data

# Choosing Frog to be class 1 for our implementation
# Choosing Deer to be class 0 for our implementation

Frog_Deer_classified_list_test = []
for i in range(len(data_dict_test[b'labels'])):
    if data_dict_test[b'labels'][i] == 4 or data_dict_test[b'labels'][i] ==6: #Filtering for Frog and Deer data, CIFAR classes 4 and 6
        rgb_pic_test = convert_rgb(data_dict[b'data'][i])
        greyscale_arr_test = convert_greyscale(rgb_pic_test)
        if data_dict_test[b'labels'][i] == 4:
            greyscale_arr_test = np.append(greyscale_arr_test,0)
            Frog_Deer_classified_list_test.append(greyscale_arr_test)
           
        if data_dict_test[b'labels'][i] == 6:
            greyscale_arr_test = np.append(greyscale_arr_test,1)
            Frog_Deer_classified_list_test.append(greyscale_arr_test)


#Now converting list from numpy array to list for inputting into pandas dataframe
new_Frog_Deer_classified_list_test = []
for element in Frog_Deer_classified_list_test:
    x = element.tolist()
    new_Frog_Deer_classified_list_test.append(x)
   
# Creating Pandas Data Frame from the filtered data
# Each row of the data frame contains 1025 entries (1025 columns), these are 1024 greyscale pixels and a classification (1/Frog or 0/Deer)
df_test = pd.DataFrame(new_Frog_Deer_classified_list_test)
df_test




In [None]:

#set learning rate, number of epochs and number of hidden layers in model
learning = 0.1
epoch = 20
hidden_nodes = 12

#convert the dataframes to list 
train =df_train.values.tolist()
test = df_test.values.tolist()

#for the training and test set convert the label of each row to an integer from a float
for x in range(len(train)):
    train[x][-1] = int(train[x][-1]) 

for y in range(len(test)):
    test[y][-1] = int(test[y][-1])

#get the actual output label from the test set
y_test = []
for row in range(len(test)):
    y_test.append(test[row][-1])

#remove the label for test set so can relabel with prediction
for row in range(len(test)):
    #assign label to None
    test[row][-1] = None
#get the predicted labels
y_pred1 = train_NN(train, test, learning, epoch, hidden_nodes)

#get the accuracy of the model
x_new = accuracy_score(y_test, y_pred1)
print("Prediction accuracy of model", x_new)

# Part 4a:


Implement 1st Enhancemnt - Added NN Layer to make 2 layer Neural Network


Part i: Test added layer on circles500.csv

Part ii: Test on images 

In [None]:
#part i
#function to create initial Neural Network 
#takes in parameters for inputs, hidden nodes and outputs
def init_NN2(nodes_in, nodes_h, nodes_h2,nodes_out):
    
    #create list for all initalised nodes and their weights
    init = []
    #create list for hidden nodes
    layer_h = []
    #create list for hidden layer 2 nodes
    layer_h2 =[]
    #create list for output nodes
    layer_out = []
    
    #for all nodes in hidden append random weight to dict for each input node +1
    #there will be extra input weight for the bias
    for i in range(nodes_h):
        l = []
        for j in range(nodes_in+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_h.append(dict_weights)
        
    #add extra hidden layer
    #for all nodes in hidden append random weight to dict for each input node +1
    #there will be extra input weight for the bias
    for i in range(nodes_h2):
        l = []
        for j in range(nodes_h+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_h2.append(dict_weights)
        
    
    #for all nodes in output append random weight to dict for each hidden node +1
    #there will be extra hidden weight for the bias
    for i in range(nodes_out):
        l = []
        for j in range(nodes_h2+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_out.append(dict_weights)
        
    #append both hidden, hidden2 and output lists to initalised Neural Network
    init.append(layer_h)
    init.append(layer_h2)
    init.append(layer_out)
    return init


#tanh activiate weights function
def tanh(x):
    return np.tanh(x)

#tanh derivative activate weights function for gradiant descent 
def grad_tanh(x):
    return (1-np.square(x))

#function to forward propagate through network
def forward_prop(init, inputs):
    for layer in init:
        #new list to update weights
        updated_weights = []
        
        for node in layer:
            #get the weights from the dictionary for eachnode
            weights = node['w']
            #inialised to last weight in dict
            initalise = weights[-1]
            #add new weights  = old weights  * input data
            for i in range(len(weights)-1):
                initalise += weights[i] * inputs[i]
            #use tanh activation on new weights as non-linear model
            node['o'] = tanh(initalise)
            #output this as inputs for next layer
            updated_weights.append(node['o'])
        inputs = updated_weights
    return inputs

#cost function to determine the error of each node for gradient descent 
def cost(init, output):
    for row in reversed(range(len(init))):
        layer = init[row]
        cost = []
        #for nodes except the output find the cost of that node
        if row != len(init)-1:
            for cell in range(len(layer)):
                #inialise cost to zero
                node_cost = 0.0
                #get the cost of each node in layer by finding error
                #multiply delta value of node with the weight of that node
                for node in init[row + 1]:
                    node_cost += (node['w'][cell] * node['d'])
                cost.append(node_cost)
        else:
            #for output node cost is predicted output - actual value
            for cell in range(len(layer)):
                node = layer[cell]
                cost.append(output[cell] - node['o'])
        for cell in range(len(layer)):
            #get the cost for each node node multiplying error by the derivative of activation
            #tanh on the output node
            #assign this to delta in node which will be used to update weights
            node = layer[cell]
            node['d'] = cost[cell] * grad_tanh(node['o'])
            
#back propagte through the neural network updating the weights with SGD
def back_prop(init, data, learning, epoch, output):
    #repeat feed forward and backwards for each epoch 
    sum_error = 0
    for e in range(epoch):
        for inputs in data:
            #for every row in training set call forward propagate 
            result = forward_prop(init, inputs)
            #give the label ouputs as parameters for find the cost of nodes
            outputs = [0 for out in range(output)]
            outputs[inputs[-1]] = 1
            cost(init, outputs)
            
            sum_error += sum([(outputs[out]-result[out])**2 for out in range(len(outputs))])
            #update weights based on new cost
            for layer in range(len(init)):
                update_input = inputs[:-1]
                #for all layers add output result to list
                if layer !=0:
                    update_input = []
                    for node in init[layer-1]:
                        update_input.append(node['o'])
                #for every node in each layer change the weights based on formula SGD
                # ( learning rate * cost of error of that node * the output of node )
                for node in init[layer]:
                    for cell in range(len(update_input)):
                        node['w'][cell] += learning*node['d']*update_input[cell]
            print("epochs: ", e," learning rate: ", learning, "error: ",sum_error)

                        
#method to predict the y values of each row in test returning max from forward propagation of the model               
def predict(init, row):
    out = forward_prop(init, row)
    return out.index(max(out))

#method to train and test the Neural Network
def train_NN2(train, test, learning, epoch, hidden, hidden2):
    #set the amount of inputs to NN by the length of columns for parameters
    inputs = len(train[0]) - 1
    #set amount of outputs to all values outputted ie. 0/1
    outputs = len(set([row[-1] for row in train]))
    #call init_NN function to set amount of nodes in each layer and their weights
    init = init_NN2(inputs, hidden,hidden2, outputs)
    #call back propagate function to train model
    back_prop(init, train, learning, epoch, outputs)
    #create list of predictions for each row of test data
    y_pred = []
    for row in test:
        row_predict = predict(init, row)
        y_pred.append(row_predict)
    return(y_pred)
df = pd.read_csv('circles500.csv', header = None, skiprows=1)
scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(df))

train, test = train_test_split(df, test_size=0.34)


learning = 0.1
epoch = 10000
hidden_nodes = 100
hidden_nodes2 = 50

train =train.values.tolist()
test = test.values.tolist()
for x in range(len(train)):
    train[x][-1] = int(train[x][-1]) 
    

for y in range(len(test)):
    test[y][-1] = int(test[y][-1])

actual = [test[row][-1] for row in range(len(test))]
    
for row in range(len(test)):
    test[row][-1] = None

#remove the label for test set so can relabel with prediction
for row in range(len(test)):
    #assign label to None
    test[row][-1] = None
#get the predicted labels
y_pred =[]
y_pred = train_NN2(train, test, learning, epoch, hidden_nodes,hidden_nodes2)

#get the accuracy of the model
accuracy_score(y_test, y_pred)

In [0]:
#PART ii
#set learning rate, number of epochs and number of hidden layers in model
learning = 0.1
epoch = 10
hidden_nodes = 124
hidden_nodes2 = 50
train =[]
test = []
#convert the dataframes to list 
train =df_train.values.tolist()
test = df_test.values.tolist()

#for the training and test set convert the label of each row to an integer from a float
for x in range(len(train)):
    train[x][-1] = int(train[x][-1]) 

for y in range(len(test)):
    test[y][-1] = int(test[y][-1])

#get the actual output label from the test set
y_test = []
for row in range(len(test)):
    y_test.append(test[row][-1])

#remove the label for test set so can relabel with prediction
for row in range(len(test)):
    #assign label to None
    test[row][-1] = None
#get the predicted labels
ypred1 = []
y_pred1 = train_NN2(train, test, learning, epoch, hidden_nodes, hidden_nodes2)

#get the accuracy of the model
accuracy_score(y_test, y_pred1)

# Part 4b:


For 2nd enhancement I compared the accuracy of the Neural Network using different activation functions to calculate weights, different from our original activation, which was tanh(x). 
The activation functions I used were:
- Sigmoid Function
- ArcTan

References :
- https://towardsdatascience.com/activation-functions-neural-networks-1cbd9f8d91d6

# Using Sigmoid Activation Function

In [0]:
#Network using Sigmoid Function
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import random
import math

    
#function to create initial Neural Network 
#takes in parameters for inputs, hidden nodes and outputs
def init_NN(nodes_in, nodes_h, nodes_out):
    
    #create list for all initalised nodes and their weights
    init = []
    #create list for hidden nodes
    layer_h = []
    #create list for output nodes
    layer_out = []
    
    #for all nodes in hidden append random weight to dict for each input node +1
    #there will be extra input weight for the bias
    for i in range(nodes_h):
        l = []
        for j in range(nodes_in+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_h.append(dict_weights)
        
    #for all nodes in output append random weight to dict for each hidden node +1
    #there will be extra hidden weight for the bias
    for i in range(nodes_out):
        l = []
        for j in range(nodes_h+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_out.append(dict_weights)
        
    #append both hidden and output lists to initalised Neural Network
    init.append(layer_h)
    init.append(layer_out)
    return init

def sigmoid(x):
    return 1/(1+math.exp(-x))

#sigmoid derivative activate weights function for gradiant descent 
def derivative_sigmoid(x):
    return x*(1-x)

#function to forward propagate through network
def forward_prop(init, inputs):
    for layer in init:
        #new list to update weights
        updated_weights = []
        
        for node in layer:
            #get the weights from the dictionary for eachnode
            weights = node['w']
            #inialised to last weight in dict
            initalise = weights[-1]
            #add new weights  = old weights  * input data
            for i in range(len(weights)-1):
                initalise += weights[i] * inputs[i]
            #use tanh activation on new weights as non-linear model
            node['o'] = sigmoid(initalise)
            #output this as inputs for next layer
            updated_weights.append(node['o'])
        inputs = updated_weights
    return inputs

#cost function to determine the error of each node for gradient descent 
def cost(init, output):
    for row in reversed(range(len(init))):
        layer = init[row]
        cost = []
        #for nodes except the output find the cost of that node
        if row != len(init)-1:
            for cell in range(len(layer)):
                #inialise cost to zero
                node_cost = 0.0
                #get the cost of each node in layer by finding error
                #multiply delta value of node with the weight of that node
                for node in init[row + 1]:
                    node_cost += (node['w'][cell] * node['d'])
                cost.append(node_cost)
        else:
            #for output node cost is predicted output - actual value
            for cell in range(len(layer)):
                node = layer[cell]
                cost.append(output[cell] - node['o'])
        for cell in range(len(layer)):
            #get the cost for each node node multiplying error by the derivative of activation
            #tanh on the output node
            #assign this to delta in node which will be used to update weights
            node = layer[cell]
            node['d'] = cost[cell] * derivative_sigmoid(node['o'])
            
#back propagte through the neural network updating the weights with SGD
def back_prop(init, data, learning, epoch, output):
    #repeat feed forward and backwards for each epoch 
    for e in range(epoch):
        for inputs in data:
            #for every row in training set call forward propagate 
            result = forward_prop(init, inputs)
            #give the label ouputs as parameters for find the cost of nodes
            outputs = [0 for out in range(output)]
            outputs[inputs[-1]] = 1
            cost(init, outputs)
            #update weights based on new cost
            for layer in range(len(init)):
                update_input = inputs[:-1]
                #for all layers add output result to list
                if layer !=0:
                    update_input = []
                    for node in init[layer-1]:
                        update_input.append(node['o'])
                #for every node in each layer change the weights based on formula SGD
                # ( learning rate * cost of error of that node * the output of node )
                for node in init[layer]:
                    for cell in range(len(update_input)):
                        node['w'][cell] += learning*node['d']*update_input[cell]
                        
#method to predict the y values of each row in test returning max from forward propagation of the model               
def predict(init, row):
    out = forward_prop(init, row)
    return out.index(max(out))

#method to train and test the Neural Network
def train_NN(train, test, learning, epoch, hidden):
    #set the amount of inputs to NN by the length of columns for parameters
    inputs = len(train[0]) - 1
    #set amount of outputs to all values outputted ie. 0/1
    outputs = len(set([row[-1] for row in train]))
    #call init_NN function to set amount of nodes in each layer and their weights
    init = init_NN(inputs, hidden,outputs)
    #call back propagate function to train model
    back_prop(init, train, learning, epoch, outputs)
    #create list of predictions for each row of test data
    y_pred = []
    for row in test:
        row_predict = predict(init, row)
        y_pred.append(row_predict)
    return(y_pred)

In [0]:
# This function taken from the CIFAR website
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data_dict = unpickle("data_batch_1") # Keys: {b'batch_label', b'labels', b'data', b'filenames' }

data1 = data_dict[b'data']

""" This function converts the raw array of 3072 floats describing each image into a structure where the colour of each pixel
in an image is represented by its r,g,b (in this order) value for every pixel in the image in question.
The input into the function is an array of 3072 floats.
This assumes that the raw data coming from the CIFAR data is in the format where all the r values come first, followed by
all the b values and then all the g values """
def convert_rgb(dataplusindex):
    rgb_picture = dataplusindex
    rgb_picture.shape = (3,32,32)
    rgb_picture = rgb_picture.transpose([1, 2, 0])
   
    return rgb_picture


"""This function converts the R,G,B pixels into greyscale pixels. There are a few methods for doing this.
If the data is ordered correctly as R,G,B it is appropriate to use Method 2 (link below) which multiplies each component colour by
a certain constant, and then sums the total.
If not, one can use a standard average of the R,G,B, Method 1 (link below)"""    
def convert_greyscale(rgb_pic):
    greyscale_array = []
    for element in rgb_pic:
        for rgb_row in element:
            x = round(np.dot(rgb_row,[0.299, 0.587, 0.114]), 2) #https://www.prasannakumarr.in/journal/color-to-grayscale-python-image-processing. Method 2, assuming the rgb values come in in the format r, g, b and not b,g,r for example
            #x = (rgb_row[0] + rgb_row[1] + rgb_row[2])/3 # https://www.prasannakumarr.in/journal/color-to-grayscale-python-image-processing. Method 1
            greyscale_array.append(x)
    greyscale_array = np.array(greyscale_array)
    return greyscale_array

# Converting the data into format [greyscale list, classification].
# Each data instance will be added to a larger list.

#Frog is class 6 in the CIFAR data
#Deer is class 4 in the CIFAR data

# Choosing Frog to be class 1 for our implementation
# Choosing Deer to be class 0 for our implementation

Frog_Deer_classified_list = []
for i in range(len(data_dict[b'labels'])):
    if data_dict[b'labels'][i] == 4 or data_dict[b'labels'][i] ==6: #Filtering for Frog and Deer data, CIFAR classes 4 and 6
        rgb_pic = convert_rgb(data_dict[b'data'][i])
        greyscale_arr = convert_greyscale(rgb_pic)
        if data_dict[b'labels'][i] == 4:
            greyscale_arr = np.append(greyscale_arr,0)
            Frog_Deer_classified_list.append(greyscale_arr)
           
        if data_dict[b'labels'][i] == 6:
            greyscale_arr = np.append(greyscale_arr,1)
            Frog_Deer_classified_list.append(greyscale_arr)


#Now converting list from numpy array to list for inputting into pandas dataframe
new_Frog_Deer_classified_list = []
for element in Frog_Deer_classified_list:
    x = element.tolist()
    new_Frog_Deer_classified_list.append(x)
   
# Creating Pandas Data Frame from the filtered data
# Each row of the data frame contains 1025 entries (1025 columns), these are 1024 greyscale pixels and a classification (1/Frog or 0/Deer)
df_train = pd.DataFrame(new_Frog_Deer_classified_list)
#print(df_train)



data_dict_test = unpickle("test_batch") # Keys: {b'batch_label', b'labels', b'data', b'filenames' }

data_test = data_dict_test[b'data']

# Converting the data into format [greyscale list, classification].
# Each data instance will be added to a larger list.

#Frog is class 6 in the CIFAR data
#Deer is class 4 in the CIFAR data

# Choosing Frog to be class 1 for our implementation
# Choosing Deer to be class 0 for our implementation

Frog_Deer_classified_list_test = []
for i in range(len(data_dict_test[b'labels'])):
    if data_dict_test[b'labels'][i] == 4 or data_dict_test[b'labels'][i] ==6: #Filtering for Frog and Deer data, CIFAR classes 4 and 6
        rgb_pic_test = convert_rgb(data_dict[b'data'][i])
        greyscale_arr_test = convert_greyscale(rgb_pic_test)
        if data_dict_test[b'labels'][i] == 4:
            greyscale_arr_test = np.append(greyscale_arr_test,0)
            Frog_Deer_classified_list_test.append(greyscale_arr_test)
           
        if data_dict_test[b'labels'][i] == 6:
            greyscale_arr_test = np.append(greyscale_arr_test,1)
            Frog_Deer_classified_list_test.append(greyscale_arr_test)


#Now converting list from numpy array to list for inputting into pandas dataframe
new_Frog_Deer_classified_list_test = []
for element in Frog_Deer_classified_list_test:
    x = element.tolist()
    new_Frog_Deer_classified_list_test.append(x)
   
# Creating Pandas Data Frame from the filtered data
# Each row of the data frame contains 1025 entries (1025 columns), these are 1024 greyscale pixels and a classification (1/Frog or 0/Deer)
df_test = pd.DataFrame(new_Frog_Deer_classified_list_test)
#df_test


#set learning rate, number of epochs and number of hidden layers in model
learning = 0.1
epoch = 10
hidden_nodes = 50

#convert the dataframes to list 
train_sigmoid =df_train.values.tolist()
test_sigmoid = df_test.values.tolist()

#for the training and test set convert the label of each row to an integer from a float
for x in range(len(train_sigmoid)):
    train_sigmoid[x][-1] = int(train_sigmoid[x][-1]) 

for y in range(len(test_sigmoid)):
    test_sigmoid[y][-1] = int(test_sigmoid[y][-1])

#get the actual output label from the test set
y_test_sigmoid = []
for row in range(len(test_sigmoid)):
    y_test_sigmoid.append(test_sigmoid[row][-1])

#remove the label for test set so can relabel with prediction
for row in range(len(test_sigmoid)):
    #assign label to None
    test_sigmoid[row][-1] = None
    
#get the predicted labels
#y_pred_sigmoid = train_NN(train_sigmoid, test_sigmoid, learning, epoch, hidden_nodes)


In [0]:
# This piece of code calculates the average accuracy over 10 tests with the Sigmoid function
accuracy_scores_sigmoid = []
for i in range(10):
    y_pred_sigmoid = train_NN(train_sigmoid, test_sigmoid, learning, epoch, hidden_nodes) 
    #get the accuracy of the model
    x = accuracy_score(y_test_sigmoid, y_pred_sigmoid)
    accuracy_scores_sigmoid.append(x)
            

average_score = (sum(accuracy_scores_sigmoid)/len(accuracy_scores_sigmoid))
print("Average network accuracy over 10 iterations with Sigmoid", average_score)

# Using ArcTan Activation Function

In [0]:
#Network using ArcTan Function
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import random
import math as m


#function to create initial Neural Network 
#takes in parameters for inputs, hidden nodes and outputs
def init_NN(nodes_in, nodes_h, nodes_out):
    
    #create list for all initalised nodes and their weights
    init = []
    #create list for hidden nodes
    layer_h = []
    #create list for output nodes
    layer_out = []
    
    #for all nodes in hidden append random weight to dict for each input node +1
    #there will be extra input weight for the bias
    for i in range(nodes_h):
        l = []
        for j in range(nodes_in+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_h.append(dict_weights)
        
    #for all nodes in output append random weight to dict for each hidden node +1
    #there will be extra hidden weight for the bias
    for i in range(nodes_out):
        l = []
        for j in range(nodes_h+1):
            l.append(random())
        dict_weights = {"w":l}
        layer_out.append(dict_weights)
        
    #append both hidden and output lists to initalised Neural Network
    init.append(layer_h)
    init.append(layer_out)
    return init

def ArcTan(x):
    return math.atan(x)

def derivative_ArcTan(x):
    return 1/(x**2 + 1)

#function to forward propagate through network
def forward_prop(init, inputs):
    for layer in init:
        #new list to update weights
        updated_weights = []
        
        for node in layer:
            #get the weights from the dictionary for eachnode
            weights = node['w']
            #inialised to last weight in dict
            initalise = weights[-1]
            #add new weights  = old weights  * input data
            for i in range(len(weights)-1):
                initalise += weights[i] * inputs[i]
            #use tanh activation on new weights as non-linear model
            node['o'] = ArcTan(initalise)
            #output this as inputs for next layer
            updated_weights.append(node['o'])
        inputs = updated_weights
    return inputs

#cost function to determine the error of each node for gradient descent 
def cost(init, output):
    for row in reversed(range(len(init))):
        layer = init[row]
        cost = []
        #for nodes except the output find the cost of that node
        if row != len(init)-1:
            for cell in range(len(layer)):
                #inialise cost to zero
                node_cost = 0.0
                #get the cost of each node in layer by finding error
                #multiply delta value of node with the weight of that node
                for node in init[row + 1]:
                    node_cost += (node['w'][cell] * node['d'])
                cost.append(node_cost)
        else:
            #for output node cost is predicted output - actual value
            for cell in range(len(layer)):
                node = layer[cell]
                cost.append(output[cell] - node['o'])
        for cell in range(len(layer)):
            #get the cost for each node node multiplying error by the derivative of activation
            #tanh on the output node
            #assign this to delta in node which will be used to update weights
            node = layer[cell]
            node['d'] = cost[cell] * derivative_ArcTan(node['o'])
            
#back propagte through the neural network updating the weights with SGD
def back_prop(init, data, learning, epoch, output):
    #repeat feed forward and backwards for each epoch 
    for e in range(epoch):
        for inputs in data:
            #for every row in training set call forward propagate 
            result = forward_prop(init, inputs)
            #give the label ouputs as parameters for find the cost of nodes
            outputs = [0 for out in range(output)]
            outputs[inputs[-1]] = 1
            cost(init, outputs)
            #update weights based on new cost
            for layer in range(len(init)):
                update_input = inputs[:-1]
                #for all layers add output result to list
                if layer !=0:
                    update_input = []
                    for node in init[layer-1]:
                        update_input.append(node['o'])
                #for every node in each layer change the weights based on formula SGD
                # ( learning rate * cost of error of that node * the output of node )
                for node in init[layer]:
                    for cell in range(len(update_input)):
                        node['w'][cell] += learning*node['d']*update_input[cell]
                        
#method to predict the y values of each row in test returning max from forward propagation of the model               
def predict(init, row):
    out = forward_prop(init, row)
    return out.index(max(out))

#method to train and test the Neural Network
def train_NN(train, test, learning, epoch, hidden):
    #set the amount of inputs to NN by the length of columns for parameters
    inputs = len(train[0]) - 1
    #set amount of outputs to all values outputted ie. 0/1
    outputs = len(set([row[-1] for row in train]))
    #call init_NN function to set amount of nodes in each layer and their weights
    init = init_NN(inputs, hidden,outputs)
    #call back propagate function to train model
    back_prop(init, train, learning, epoch, outputs)
    #create list of predictions for each row of test data
    y_pred = []
    for row in test:
        row_predict = predict(init, row)
        y_pred.append(row_predict)
    return(y_pred)

In [0]:
# This function taken from the CIFAR website
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data_dict = unpickle("data_batch_1") # Keys: {b'batch_label', b'labels', b'data', b'filenames' }

data1 = data_dict[b'data']

#Frog is class 6 in the CIFAR data
#Deer is class 4 in the CIFAR data

# Choosing Frog to be class 1 for our implementation
# Choosing Deer to be class 0 for our implementation

Frog_Deer_classified_list = []
for i in range(len(data_dict[b'labels'])):
    if data_dict[b'labels'][i] == 4 or data_dict[b'labels'][i] ==6: #Filtering for Frog and Deer data, CIFAR classes 4 and 6
        rgb_pic = convert_rgb(data_dict[b'data'][i])
        greyscale_arr = convert_greyscale(rgb_pic)
        if data_dict[b'labels'][i] == 4:
            greyscale_arr = np.append(greyscale_arr,0)
            Frog_Deer_classified_list.append(greyscale_arr)
           
        if data_dict[b'labels'][i] == 6:
            greyscale_arr = np.append(greyscale_arr,1)
            Frog_Deer_classified_list.append(greyscale_arr)


#Now converting list from numpy array to list for inputting into pandas dataframe
new_Frog_Deer_classified_list = []
for element in Frog_Deer_classified_list:
    x = element.tolist()
    new_Frog_Deer_classified_list.append(x)
   
# Creating Pandas Data Frame from the filtered data
# Each row of the data frame contains 1025 entries (1025 columns), these are 1024 greyscale pixels and a classification (1/Frog or 0/Deer)
df_train = pd.DataFrame(new_Frog_Deer_classified_list)
#print(df_train)



data_dict_test = unpickle("test_batch") # Keys: {b'batch_label', b'labels', b'data', b'filenames' }

data_test = data_dict_test[b'data']

# Converting the data into format [greyscale list, classification].
# Each data instance will be added to a larger list.

#Frog is class 6 in the CIFAR data
#Deer is class 4 in the CIFAR data

# Choosing Frog to be class 1 for our implementation
# Choosing Deer to be class 0 for our implementation

Frog_Deer_classified_list_test = []
for i in range(len(data_dict_test[b'labels'])):
    if data_dict_test[b'labels'][i] == 4 or data_dict_test[b'labels'][i] ==6: #Filtering for Frog and Deer data, CIFAR classes 4 and 6
        rgb_pic_test = convert_rgb(data_dict[b'data'][i])
        greyscale_arr_test = convert_greyscale(rgb_pic_test)
        if data_dict_test[b'labels'][i] == 4:
            greyscale_arr_test = np.append(greyscale_arr_test,0)
            Frog_Deer_classified_list_test.append(greyscale_arr_test)
           
        if data_dict_test[b'labels'][i] == 6:
            greyscale_arr_test = np.append(greyscale_arr_test,1)
            Frog_Deer_classified_list_test.append(greyscale_arr_test)


#Now converting list from numpy array to list for inputting into pandas dataframe
new_Frog_Deer_classified_list_test = []
for element in Frog_Deer_classified_list_test:
    x = element.tolist()
    new_Frog_Deer_classified_list_test.append(x)
   
# Creating Pandas Data Frame from the filtered data
# Each row of the data frame contains 1025 entries (1025 columns), these are 1024 greyscale pixels and a classification (1/Frog or 0/Deer)
df_test = pd.DataFrame(new_Frog_Deer_classified_list_test)
#df_test


#set learning rate, number of epochs and number of hidden layers in model
learning = 0.1
epoch = 10
hidden_nodes = 50

#convert the dataframes to list 
train_ArcTan =df_train.values.tolist()
test_ArcTan = df_test.values.tolist()

#for the training and test set convert the label of each row to an integer from a float
for x in range(len(train_ArcTan)):
    train_ArcTan[x][-1] = int(train_ArcTan[x][-1]) 

for y in range(len(test_ArcTan)):
    test_ArcTan[y][-1] = int(test_ArcTan[y][-1])

#get the actual output label from the test set
y_test_ArcTan = []
for row in range(len(test_ArcTan)):
    y_test_ArcTan.append(test_ArcTan[row][-1])

#remove the label for test set so can relabel with prediction
for row in range(len(test_ArcTan)):
    #assign label to None
    test_ArcTan[row][-1] = None
    
#get the predicted labels
#y_pred_sigmoid = train_NN(train_sigmoid, test_sigmoid, learning, epoch, hidden_nodes)


In [None]:
# This piece of code calculates the average accuracy over 10 tests with the ArcTan function
accuracy_scores_ArcTan = []
for i in range(10):
    y_pred_ArcTan = train_NN(train_ArcTan, test_ArcTan, learning, epoch, hidden_nodes) 
    #get the accuracy of the model
    x = accuracy_score(y_test_ArcTan, y_pred_ArcTan)
    accuracy_scores_ArcTan.append(x)
            

average_score = (sum(accuracy_scores_ArcTan)/len(accuracy_scores_ArcTan))
print("Average network accuracy over 10 iterations with ArcTan", average_score)