In [None]:
import numpy

In [None]:
class NeuralNet:
    def __init__(self, nn_architecure):
        self.nn_architecture = nn_architecture
        self.nb_layers = len(nn_architecture)
        
        #Initialises the network with random weights and zero biases 
        #creates a parameters dictionary with W1, W2... and b1, b2... the numpy arrays/matrices
        self.parameters = {}
        for layer in range(1, self.nb_layers):
            self.parameters['W'+str(layer)] = numpy.random.randn(self.nn_architecture[layer]["layer_size"], self.nn_architecture[layer - 1]["layer_size"])
            
            self.parameters['b'+str(layer)] = numpy.zeros((self.nn_architecture[layer]["layer_size"],1))
        
        pass

        
    def forward_pass(self, inputs):
        
        forward_memory = {'A0':inputs}
        current_A = inputs
        for layer in range(1, self.nb_layers):
            W, b = self.parameters['W'+str(layer)], self.parameters['b'+str(layer)]
            activation = self.nn_architecture[layer]["activation"]
            
            Z, new_A = linear_activation(W, b, current_A, activation)
            
            #keep every activation in a memory dictionary
            forward_memory['Z'+str(layer)] = Z
            forward_memory['A'+str(layer)] = new_A
            
            current_A = new_A
            
        return new_A, forward_memory
            
            

    
    def backward_pass(self, output, target, forward_memory):
        #First find the first error with MSE derivative
        grads = {}
        target = numpy.reshape(target,(10,1))
        prev_dA = output - target
        
        #computes the derivatives for each layer
        for layer in reversed(range(1,self.nb_layers)):
            #fetch relevant parameters
            W, b = self.parameters['W'+str(layer)], self.parameters['b'+str(layer)]
            activation = self.nn_architecture[layer]["activation"]
            
            A, Z = forward_memory['A'+str(layer-1)],forward_memory['Z'+str(layer)]
            
            #Find derivatives
            dA, dW, db = linear_activation_backwards(A, Z, W, activation, prev_dA)
            
            #grads is a dictionary storing the gradients
            grads['dW'+str(layer)] = dW
            grads['db'+str(layer)] = db
            
            prev_dA = dA
            
        return grads
            
        
    def update_params(self, grads, learning_rate):  #gradient descent's incremental downwards step, we update the parameters dictionary
        for layer in range(1,self.nb_layers):
            self.parameters['W'+str(layer)] = self.parameters['W'+str(layer)] - learning_rate*grads['dW'+str(layer)]
            self.parameters['b'+str(layer)] = self.parameters['b'+str(layer)] - learning_rate*grads['db'+str(layer)]
        return
    
    

In [None]:
#Useful functions outside of class action

#linear_activation, takes the necessary and gives the next layer's activation

def linear_activation(W, b, current_A, activation):
    Z = linear_forward(W, b, current_A)
    
    if activation == "sigmoid":
        new_A = sigmoid(Z)
        
    elif activation == "relu":
        new_A = relu(Z)
    
    return Z, new_A

#linear_forward is just a straightforwards Z = WA + b
def linear_forward(W, b, current_A):
    Z = numpy.dot(W,current_A) + b
    return Z

def linear_activation_backwards(A, Z, W, activation, prev_dA):#compute gradients from the params
    if activation == "sigmoid":
        dZ = back_sigmoid(Z)*prev_dA
    elif activation == "relu":
        dZ = back_relu(Z)*prev_dA
    
    #from dZ we can calculate all other derivatives
    dA, dW = linear_backwards(A, W, dZ)
    db = dZ
    
    return dA, dW, db
    

def linear_backwards(A, W, dZ):
    dW = numpy.dot(dZ,A.T)
    dA = numpy.dot(W.T, dZ)
    return dA, dW

In [None]:
#Cost function MSE
def MSE(yhat, y):
    return float((1/2)*numpy.dot((yhat - y).T,(yhat - y)))

def MSE_prime(yhat, y):
    return (yhat - y)

In [None]:
#This dictionary defines the whole network architecture, layer size and activation functions are the two parameters
nn_architecture = [
    {"layer_size":784, "activation": "none"},
    {"layer_size": 300, "activation":"sigmoid"},
    {"layer_size": 100, "activation":"relu"},
    {"layer_size": 10, "activation":"sigmoid"}
]

In [None]:
#activation functions

def sigmoid(x):
    return 1/(1+numpy.exp(-x))

def back_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

#derivatives
def relu(x):
    return numpy.maximum(0,x)
    
def back_relu(x):
    x[x>0]= 1
    x[x<=0] = 0
    
    return x


In [None]:
#Create minibatches

def create_minibatches(minibatch_size, all_inputs, all_targets):
    minibatches = []
    data = list(zip(all_inputs,all_targets))
    numpy.random.shuffle(data)
    
    nb_minibatch = len(all_inputs)//minibatch_size
    
    for batch in range(nb_minibatch+1):
        minibatch = data[batch*minibatch_size: (batch+1)*minibatch_size]
        X_mini = [minibatch[i][0] for i in range(len(minibatch))]
        Y_mini = [minibatch[i][1] for i in range(len(minibatch))]
        minibatches.append((X_mini,Y_mini))
 
    
    return minibatches
        
    

In [None]:
training_data_file = open(r"C:\Users\oscar\Documents\CS\ML\Make your own NN\mnist_train.csv")
training_data_list = training_data_file.readlines()
test_data_file = open(r"C:\Users\oscar\Documents\CS\ML\Make your own NN\mnist_test.csv")
test_data_list = test_data_file.readlines()

In [None]:
all_inputs = []
all_targets = []
all_test_inputs = []
all_test_targets = []
for record in range(len(training_data_list)):

    # split the record by the ',' commas
    all_values = training_data_list[record].split(',')
    # scale and shift the inputs
    inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
    inputs = numpy.reshape(inputs,(784,1))
    # create the target output values (all 0.01, except the desired label which is 0.99)
    targets = numpy.zeros(10) + 0.01
    # all_values[0] is the target label for this record
    targets[int(all_values[0])] = 0.99
    
    all_inputs.append(inputs)
    all_targets.append(targets)
 #Same thing for test data
for record in range(len(test_data_list)):
    all_test_values = test_data_list[record].split(',')
    inputs = (numpy.asfarray(all_test_values[1:]) / 255.0 * 0.99) + 0.01
    inputs = numpy.reshape(inputs,(784,1))
    targets = numpy.zeros(10) + 0.01
    targets[int(all_values[0])] = 0.99
    
    all_test_inputs.append(inputs)
    all_test_targets.append(targets)

In [None]:
#Gradient Descent function with minibatches

def train(net,inputs,targets,learning_rate, nb_iterations,batch_size):
    for iteration in range(nb_iterations):
        minibatches = create_minibatches(batch_size,all_inputs,all_targets)
        #cost = 0
        for minibatch in minibatches:
            batch_grads = {}
            for example in range(len(minibatch[0])):
                output, memo = net.forward_pass(minibatch[0][example])
                #t = numpy.reshape(minibatch[1][example],(10,1))
                #cost += float(MSE(output, t))
                grads = net.backward_pass(output, minibatch[1][example], memo)
                
                for layer in range(1,net.nb_layers):
                    if (example == 0):  #initialising dictionary on the first example
                        batch_grads['dW'+str(layer)] = grads['dW'+str(layer)]
                        batch_grads['db'+str(layer)] = grads['db'+str(layer)]
                    else:
                        batch_grads['dW'+str(layer)] = grads['dW'+str(layer)] + batch_grads['dW'+str(layer)]
                        batch_grads['db'+str(layer)] = grads['db'+str(layer)] + batch_grads['db'+str(layer)]
        
            
            net.update_params(batch_grads,learning_rate/batch_size)
        
    return

In [None]:
net = NeuralNet(nn_architecture)


In [None]:
train(net, all_inputs, all_targets, learning_rate = 0.01, nb_iterations = 2, batch_size = 64)

In [None]:
#Calculate network accuracy on training data
c = 0
for example in range(len(all_inputs)):
    output, memo = net.forward_pass(all_inputs[example])
    guess, truth = numpy.argmax(output), numpy.argmax(all_targets[example])
    if guess == truth:
        c +=1
print('accuracy', c/len(all_inputs))

In [None]:
#Calculate network accuracy
c = 0
for example in range(len(all_test_inputs)):
    output, memo = net.forward_pass(all_test_inputs[example])
    guess, truth = numpy.argmax(output), numpy.argmax(all_test_targets[example])
    if guess == truth:
        c +=1
print('accuracy', c/(len(all_test_inputs)))

In [None]:
output, memo = net.forward_pass(all_inputs[0])
        
            
grads =net.backward_pass(output, all_targets[0], memo)