In [21]:
%matplotlib inline
from keras.datasets import mnist
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [58]:
class Neural_Network:
    def __init__(self, net):
        self.layers = len(net)
        self.net = net
        self.biases = [np.around(np.random.randn(1,k), decimals=10) for k in net[1:]]
        self.weights = [np.around(np.random.randn(j,k), decimals=10) for k,j in zip(net[:-1], net[1:])]
        self.c = []
        self.iterate = 0
        
    def get_wb(self):
        return self.weights, self.biases
    
    def SDG(self, data , epochs = 15, mini_batch_size = 500, lr = 0.03, testing_data=False):
        if testing_data:
            return self.evaluation(data)
        batches = [data[i:i+mini_batch_size] for i in range(0,len(data),mini_batch_size)]
        for x in range(epochs):
            np.random.shuffle(data)
            for batch in batches:
                self.mini_batch_update(batch,lr)
            print("epochs: {}/{}".format(x, epochs))
                
    def mini_batch_update(self, batch, lr):
        delta_w = [np.zeros(w.shape) for w in self.weights]
        delta_b = [np.zeros(b.shape) for b in self.biases]
        for i in range(len(batch)):
            input_node = batch[i][0]
            output_node = batch[i][1]
            d_w, d_b = self.gradient_descent(input_node, output_node)  
            delta_w = [np.around((w1+w2), decimals=10) for w1,w2 in zip(delta_w,d_w)]
            delta_b = [np.around((b1+b2), decimals=10) for b1,b2 in zip(delta_b, d_b)]
        self.weights = [np.around((w-((lr/len(batch))*d_w)), decimals=10) for w,d_w in zip(self.weights,delta_w)]
        self.biases = [np.around((b-((lr/len(batch))*d_b)), decimals=10) for b, d_b in zip(self.biases, delta_b)]    

    def gradient_descent(self, x, y):
        # Take in a single training example
        # back propagate
        d_w = [np.zeros(w.shape) for w in self.weights]
        d_b = [np.zeros(b.shape) for b in self.biases]
        z_nodes, a_nodes = self.feedforward(x)
        output_e = self.delta_cost(a_nodes[-1], y)*self.d_sigmoid(z_nodes[-1])
        output_e = np.around(output_e, decimals=10)
        d_w[-1] = np.around(np.dot(output_e.transpose(), a_nodes[-2]), decimals=10)
        d_b[-1] = output_e
        
        for l in range(2,self.layers):
            '''
            if l ==3:
                print("deez nut: ", output_e)
                print("dot: ",  np.dot(output_e, self.weights[-l+1]))
                print("d_sigmoid: ", self.d_sigmoid(z_nodes[-l]))
                print("e after: ", np.dot(output_e, self.weights[-l+1])*self.d_sigmoid(z_nodes[-l]))
            '''
            output_e = np.dot(output_e, self.weights[-l+1])*self.d_sigmoid(z_nodes[-l])
            output_e = np.around(output_e, decimals=10)
            d_w[-l] = np.around(np.dot(output_e.transpose(), a_nodes[-l-1]), decimals=10)
            d_b[-l] = output_e
            '''
            if np.isnan(d_w[-l]).any():
                print("layer: ", -l)
                print("z_nodes: ", z_nodes[-l])
                print("weight: ", self.weights[-l+1])
                print("output_e: ", output_e)
                print("a_nodes: ", a_nodes[-l-1])
                print("d_w: ",d_w[-l])
                return -1
            '''
        self.iterate += 1
        return d_w, d_b 
                  
    def feedforward(self, input_node):
        # Take in a training example
        # output the z and a for each layer
        node = np.expand_dims(input_node,0)
        z = []
        a = [node]
        for w,b in zip(self.weights, self.biases):
            node = np.around((np.dot(node, w.transpose())+b), decimals=10)
            z.append(node)
            node = np.around(self.sigmoid(node), decimals=10)
            a.append(node)
            #if np.isnan(node).any():
                #print("a: {}".format(node))
        return z, a
    
    def cost(self, output_node, label):
        co = 1/2*(output_node-label)**2
        if (self.iterate%100) == 0:
            print(co)
        self.c.append(co.mean())
    
    def get_cost(self):
        return self.c
    
    def delta_cost(self,output_node, label):
        error = (output_node-label)
        return error
    
    def sigmoid(self, z):
        a = np.around((1/(1+np.exp(-z))), decimals=10)
        return a
    
    def d_sigmoid(self, z):
        d_a = np.around((np.exp(-z)/((1+np.exp(-z))**2)), decimals=10)
        return d_a
    
    def plot_cost(self):
        print("len: {}", len(self.c))
        plt.plot(len(self.c), self.c, 'ro')
        plt.show()
        
    def evaluation(self, data):
        correct = 0
        wrong = 0
        for x,y in data:
            z,a = self.feedforward(x)
            y_prime = a[-1].squeeze()
            
            if np.allclose(y_prime, y):
                correct +=1
            else:
                wrong += 1
        accuracy = correct/len(data)
        print("correct prediction: {} Accuracy: {}".format(correct, accuracy))
            

In [9]:
(train_X, train_Y), (test_X, test_Y) = mnist.load_data()

In [49]:
train_x = np.zeros((train_X.shape[0], train_X.shape[1]**2))
train_y = np.zeros((train_Y.shape[0], 10))
test_x = np.zeros((test_X.shape[0], test_X.shape[1]**2)) 
test_y = np.zeros((test_Y.shape[0], 10))


for i in range(len(train_x)):
    x = np.array([train_X[i].ravel()])
    x = np.ma.log(x)
    x = x.filled(0)
    train_x[i] = x

for i in range(len(train_Y)):
    temp = np.zeros(10)
    temp[train_Y[i]] = 1
    train_y[i] = temp
training_data = [[x,y] for x,y in zip(train_x,train_y)]


for i in range(len(test_x)):
    test_x[i] = np.array([test_X[i].ravel()])

for i in range(len(test_Y)):
    temp = np.zeros(10)
    temp[test_Y[i]] = 1
    test_y[i] = temp
test_data = [[x,y] for x,y in zip(test_x,test_y)]

In [51]:
network = [784,25,25,10]
epochs = 15
mini_batch_size = 500
learing_rate = 0.01

In [59]:
handwritten_digit_network = Neural_Network(network)

handwritten_digit_network.SDG(training_data, epochs, mini_batch_size, learing_rate)

epochs: 0/15
epochs: 1/15
epochs: 2/15
epochs: 3/15
epochs: 4/15
epochs: 5/15
epochs: 6/15
epochs: 7/15
epochs: 8/15
epochs: 9/15
epochs: 10/15
epochs: 11/15
epochs: 12/15
epochs: 13/15
epochs: 14/15


In [60]:
w, b = handwritten_digit_network.get_wb()

In [62]:
for i in range(len(test_x)):
    x = np.array([test_X[i].ravel()])
    x = np.ma.log(x)
    x = x.filled(0)
    test_x[i] = x

for i in range(len(test_Y)):
    temp = np.zeros(10)
    temp[test_Y[i]] = 1
    test_y[i] = temp
test_data = [[x,y] for x,y in zip(test_x,test_y)]

In [74]:
len(test_data[:100])

100

In [76]:
correct = 0
for i in range(len(test_data[:1000])):
    x = test_data[i][0]
    y = test_data[i][1]
    a,z = handwritten_digit_network.feedforward(x)
    x_out = a[-1]
    if np.argmax(x_out) == np.argmax(y):
        correct +=1
print("accuracy: ", correct, len(test_data[:1000]))

accuracy:  87 1000


In [8]:
for data in training_data:
    z,a = nn.feedforward(data[0])
    output_e = nn.delta_cost(a[-1], data[1])*nn.d_sigmoid(z[-1])
    d_w = np.dot(output_e.transpose(), a[-2])
    d_b = output_e
    
    for l in range(2,4):
        if np.isnan(d_w).any():
            print(d_w)
        output_e = np.dot(output_e, w[-l+1])*nn.d_sigmoid(z[-l])
        d_w = np.dot(output_e.transpose(), a[-l-1])
        d_b = output_e

NameError: name 'training_data' is not defined

In [38]:
e = np.array([[ 0.050496   ,-0.03165766 , 0.0716049 ,  0.09488568 , 0.02535765,  0.00241863,
  -0.0178089,  -0.00044482,  0.00025129,  0.00255088, -0.05742943, -0.00036819
   0.05017959, -0.02962547, -0.00251379,  0.00545067 -0.00060663  0.0028027
   0.01085913 -0.00759827 -0.03224925  0.0344678  -0.01500107  0.0034105
   0.00917969]])

SyntaxError: invalid syntax (702631590.py, line 1)