In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import random

In [2]:
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

class NeuralNetwork:
    
    def __init__(self, layers):
        self.h_biases = np.random.randn(layers[1],1)
        self.o_biases = np.random.randn(layers[2],1)
        
        self.h_weights = np.random.randn(layers[1],layers[0])
        self.o_weights = np.random.randn(layers[2],layers[1])
    
    def forward_propagation(self, x):
        a = sigmoid(np.dot(self.h_weights, x) + self.h_biases)
        
        output = sigmoid(np.dot(self.o_weights, a) + self.o_biases)
        
        return output
    
    def update_mini_batch(self, batch, l_rate):
        o_b = np.zeros(self.o_biases.shape)
        h_b = np.zeros(self.h_biases.shape)
        
        o_w = np.zeros(self.o_weights.shape)
        h_w = np.zeros(self.h_weights.shape)
        
        for x, y in batch:
            o_del_b, h_del_b, o_del_w, h_del_w = self.backprop(x,y)
            
            o_b = o_b + o_del_b
            h_b = h_b + h_del_b
            o_w = o_w + o_del_w
            h_w = h_w + h_del_w
            
        self.o_weights = self.o_weights - (l_rate/len(batch))*o_w
        self.h_weights = self.h_weights - (l_rate/len(batch))*h_w
        self.o_biases = self.o_biases - (l_rate/len(batch))*o_b
        self.h_biases = self.h_biases - (l_rate/len(batch))*h_b
    
    def backprop(self, x, y):
        z_h = np.dot(self.h_weights, x) + self.h_biases
        a_h = sigmoid(z_h)
        
        z_o = np.dot(self.o_weights, a_h) + self.o_biases
        predicted = sigmoid(z_o)
        
        delta = (predicted - y) * sigmoid_prime(z_o)
        
        o_del_b = delta
        o_del_w = np.dot(delta, a_h.transpose())
        
        delta = np.dot(self.o_weights.transpose(), delta) * sigmoid_prime(z_h)
        
        h_del_b = delta
        h_del_w = np.dot(delta, x.transpose())
        
        return (o_del_b, h_del_b, o_del_w, h_del_w)
        
    def fit(self, train_data, epochs, mini_batch_size, learning_rate):
        n = len(train_data)
        for i in range(epochs):
            random.shuffle(train_data)
            batches = [train_data[j:j+mini_batch_size] for j in range(0,n, mini_batch_size)]
            for batch in batches:
                self.update_mini_batch(batch, learning_rate)
            print("epoch {} completed".format(i))
    
    def accuracy(self, test_data):
        test_results = [(np.argmax(self.forward_propagation(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
            

In [3]:
X, y = fetch_openml('mnist_784', return_X_y=True)
y = y.astype(int)
X = (X/255).astype('float32')
X = [np.reshape(x, (784, 1)) for x in X]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=7)

In [5]:
y_train = [vectorized_result(i) for i in y_train]

In [8]:
network = NeuralNetwork([784, 100, 10])
train_data = list(zip(X_train, y_train))
network.fit(train_data, 30, 10, 3.0)

epoch 0 completed
epoch 1 completed
epoch 2 completed
epoch 3 completed
epoch 4 completed
epoch 5 completed
epoch 6 completed
epoch 7 completed
epoch 8 completed
epoch 9 completed
epoch 10 completed
epoch 11 completed
epoch 12 completed
epoch 13 completed
epoch 14 completed
epoch 15 completed
epoch 16 completed
epoch 17 completed
epoch 18 completed
epoch 19 completed
epoch 20 completed
epoch 21 completed
epoch 22 completed
epoch 23 completed
epoch 24 completed
epoch 25 completed
epoch 26 completed
epoch 27 completed
epoch 28 completed
epoch 29 completed


In [9]:
test_data = list(zip(X_test, y_test))
print('Accuracy: {} %'.format((network.accuracy(test_data)/len(test_data))*100))

Accuracy: 96.59285714285714 %
