In [548]:
import numpy as np
import matplotlib.pyplot as plt
from mnist import MNIST
import _pickle as cPickle
import gzip
import random
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

def relu(x):
    return np.maximum(x, 0)

def relu_diff(x):
    return (x > 0)

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_diff(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_diff(x):
    return 1.0 - np.tanh(x)**2

def one_hot_encoding(ys, outputs=10):
    lbl = np.zeros((len(ys),outputs))
    for l in range(len(ys)):
        lbl[l][ys[l]] = 1
    return lbl

def show_image(norm_pixels, label=None):
    if label is not None:
        plt.title('Label is {label}'.format(label=np.argmax(label)))  

    img = np.array(norm_pixels*255, dtype='uint8').reshape((28,28))
    plt.imshow(img, cmap='gray')
    plt.show()

class BatchGenerator():
    def __init__(self, batch_size):
        mnist = gzip.open('./data/mnist.pkl.gz', 'rb')
        self.training_data, self.validation_data, self. test_data = \
            cPickle.load(mnist,encoding='iso-8859-1')
            
        self.training_data = list(zip(self.training_data[0],one_hot_encoding(self.training_data[1])))
        self.validation_data = (self.validation_data[0],one_hot_encoding(self.validation_data[1]))
        self.test_data = (self.test_data[0],one_hot_encoding(self.test_data[1]))
        
        self.batch_size = batch_size
        self.batch_index = 0
        self.reset()

    def _shuffle(self):
        self.batch_index = 0
        random.shuffle(self.training_data)
 
    def batch(self): 
        
        x_batch, y_batch = zip(*self.training_data[self.batch_index:self.batch_index+self.batch_size])
        
        self.batch_index += self.batch_size
        
        if self.batch_index >= len(self.training_data):
            self.reset()

        return np.array(x_batch), np.array(y_batch)
    
    def get_epochs(self):
        self.training_data[0]/self.batch_size
        
    def reset(self):
        self._shuffle()
        self.batch_index = 0
    
class NeuralNetwork():
    def __init__(self, layers):
        # layers
        self.layers = layers
        
        # number of layers
        self.n_layers = len(layers)

        # init weights
        self.weights = []
        for i in range(self.n_layers - 1):
            self.weights.append(0.005*np.random.rand(layers[i], layers[i + 1]))
            
        # init biases
        self.biases = [0.005*np.random.rand(b) for b in layers[1:]]
        

    def _forward_pass(self, input, verbose=False):
        inputs = []
        activations = [input]
        for i in range(self.n_layers - 1):
            inputs.append(activations[i].dot(self.weights[i]) + self.biases[i])
            activations.append(relu(inputs[-1]))
        return {'inputs': inputs, 'activations': activations}

    def _backward_pass(self, forward, y): 
        
        delta_w = [np.zeros(w.shape) for w in self.weights]
        delta_b = [np.zeros(b.shape) for b in self.biases]
        
        # error output layer
        error = (forward['activations'][-1] - y) * relu_diff(forward['inputs'][-1])
        delta_w[-1] += forward['activations'][-2].transpose().dot(error)
        delta_b[-1] += np.sum(error, axis=0)
        
        # error hidden layers
        for l in range(self.n_layers - 3, -1, -1):
            error = error.dot(self.weights[l + 1].transpose())*relu_diff(forward['inputs'][l])
            delta_w[l] += forward['activations'][l].transpose().dot(error)
            delta_b[l] += np.sum(error, axis=0)
            
        return delta_w, delta_b
    
    def train(self, batch_generator, learning_rate = 0.02, epochs=60000):  
        
        for e in range(epochs):
            # get batch
            x_batch, y_batch = batch_generator.batch()
 
            # get deltas
            delta_w, delta_b = self._backward_pass(self._forward_pass(x_batch), y_batch)
            
            #update weights and biases
            for l in range(self.n_layers - 1):
                self.weights[l] -= learning_rate * delta_w[l] 
                self.biases[l] -= learning_rate * delta_b[l]  

        
    def inference(self,input):
        return np.argmax(self._forward_pass(input)['activations'][-1])
    


In [549]:
batch_generator = BatchGenerator(1)
nn = NeuralNetwork([784,60,10])
nn.train(batch_generator)

x_test, y_test = batch_generator.test_data

predictions = []
true_labels = []
for i in range(x_test.shape[0]):
    o = nn.inference(np.atleast_2d(x_test[i]))
    predictions.append(o)
    true_labels.append(np.argmax(y_test[i]))
    
    
print(confusion_matrix(true_labels,predictions))
print(classification_report(true_labels,predictions))

[[ 970    0    1    1    0    0    4    2    2    0]
 [   0 1123    4    4    0    1    1    0    2    0]
 [   9    1  988    6    9    0    2   10    7    0]
 [   1    0   14  971    0    4    0   10    2    8]
 [   0    0    4    0  964    0    3    1    0   10]
 [   8    2    0   24    2  834    9    6    2    5]
 [   6    2    3    0    8    5  933    0    1    0]
 [   2   12   19    1    5    1    1  976    2    9]
 [  11    4    6   16   14    4   11   11  890    7]
 [   4    6    0   12   50    3    1    9    0  924]]
             precision    recall  f1-score   support

          0       0.96      0.99      0.97       980
          1       0.98      0.99      0.98      1135
          2       0.95      0.96      0.95      1032
          3       0.94      0.96      0.95      1010
          4       0.92      0.98      0.95       982
          5       0.98      0.93      0.96       892
          6       0.97      0.97      0.97       958
          7       0.95      0.95      0.95  