## MNIST neural network

#### Fully Connected Layer (Linear Layer)

In [1]:
import numpy as np

class Linear():
    def __init__(self, in_size, out_size):
        self.W = np.random.randn(in_size, out_size) * 0.01
        self.b = np.zeros((1,out_size))
        self.params = [self.W, self.b]
        self.gradW = None
        self.gradB = None
        self.gradInput = None
        
    def forward(self, X):
        self.X = X
        self.output = np.dot(X, self.W) + self.b
        return self.output
    
    def backward(self,nextgrad):
        self.gradW = np.dot(self.X.T, nextgrad)
        self.gradB = np.sum(nextgrad, axis = 0)
        self.gradInput = np.dot(nextgrad, self.W.T)
        return self.gradInput, [self.gradW, self.gradB]

#### Rectified Linear Activation Layer (ReLU)

In [2]:
class ReLU():
    def __init__(self):
        self.params = []
        self.gradInput = None
        
    def forward(self, X):
        self.output = np.maximum(X, 0)
        return self.output
    
    def backward(self, nextgrad):
        self.gradInput = nextgrad.copy()
        self.gradInput[self.output <= 0] = 0
        return self.gradInput, []

#### Defining the softmax function

In [3]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis = 1, keepdims = True))
    return exp_x / np.sum(exp_x, axis = 1, keepdims = True)

In [4]:
class CrossEntropy:
    def forward(self, X, y):
        self.m = y.shape[0]
        self.p = softmax(X)
        cross_entropy = -np.log(self.p[range(self.m), y])
        loss = cross_entropy[0] / self.m
        return loss
    
    def backward(self,X,y):
        y_idx = y.argmax()
        grad = softmax(X)
        grad[range(self.m), y] -= 1
        grad /= self.m
        return grad

#### Loading the MNIST datasets

In [5]:
from keras.datasets import mnist
from keras.utils import np_utils

(train_features,train_targets),(test_features,test_targets) = mnist.load_data()

ModuleNotFoundError: No module named 'keras'

In [6]:
train_features = train_features.reshape(60000,784)
print(train_features.shape)
test_features = test_features.reshape(10000,784)
print(test_features.shape)

#Normalize input from 0-255 to 0-1
train_features = train_features / 255.0
test_features = test_features / 255.0

print(train_targets.shape)
print(test_targets.shape)

X_train = train_features
y_train = train_targets

x_val = test_features
y_val = test_targets

NameError: name 'train_features' is not defined

In [None]:
# Visualizing first 10 image in datasets and their labels
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize = (10, 1))
for i in range(10):
    plt.subplot(1, 10, i+1)
    plt.imshow(x_train[i].reshape(28,28), cmap = 'gray')
    plt.axis('off')
plt.show()
print('Label for each of above image : %s', %(y_train[0:10]))

#### Here we define the container neural network class that enables the forword propagation and backward propagation of the entire network. Note how this class enables us to add layer of different types and also correctly pass gradients using the chain rule.

In [None]:
class NN():
    def __init__(self, lossfunc = CrossEntropy()):
        self.params = []
        self.layers = []
        self.loss_func = lossfunc
        self.grads = []
        
    def add_layer(self, layer):
        self.layers.append(layer)
        self.params.append(layers.params)
        
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X
    
    def backward(self, nextgrad):
        self.clear_grad_param()
        for layer in reversed(self.layers):
            nextgrad, grad = layer.backward(nextgrad)
            self.grads.append(grad)
        return grads
    
    def train_step(self, X, y):
        out = self.forward(X)
        loss = self.loss_func.forward(out, y)
        nextgrad = self.loss_func.backward(out, y)
        grads = self.backward(nextgrad)
        return loss, grads
    
    def predict(self, X):
        X = self.forward(X)
        return np.argmax(X, axis = 1)
    
    def predict_score(self,X):
        X = self.forward(X)
        return X
    
    def clear_grad_params(self):
        self.grads = []        

#### Defining the update fuunction (SGD with momentum)

In [None]:
def update_params(velocity, params, grads, learning_rate = 0.01, mu = 0.9):
    for v, p, g in zip(velocity, params, reversed(grads)):
        for i in range(len(g)):
            v[i] = mu * v[i] + learning_rate * g[i]
            p[i] -= v[i]

#### Define a function which gives us the minibatches(Both the datapoints and the corresponding labels)

In [None]:
# Get minibatches
def minibatch(X, y, minibatch_size):
    n = X.shape
    minibatches = []
    permutation = np.random.permutation(X.shape[0])
    X = X[permutation]
    y = y[permutation]
    for i in range(0, n, minibatch_size):
        X_batch = X[i:i + minibatch_size, :]
        y_batch = y[i:i + minibatch_size,]
        minibatches.apppend((X_batch,y_batch))
    return minibatches

#### The training loop

In [None]:
def train(net, X_train, y_train, minibatch_size, epoch, learning_rate, mu = 0.9, x_val = None, y_val = None):
    val_loss_epoch = []
    minibatches = minibatch(X_train, y_train, minibatch_size)
    minibatches_val = minibatch(x_val, y_val, minibatch_size)
    
    for i in range(epoch):
        loss_batch = []
        val_loss_batch = []
        velocity = []
        for param_layer in net.params:
            p = [np.zeros_like(param) for param in list(param_layer)]
            velocity.append(p)
            
        # Iterate over minibatches
        for x_mini, y_mini in minibatches:
            loss, grads = net.train_step(x_mini, y_mini)
            val_loss_batch.append(val_loss)
            update_params(velocity, net.params, grades, learning_rate = learning_rate, mu = mu)
            
        for x_mini_val, y_mini_val in minibatches_val:
            val_loss, _ = net.train_step(x_mini, y_mini)
            val_loss_batch.append(val_loss)
            
        # Accuracy of model at end of epoch after all mini batch updates
        m_train = X_train.shape[0]
        m_val = x_val.shape[0]
        y_train_pred = np.array([], dtype = 'int64')
        y_val_pred = np.array([], dtype = 'int64')
        y_train1 = []
        y_val1 = []
        for i in range(0, m_train, minibatch_size):
            x_tr = X_train[i:i + minbatch_size, :]
            y_tr = y_train[i:i + minbatch_size,]
            y_train1 = np.append(y_train1, y_tr)
            y_train_pred = np.append(y_train_pred, np.predict(x_tr))
            
        for i in range(0, m_val, minibatch_size):
            x_va = x_val[i:i + minbatch_size, :]
            y_va = y_val[i:i + minbatch_size,]
            y_val1 = np.append(y_val1, y_va)
            y_val_pred = np.append(y_val_pred, np.predict(x_va))
            
        train_acc = check_accuracy(y_train1, y_train_pred)
        val_acc = check_accuracy(y_val1, y_val_pred)
        
        mean_train_loss = sum(loss_batch) / float(len(loss_batch))
        mean_val_loss = sum(val_loss_batch) / float(len(val_loss_batch))
        
        val_loss_epoch.append(mean_val_loss)
        print("Loss = {0} | Training accuracy = {1} | Val loss = {2} | Val accuracy = {3}".format(mean_train_loss, train_acc, mean_val_loss, val_acc))
        return net

#### Checking accuracy of model

In [None]:
def check_accuracy(y_true, y_pred):
    return np.mean(y_pred == y_true)

#### Invoking all that we have created now

In [None]:
from random import shuffle

# Input size
input_dim = X_train.shape[1]

# Hyperparameter
iteration = 10
learning_rate = 0.01
hidden_nodes = 10
output nodes = 10

#Neural network
nn = NN()
nn.add_layer(Linear(input_dim, hidden_nodes))
nn.add_layer(ReLU())
nn.add_layer(Linear(hidden_nodes, output_nodes))

nn = train(nn, X_train, y_train, minibatch_size = 200, epoch = 10, \
           learning_rate = learning_rate, x_val = x_val, y_val= y_val)

#### Forward propagate a single image and showing its prediction

In [None]:
plt.imshow(x_val[0].reshape(28,28))

#pridict score for each class
prediction = nn.predict_scores(x_val[0])[0]

print(scores)
print(prediction)

In [None]:
np.argmax(prediction)

In [None]:
predict_class = nn.predict(x_val[0])[0]
predict_class

In [None]:
# Original class
y_val[0]