## MNIST hand-written digit classification

![](pics/mnist.png)

Digit range from 0~9, training set consist of 60000 images, and test set consist of 10000 images.

## Task 1: One layer SoftMax Classifier

### The Forward Pass of SoftMax Classifier

![](pics/formula1.png)

In [1]:
import re
import os
import glob
import random
import numpy as np
from PIL import Image
from tqdm import tqdm_notebook

In [2]:
def load_data(train_path='/train', test_path='test/'):
    train_list = glob.glob(os.path.join(train_path, '*.png'))
    pattern = re.compile(r'num(\d).png')
    train_id = np.array([float(pattern.search(img_name).groups()[0]) for img_name in train_list])
    train_data = np.concatenate([np.array(Image.open(img_name)).reshape((1, 784))for img_name in tqdm_notebook(train_list, leave=False)],
                                axis=0).astype(np.float)

    test_list = glob.glob(os.path.join(test_path, '*.png'))
    test_id = np.array([float(pattern.search(img_name).groups()[0]) for img_name in test_list])
    test_data = np.concatenate([np.array(Image.open(img_name)).reshape((1, 784)) for img_name in tqdm_notebook(test_list, leave=False)],
                               axis=0).astype(np.float)

    return train_data, train_id, test_data, test_id

In [3]:
train_path = r'C:\Users\GS65\Desktop\assignment2\train' 
test_path = r'C:\Users\GS65\Desktop\assignment2\test'

In [4]:
train_data, train_id, test_data, test_id = load_data(train_path,test_path)

HBox(children=(IntProgress(value=0, max=60000), HTML(value='')))



HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))



In [5]:
def softmax(scores):
    exp_scores = np.nan_to_num(np.exp(scores))
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims = True)
    probs = np.nan_to_num(probs)
    return probs

In [6]:
def loss_compute(probs, labels, reg=0, W=0):
    L = probs[range(probs.shape[0]),labels.astype('int64')]
    L = np.nan_to_num(-np.log(L))
    data_loss = np.sum(L)/L.shape[0]
    if reg:
        reg_loss = 0.5 * reg * np.sum(W * W)
        return (data_loss + reg_loss)
    else:
        return data_loss

In [7]:
def One_layer_classifier(x_train, y_train, epochs, mini_batch_size, step, reg=0):
    np.random.seed(42)
    w = 0.01 * np.random.randn(x_train.shape[1],10)
    b = np.zeros((1,10))
    training = [(x,y) for x,y in zip(x_train,y_train)]
    n = len(x_train)
    for i in range(epochs):
        random.shuffle(training)
        mini_batches = [training[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
    
        for mini_batch in mini_batches:
            # forward
            x_batch = np.concatenate([np.array(t[0]).reshape((1, 784)) for t in mini_batch])
            y_batch = np.array([t[1] for t in mini_batch])
            scores = np.dot(x_batch, w) + b
            probs = softmax(scores)
            # backward
            dscores = probs
            dscores[range(x_batch.shape[0]),y_batch.astype('int64')] -= 1
            dscores = dscores/x_batch.shape[0]
            dw = np.dot(x_batch.T, dscores)
            dw += reg * w
            db = np.sum(dscores, axis=0, keepdims=True)
            w += -step * dw/len(mini_batch)
            b += -step * db/len(mini_batch)


        # compute loss      
        if i % 100 ==0:
            t_scores = np.dot(x_train, w) + b
            t_probs = softmax(t_scores)
            loss = loss_compute(t_probs,y_train,reg,w)
            print ("iteration %d: loss %f" % (i, loss))       
    return w,b

In [8]:
def accuracy(w, b, x_test, y_test):
    scores = np.dot(x_test,w) + b
    predict = np.argmax(scores, axis=1)
    print('training accuracy: %.2f' % (np.mean(predict == y_test))) 

In [9]:
w, b = One_layer_classifier(train_data, train_id, 1000, 100, 0.005, reg=0)
accuracy(w, b, test_data, test_id)                            

iteration 0: loss 1.291193
iteration 100: loss 0.481483
iteration 200: loss 0.458341
iteration 300: loss 0.361681
iteration 400: loss 0.545539
iteration 500: loss 0.387182
iteration 600: loss 0.450068
iteration 700: loss 0.361206
iteration 800: loss 0.406088
iteration 900: loss 0.516441
training accuracy: 0.89


In [10]:
w, b = One_layer_classifier(train_data, train_id, 1000, 100, 0.005, reg=0.5)
accuracy(w, b, test_data, test_id)                            

iteration 0: loss 1.312432
iteration 100: loss 0.577875
iteration 200: loss 0.490529
iteration 300: loss 0.400090
iteration 400: loss 0.602941
iteration 500: loss 0.476072
iteration 600: loss 0.523681
iteration 700: loss 0.440059
iteration 800: loss 0.528997
iteration 900: loss 0.397138
training accuracy: 0.83


## Task 2: Two Layer neural network

### The Forward Pass of a Two layer neural network

![](pics/formula2.png)

In [9]:
class Network:

    def __init__(self,sizes):
        self.sizes = sizes
        self.b = [0.01 * np.random.randn(1, y) for y in sizes[1:]]
        self.w = [0.01 * np.random.randn(x, y) for x, y in zip(sizes[:-1], sizes[1:])]

    def loss_compute(self, x_train, labels, reg=0):
        hidden_layer = np.maximum(0, np.dot(x_train, self.w[0]) + self.b[0])
        scores = np.dot(hidden_layer, self.w[1]) + self.b[1]
        probs = softmax(scores)
        L = probs[range(probs.shape[0]),labels.astype('int64')]
        L = np.nan_to_num(-np.log(L))
        data_loss = np.sum(L)/L.shape[0]
        if reg:
            reg_loss1 = 0.5 * reg * np.sum(self.w[0] * self.w[0]) 
            reg_loss2 = 0.5 * reg * np.sum(self.w[1] * self.w[1])
            reg_loss = reg_loss1 + reg_loss2
            return (data_loss + reg_loss)
        else:
            return data_loss

    def SGD(self, x_train, y_train, epochs, mini_batch_size, step, reg=0):
        training = [(x,y) for x,y in zip(x_train,y_train)]
        n = len(x_train)
        for i in range(epochs):
            random.shuffle(training)
            mini_batches = [training[k:k+mini_batch_size]
                    for k in range(0, n, mini_batch_size)]
            
            for mini_batch in mini_batches:
                # forward
                x_batch = np.concatenate([np.array(t[0]).reshape((1, 784)) for t in mini_batch])
                y_batch = np.array([t[1] for t in mini_batch])
                hidden_layer = np.maximum(0, np.dot(x_batch, self.w[0]) + self.b[0])
                scores = np.dot(hidden_layer, self.w[1]) + self.b[1]
                probs = softmax(scores)
                # backward
                dscores = probs
                dscores[range(x_batch.shape[0]),y_batch.astype('int64')] -= 1
                dscores /= x_batch.shape[0]
                
                dw2 = np.dot(hidden_layer.T, dscores)
                db2 = np.sum(dscores, axis=0, keepdims=True)
                dhidden = np.dot(dscores, (self.w[1]).T)
                dhidden[hidden_layer <= 0] = 0
                dw = np.dot(x_batch.T, dhidden)
                db = np.sum(dhidden, axis=0, keepdims=True)
                dw2 += reg * self.w[1]
                dw += reg * self.w[0]
                
                # update
                m = len(mini_batch)
                self.w[0] += -step * dw / m
                self.b[0] += -step * db / m
                self.w[1] += -step * dw2 / m
                self.b[1] += -step * db2 / m

         
            # compute loss      
            if i % 10 ==0:
                loss = self.loss_compute(x_train,y_train,reg)
                print ("iteration %d: loss %f" % (i, loss))       

    def accuracy(self, x_test, y_test):
        hidden_layer = np.maximum(0, np.dot(x_test, self.w[0]) + self.b[0])
        scores = np.dot(hidden_layer,self.w[1] ) + self.b[1]
        predict = np.argmax(scores, axis=1)
        print('training accuracy: %.2f' % (np.mean(predict == y_test)))

In [10]:
np.random.seed(42)
net = Network([784, 100, 10])
net.SGD(train_data, train_id, 100, 30, 0.05, 0.05)
net.accuracy(test_data, test_id)

iteration 0: loss 0.297819
iteration 10: loss 0.095195
iteration 20: loss 0.098444
iteration 30: loss 0.089829
iteration 40: loss 0.088809
iteration 50: loss 0.085916
iteration 60: loss 0.080518
iteration 70: loss 0.083492
iteration 80: loss 0.092804
iteration 90: loss 0.080891
training accuracy: 0.98
