In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py

  from ._conv import register_converters as _register_converters


In [2]:
# Read data, which has a size of N * 784 and N * 1
MNIST = h5py.File("..\MNISTdata.hdf5",'r')
x_train = np.float32(MNIST['x_train'][:])
x_test = np.float32(MNIST['x_test'][:])
y_train = np.int32(MNIST['y_train'][:,0])
y_test = np.int32(MNIST['y_test'][:,0])

In [17]:
# Build activate functions
relu = lambda x: x*(x>0)

# Input a m * n matrix, output a m * n matrix whose rows are transformed and normalized
def softmax(X):
    Xexp = np.exp(X)
    return Xexp / np.sum(Xexp,axis=1,keepdims=True)

In [18]:
# Initialize the parameters
def param_init(lx,ly):
    W1 = np.random.randn(lx,ly) / np.sqrt(lx)
    b1 = np.zeros(ly)
    
    return W1,b1

In [19]:
# Build the forward step
def forward_prop(X,W1,b1):
    # Input to Hidden layer
    S1 = X.dot(W1) + b1
    A1 = softmax(S1)
    
    return A1

In [20]:
# Build the back-propagation step
def back_prop(W1,b1,A1,X,Y,alpha):
    b1Del = Y - A1
    W1Del = X.T.dot(b1Del)
    W1n = W1 + alpha * W1Del
    b1n = b1 + alpha * b1Del
    
    return W1n,b1n

In [21]:
X = x_train
Y = y_train
OrigAlpha = 0.01

Y_oh = np.array(pd.get_dummies(np.squeeze(Y)))
    
# Indicate number of units per layer
N = X.shape[0] # Number of samples
lx = X.shape[1] # Number of features
ly = Y_oh.shape[1] # Number of classes
    
# Initialized the parameters
W1,b1 = param_init(lx,ly)

num_epochs = 10
for epoch in range(num_epochs):
    if epoch <= 5:
        alpha = OrigAlpha
    elif epoch <= 10: 
        alpha = OrigAlpha * 1e-1
    elif epoch <= 15:
        alpha = OrigAlpha * 1e-2
    else:
        alpha = OrigAlpha * 1e-3
    total_cor = 0
    for n in range(N):
        r = np.random.randint(N)
        x_samp = X[[r]]
        y_samp = Y_oh[[r]]
        
        A1 = forward_prop(x_samp,W1,b1)
        pred = np.argmax(A1)
        if pred==Y[r]:
            total_cor += 1
        W1,b1 = back_prop(W1,b1,A1,x_samp,y_samp,alpha)
    print(total_cor / np.float(N))

0.9003
0.9163
0.92055
0.9227166666666666
0.92175
0.9257166666666666
0.9299
0.93075
0.9324833333333333
0.9312833333333334
