In [1]:
# This python file builds a one-layer neural network from scratch to deal with MNIST dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py

  from ._conv import register_converters as _register_converters


In [2]:
# Read data, which has a size of N * 784 and N * 1
MNIST = h5py.File("..\MNISTdata.hdf5",'r')
x_train = np.float32(MNIST['x_train'][:])
x_test = np.float32(MNIST['x_test'][:])
y_train = np.int32(MNIST['y_train'][:,0])
y_test = np.int32(MNIST['y_test'][:,0])

In [3]:
# To check the keys and values in MNISTdata.hdf5
for key in MNIST.keys():
    print(MNIST[key].name)
MNIST.close()

/x_test
/x_train
/y_test
/y_train


In [3]:
# Build activate functions
relu = lambda x: x*(x>0)

# Input a m * n matrix, output a m * n matrix whose rows are transformed and normalized
def softmax(X):
    Xexp = np.exp(X)
    return Xexp / np.sum(Xexp,axis=1,keepdims=True)

In [12]:
# Build normalize function
def layer_norm(A):
    ma = np.max(A)
    mi = np.min(A)
    rag = ma - mi
    return (A - mi) / rag, 1 / rag

In [26]:
# Initialize the parameters
def param_init(lx,lh,ly):
    W1 = np.random.randn(lx,lh) / np.sqrt(lx)
    W2 = np.random.randn(lh,ly) / np.sqrt(lx) # Distribution has higher variance; If not dividing, may take more epochs for global min
    b1 = np.zeros(lh)
    b2 = np.zeros(ly)
    
    return W1,b1,W2,b2

In [27]:
# Build the forward step
# Model: S1 = XW1 + b1 → A1 = relu(S1) → S2 = A1W2 + b2 → A2 = softmax(S2)
def forward_prop(X,W1,b1,W2,b2):
    # Input to Hidden layer
    S1 = np.dot(X,W1) + b1 # Shape: (n * lx).dot(lx * lh) = (n * lh)
    A1 = relu(S1) # Shape: (n * lh)
    #A1norm, norm = layer_norm(A1)
    
    # Hidden layer to Output
    #S2 = A1norm.dot(W2) + b2 # Shape: (n * lh).dot(lh * ly) = (n * ly)
    S2 = np.dot(A1,W2) + b2
    A2 = softmax(S2) # Shape: (n * ly)
    
    return S1, A1, A2
    #return A1norm, norm, A2

In [47]:
# Build the back-propagation step
#def back_prop(W1,b1,W2,b2,A1norm,A2,X,Y,norm,alpha):
def back_prop(W1,b1,W2,b2,S1,A1,A2,X,Y,alpha):
    batch_size = X.shape[0]
    A2Del = Y - A2
    b2Del = np.mean(A2Del, axis=0, keepdims=True) # Length: ly
    #W2Del = A1norm.T.dot(b2Del) 
    W2Del = np.dot(A1.T, A2Del) / batch_size # Shape: (lh * ly)
    #A1Del = b2Del.dot(W2.T) * norm 
    #A1Del = np.dot(A2Del,W2.T) # Shape: (n, lh)
    A1Del = np.mean(np.dot(A2Del,W2.T),axis=0, keepdims=True) # Shape (1, lh)
    S1Del = np.multiply(A1Del,(lambda x:(x>0))(S1)) # Shape: (n * lh)
    b1Del = np.mean(S1Del, axis=0, keepdims=True) # Length: lh
    W1Del = np.dot(X.T,S1Del) / batch_size # Shape: (lx * lh)
    
    b2n = b2 + alpha * b2Del # Length ly
    W2n = W2 + alpha * W2Del # Shape (lh * ly)
    b1n = b1 + alpha * b1Del # Length lh
    W1n = W1 + alpha * W1Del # Shape (1x * lh)
    
    return W1n,b1n,W2n,b2n

In [48]:
# Build the complete Neural Network
def TwoLayer_NN_train(X, Y, NumHidden = 100, batch_size = 20, OrigAlpha = 0.01, num_epochs = 10):    
    # Recode Y as One-Hot
    Y_oh = np.array(pd.get_dummies(np.squeeze(Y)))
    
    # Indicate number of units per layer
    N = X.shape[0] # Number of samples
    lx = X.shape[1] # Number of features
    ly = Y_oh.shape[1] # Number of classes
    lh = NumHidden # Number of hidden units
    
    # Initialized the parameters
    W1,b1,W2,b2 = param_init(lx,lh,ly)
    
    # Run 20 train iterations, record the error every time
    for epoch in range(num_epochs):
        if epoch <= 5:
            alpha = OrigAlpha
        elif epoch <= 10: 
            alpha = OrigAlpha * 1e-1
        elif epoch <= 15:
            alpha = OrigAlpha * 1e-2
        else:
            alpha = OrigAlpha * 1e-3
        total_cor = 0
        iteration = int(N / batch_size)
        for n in range(iteration):
            r = np.random.choice(N, size=batch_size, replace=False)
            x_samps = X[[r]]
            y_samps = Y_oh[[r]]
            # Forward
            #A1norm,norm,A2 = forward_prop(x_samp,W1,b1,W2,b2)
            S1, A1, A2 = forward_prop(x_samps,W1,b1,W2,b2)
            #if n%2000 == 0:
            #    print(A2,Y[r])
            pred = np.argmax(A2,axis=1)
            total_cor += np.sum(pred == Y[r])
            # Backward
            #W1,b1,W2,b2 = back_prop(W1,b1,W2,b2,A1norm,A2,x_samp,y_samp,norm,alpha)
            W1,b1,W2,b2 = back_prop(W1,b1,W2,b2,S1,A1,A2,x_samps,y_samps,alpha)
        print("Training Accuracy: ",total_cor / np.float(N))
    return W1,b1,W2,b2

In [53]:
W1,b1,W2,b2 = TwoLayer_NN_train(x_train, y_train, NumHidden=100, batch_size = 1, OrigAlpha=0.01, num_epochs=10)

Training Accuracy:  0.9292333333333334
Training Accuracy:  0.9709333333333333
Training Accuracy:  0.9774666666666667
Training Accuracy:  0.98245
Training Accuracy:  0.98575
Training Accuracy:  0.9873
Training Accuracy:  0.9935833333333334
Training Accuracy:  0.99565
Training Accuracy:  0.9955166666666667
Training Accuracy:  0.9964666666666666


In [54]:
# For a given neural network, predict an input X
def predict_NN(X,W1,b1,W2,b2):
    X_predprob = forward_prop(X,W1,b1,W2,b2)[2]
    X_pred = X_predprob.argmax(axis=1) # Take the biggest probability as its choice
    return X_pred

In [55]:
y_predtrain = predict_NN(x_train,W1,b1,W2,b2)
np.sum(y_predtrain == y_train) / x_train.shape[0]

0.9964166666666666

In [56]:
y_predtest = predict_NN(x_test,W1,b1,W2,b2)
np.sum(y_predtest == y_test) / x_test.shape[0]

0.9805