# IE 534 Deep Learning - Homework 1 (Code)
## Hanwen Hu
## NetId: hanwenh3

-----

In [1]:
# This python file builds a one-layer neural network from scratch to deal with MNIST dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py

  from ._conv import register_converters as _register_converters


In [2]:
# Read data, which has a size of N * 784 and N * 1
MNIST = h5py.File("..\MNISTdata.hdf5",'r')
x_train = np.float32(MNIST['x_train'][:])
x_test = np.float32(MNIST['x_test'][:])
y_train = np.int32(MNIST['y_train'][:,0])
y_test = np.int32(MNIST['y_test'][:,0])

In [3]:
# Build activate functions
relu = lambda x: x*(x>0)

# Input a m * n matrix, output a m * n matrix whose rows are transformed and normalized
def softmax(X):
    Xexp = np.exp(X)
    return Xexp / np.sum(Xexp,axis=1,keepdims=True)

In [4]:
# Initialize the parameters
def param_init(lx,lh,ly):
    W1 = np.random.randn(lx,lh) / np.sqrt(lx)
    W2 = np.random.randn(lh,ly) / np.sqrt(lx)
    b1 = np.zeros(lh)
    b2 = np.zeros(ly)
    
    return W1,b1,W2,b2

In [5]:
# Build the forward step
# Model: S1 = XW1 + b1 → A1 = relu(S1) → S2 = A1W2 + b2 → A2 = softmax(S2)
def forward_prop(X,W1,b1,W2,b2):
    # Input to Hidden layer
    S1 = np.dot(X,W1) + b1 # Shape: (1 * lx).dot(lx * lh) = (1 * lh)
    A1 = relu(S1) # Shape: (1 * lh)
    
    # Hidden layer to Output
    S2 = np.dot(A1,W2) + b2
    A2 = softmax(S2) # Shape: (1 * ly)
    
    return S1, A1, A2

In [6]:
# Build the back-propagation step
def back_prop(W1,b1,W2,b2,S1,A1,A2,X,Y,alpha):
    b2Del = Y - A2
    W2Del = np.dot(A1.T,b2Del)
    A1Del = np.dot(b2Del,W2.T)
    b1Del = np.multiply(A1Del,(lambda x:(x>0))(S1)) # Length: lh
    W1Del = np.dot(X.T,b1Del) # Shape: (lx * lh)
    
    b2n = b2 + alpha * b2Del # Length ly
    W2n = W2 + alpha * W2Del # Shape (lh * ly)
    b1n = b1 + alpha * b1Del # Length lh
    W1n = W1 + alpha * W1Del # Shape (1x * lh)
    
    return W1n,b1n,W2n,b2n

In [10]:
# Build the complete Neural Network
def TwoLayer_NN_train(X, Y, NumHidden = 100, OrigAlpha = 0.01, num_epochs = 10):    
    # Recode Y as One-Hot
    Y_oh = np.array(pd.get_dummies(np.squeeze(Y)))
    
    # Indicate number of units per layer
    N = X.shape[0] # Number of samples
    lx = X.shape[1] # Number of features
    ly = Y_oh.shape[1] # Number of classes
    lh = NumHidden # Number of hidden units
    
    # Initialized the parameters
    W1,b1,W2,b2 = param_init(lx,lh,ly)
    
    # Run 20 train iterations, record the error every time
    for epoch in range(num_epochs):
        if epoch <= 5:
            alpha = OrigAlpha
        elif epoch <= 10: 
            alpha = OrigAlpha * 1e-1
        elif epoch <= 15:
            alpha = OrigAlpha * 1e-2
        else:
            alpha = OrigAlpha * 1e-3
        total_cor = 0
        for n in range(N):
            r = np.random.randint(N)
            x_samp = X[[r]]
            y_samp = Y_oh[[r]]
            # Forward
            S1, A1, A2 = forward_prop(x_samp,W1,b1,W2,b2)
            pred = np.argmax(A2)
            if pred==Y[r]:
                total_cor += 1
            # Backward
            W1,b1,W2,b2 = back_prop(W1,b1,W2,b2,S1,A1,A2,x_samp,y_samp,alpha)
        print("Training Accuracy: ",total_cor / np.float(N))
    return W1,b1,W2,b2

In [11]:
W1,b1,W2,b2 = TwoLayer_NN_train(x_train, y_train, NumHidden=100, OrigAlpha=0.01, num_epochs=10)

Training Accuracy:  0.92965
Training Accuracy:  0.96965
Training Accuracy:  0.9761666666666666
Training Accuracy:  0.9817
Training Accuracy:  0.9854833333333334
Training Accuracy:  0.98785
Training Accuracy:  0.99345
Training Accuracy:  0.99495
Training Accuracy:  0.9962
Training Accuracy:  0.9965833333333334


In [12]:
# For a given neural network, predict an input X
def predict_NN(X,W1,b1,W2,b2):
    X_predprob = forward_prop(X,W1,b1,W2,b2)[2]
    X_pred = X_predprob.argmax(axis=1) # Take the biggest probability as its choice
    return X_pred

In [13]:
y_predtrain = predict_NN(x_train,W1,b1,W2,b2)
np.sum(y_predtrain == y_train) / x_train.shape[0]

0.9965666666666667

In [14]:
y_predtest = predict_NN(x_test,W1,b1,W2,b2)
np.sum(y_predtest == y_test) / x_test.shape[0]

0.98