In [1]:
# Import Module
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py
import datetime

  from ._conv import register_converters as _register_converters


In [2]:
# Read data, which has a size of N * 784 and N * 1
MNIST = h5py.File("..\MNISTdata.hdf5",'r')
x_train = np.float32(MNIST['x_train'][:])
x_test = np.float32(MNIST['x_test'][:])
y_train = np.int32(MNIST['y_train'][:,0])
y_test = np.int32(MNIST['y_test'][:,0])

In [3]:
# Reshape samples as 28 * 28 images
x_trainnew = np.reshape(x_train, (len(x_train),28,28))
x_testnew = np.reshape(x_test, (len(x_test),28,28))

In [4]:
# Build activate functions
relu = lambda x: x*(x>0)

# Input a m * n matrix, output a m * n matrix whose rows are transformed and normalized
def softmax(X):
    Xexp = np.exp(X)
    return Xexp / np.sum(Xexp,axis=1,keepdims=True)

In [5]:
# Initialize the parameters
def param_init(input_size, kernel_size, output_size):
    lx = input_size # 2-dim
    lk = kernel_size # 3-dim
    lh = (lx[0]-lk[0]+1, lx[1]-lk[1]+1, lk[2]) # Hidden layer size, 3-dim
    ly = output_size # 1-dim
    K = np.random.randn(lk[0],lk[1],lk[2]) / max(lx)
    W = np.random.randn(ly,lh[0],lh[1],lk[2]) / max(lx)
    b = np.zeros(ly)
    
    return K,W,b

In [6]:
# Build the forward step
# Model: Z = X * K → H = relu(Z) → U = WH + b → Yhat = softmax(U)
def Convolution(image, kernel):
    d1,d2 = image.shape
    k1,k2,C = kernel.shape
    output_a = d1 - k1 + 1
    output_b = d2 - k2 + 1
    conv = np.zeros((output_a,output_b,C))
    for c in range(C):
        for a in range(output_a):
            for b in range(output_b):
                conv[a,b,c] = np.sum(np.multiply(image[a:(a+k1),b:(b+k2)], kernel[:,:,c]))
    return conv

def forward_prop(X,K,W,b):
    # Input to Hidden layer
    Z = Convolution(X,K) # Shape: (lx[0]-lk[0]+1, lx[1]-lk[1]+1, C)
    H = relu(Z) # Shape: (lx[0]-lk[0]+1, lx[1]-lk[1]+1, C)
    
    # Hidden layer to Output
    U = np.sum(np.multiply(W,H), axis=(1,2,3)) + b
    U.shape = (1,W.shape[0]) # Shape: (1 * ly)
    Yhat = softmax(U) # Shape: (1 * ly)
    
    return Z, H, Yhat

In [7]:
# Build the back-propagation step
def back_prop(K,W,b,Z,H,Yhat,X,Y,alpha):
    UDel = Y - Yhat # Shape (1, ly)
    bDel = np.squeeze(UDel) # Length ly
    WDel = np.tensordot(bDel, H, axes=0) # Shape (ly, lx[0]-lk[0]+1, lx[1]-lk[1]+1, C)
    HDel = np.tensordot(bDel, W, axes=1) # Shape (lx[0]-lk[0]+1, lx[1]-lk[1]+1, C)
    ZDel = np.multiply(HDel,(lambda x:(x>0))(Z)) # Shape (lx[0]-lk[0]+1, lx[1]-lk[1]+1, C)
    KDel = Convolution(X,ZDel) # Shape: (lk[0], lk[1])
    
    bn = b + alpha * bDel # Length ly
    Wn = W + alpha * WDel # Shape (ly, lx[0]-lk[0]+1, lx[1]-lk[1]+1, C)
    Kn = K + alpha * KDel # Shape (1k[0], lk[1])
    
    return Kn,Wn,bn

In [8]:
# Build the complete Neural Network
def TwoLayer_CNN_train(X, Y, ChannelSize = (3,3), NumChannel = 1, OrigAlpha = 0.01, num_epochs = 10):    
    # Recode Y as One-Hot
    Y_oh = np.array(pd.get_dummies(np.squeeze(Y)))
    
    # Indicate number of units per layer
    N = X.shape[0] # Number of samples
    xsize = X.shape[1:] # Size of every sample
    ksize = (ChannelSize[0],ChannelSize[1],NumChannel) # Size of the channel
    ysize = Y_oh.shape[1] # Number of classes
    
    # Initialized the parameters
    K,W,b = param_init(xsize,ksize,ysize)
    
    # Run 20 train iterations, record the error every time
    tim = datetime.datetime.now()
    for epoch in range(num_epochs):
        if epoch <= 5:
            alpha = OrigAlpha
        elif epoch <= 10: 
            alpha = OrigAlpha * 1e-1
        elif epoch <= 15:
            alpha = OrigAlpha * 1e-2
        else:
            alpha = OrigAlpha * 1e-3
        total_cor = 0
        trial_time = int(N)
        for n in range(trial_time):
            r = np.random.randint(N)
            x_samp = X[r]
            y_samp = Y_oh[[r]]
            # Forward
            Z, H, Yhat = forward_prop(x_samp,K,W,b)
            pred = np.argmax(Yhat)
            total_cor += (pred==Y[r])
            # Backward
            K,W,b = back_prop(K,W,b,Z,H,Yhat,x_samp,y_samp,alpha)
            if n%10000==0:
                print("Epoch: {}, Trial: {}".format(epoch,n))
                tim_b = datetime.datetime.now()
                print("Cost Time: ", tim_b-tim)
        print("Training Accuracy: ",total_cor / trial_time)
    return K,W,b

In [10]:
np.random.seed(123)
K,W,b = TwoLayer_CNN_train(x_trainnew, y_train, NumChannel=5, OrigAlpha=0.01, num_epochs=10)

Epoch: 0, Trial: 0
Cost Time:  0:00:00.073800
Epoch: 0, Trial: 10000
Cost Time:  0:04:19.075462
Epoch: 0, Trial: 20000
Cost Time:  0:04:10.694176
Epoch: 0, Trial: 30000
Cost Time:  0:04:08.995468
Epoch: 0, Trial: 40000
Cost Time:  0:04:04.848908
Epoch: 0, Trial: 50000
Cost Time:  0:04:18.398255
Training Accuracy:  0.9222333333333333
Epoch: 1, Trial: 0
Cost Time:  0:04:31.576354
Epoch: 1, Trial: 10000
Cost Time:  0:04:13.101966
Epoch: 1, Trial: 20000
Cost Time:  0:05:03.165314
Epoch: 1, Trial: 30000
Cost Time:  0:05:05.647999
Epoch: 1, Trial: 40000
Cost Time:  0:04:55.290188
Epoch: 1, Trial: 50000
Cost Time:  0:04:43.278963
Training Accuracy:  0.9540333333333333
Epoch: 2, Trial: 0
Cost Time:  0:03:50.139090
Epoch: 2, Trial: 10000
Cost Time:  0:03:47.571942
Epoch: 2, Trial: 20000
Cost Time:  0:03:58.935516
Epoch: 2, Trial: 30000
Cost Time:  0:03:52.517104
Epoch: 2, Trial: 40000
Cost Time:  0:03:54.477148
Epoch: 2, Trial: 50000
Cost Time:  0:03:53.506373
Training Accuracy:  0.962083333333

In [12]:
# For a given neural network, predict an input X
def predict_NN(X,K,W,b):
    X_predprob = forward_prop(X,K,W,b)[2]
    X_pred = X_predprob.argmax(axis=1) # Take the biggest probability as its choice
    return X_pred

In [13]:
# Predict on train set
N = x_trainnew.shape[0]
total_cor = 0
for n in range(N):
    temp_pred = predict_NN(x_trainnew[n],K,W,b)
    if temp_pred == y_train[n]:
        total_cor += 1
total_cor / N

0.9888333333333333

In [14]:
# Predict on test set
N = x_testnew.shape[0]
total_cor = 0
for n in range(N):
    temp_pred = predict_NN(x_testnew[n],K,W,b)
    if temp_pred == y_test[n]:
        total_cor += 1
total_cor / N

0.9734