In [1]:
import numpy as np

In [2]:
from keras.datasets import mnist

(X_train, Y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape((len(X_train), np.prod(X_train.shape[1:])))
X_test = X_test.reshape((len(X_test), np.prod(X_test.shape[1:])))

In [3]:
X_train = X_train / X_train.max()
X_test = X_test / X_test.max()

y_train = np.zeros((Y_train.size, int(Y_train.max()) + 1))
y_train[np.arange(Y_train.size), Y_train.astype(np.int)] = 1.0

X_train = X_train.T
y_train = y_train.T
X_test = X_test.T

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(784, 60000) (10, 60000)
(784, 10000) (10000,)


In [4]:
class MyMLP():
    def __init__(self, numClass, X_train, y_train, batch_size, hidden_size):
        self.X_train = X_train
        self.y_train = y_train
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        
        self.W1 = np.random.randn(self.hidden_size, self.X_train.shape[0]) * np.sqrt(
            2. / self.X_train.shape[0])
        self.b1 = np.zeros((self.hidden_size, 1))
        self.W2 = np.random.randn(numClass, self.hidden_size)* np.sqrt(2. / self.hidden_size)
        self.b2 = np.zeros((numClass, 1))
    
    def softmax(self, X):
        exponent = np.exp(X)
        return exponent / np.sum(exponent, axis=0)

    def ReLU(self, X):
        return np.maximum(0, X)

    def gradReLU(self, X):
        return (X > 0) * 1
    
    def feed_forward(self, X):
        self.Z1 = np.matmul(self.W1, X) + self.b1
        self.A1 = self.ReLU(self.Z1)
        self.Z2 = np.matmul(self.W2, self.A1) + self.b2
        self.A2 = self.softmax(self.Z2)
        
    def crossEntropyLoss(self, y, y_hat, lamda):
        m = y.shape[1]
        loss = -(1 / m) * np.sum(y*np.log(y_hat)) + lamda / (2*m) * (np.sum(
            self.W1**2) + np.sum(self.W2**2))

        return loss
    
    def back_prop(self, X, y): # to backpropagate the error
        m = X.shape[1]
        self.dZ2 = self.A2 - y
        self.dW2 = (1./m) * np.matmul(self.dZ2, self.A1.T)
        self.db2 = (1./m) * np.sum(self.dZ2, axis=1, keepdims=True)
        self.dA1 = np.matmul(self.W2.T, self.dZ2)
        self.dZ1 = self.dA1 * self.gradReLU(self.Z1)
        self.dW1 = (1./m) * np.matmul(self.dZ1, X.T)
        self.db1 = (1./m) * np.sum(self.dZ1)
        
    def update(self, lr, lamda):
        self.W2 = self.W2 - lr * self.dW2 - (self.W2*lamda*lr) / self.batch_size
        self.b2 = self.b2 - lr * self.db2
        self.W1 = self.W1 - lr * self.dW1 - (self.W1*lamda*lr) / self.batch_size
        self.b1 = self.b1 - lr * self.db1
        
    def predict(self, X):
        Z1 = np.matmul(self.W1, X) + self.b1
        A1 = self.ReLU(Z1)
        Z2 = np.matmul(self.W2, A1) + self.b2
        A2 = self.softmax(Z2)

        return np.argmax(A2, axis=0)
    
    def train(self, epochs, lamda, lr):
        for epoch in range(epochs):
            splitIndex = np.random.permutation(self.X_train.shape[1])[:self.batch_size]
            X = X_train[:, splitIndex]
            y = y_train[:, splitIndex]
            
            self.feed_forward(X)

            loss = self.crossEntropyLoss(y, self.A2, lamda)
            print('Epoch {} \tTrain Loss: {:.4f}'.format(epoch + 1, loss))
            
            self.back_prop(X, y)
            self.update(lr, lamda)

In [8]:
numClass = 10
epochs = 1000
batch_size = 1000
hidden_size = 512
lr, lamda = 1, 2

In [9]:
model = MyMLP(numClass, X_train, y_train, batch_size, hidden_size)
model.train(epochs, lamda, lr)

Epoch 1 	Train Loss: 3.4069
Epoch 2 	Train Loss: 3.0018
Epoch 3 	Train Loss: 3.0426
Epoch 4 	Train Loss: 2.7175
Epoch 5 	Train Loss: 2.9511
Epoch 6 	Train Loss: 2.4118
Epoch 7 	Train Loss: 2.3852
Epoch 8 	Train Loss: 1.9909
Epoch 9 	Train Loss: 2.3536
Epoch 10 	Train Loss: 2.5766
Epoch 11 	Train Loss: 2.6794
Epoch 12 	Train Loss: 3.0046
Epoch 13 	Train Loss: 2.6035
Epoch 14 	Train Loss: 2.2470
Epoch 15 	Train Loss: 1.9382
Epoch 16 	Train Loss: 1.7520
Epoch 17 	Train Loss: 2.3773
Epoch 18 	Train Loss: 3.4125
Epoch 19 	Train Loss: 2.7377
Epoch 20 	Train Loss: 2.4722
Epoch 21 	Train Loss: 2.1325
Epoch 22 	Train Loss: 2.0716
Epoch 23 	Train Loss: 2.0968
Epoch 24 	Train Loss: 1.8480
Epoch 25 	Train Loss: 1.8302
Epoch 26 	Train Loss: 1.9495
Epoch 27 	Train Loss: 2.1580
Epoch 28 	Train Loss: 1.8652
Epoch 29 	Train Loss: 1.8307
Epoch 30 	Train Loss: 1.6016
Epoch 31 	Train Loss: 1.4916
Epoch 32 	Train Loss: 1.4790
Epoch 33 	Train Loss: 1.5537
Epoch 34 	Train Loss: 1.6845
Epoch 35 	Train Loss: 1

Epoch 278 	Train Loss: 0.6047
Epoch 279 	Train Loss: 0.5630
Epoch 280 	Train Loss: 0.6043
Epoch 281 	Train Loss: 0.5943
Epoch 282 	Train Loss: 0.5777
Epoch 283 	Train Loss: 0.5837
Epoch 284 	Train Loss: 0.5847
Epoch 285 	Train Loss: 0.5877
Epoch 286 	Train Loss: 0.6285
Epoch 287 	Train Loss: 0.6222
Epoch 288 	Train Loss: 0.5825
Epoch 289 	Train Loss: 0.5818
Epoch 290 	Train Loss: 0.5885
Epoch 291 	Train Loss: 0.6047
Epoch 292 	Train Loss: 0.5650
Epoch 293 	Train Loss: 0.5628
Epoch 294 	Train Loss: 0.5913
Epoch 295 	Train Loss: 0.6159
Epoch 296 	Train Loss: 0.5903
Epoch 297 	Train Loss: 0.5664
Epoch 298 	Train Loss: 0.5642
Epoch 299 	Train Loss: 0.5580
Epoch 300 	Train Loss: 0.5923
Epoch 301 	Train Loss: 0.5510
Epoch 302 	Train Loss: 0.5518
Epoch 303 	Train Loss: 0.5683
Epoch 304 	Train Loss: 0.5530
Epoch 305 	Train Loss: 0.5517
Epoch 306 	Train Loss: 0.5697
Epoch 307 	Train Loss: 0.5747
Epoch 308 	Train Loss: 0.5844
Epoch 309 	Train Loss: 0.5501
Epoch 310 	Train Loss: 0.5496
Epoch 311 

Epoch 552 	Train Loss: 0.3794
Epoch 553 	Train Loss: 0.3607
Epoch 554 	Train Loss: 0.3649
Epoch 555 	Train Loss: 0.3707
Epoch 556 	Train Loss: 0.3463
Epoch 557 	Train Loss: 0.3334
Epoch 558 	Train Loss: 0.3440
Epoch 559 	Train Loss: 0.3508
Epoch 560 	Train Loss: 0.3592
Epoch 561 	Train Loss: 0.3247
Epoch 562 	Train Loss: 0.3211
Epoch 563 	Train Loss: 0.3418
Epoch 564 	Train Loss: 0.3524
Epoch 565 	Train Loss: 0.3349
Epoch 566 	Train Loss: 0.3513
Epoch 567 	Train Loss: 0.3387
Epoch 568 	Train Loss: 0.3489
Epoch 569 	Train Loss: 0.3505
Epoch 570 	Train Loss: 0.3260
Epoch 571 	Train Loss: 0.3301
Epoch 572 	Train Loss: 0.3642
Epoch 573 	Train Loss: 0.3319
Epoch 574 	Train Loss: 0.3549
Epoch 575 	Train Loss: 0.3740
Epoch 576 	Train Loss: 0.3671
Epoch 577 	Train Loss: 0.3624
Epoch 578 	Train Loss: 0.3901
Epoch 579 	Train Loss: 0.3733
Epoch 580 	Train Loss: 0.3887
Epoch 581 	Train Loss: 0.3331
Epoch 582 	Train Loss: 0.3221
Epoch 583 	Train Loss: 0.3360
Epoch 584 	Train Loss: 0.3218
Epoch 585 

Epoch 826 	Train Loss: 0.2699
Epoch 827 	Train Loss: 0.2764
Epoch 828 	Train Loss: 0.2570
Epoch 829 	Train Loss: 0.2747
Epoch 830 	Train Loss: 0.2791
Epoch 831 	Train Loss: 0.2621
Epoch 832 	Train Loss: 0.2581
Epoch 833 	Train Loss: 0.2686
Epoch 834 	Train Loss: 0.2882
Epoch 835 	Train Loss: 0.2636
Epoch 836 	Train Loss: 0.2723
Epoch 837 	Train Loss: 0.2590
Epoch 838 	Train Loss: 0.2665
Epoch 839 	Train Loss: 0.2770
Epoch 840 	Train Loss: 0.2958
Epoch 841 	Train Loss: 0.2544
Epoch 842 	Train Loss: 0.2723
Epoch 843 	Train Loss: 0.2748
Epoch 844 	Train Loss: 0.2802
Epoch 845 	Train Loss: 0.2541
Epoch 846 	Train Loss: 0.2676
Epoch 847 	Train Loss: 0.2877
Epoch 848 	Train Loss: 0.2786
Epoch 849 	Train Loss: 0.2691
Epoch 850 	Train Loss: 0.2879
Epoch 851 	Train Loss: 0.2689
Epoch 852 	Train Loss: 0.2791
Epoch 853 	Train Loss: 0.3068
Epoch 854 	Train Loss: 0.2825
Epoch 855 	Train Loss: 0.2812
Epoch 856 	Train Loss: 0.2771
Epoch 857 	Train Loss: 0.2930
Epoch 858 	Train Loss: 0.2738
Epoch 859 

In [10]:
y_pred = model.predict(X_test)
accuracy = sum(y_pred == y_test) * 1 / len(y_test)

print('Best Model\nBatch size: {}, Hidden Size: {}, Learning Rate: {}'.format(
    batch_size, hidden_size, lr))
print('\nTest Accuracy of Overall:   {:.2f}%'.format(accuracy))

Best Model
Batch size: 1000, Hidden Size: 512, Learning Rate: 1

Test Accuracy of Overall:   0.97%
