In [2]:
import numpy as np
x_train = np.genfromtxt("mnist_small_train_in.txt", delimiter=',', dtype=int)
y_train = np.genfromtxt("mnist_small_train_out.txt", delimiter=',', dtype=int)
x_val = np.genfromtxt("mnist_small_test_in.txt", delimiter=',', dtype=int)
y_val = np.genfromtxt("mnist_small_test_out.txt", delimiter=',', dtype=int)

In [3]:
print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)

(6006, 784) (6006,)
(1004, 784) (1004,)


In [4]:
def sigmoid(x, der=False):
    if der:
        return (np.exp(-x))/((np.exp(-x)+1)**2)
    return 1/(1 + np.exp(-x))

In [5]:
def softmax(x):
    exps = np.exp(x - x.max())
    return exps / np.sum(exps, axis=0)

In [20]:
class Neuralnetwork:
    def __init__(self, size, epochs=200, l_rate=1):
        self.size = size
        self.epochs = epochs
        self.l_rate = l_rate
        self.params = self.initialization()
    
    def initialization(self):
        input_layer = self.size[0]
        hidden_layer = self.size[1]
        output_layer = self.size[2]
        params = {
            'W1': np.random.randn(hidden_layer, input_layer) * np.sqrt(1. / input_layer),
            'B1': np.zeros((hidden_layer,1)) * np.sqrt(1. / input_layer),
            'W2': np.random.randn(output_layer, hidden_layer)* np.sqrt(1. / hidden_layer),
            'B2': np.zeros((output_layer,1)) * np.sqrt(1. / hidden_layer),
        }
        return params
    
    def forward(self, x_train):
        params = self.params
        params['A1'] = params['W1'] @ x_train.T + params['B1']
        params['Z1'] = sigmoid(params['A1'])
        params['A2'] = params['W2'] @ params['Z1'] + params['B2']
        params['Z2'] = softmax(params['A2'])
        return params['Z2']
    
    def backward(self, x_train, y_train, output):
        params, n, momentan, change_weight = self.params, x_train.shape[0], .8, {}
        error = output - self.one_hot_encoding(y_train)
        error_A2 = error @ params['Z1'].T
        error_A1 = np.array([x*y for x, y in zip(params['W2'].T @ error, sigmoid(params['Z1'], der=True))])
        change_weight['W2'] = error_A2 / n 
        change_weight['B2'] = error @ np.ones((n,1)) / n
        change_weight['W1'] = error_A1 @ x_train / n
        change_weight['B1'] = error_A1 @ np.ones((n,1)) / n
        
        #optimization parameters with momentan
        change_weight['W2'] = momentan*params['W2'] + (1-momentan)*change_weight['W2']
        change_weight['B2'] = momentan*params['B2'] + (1-momentan)*change_weight['B2']
        change_weight['W1'] = momentan*params['W1'] + (1-momentan)*change_weight['W1']
        change_weight['B1'] = momentan*params['B1'] + (1-momentan)*change_weight['B1']
        
        params['W2'] -= change_weight['W2'] * self.l_rate
        params['B2'] -= change_weight['B2'] * self.l_rate
        params['W1'] -= change_weight['W1'] * self.l_rate
        params['B1'] -= change_weight['B1'] * self.l_rate
        return params
    
    def one_hot_encoding(self, y):
        lst1 = []
        for i in range(10):
            lst2 = []
            for j in range(y.shape[0]):
                if j == y[i]:
                    lst2.append(1)
                else:
                    lst2.append(0)
            lst1.append(lst2)
        return np.array(lst1)     
    
    def accuracy(self, x_val, y_val):
        predict = []
        output = self.forward(x_val).T
        for i in range(y_val.shape[0]):
            pred = np.argmax(output[i])
            #print(pred, y_val[i])
            predict.append(pred!=y_val[i])
        return sum(i for i in predict)/y_val.shape[0]
    
    def train(self, x_train, y_train, x_val, y_val):
        for i in range(self.epochs):
            output = self.forward(x_train)
            weight = self.backward(x_train, y_train, output)
            accur = self.accuracy(x_val, y_val)
            print('Epoch:{0}, Misclassification Rate"{1:.0%}'.format(i+1, accur))
            
nn = Neuralnetwork([784,300,10])
nn.train(x_train, y_train, x_val, y_val)

Epoch:1, Misclassification Rate"90%
Epoch:2, Misclassification Rate"90%
Epoch:3, Misclassification Rate"90%
Epoch:4, Misclassification Rate"90%
Epoch:5, Misclassification Rate"90%
Epoch:6, Misclassification Rate"90%
Epoch:7, Misclassification Rate"90%
Epoch:8, Misclassification Rate"90%
Epoch:9, Misclassification Rate"90%
Epoch:10, Misclassification Rate"90%
Epoch:11, Misclassification Rate"90%
Epoch:12, Misclassification Rate"90%
Epoch:13, Misclassification Rate"90%
Epoch:14, Misclassification Rate"90%
Epoch:15, Misclassification Rate"90%
Epoch:16, Misclassification Rate"90%
Epoch:17, Misclassification Rate"90%
Epoch:18, Misclassification Rate"90%
Epoch:19, Misclassification Rate"90%
Epoch:20, Misclassification Rate"90%
Epoch:21, Misclassification Rate"90%
Epoch:22, Misclassification Rate"90%
Epoch:23, Misclassification Rate"90%
Epoch:24, Misclassification Rate"90%
Epoch:25, Misclassification Rate"90%
Epoch:26, Misclassification Rate"90%
Epoch:27, Misclassification Rate"90%
Epoch:28, 