In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [0]:
def relu(inputs):
    return np.maximum(inputs, 0)

# output probability distribution function
def softmax(inputs):
    exp = np.exp(inputs)
    return exp/np.sum(exp, axis = 1, keepdims = True)

# loss
def cross_entropy(inputs, y):
    indices = np.argmax(y, axis = 1).astype(int)
    probability = inputs[np.arange(len(inputs)), indices] #inputs[0, indices]
    log = np.log(probability)
    loss = -1.0 * np.sum(log) / len(log)
    return loss

# L2 regularization
def L2_regularization(la, weight1, weight2):
    weight1_loss = 0.5 * la * np.sum(weight1 * weight1)
    weight2_loss = 0.5 * la * np.sum(weight2 * weight2)
    return weight1_loss + weight2_loss

In [0]:
class Network:

    def __init__(self, 
                 num_nodes_in_layers, 
                 batch_size,
                 num_epochs,
                 learning_rate, 
                 weights_file
                 ):

        self.num_nodes_in_layers = num_nodes_in_layers
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate
        self.weights_file = weights_file

        # build the network
        #         w1/b1    w2/b2   
        #784(inputs) ---> 20 ---> 10(output)
        #         x     z1  a1  z2  a2=y
        self.weight1 = np.random.normal(0, 1, [self.num_nodes_in_layers[0], self.num_nodes_in_layers[1]])
        self.bias1 = np.zeros((1, self.num_nodes_in_layers[1]))
        self.weight2 = np.random.normal(0, 1, [self.num_nodes_in_layers[1], self.num_nodes_in_layers[2]])
        self.bias2 = np.zeros((1, self.num_nodes_in_layers[2]))
        self.loss = []

    def train(self, inputs, labels):

        for epoch in range(self.num_epochs): # training begin
            iteration = 0
            while iteration < len(inputs):

                # batch input
                inputs_batch = inputs[iteration:iteration+self.batch_size]
                labels_batch = labels[iteration:iteration+self.batch_size]
                
                # forward pass
                z1 = np.dot(inputs_batch, self.weight1) + self.bias1
                a1 = relu(z1)
                z2 = np.dot(a1, self.weight2) + self.bias2
                y = softmax(z2)
                
                # calculate loss
                loss = cross_entropy(y, labels_batch)
                loss += L2_regularization(0.01, self.weight1, self.weight2)#lambda
                self.loss.append(loss)

                # backward pass
                delta_y = (y - labels_batch) / y.shape[0]
                delta_hidden_layer = np.dot(delta_y, self.weight2.T) 
                delta_hidden_layer[a1 <= 0] = 0 # derivatives of relu

                # backpropagation
                weight2_gradient = np.dot(a1.T, delta_y) # forward * backward
                bias2_gradient = np.sum(delta_y, axis = 0, keepdims = True)
            
                weight1_gradient = np.dot(inputs_batch.T, delta_hidden_layer)
                bias1_gradient = np.sum(delta_hidden_layer, axis = 0, keepdims = True)

                # L2 regularization
                weight2_gradient += 0.01 * self.weight2
                weight1_gradient += 0.01 * self.weight1

                # stochastic gradient descent
                self.weight1 -= self.learning_rate * weight1_gradient #update weight and bias
                self.bias1 -= self.learning_rate * bias1_gradient
                self.weight2 -= self.learning_rate * weight2_gradient
                self.bias2 -= self.learning_rate * bias2_gradient

                print('=== Epoch: {:d}/{:d}\tIteration:{:d}\tLoss: {:.2f} '.format(epoch+1, self.num_epochs, iteration+1, loss))
                iteration += self.batch_size
        '''
        obj = [self.weight1, self.bias1, self.weight2, self.bias2]
        with open('filename.pkl', 'wb') as handle:
            pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)
        '''

    def test(self, inputs, labels):
        input_layer = np.dot(inputs, self.weight1)
        hidden_layer = relu(input_layer + self.bias1)
        scores = np.dot(hidden_layer, self.weight2) + self.bias2
        probs = softmax(scores)
        acc = float(np.sum(np.argmax(probs, 1) == labels)) / float(len(labels))
        print('Test accuracy: {:.2f}%'.format(acc*100))

In [4]:
!pip install mnist
import mnist


# load data
num_classes = 10
train_images = mnist.train_images() #[60000, 28, 28]
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

print("Training...")

# data processing
X_train = train_images.reshape(train_images.shape[0], train_images.shape[1]*train_images.shape[2]).astype('float32') #flatten 28x28 to 784x1 vectors, [60000, 784]
x_train = X_train / 255 #normalization
y_train = np.eye(num_classes)[train_labels] #convert label to one-hot

X_test = test_images.reshape(test_images.shape[0], test_images.shape[1]*test_images.shape[2]).astype('float32') #flatten 28x28 to 784x1 vectors, [60000, 784]
x_test = X_test / 255 #normalization
y_test = test_labels

net = Network(
                 num_nodes_in_layers = [784, 20, 10], 
                 batch_size = 1,
                 num_epochs = 5,
                 learning_rate = 0.001, 
                 weights_file = 'weights.pkl'
             )

net.train(x_train, y_train)


print("Testing...")
net.test(x_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
=== Epoch: 5/5	Iteration:55004	Loss: 0.48 
=== Epoch: 5/5	Iteration:55005	Loss: 0.48 
=== Epoch: 5/5	Iteration:55006	Loss: 0.50 
=== Epoch: 5/5	Iteration:55007	Loss: 0.46 
=== Epoch: 5/5	Iteration:55008	Loss: 0.54 
=== Epoch: 5/5	Iteration:55009	Loss: 0.42 
=== Epoch: 5/5	Iteration:55010	Loss: 0.82 
=== Epoch: 5/5	Iteration:55011	Loss: 0.43 
=== Epoch: 5/5	Iteration:55012	Loss: 2.76 
=== Epoch: 5/5	Iteration:55013	Loss: 0.43 
=== Epoch: 5/5	Iteration:55014	Loss: 0.62 
=== Epoch: 5/5	Iteration:55015	Loss: 0.47 
=== Epoch: 5/5	Iteration:55016	Loss: 1.53 
=== Epoch: 5/5	Iteration:55017	Loss: 0.44 
=== Epoch: 5/5	Iteration:55018	Loss: 0.53 
=== Epoch: 5/5	Iteration:55019	Loss: 0.68 
=== Epoch: 5/5	Iteration:55020	Loss: 1.00 
=== Epoch: 5/5	Iteration:55021	Loss: 0.42 
=== Epoch: 5/5	Iteration:55022	Loss: 0.50 
=== Epoch: 5/5	Iteration:55023	Loss: 1.64 
=== Epoch: 5/5	Iteration:55024	Loss: 0.43 
=== Epoch: 5/5	Iteration:55025	L