All codes are based on "Neural Networks and Deep Learning" Course by Deeplearning.ai

In [None]:
# 2 Layer NN(ReLU, Softmax) with no batch
import sys
import numpy as np
import tensorflow as tf

(images_train, labels_train), (images_test, labels_test) = tf.keras.datasets.mnist.load_data()
images_train = np.reshape(images_train / 255.0, (len(images_train), len(images_train[0]) * len(images_train[0][0]))).T
images_test = np.reshape(images_test / 255.0, (len(images_test), len(images_test[0]) * len(images_test[0][0]))).T

def one_hot_encoding(data):
    encoded = np.zeros((len(data), max(data) - min(data) + 1))
    for idx in range(len(data)):
        encoded[idx][data[idx]] = 1
    return encoded.T

labels_train, labels_test = one_hot_encoding(labels_train), one_hot_encoding(labels_test)

class NeuralNetwork_2():
    def __init__(self, n0, n1, n2):
        self.weight1 = np.random.rand(n1, n0) * 0.01
        self.weight2 = np.random.rand(n2, n1) * 0.01
        self.bias1 = np.random.rand(n1)
        self.bias2 = np.random.rand(n2)
    def linear_hypo(self, w, x, b):
        return (np.matmul(w, x).T + b).T
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def relu(self, z):
        return np.maximum(z, 0)
    def softmax(self, z):
        return np.exp(z) / np.exp(z).sum(axis=0)
    def hypothesis(self, func, lh):
        if func == 'sigmoid':
            return self.sigmoid(lh)
        elif func == 'relu':
            return self.relu(lh)
        elif func == 'softmax':
            return self.softmax(lh)
        else:
            sys.exit('Error in hypothesis: There is no {} function'.format(func))
    def cost(self, y, hypo):
        # cross entropy
        sum = np.multiply(y, np.log(hypo)).sum(axis=0)
        return -np.average(sum)
    def train(self, features, labels, learning_rate, EPOCHS):
        for iter in range(EPOCHS + 1):
            # layer1: relu, layer2: softmax, cost: cross-entropy
            z1 = self.linear_hypo(self.weight1, features, self.bias1)
            layer1 = self.hypothesis('relu', z1)
            z2 = self.linear_hypo(self.weight2, layer1, self.bias2)
            layer2 = self.hypothesis('softmax', z2)
            # gradient
            dz2 = layer2 - labels
            dw2 = np.matmul(dz2, layer1.T)
            db2 = np.average(dz2, axis=1)

            dz1 = np.multiply(np.matmul(self.weight2.T, dz2), np.where(z1 > 0, 1, 0)) # relu gradient: if x > 0: 1 else: 0 => np.where(z1 > 0, 1, 0)
            dw1 = np.matmul(dz1, features.T)
            db1 = np.average(dz1, axis=1)

            self.weight2 = self.weight2 - dw2 * learning_rate
            self.weight1 = self.weight1 - dw1 * learning_rate
            self.bias2 = self.bias2 - db2 * learning_rate
            self.bias1 = self.bias1 - db1 * learning_rate

            error = self.cost(labels, layer2)
            if iter % (EPOCHS / 10) == 0:
                print("iter: {:4} error: {:10.4f}".format(iter, error))
model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.001, 100)


iter:    0 error:     2.3499
iter:   10 error:     2.3504
iter:   20 error:     2.3503
iter:   30 error:     2.3502
iter:   40 error:     2.3501
iter:   50 error:     2.3500
iter:   60 error:     2.3499
iter:   70 error:     2.3499
iter:   80 error:     2.3498
iter:   90 error:     2.3497
iter:  100 error:     2.3496


In [None]:
# 2 Layer NN(ReLU, Softmax) with batch
import sys
import numpy as np
import tensorflow as tf

(images_train, labels_train), (images_test, labels_test) = tf.keras.datasets.mnist.load_data()
images_train = np.reshape(images_train / 255.0, (len(images_train), len(images_train[0]) * len(images_train[0][0]))).T
images_test = np.reshape(images_test / 255.0, (len(images_test), len(images_test[0]) * len(images_test[0][0]))).T

def one_hot_encoding(data):
    encoded = np.zeros((len(data), max(data) - min(data) + 1))
    for idx in range(len(data)):
        encoded[idx][data[idx]] = 1
    return encoded.T

labels_train, labels_test = one_hot_encoding(labels_train), one_hot_encoding(labels_test)

class NeuralNetwork_2():
    def __init__(self, n0, n1, n2):
        self.weight1 = np.random.rand(n1, n0) * 0.01
        self.weight2 = np.random.rand(n2, n1) * 0.01
        self.bias1 = np.random.rand(n1)
        self.bias2 = np.random.rand(n2)
    def linear_hypo(self, w, x, b):
        return (np.matmul(w, x).T + b).T
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def relu(self, z):
        return np.maximum(z, 0)
    def softmax(self, z):
        return np.exp(z) / np.exp(z).sum(axis=0)
    def hypothesis(self, func, lh):
        if func == 'sigmoid':
            return self.sigmoid(lh)
        elif func == 'relu':
            return self.relu(lh)
        elif func == 'softmax':
            return self.softmax(lh)
        else:
            sys.exit('Error in hypothesis: There is no {} function'.format(func))
    def cost(self, y, hypo):
        # cross entropy
        sum = np.multiply(y, np.log(hypo)).sum(axis=0)
        return -np.average(sum)
    def train(self, features, labels, learning_rate, EPOCHS, batch_size):
        for iter in range(EPOCHS + 1):
            for batch in range(int(len(features[0]) / batch_size)):
                features_batch = features[:, batch * batch_size : (batch + 1) * batch_size]
                labels_batch = labels[:, batch * batch_size : (batch + 1) * batch_size]
                # layer1: relu, layer2: softmax, cost: cross-entropy
                z1 = self.linear_hypo(self.weight1, features_batch, self.bias1)
                layer1 = self.hypothesis('relu', z1)
                z2 = self.linear_hypo(self.weight2, layer1, self.bias2)
                layer2 = self.hypothesis('softmax', z2)
                # gradient
                dz2 = layer2 - labels_batch
                dw2 = np.matmul(dz2, layer1.T)
                db2 = np.average(dz2, axis=1)

                dz1 = np.multiply(np.matmul(self.weight2.T, dz2), np.where(z1 > 0, 1, 0)) # relu gradient: if x > 0: 1 else: 0 => np.where(z1 > 0, 1, 0)
                dw1 = np.matmul(dz1, features_batch.T)
                db1 = np.average(dz1, axis=1)

                self.weight2 = self.weight2 - dw2 * learning_rate
                self.weight1 = self.weight1 - dw1 * learning_rate
                self.bias2 = self.bias2 - db2 * learning_rate
                self.bias1 = self.bias1 - db1 * learning_rate

                error = self.cost(labels_batch, layer2)
            #if iter % (EPOCHS / 10) == 0:
            #    print("iter: {:4} error: {:10.4f}".format(iter, error))
    def test_accuracy(self, features, labels):
        hypothesis = self.hypothesis('softmax', self.linear_hypo(self.weight2, self.hypothesis('relu', self.linear_hypo(self.weight1, features, self.bias1)), self.bias2))
        prob = np.average((np.argmax(hypothesis, axis=0) == np.argmax(labels, axis=0))) * 100
        print("Test Accuracy: {:10.4f}".format(prob))
model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.01, 100, 100)
model.test_accuracy(images_test, labels_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Test Accuracy:    11.3500


#Find best setting of model evaluated by Test Accuracy

In [None]:
# Test Accuracy comparison with changing learning_rate - 0.0001 is the best
model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.1, 100, 50)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.01, 100, 50)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.001, 100, 50)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.0001, 100, 50)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.00001, 100, 50)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.000001, 100, 50)
model.test_accuracy(images_test, labels_test)

Test Accuracy:    10.2800
Test Accuracy:    86.4100
Test Accuracy:    88.7300
Test Accuracy:    89.4500
Test Accuracy:    86.4200
Test Accuracy:    12.1900


In [None]:
# Test Accuracy comparison with changing batch_size - 100 is the best
model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.0001, 100, 10)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.0001, 100, 50)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.0001, 100, 100)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 5, 10)
model.train(images_train, labels_train, 0.0001, 100, 1000)
model.test_accuracy(images_test, labels_test)

Test Accuracy:    89.5300
Test Accuracy:    89.5200
Test Accuracy:    89.7100
Test Accuracy:    89.6400


In [None]:
# Test Accuracy comparison with changing number of hidden layer nodes - 100 is the best
model = NeuralNetwork_2(len(images_train), 20, 10)
model.train(images_train, labels_train, 0.0001, 100, 100)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 50, 10)
model.train(images_train, labels_train, 0.0001, 100, 100)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 100, 10)
model.train(images_train, labels_train, 0.0001, 100, 100)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 1000, 10) # too much time needed to calculate
model.train(images_train, labels_train, 0.0001, 100, 100)
model.test_accuracy(images_test, labels_test)

Test Accuracy:    95.2200
Test Accuracy:    95.9300
Test Accuracy:    96.3200
Test Accuracy:    95.4800


In [None]:
# Test Accuracy comparison with changing EPOCHS - as many as possible
# 100 nodes of hidden layer's calculation takes too much time so decreased it to 20
model = NeuralNetwork_2(len(images_train), 20, 10)
model.train(images_train, labels_train, 0.0001, 10, 100)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 20, 10)
model.train(images_train, labels_train, 0.0001, 100, 100)
model.test_accuracy(images_test, labels_test)

model = NeuralNetwork_2(len(images_train), 20, 10)
model.train(images_train, labels_train, 0.0001, 1000, 100)
model.test_accuracy(images_test, labels_test)

Test Accuracy:    90.9200
Test Accuracy:    94.8000
Test Accuracy:    95.8800


In [None]:
# 3 Layer NN(ReLU, Softmax) with no batch
import sys
import numpy as np
import tensorflow as tf

(images_train, labels_train), (images_test, labels_test) = tf.keras.datasets.mnist.load_data()
images_train = np.reshape(images_train / 255.0, (len(images_train), len(images_train[0]) * len(images_train[0][0]))).T
images_test = np.reshape(images_test / 255.0, (len(images_test), len(images_test[0]) * len(images_test[0][0]))).T

def one_hot_encoding(data):
    encoded = np.zeros((len(data), max(data) - min(data) + 1))
    for idx in range(len(data)):
        encoded[idx][data[idx]] = 1
    return encoded.T

labels_train, labels_test = one_hot_encoding(labels_train), one_hot_encoding(labels_test)

class NeuralNetwork_3():
    def __init__(self, n0, n1, n2, n3):
        self.weight1 = np.random.rand(n1, n0)
        self.weight2 = np.random.rand(n2, n1)
        self.weight3 = np.random.rand(n3, n2)
        self.bias1 = np.random.rand(n1)
        self.bias2 = np.random.rand(n2)
        self.bias3 = np.random.rand(n3)
    def linear_hypo(self, w, x, b):
        return (np.matmul(w, x).T + b).T
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def relu(self, z):
        return np.maximum(z, 0)
    def softmax(self, z):
        return np.exp(z) / np.exp(z).sum(axis=0)
    def hypothesis(self, func, lh):
        if func == 'sigmoid':
            return self.sigmoid(lh)
        elif func == 'relu':
            return self.relu(lh)
        elif func == 'softmax':
            return self.softmax(lh)
        else:
            sys.exit('Error in hypothesis: There is no {} function'.format(func))
    def cost(self, y, hypo):
        # cross entropy
        sum = np.multiply(y, np.log(hypo)).sum(axis=0)
        return -np.average(sum)
    def train(self, features, labels, learning_rate, EPOCHS, batch_size):
        for iter in range(EPOCHS + 1):
            for batch in range(int(len(features[0]) / batch_size)):
                features_batch = features[:, batch * batch_size : (batch + 1) * batch_size]
                labels_batch = labels[:, batch * batch_size : (batch + 1) * batch_size]
                # layer1: relu, layer2: relu, layer3: softmax, cost: cross-entropy
                z1 = self.linear_hypo(self.weight1, features_batch, self.bias1)
                layer1 = self.hypothesis('relu', z1)
                z2 = self.linear_hypo(self.weight2, layer1, self.bias2)
                layer2 = self.hypothesis('relu', z2)
                z3 = self.linear_hypo(self.weight3, layer2, self.bias3)
                layer3 = self.hypothesis('softmax', z3)
                # gradient
                dz3 = layer3 - labels_batch
                dw3 = np.matmul(dz3, layer2.T)
                db3 = np.average(dz3, axis=1)

                dz2 = np.multiply(np.matmul(self.weight3.T, dz3), np.where(z2 > 0, 1, 0))
                dw2 = np.matmul(dz2, layer1.T)
                db2 = np.average(dz2, axis=1)

                dz1 = np.multiply(np.matmul(self.weight2.T, dz2), np.where(z1 > 0, 1, 0))
                dw1 = np.matmul(dz1, features_batch.T)
                db1 = np.average(dz1, axis=1)

                self.weight3 = self.weight3 - dw3 * learning_rate
                self.weight2 = self.weight2 - dw2 * learning_rate
                self.weight1 = self.weight1 - dw1 * learning_rate
                self.bias3 = self.bias3 - db3 * learning_rate
                self.bias2 = self.bias2 - db2 * learning_rate
                self.bias1 = self.bias1 - db1 * learning_rate

                error = self.cost(labels_batch, layer3)
            if iter % (EPOCHS / 10) == 0:
                print("iter: {:4} error: {:10.4f}".format(iter, error))
    def test_accuracy(self, features, labels):
        layer1 = self.linear_hypo(self.weight1, features, self.bias1)
        layer1 = self.hypothesis('relu', layer1)
        layer2 = self.linear_hypo(self.weight2, layer1, self.bias2)
        layer2 = self.hypothesis('relu', layer2)
        layer3 = self.linear_hypo(self.weight3, layer2, self.bias3)
        hypothesis = self.hypothesis('softmax', layer3)
        prob = np.average((np.argmax(hypothesis, axis=0) == np.argmax(labels, axis=0))) * 100
        print("Test Accuracy: {:10.4f}".format(prob))
model = NeuralNetwork_3(len(images_train), 5, 5, 10)
model.train(images_train, labels_train, 0.0001, 10, 10)
model.test_accuracy(images_test, labels_test)



iter:    0 error:        nan
iter:    1 error:        nan
iter:    2 error:        nan
iter:    3 error:        nan
iter:    4 error:        nan
iter:    5 error:        nan
iter:    6 error:        nan
iter:    7 error:        nan
iter:    8 error:        nan
iter:    9 error:        nan
iter:   10 error:        nan
Test Accuracy:     9.8000
