# Neural Networks

Table of contents

✔ Chapter 1.  Fully connected layer

To install further python libraries, type

`!pip install --target=$my_path [LIBRARY_NAME]`

# Chapter 1-1. Implement from scratch and train/test against fake data


In [None]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_pass(self, input):
        raise NotImplementedError

    # computes dJ/dw and dJ/db and update params
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [None]:
import numpy as np

# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_pass(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dJ/dW, dJ/dB for a given loss_d=dJ/dz
    # Returns next_input=dJ/dz*w for the next step of the backpropagation.
    def backward_propagation(self, loss_d, learning_rate):
        next_input = np.dot(loss_d, self.weights.T)
        
        #compute dJ/dw
        weights_error = np.dot(self.input.T, loss_d)
       
        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * loss_d
        return next_input

In [None]:
# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_pass(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns next_input=ReLU'(x)*(dJ/da) for the next step of backpropagation
    # learning_rate is not used as no update is needed
    def backward_propagation(self, loss_d, learning_rate):
        return self.activation_prime(self.input) * loss_d

In [None]:
import numpy as np

# activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

In [None]:
import numpy as np

# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [None]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # compute loss
    def compute_loss(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict for each of given inputs
    def predict(self, input_data):
        samples = len(input_data)
        result = []

        
        for i in range(samples):
            # forward pass
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_pass(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            # Batch gradient descent
            for j in range(samples):
                # forward pass
                a = x_train[j]
                for layer in self.layers:
                    a = layer.forward_pass(a)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], a)

                # backward propagation
                # Since the last layer is the activation layer, the output is denoted as a here
                # Note that the output of fclayer is z which is z=wx+b or z=wa+b
                # compute the derivative of loss (loss_d) with respect to activation output (a)
                loss_d = self.loss_prime(y_train[j], a)
                for layer in reversed(self.layers):
                    loss_d = layer.backward_propagation(loss_d, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [None]:
# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
print(x_train.shape)
print(y_train.shape)


(4, 1, 2)
(4, 1, 1)


In [None]:
# network
net = Network()
net.add(FCLayer(2, 3)) # input (1,2), output (1,3)
net.add(ActivationLayer(sigmoid, sigmoid_prime)) # element-wise application
net.add(FCLayer(3, 1)) # input (1,3), output (1,1)
net.add(ActivationLayer(sigmoid, sigmoid_prime)) # element-wise application

# train
net.compute_loss(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

epoch 1/1000   error=0.256084
epoch 2/1000   error=0.256035
epoch 3/1000   error=0.255994
epoch 4/1000   error=0.255960
epoch 5/1000   error=0.255931
epoch 6/1000   error=0.255907
epoch 7/1000   error=0.255887
epoch 8/1000   error=0.255870
epoch 9/1000   error=0.255856
epoch 10/1000   error=0.255844
epoch 11/1000   error=0.255834
epoch 12/1000   error=0.255825
epoch 13/1000   error=0.255818
epoch 14/1000   error=0.255812
epoch 15/1000   error=0.255806
epoch 16/1000   error=0.255802
epoch 17/1000   error=0.255798
epoch 18/1000   error=0.255794
epoch 19/1000   error=0.255791
epoch 20/1000   error=0.255788
epoch 21/1000   error=0.255785
epoch 22/1000   error=0.255783
epoch 23/1000   error=0.255781
epoch 24/1000   error=0.255779
epoch 25/1000   error=0.255777
epoch 26/1000   error=0.255775
epoch 27/1000   error=0.255773
epoch 28/1000   error=0.255772
epoch 29/1000   error=0.255770
epoch 30/1000   error=0.255769
epoch 31/1000   error=0.255767
epoch 32/1000   error=0.255766
epoch 33/1000   e

It seems that the error doesn't really reduce properly. Change the activation function to use tanh

In [None]:
# network
net = Network()
net.add(FCLayer(2, 3)) # input (1,2), output (1,3)
net.add(ActivationLayer(tanh, tanh_prime)) # change the activation function
net.add(FCLayer(3, 1)) # input (1,3), output (1,1)
net.add(ActivationLayer(tanh, tanh_prime)) # change the activation function

# train
net.compute_loss(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

epoch 1/1000   error=0.661901
epoch 2/1000   error=0.343018
epoch 3/1000   error=0.313261
epoch 4/1000   error=0.307972
epoch 5/1000   error=0.305882
epoch 6/1000   error=0.304523
epoch 7/1000   error=0.303397
epoch 8/1000   error=0.302361
epoch 9/1000   error=0.301362
epoch 10/1000   error=0.300376
epoch 11/1000   error=0.299391
epoch 12/1000   error=0.298400
epoch 13/1000   error=0.297398
epoch 14/1000   error=0.296381
epoch 15/1000   error=0.295347
epoch 16/1000   error=0.294293
epoch 17/1000   error=0.293217
epoch 18/1000   error=0.292119
epoch 19/1000   error=0.290995
epoch 20/1000   error=0.289846
epoch 21/1000   error=0.288670
epoch 22/1000   error=0.287467
epoch 23/1000   error=0.286235
epoch 24/1000   error=0.284975
epoch 25/1000   error=0.283686
epoch 26/1000   error=0.282368
epoch 27/1000   error=0.281021
epoch 28/1000   error=0.279646
epoch 29/1000   error=0.278244
epoch 30/1000   error=0.276815
epoch 31/1000   error=0.275361
epoch 32/1000   error=0.273883
epoch 33/1000   e

Now it works out!

# Chapter 1-2. train/test against MNIST data

Use MNIST dataset


*   Source: keras



In [None]:
from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST 
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
print(x_train[0])
x_train /= 255
print(x_train[0])
# One-hot encoding: encode output in range [0,9] into a vector of size 10
# Change to one-hot vector (e.g. 3 = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
print(y_train[0])
y_train = np_utils.to_categorical(y_train)
print(y_train[0])

[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   3.  18.
   18.  18. 126. 136. 175.  26. 166. 255. 247. 127.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.  30.  36.  94. 154. 170. 253.
  253. 253. 253. 253. 225. 172. 253. 242. 195.  64.   0.   0.   

In [None]:
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

In [None]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # compute loss
    def compute_loss(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict for each of given inputs
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        
        for i in range(samples):
            # forward pass
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_pass(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate, batch_size):
        train_len = np.array(range(len(x_train)))
        
        # training loop
        for i in range(epochs):
            err = 0
            # mini-batch gradient descent
            np.random.shuffle(train_len)
            for j in train_len[:batch_size]:
                # forward pass
                a = x_train[j]
                for layer in self.layers:
                    a = layer.forward_pass(a)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], a)

                # backward propagation
                # Since the last layer is the activation layer, the output is denoted as a here
                # Note that the output of fclayer is z which is z=wx+b or z=wa+b
                # compute the derivative of loss (loss_d) with respect to activation output (a)
                loss_d = self.loss_prime(y_train[j], a)
                for layer in reversed(self.layers):
                    loss_d = layer.backward_propagation(loss_d, learning_rate)

            # calculate average error on all samples
            err /= batch_size
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [None]:
# Network
net = Network()
net.add(FCLayer(28*28, 100))                # input (1, 28*28), output (1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50))                   # input (1, 100), output (1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10))                    # input (1, 50), output (1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

net.compute_loss(mse, mse_prime)
net.fit(x_train, y_train, epochs=500, learning_rate=0.1, batch_size=256)


epoch 1/500   error=0.493313
epoch 2/500   error=0.204345
epoch 3/500   error=0.139964
epoch 4/500   error=0.139940
epoch 5/500   error=0.125037
epoch 6/500   error=0.128585
epoch 7/500   error=0.108100
epoch 8/500   error=0.110474
epoch 9/500   error=0.103557
epoch 10/500   error=0.101204
epoch 11/500   error=0.100366
epoch 12/500   error=0.091957
epoch 13/500   error=0.090558
epoch 14/500   error=0.097299
epoch 15/500   error=0.089570
epoch 16/500   error=0.081336
epoch 17/500   error=0.088287
epoch 18/500   error=0.082720
epoch 19/500   error=0.083860
epoch 20/500   error=0.074619
epoch 21/500   error=0.072706
epoch 22/500   error=0.077619
epoch 23/500   error=0.070493
epoch 24/500   error=0.077584
epoch 25/500   error=0.071651
epoch 26/500   error=0.068569
epoch 27/500   error=0.066328
epoch 28/500   error=0.074819
epoch 29/500   error=0.074409
epoch 30/500   error=0.071909
epoch 31/500   error=0.066579
epoch 32/500   error=0.067264
epoch 33/500   error=0.073249
epoch 34/500   erro

In [None]:
out = net.predict(x_test)
tp=0
for pred, true in zip(out,y_test):
  if np.argmax(pred) == np.argmax(true):
    tp +=1
print(tp/len(x_test))

0.9282


# [Extra Credit] Could you improve the performance? 
Any strategies can be acceptable such as different activation funciton, learning rate, and loss function, adding more layers and neurons, changing the epoch and batch sizes, etc.
**(Deadline:12/16 FRI 23:59)**



### Submitters are as follows.
 *Note that hands-on assignments can be done collaboratively (**up to 2 students**)*

    Name: Sohyun Doh
    Student ID: 2076120
    

TO-DO: Using the Network class provided above, improve the initial performance we got. Note that the model obtained 92.1% accuracy on average using five experiments (91.2%, 90.3%, 93.47%, 92.65%, 92.82%).
- Feel free to add and modify all the codes provided above
- Run the five experiments and report the average accuracy
- Discuss what changes/additions you made to improve the performance of the neural network model   