## Neural Network

Step 1: Import packages.

In [1]:
import numpy as np

Step 2: Define activation functions and their derivation functions.

In [2]:
def tanh(x):
    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

def tanh_deriv(x):
    return 1. - tanh(x) ** 2

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    return sigmoid(x) * (1 - sigmoid(x))

def direct(x):
    return x

def direct_deriv(x):
    return 1

Step 3: Construct a neural network.

In [3]:
class NN:
    def __init__(self, layers, activation_fns): # [1, 10, 1]
        self.activations = []
        self.activations_deriv = []

        for activation_fn in activation_fns:

            if activation_fn == "tanh":
                self.activations.append(tanh)
                self.activations_deriv.append(tanh_deriv)

            elif activation_fn == "sigmoid":
                self.activations.append(sigmoid)
                self.activations_deriv.append(sigmoid_deriv)

            elif activation_fn is None:
                self.activations.append(direct)
                #self.activations_deriv.append(direct)
                self.activations_deriv.append(direct_deriv)
        
        self.layers = layers

        self.weights = []
        self.biases = []
        
        for l in range(len(layers) - 1):
            self.weights.append(np.random.random([layers[l], layers[l + 1]])) # weights: [1, 10], [10, 1]
        for l in layers:
            self.biases.append(np.zeros(l))

    def loss(self, y_pred, y):
        return np.mean((y_pred - y) ** 2)

    def forward(self, x):
        outputs = [x + self.biases[0]]
        for i, weight in enumerate(self.weights):
            if self.activations[i + 1] is not None: # self.activations[0] is about input layer
                outputs.append(self.activations[i + 1](np.matmul(outputs[i], weight) + self.biases[i + 1])) # outputs[i]: [batch_size, xx]
            else:
                outputs.append(np.matmul(outputs[i], weight) + self.biases[i + 1]) # outputs[i]: [batch_size, xx]

        #len(outputs): 3, outputs[0...2].shape: (64, 1), (64, 30), (64, 1)
        return outputs

    def backward(self, outputs, y): # y is the label
        batch_size = outputs[0].shape[0]
        
        delta_biases = [np.zeros_like(bias) for bias in self.biases]
        delta_weights = [np.zeros_like(weight) for weight in self.weights]

        for ins in range(batch_size):
            # Calculates the error at the output layer
            errors = []
            errors.append(self.activations_deriv[-1](outputs[-1][ins]) * (outputs[-1][ins] - y[ins]))
            
            for l, weight in enumerate(reversed(self.weights)):
                errors.append(self.activations_deriv[-(l + 2)](outputs[-(l + 2)][ins]) * np.matmul(weight, errors[l])) 
            
            errors = list(reversed(errors))
            
            
            # calc the delta
            for i in range(len(self.layers)): # [1, 30, 1]
                delta_biases[i] += self.learning_rate * errors[i]
            
            for i in range(len(self.layers) - 1):
                output = np.array(outputs[i][ins])
                output = output.reshape(output.shape[0], 1)
                error = np.array(errors[i + 1]) # the first error is useless because it is the input node which has no weight
                error = error.reshape(1, error.shape[0])
                
                #delta_weights[i] += self.learning_rate * np.matmul(outputs[i][ins], errors[i]) # [1, 10], O_i:[1], Err_j=Err_{j+1}:[10]
                delta_weights[i] += self.learning_rate * np.matmul(output, error) # [1, 10], O_i:[1], Err_j=Err_{j+1}:[10]
        
        for i in range(1, len(self.biases)):
            self.biases[i] -= delta_biases[i] / batch_size

        for i in range(len(self.weights)):
            self.weights[i] -= delta_weights[i] / batch_size



    def fit(self, x, y, num_epochs, learning_rate=0.02):
        self.learning_rate = learning_rate

        for epoch in range(num_epochs):
            outputs = self.forward(x) # len(outputs): 1
            self.backward(outputs, y)

            if epoch % 1000 == 0:
                print("epoch {0}: loss={1}".format(epoch, self.loss(outputs[-1], y)))

Step 4: Train model.

In [5]:
def eval(model):
    batch_size = 64
    x = np.vstack(np.linspace(-1, 1, batch_size))
    y = 10 * x ** 2 + 1
    outputs = model.forward(x)
    from pprint import pprint
    pprint(outputs[-1].squeeze())
    pprint(y.squeeze())

# dataset
batch_size = 64
x = np.vstack(np.linspace(-1, 1, batch_size))
y = 10 * x ** 2 + 1 + np.random.random([batch_size, 1])

# define model
#model = NN([1, 30, 1], [None, "tanh", None])
model = NN([1, 10, 1], [None, "tanh", None])

# train model
model.fit(x, y, num_epochs=10000)

# evaluate model
eval(model)

epoch 0: loss=35.674792572855296
epoch 1000: loss=9.413976089129596
epoch 2000: loss=4.107455220412255
epoch 3000: loss=0.29388367940048243
epoch 4000: loss=0.17021808430170482
epoch 5000: loss=0.13331286758264427
epoch 6000: loss=0.11798108740107237
epoch 7000: loss=0.11063401333019798
epoch 8000: loss=0.1067887576064884
epoch 9000: loss=0.10464195755352378
array([10.94175536, 10.53181954, 10.09975448,  9.64801663,  9.17967702,
        8.6983646 ,  8.2081739 ,  7.71354072,  7.2190933 ,  6.72948958,
        6.24925269,  5.78261725,  5.33339767,  4.90488665,  4.49978908,
        4.12019223,  3.76757   ,  3.44281634,  3.1463012 ,  2.87794187,
        2.63728267,  2.423577  ,  2.23586685,  2.07305632,  1.93397694,
        1.81744364,  1.72230135,  1.64746249,  1.59193624,  1.55485066,
        1.53546868,  1.5331991 ,  1.54760351,  1.57839977,  1.62546281,
        1.688823  ,  1.76866219,  1.86530746,  1.97922226,  2.11099445,
        2.26132059,  2.4309858 ,  2.62083819,  2.8317571 ,  3.0