# Assignment 8: Neural Networks

Only use the already imported library `numpy` and the Python standard library. For the evaluation you may also use scikit-learn (`sklearn`) and `matplotlib`. Make sure that the dataset `airfoil_self_noise.csv` is in the same directory as the notebook.

In [None]:
import numpy as np

def load_dataset(path):
    from sklearn.model_selection import train_test_split
    
    data = np.genfromtxt(path)
    X, y = data[:, :5], data[:, 5]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2020)

    return X_train, X_test, y_train, y_test
    

X_train, X_test, y_train, y_test = load_dataset('airfoil_self_noise.csv')

## Task 3: Feedforward Neural Network: Programming

In this task, you will implement a feedforward neural network for regression. The hyperparameters of the model are:
- `input_dim`: The dimension of the input vector.
- `output_dim`: The dimension of the output vector.
- `width`: The dimension of each hidden layer.
- `depth`: The number of hidden layers. For B.Sc. Data Science students, this parameter is constant with a value of 1.
- `learning_rate`: The learning rate for gradient descent.
- `epochs`: The number of epochs/iterations performed during training.

B.Sc. Data Science only have to implement for a single hidden layer, i.e. `depth = 1`. All other students have to implement the network for any `depth >= 1`.

The activation function for each hidden layer is ReLU (g(x) = max(0, x)). The output layer uses the identity as activation, since our objective is regression.

You have to implement the `FeedforwardNeuralNetworkRegressor`.

The `__init__` method initializes the network.
Initialize each weight and bias randomly with a standard Gaussian distribution using the numpy function `numpy.random.normal` with default parameters.

The `fit` method trains the network.
Use backpropagation with gradient descent similar to Task 2.
Use the whole training data set for each training epoch.
Use the mean squared error as loss function.

The `predict` method computes the forward-pass of the network.

Evaluate your classifier on the test data with the mean squared error and compare your results to your linear regression model from assignment 3. Try out different hyper-parameters and compare the results. You may want to normalize your input and output data for better performance.

In [None]:
class FeedforwardNeuralNetworkClassifier(object):
    def __init__(self, input_dim, output_dim, width, depth, learning_rate, epochs):
        self.in_dim = input_dim
        self.out_dim = output_dim
        self.width = width
        self.depth = depth
        self.lr = learning_rate
        self.epochs = epochs
        self.params = self._init_params(in_dim=self.in_dim, out_dim=self.out_dim, depth=self.depth, width=self.width)
        
    def fit(self, X, y):
        params = self.params
        for epoch in range(self.epochs):
            y_hat, cache = self._forward_pass(X, params)
            loss = self._compute_loss(y_hat, y)
            print(f'epoch = {epoch} loss = {loss}')

            grads_values = self._back_pass(y_hat, y, cache, params)
            params = self._update_params(params, grads_values, self.lr)
        
        self.params = params
    
    def predict(self, X):
        output, _ = self._forward_pass(X, self.params)
        return output
    
    def _init_params(self, in_dim, out_dim, depth, width):
        params = {}
        inp_weights = np.random.normal(size=(in_dim, width[0]))
        inp_bias = np.zeros(width[0])
        params['w0'] = inp_weights
        params['b0'] = inp_bias
        for i in range(depth-1):
            weights = np.random.normal(size=(width[i], width[i+1]))
            bias = np.zeros(width[i+1])
            params[f'w{i+1}'] = weights
            params[f'b{i+1}'] = bias

        if (out_dim>1):
            params[f'w{depth}'] = np.random.normal(size=(width[-1], out_dim))
        else:
            params[f'w{depth}'] = np.random.normal(size=(width[-1], out_dim))
        params[f'b{depth}'] = np.zeros(out_dim)
        return params
    
    def _ReLU(self, x):
        return x * (x > 0)
    
    def _relu_backward(sef, dA, Z):
        dZ = np.array(dA, copy=True)
        dZ[Z <= 0] = 0
        return dZ
    
    def _single_layer_forward_pass(self, A_prev, W_curr, b_curr, out_layer):
        Z_curr = np.dot(A_prev, W_curr) + b_curr
        if out_layer:
            return Z_curr, Z_curr
        else:
            return self._ReLU(Z_curr), Z_curr

    def _forward_pass(self, X, params_values):
        memory = {}
        A_curr = X
        memory[f'A{0}'] = A_curr

        for i in range(self.depth):
            A_prev = A_curr
            W_curr = params_values[f'w{i}']
            b_curr = params_values[f'b{i}']
            A_curr, Z_curr = self._single_layer_forward_pass(A_prev, W_curr, b_curr, False)
            
            memory[f'A{i+1}'] = A_curr
            memory[f'Z{i}'] = Z_curr
        
        A_prev = A_curr
        W_curr = params_values[f'w{self.depth}']
        b_curr = params_values[f'b{self.depth}']
        A_curr, Z_curr = self._single_layer_forward_pass(A_prev, W_curr, b_curr, True)
        
        memory[f'A{self.depth+1}'] = A_curr
        memory[f'Z{self.depth}'] = Z_curr
        
        return A_curr, memory
    
    def _single_layer_back_pass(self, dA_curr, W_curr, Z_curr, A_prev):
        dZ_curr = self._relu_backward(dA_curr, Z_curr)
        dW_curr = np.mean(np.dot(A_prev.T, dZ_curr))
        db_curr = np.mean(dZ_curr)
        dA_prev = np.dot(dZ_curr, W_curr.T)

        return dA_prev, dW_curr, db_curr
    
    def _back_pass(self, y_hat, y, memory, params_values):
        grads = {}
        n = y.shape[0]
        y = y.reshape(y_hat.shape)
        dA_prev = (y-y_hat)

        for i in range(self.depth+1, 0, -1):
            dA_curr = dA_prev
            A_prev = memory[f'A{i-1}']
            Z_curr = memory[f'Z{i-1}']
            W_curr = params_values[f'w{i-1}']
            b_curr = params_values[f'b{i-1}']
            dA_prev, dW_curr, db_curr = self._single_layer_back_pass(dA_curr, W_curr, Z_curr, A_prev)
            grads[f'dw{i-1}'] = dW_curr
            grads[f'db{i-1}'] = db_curr
        
        return grads
    
    def _update_params(self, params_values, grads_values, learning_rate):
        for i in range(self.depth+1):
            params_values[f'w{i}'] -= learning_rate * grads_values[f'dw{i}']        
            params_values[f'b{i}'] -= learning_rate * grads_values[f'db{i}']

        return params_values
    
    def _compute_loss(self, y_hat, y_true):
        return np.mean((y_true-y_hat)**2)

def MSE_loss(y_hat, y_true):
    return np.mean((y_true-y_hat)**2)

In [None]:
input_dim = 5
output_dim = 1
width = (5, 5)
depth = 2
learning_rate = 0.01
epochs = 50

model = FeedforwardNeuralNetworkClassifier(input_dim, output_dim, width, depth, learning_rate, epochs)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
loss = MSE_loss(y_pred, y_test)
print(loss)

epoch = 0 loss = 977261.3048379932
epoch = 1 loss = 14146.373341363871
epoch = 2 loss = 14429.74232685125
epoch = 3 loss = 14718.807028946932
epoch = 4 loss = 15013.681931554738
epoch = 5 loss = 15314.483819704963
epoch = 6 loss = 15621.331825806994
epoch = 7 loss = 15621.331825806994
epoch = 8 loss = 15621.331825806994
epoch = 9 loss = 15621.331825806994
epoch = 10 loss = 15621.331825806994
epoch = 11 loss = 15621.331825806994
epoch = 12 loss = 15621.331825806994
epoch = 13 loss = 15621.331825806994
epoch = 14 loss = 15621.331825806994
epoch = 15 loss = 15621.331825806994
epoch = 16 loss = 15621.331825806994
epoch = 17 loss = 15621.331825806994
epoch = 18 loss = 15621.331825806994
epoch = 19 loss = 15621.331825806994
epoch = 20 loss = 15621.331825806994
epoch = 21 loss = 15621.331825806994
epoch = 22 loss = 15621.331825806994
epoch = 23 loss = 15621.331825806994
epoch = 24 loss = 15621.331825806994
epoch = 25 loss = 15621.331825806994
epoch = 26 loss = 15621.331825806994
epoch = 27 lo