In [125]:
import numpy as np
import pandas as pd

# Lab 9 - Multi-layer Perceptron Forward Pass & Backpropagation

## Part I
For this exercise you will implement a simple 2-layer perceptron with the forward pass and the backpropagation to learn the weights

For the first part you'll build and train a 2-layer neural network that predicts the prices of houses, using the usual Boston housing dataset.

In [126]:
boston = pd.read_csv('./BostonHousing.txt')

As usual, consider the MEDV as your target variable. 
* Split the data into training, validation and testing (70,15,15)%
* Experiment with different number of neurons per layer for your network, using the validation set

In [127]:
# your code goes here
from sklearn.model_selection import train_test_split
# The target variable is the last column of our dataset
X = boston.values[:,:-1]
y = boston.values[:,1].reshape(-1, 1)
# Now lets split the 3 results
X_train, X_tv, y_train, y_tv = train_test_split(X, y, test_size=0.3, random_state=0)
X_test, X_val, y_test, y_val = train_test_split(X_tv, y_tv, test_size=0.5, random_state=0)

In [128]:
def sigmoid_activation(z:np.ndarray) -> np.ndarray:
    # your code goes here
    return 1 / (1 + np.exp(-z))

def rmse(y_pred:np.ndarray, y_real:np.ndarray) -> float:
    return np.sqrt(np.mean(np.power(y_pred - y_real, 2)))

def sigmoid_derivative(z:np.ndarray) -> np.ndarray:
    # your code goes here
    return sigmoid_activation(z) * (1 - sigmoid_activation(z))


In [129]:
# your code goes here

def backpropagation_2layer(X:np.ndarray, y_real:np.ndarray, dim_input:int,
                            dim_hidden:int, dim_output:int, repeats:int=100) -> tuple[np.ndarray]:
    
    # Initializing weights and biases with random values
    # First we initialize the input to hidden layers of weights and biases
    W_input = np.random.normal(size=(dim_input, dim_hidden))
    b_input = np.random.normal(size=(dim_hidden, 1))
    # Then the hidden to output layers of weights and biases
    W_output = np.random.normal(size=(dim_hidden, dim_output))
    b_output = np.random.normal(size=(dim_output, 1))

    for repeat in range(repeats):
        hidden_layer_i = X @ W_input + b_input.T
        hidden_layer_o = sigmoid_activation(hidden_layer_i)

        output_layer = hidden_layer_o @ W_output + b_output.T

        derivative_wo = hidden_layer_o.T @ (output_layer - y_real)
        derivative_bo = np.sum(output_layer - y_real)

        error_h = (output_layer - y_real) @ W_output.T @ sigmoid_derivative(hidden_layer_i)
        derivative_wi = X.T @ error_h
        derivative_bi = np.sum(error_h, axis=0)

        W_input -= derivative_wo
        b_input -= derivative_bo

        W_output -= derivative_wi
        b_output -= derivative_bi


    return W_input, b_input, W_output, b_output



# for i in range(1, 14):
#     hidden_layer_dim = i
#     y_val_pred = two_layer_perceptron(X_val, sigmoid_activation, X_val.shape[1], hidden_layer_dim, 1)
#     val_rmse =rmse(y_val_pred, y_val)
#     print(f"RMSE para {i} neurônio(s): {val_rmse}")


RMSE para 1 neurônio(s): 27.548609537790604
RMSE para 2 neurônio(s): 27.168490218752858
RMSE para 3 neurônio(s): 26.533824135874994
RMSE para 4 neurônio(s): 28.24071786214621
RMSE para 5 neurônio(s): 26.79438856956638
RMSE para 6 neurônio(s): 25.580149431258594
RMSE para 7 neurônio(s): 29.742960217344137
RMSE para 8 neurônio(s): 27.85825903091911
RMSE para 9 neurônio(s): 24.35804546307932
RMSE para 10 neurônio(s): 29.72297820690501
RMSE para 11 neurônio(s): 25.930005139711206
RMSE para 12 neurônio(s): 31.375757059417488
RMSE para 13 neurônio(s): 27.391372750500555


  return 1 / (1 + np.exp(-z))


## Part II 

For this exercise you will build and train a 2-layer neural network that predicts the exact digit from a hand-written image, using the MNIST dataset. 
For this exercise, add weight decay to your network.

In [130]:
from sklearn.datasets import load_digits

In [131]:
digits = load_digits()

In [132]:
X = digits.data
y = digits.target

In [133]:
X.shape

(1797, 64)

Again, you will split the data into training, validation and testing.

In [134]:
# your code goes here:
# Now lets split the 3 results
X_train, X_tv, y_train, y_tv = train_test_split(X, y, test_size=0.3, random_state=0)
X_test, X_val, y_test, y_val = train_test_split(X_tv, y_tv, test_size=0.5, random_state=0)

In [135]:
# your code goes here:
