# 3. MLP

Data de entrega: 21/09/2025

O enunciado da atividade está disponível neste [link](https://insper.github.io/ann-dl/versions/2025.2/exercises/mlp/main).

In [1]:
from utils import data
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

---

## Exercício 1

In [2]:
x = np.array([0.5, -0.2])
y = 1

W_hidden = np.array([[0.3, -0.1], [0.2, 0.4]])
b_hidden = np.array([0.1, -0.2])

W_output = np.array([0.5, -0.3])
b_output = 0.2

eta = 0.3

# Activation function: tanh(x)
activation_function = lambda x: (np.exp(2 * x) - 1) / (np.exp(2 * x) + 1)
activation_function_derivative = lambda x: 1 - (activation_function(x)**2)

# Loss function: MSE
loss_function = lambda y, y_pred: 0.5 * (y - y_pred)**2
loss_function_derivative = lambda y, y_pred: y - y_pred

### Forward pass

In [4]:
# Hidden layer
z1_pre = W_hidden @ x.T + b_hidden
z1_activation = activation_function(z1_pre)

# Output layer
z2_pre = W_output @ z1_activation + b_output
z2_activation = activation_function(z2_pre)

y_pred = z2_activation

### Loss calculation

In [5]:
L = loss_function(y, y_pred)

### Backward pass (backpropagation)

#### Erro na camada de saída

In [6]:
output_error = loss_function_derivative(y, y_pred) * activation_function_derivative(z2_pre)

#### Gradientes para os pesos e *bias* da saída

In [7]:
W_output_gradient = output_error * z1_activation
b_output_gradient = output_error

#### Erro para a camada oculta

In [8]:
hidden_error = output_error * W_hidden * activation_function_derivative(z1_pre)

#### Gradientes para os pesos e *bias* das ocultas

In [9]:
W_hidden_gradient = hidden_error * x
b_hidden_gradient = hidden_error

#### Atualização dos pesos e *biases*

In [10]:
W_output = W_output - eta * W_output_gradient
b_output = b_output - eta * b_output_gradient

W_hidden = W_hidden - eta * W_hidden_gradient
b_hidden = b_hidden - eta * b_hidden_gradient

### Algo

In [3]:
mlp_data = data.MLP(input=x,
                        output=y,
                        W_hidden=W_hidden,
                        b_hidden=b_hidden,
                        W_output=W_output,
                        b_output=b_output,
                        eta=eta,
                        activation_function=activation_function,
                        activation_function_d=activation_function_derivative,
                        loss_function=loss_function,
                        loss_function_d=loss_function_derivative)

In [4]:
z1_pre, z1_activation, z2_pre, z2_activation = mlp_data.forward()

loss = mlp_data.loss_calculation(y, z2_activation)

W_hidden_gradient, b_hidden_gradient, W_output_gradient, b_output_gradient = mlp_data.backpropagation(z1_pre, z1_activation, z2_pre, z2_activation)

W_hidden, b_hidden, W_output, b_output = mlp_data.update_weights(W_hidden_gradient, b_hidden_gradient, W_output_gradient, b_output_gradient)

In [10]:
print(f"New weights hidden layer:")
print(f"{W_hidden}")
print(f"New bias hidden layer:")
print(f"{b_hidden}")
print(f"New weights output layer:")
print(f"{W_output}")
print(f"New biases output layer:")
print(f"{b_output}")

New weights hidden layer:
[[ 0.22359454 -0.06819676]
 [ 0.12359454  0.43180324]]
New bias hidden layer:
[-0.05281093 -0.35901618]
New weights output layer:
[ 0.45670643 -0.27075481]
New biases output layer:
0.03577581279296005
