In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import optuna
import numpy as np
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("../dados/dados.csv")

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/edunb01/dotfiles/master/olist_customers_dataset.csv')


In [3]:
X = df[['x1.obs', 'x2.obs']].to_numpy().T   # shape: (2, 100000)

Y = df[['y']].to_numpy().T                  # shape: (1, 100000)

x_treino = X[:, 0:8000]
x_val = X[:, 8000:9000]
x_teste = X[:, 9000:10000]

y_treino = Y[:, 0:8000]
y_val = Y[:, 8000:9000]
y_teste = Y[:, 9000:10000]

In [4]:
print("X:", X.shape)
print("Y:", Y.shape)
print("Treino:", x_treino.shape, y_treino.shape)
print("Validação:", x_val.shape, y_val.shape)
print("Teste:", x_teste.shape, y_teste.shape)

X: (2, 100000)
Y: (1, 100000)
Treino: (2, 8000) (1, 8000)
Validação: (2, 1000) (1, 1000)
Teste: (2, 1000) (1, 1000)


# Item a)

In [5]:
x_treino_t = torch.tensor(x_treino.T, dtype=torch.float32)
y_treino_t = torch.tensor(y_treino.T, dtype=torch.float32).view(-1,1)

x_val_t = torch.tensor(x_val.T, dtype=torch.float32)
y_val_t = torch.tensor(y_val.T, dtype=torch.float32).view(-1,1)

In [6]:
class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(2, 2)     # 2 inputs -> 2 neurônios
        self.out    = nn.Linear(2, 1)     # 2 -> 1 saída
        self.sigmoid = nn.Sigmoid()
        
        # inicialização em zero
        nn.init.zeros_(self.hidden.weight)
        nn.init.zeros_(self.hidden.bias)
        nn.init.zeros_(self.out.weight)
        nn.init.zeros_(self.out.bias)

    def forward(self, x):
        h = self.sigmoid(self.hidden(x))
        y_hat = self.out(h)
        return y_hat

In [7]:
Phi = SimpleNN()

criterion = nn.MSELoss()
optimizer = optim.SGD(Phi.parameters(), lr=0.1)
epochs = 100

losses_treino = []
losses_val = []
best_val_loss = np.inf
best_state = None
best_epoch = 0

In [8]:
for epoch in range(epochs):

    optimizer.zero_grad()
    y_pred = Phi(x_treino_t)
    train_loss = criterion(y_pred, y_treino_t)
    train_loss.backward()
    optimizer.step()

    with torch.no_grad():
        val_pred = Phi(x_val_t)
        val_loss = criterion(val_pred, y_val_t)

    losses_treino.append(train_loss.item())
    losses_val.append(val_loss.item())

    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        best_state = {k: v.clone() for k, v in Phi.state_dict().items()}
        best_epoch = epoch + 1

In [9]:
print(f"Melhor época (validação): {best_epoch}")
print(f"Melhor MSE de validação: {best_val_loss:.6f}")

Melhor época (validação): 100
Melhor MSE de validação: 97.893570


# Item b)

Configurações iniciais

In [10]:
SEED = 22025
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


Preparando os dados para o Torch

In [11]:
X_train_t = torch.tensor(x_treino.T, dtype=torch.float32).to(device)
y_train_t = torch.tensor(y_treino.T, dtype=torch.float32).view(-1, 1).to(device)

X_val_t = torch.tensor(x_val.T, dtype=torch.float32).to(device)
y_val_t = torch.tensor(y_val.T, dtype=torch.float32).view(-1, 1).to(device)

X_test_t = torch.tensor(x_teste.T, dtype=torch.float32).to(device)
y_test_t = torch.tensor(y_teste.T, dtype=torch.float32).view(-1, 1).to(device)

train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=32, shuffle=True)
val_loader   = DataLoader(TensorDataset(X_val_t, y_val_t), batch_size=64, shuffle=False)
test_loader  = DataLoader(TensorDataset(X_test_t, y_test_t), batch_size=64)

Construindo a rede neural

In [12]:
def build_model(input_dim, hidden_layers, activation_name, dropout):
    layers = []
    act = {
        "relu": nn.ReLU()
    }[activation_name]

    prev_dim = input_dim
    for h in hidden_layers:
        layers.append(nn.Linear(prev_dim, h))
        layers.append(act)
        if dropout > 0:
            layers.append(nn.Dropout(dropout))
        prev_dim = h

    # camada final
    layers.append(nn.Linear(prev_dim, 1))

    return nn.Sequential(*layers).to(device)

Definindo função objetivo do optuna para otimização de hiperparâmetros da rede

In [14]:
def objective(trial):

    # número de camadas
    n_layers = trial.suggest_int("n_layers", 1, 4)

    # tamanho de cada camada
    hidden = [
        trial.suggest_int(f"n_units_layer_{i}", 8, 128, log=True)
        for i in range(n_layers)
    ]

    # ativação
    activation = "relu"


    # dropout
    dropout = trial.suggest_float("dropout", 0.0, 0.5)

    # weight decay
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)

    model = build_model(
        input_dim=2,
        hidden_layers=hidden,
        activation_name=activation,
        dropout=dropout
    ).to(device)

    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=weight_decay)
    criterion = nn.MSELoss()

    best_val = float("inf")
    patience = 20
    patience_count = 0
    epoch = 300

    for epoch in range(epoch):

        # --- Treino ---
        model.train()
        for xb, yb in train_loader:
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()

        # --- Validação ---
        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                pred = model(xb)
                vloss = criterion(pred, yb)
                val_losses.append(vloss.item())

        val_mse = np.mean(val_losses)

        # Reporta para Optuna (permite pruning)
        trial.report(val_mse, epoch)

        # Verifica se trial deve ser podado
        if trial.should_prune():
            raise optuna.TrialPruned()

        # Early stopping manual
        if val_mse < best_val:
            best_val = val_mse
            patience_count = 0
        else:
            patience_count += 1

        if patience_count >= patience:
            break

    return best_val


Otimizando

In [15]:
pruner = optuna.pruners.HyperbandPruner()
study = optuna.create_study(direction="minimize", pruner=pruner)
study.optimize(objective, n_trials=15, show_progress_bar=True)

print("Melhores hiperparâmetros:")
print(study.best_trial.params)

[I 2025-11-22 17:02:28,161] A new study created in memory with name: no-name-b7ea6080-3fe1-481f-9174-3ecc13ba4046
Best trial: 0. Best value: 10.8898:   7%|▋         | 1/15 [00:53<12:24, 53.18s/it]

[I 2025-11-22 17:03:21,354] Trial 0 finished with value: 10.889815151691437 and parameters: {'n_layers': 3, 'n_units_layer_0': 13, 'n_units_layer_1': 86, 'n_units_layer_2': 49, 'dropout': 0.10777119227503335, 'weight_decay': 1.1970647935793278e-05}. Best is trial 0 with value: 10.889815151691437.


Best trial: 0. Best value: 10.8898:  13%|█▎        | 2/15 [02:03<13:43, 63.37s/it]

[I 2025-11-22 17:04:31,851] Trial 1 finished with value: 47.870028495788574 and parameters: {'n_layers': 1, 'n_units_layer_0': 27, 'dropout': 0.16977348472589482, 'weight_decay': 0.0002767067091212526}. Best is trial 0 with value: 10.889815151691437.


Best trial: 2. Best value: 7.30921:  20%|██        | 3/15 [03:04<12:26, 62.23s/it]

[I 2025-11-22 17:05:32,711] Trial 2 finished with value: 7.309206575155258 and parameters: {'n_layers': 3, 'n_units_layer_0': 35, 'n_units_layer_1': 10, 'n_units_layer_2': 38, 'dropout': 0.1289091020621997, 'weight_decay': 4.3782754273019764e-05}. Best is trial 2 with value: 7.309206575155258.


Best trial: 2. Best value: 7.30921:  27%|██▋       | 4/15 [03:34<09:02, 49.34s/it]

[I 2025-11-22 17:06:02,305] Trial 3 finished with value: 116.01578140258789 and parameters: {'n_layers': 4, 'n_units_layer_0': 13, 'n_units_layer_1': 19, 'n_units_layer_2': 8, 'n_units_layer_3': 11, 'dropout': 0.48844269928938094, 'weight_decay': 0.003290430090070934}. Best is trial 2 with value: 7.309206575155258.


Best trial: 2. Best value: 7.30921:  33%|███▎      | 5/15 [03:36<05:23, 32.33s/it]

[I 2025-11-22 17:06:04,459] Trial 4 pruned. 


Best trial: 2. Best value: 7.30921:  40%|████      | 6/15 [03:59<04:24, 29.37s/it]

[I 2025-11-22 17:06:28,086] Trial 5 pruned. 


Best trial: 6. Best value: 1.62546:  47%|████▋     | 7/15 [05:10<05:43, 42.99s/it]

[I 2025-11-22 17:07:39,108] Trial 6 finished with value: 1.6254582107067108 and parameters: {'n_layers': 4, 'n_units_layer_0': 68, 'n_units_layer_1': 60, 'n_units_layer_2': 44, 'n_units_layer_3': 123, 'dropout': 0.06092875111793633, 'weight_decay': 7.332381666879278e-06}. Best is trial 6 with value: 1.6254582107067108.


Best trial: 6. Best value: 1.62546:  53%|█████▎    | 8/15 [05:12<03:28, 29.79s/it]

[I 2025-11-22 17:07:40,645] Trial 7 pruned. 


Best trial: 6. Best value: 1.62546:  60%|██████    | 9/15 [05:13<02:04, 20.75s/it]

[I 2025-11-22 17:07:41,513] Trial 8 pruned. 


Best trial: 6. Best value: 1.62546:  67%|██████▋   | 10/15 [05:43<01:58, 23.60s/it]

[I 2025-11-22 17:08:11,501] Trial 9 finished with value: 101.37927961349487 and parameters: {'n_layers': 1, 'n_units_layer_0': 10, 'dropout': 0.43779449044022073, 'weight_decay': 0.001370229104519219}. Best is trial 6 with value: 1.6254582107067108.


Best trial: 10. Best value: 1.30346:  73%|███████▎  | 11/15 [08:41<04:43, 70.77s/it]

[I 2025-11-22 17:11:09,211] Trial 10 finished with value: 1.303463138639927 and parameters: {'n_layers': 2, 'n_units_layer_0': 102, 'n_units_layer_1': 42, 'dropout': 0.012061856039458252, 'weight_decay': 1.2698758565474389e-06}. Best is trial 10 with value: 1.303463138639927.


Best trial: 10. Best value: 1.30346:  80%|████████  | 12/15 [10:54<04:29, 89.99s/it]

[I 2025-11-22 17:13:23,166] Trial 11 finished with value: 1.4458410888910294 and parameters: {'n_layers': 2, 'n_units_layer_0': 102, 'n_units_layer_1': 43, 'dropout': 0.004671123030754048, 'weight_decay': 1.512546196814767e-06}. Best is trial 10 with value: 1.303463138639927.


Best trial: 10. Best value: 1.30346:  87%|████████▋ | 13/15 [10:57<02:07, 63.61s/it]

[I 2025-11-22 17:13:26,060] Trial 12 pruned. 


Best trial: 10. Best value: 1.30346:  93%|█████████▎| 14/15 [10:59<00:44, 44.94s/it]

[I 2025-11-22 17:13:27,874] Trial 13 pruned. 


Best trial: 10. Best value: 1.30346: 100%|██████████| 15/15 [11:01<00:00, 44.08s/it]

[I 2025-11-22 17:13:29,352] Trial 14 pruned. 
Melhores hiperparâmetros:
{'n_layers': 2, 'n_units_layer_0': 102, 'n_units_layer_1': 42, 'dropout': 0.012061856039458252, 'weight_decay': 1.2698758565474389e-06}





Loop para treinar a rede com o melhor conjunto de hiperparâmetros

In [None]:
def train_and_eval(model, optimizer, criterion, trial=None, n_epochs=20):
    for epoch in range(n_epochs):
        model.train()
        for xb, yb in train_loader:
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        # validação
        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                preds = model(xb)
                val_losses.append( criterion(preds, yb).item() )

        val_mse = sum(val_losses) / len(val_losses)

        if trial is not None:
            trial.report(val_mse, epoch)
            if trial.should_prune():
                raise optuna.TrialPruned()

    return val_mse


Re-treinando o modelo final com os melhores hiperparâmetros

In [16]:
best_params = study.best_trial.params

hidden = [best_params[f"n_units_layer_{i}"] for i in range(best_params["n_layers"])]

best_model = build_model(
    input_dim=2,
    hidden_layers=hidden,
    activation_name="relu",
    dropout=best_params["dropout"]
)

optimizer = optim.Adam(
    best_model.parameters(),
    lr=1e-3,
    weight_decay=best_params["weight_decay"]
)

criterion = nn.MSELoss()

train_and_eval(best_model, optimizer, criterion)

4.7594615668058395

Avaliação final no conjunto de teste

In [17]:
best_model.eval()
test_losses = []
with torch.no_grad():
    for xb, yb in test_loader:
        pred = best_model(xb)
        test_losses.append(((pred - yb)**2).mean().item())

final_test_mse = np.mean(test_losses)
print(f"MSE no conjunto de teste: {final_test_mse:.6f}")

MSE no conjunto de teste: 4.842668
