In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

# Carregar os dados do CSV
data = pd.read_csv('merged_df.csv')

# Exibir as primeiras linhas para verificar a estrutura
print(data.head())

# Lidando com NaNs (preenchendo com média)
numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
data[numeric_cols] = data[numeric_cols].interpolate(method='linear', limit_direction='forward', axis=0)
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())

# Função para mapear os dados em blocos de 60
def map_data_in_blocks(data, block_size=60):
    sequences = []
    for i in range(0, len(data), block_size):
        block = data.iloc[i:i+block_size]
        if len(block) == block_size:
            # Usar apenas colunas numéricas
            numeric_block = block.select_dtypes(include=[np.number])
            sequences.append(numeric_block.mean())  # Aqui você pode calcular a média ou aplicar outra função
    return pd.DataFrame(sequences)

# Aplicando a função para mapear em blocos
data_mapped = map_data_in_blocks(data)

# Exibindo a estrutura após o mapeamento
print(data_mapped.head())

# Selecionando features e target
features = data_mapped[["temperature", "speed"]]
target = data_mapped['dst']

# Normalização dos dados
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Função para criar sequências de dados
def create_sequences(X, y, time_steps=60):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

# Definindo a quantidade de timesteps (dados anteriores a serem usados para previsão)
time_steps = 60

# Criando sequências
X_seq, y_seq = create_sequences(X_scaled, target.values, time_steps)

# Dividindo os dados em treino e teste
train_size = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]

# Definição do modelo Transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout):
        super(TransformerModel, self).__init__()
        self.input_projection = nn.Linear(input_dim, d_model)
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers,
                                          dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(d_model, 1)

    def forward(self, src):
        src = self.input_projection(src)
        tgt = src
        output = self.transformer(src, tgt)
        output = self.fc_out(output[:, -1, :])
        return output

# Função para treinar o Transformer com regularização
def train_transformer(X_train, y_train, X_test, y_test):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_dim = X_train.shape[2]
    d_model = 16  # Similar à LSTM
    nhead = 4
    num_encoder_layers = 2
    dim_feedforward = 64
    dropout = 0.3  # Ajustado como no LSTM
    model = TransformerModel(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout).to(device)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # Ajuste da taxa de aprendizado e L2
    train_dataset = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
    test_dataset = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # Ajuste do tamanho de batch
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    best_val_loss = float("inf")
    early_stopping_patience = 10
    epochs_no_improve = 0

    for epoch in range(10):  # Aumente o número de épocas conforme necessário
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            #print(X_batch.shape)
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        val_loss = 0
        y_val_pred = []
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                val_output = model(X_batch)
                val_loss += criterion(val_output.squeeze(), y_batch).item()
                y_val_pred.append(val_output.cpu().numpy())

        y_val_pred = np.concatenate(y_val_pred, axis=0)
        y_val_pred = y_val_pred[:len(y_test)]
        rmse_val = np.sqrt(mean_squared_error(y_test, y_val_pred))
        print(f"Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, Test Loss: {val_loss/len(test_loader):.4f}, Test RMSE: {rmse_val:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == early_stopping_patience:
                print("Early stopping")
                break

    return model

# Treinando o Transformer
model = train_transformer(X_train, y_train, X_test, y_test)
