In [None]:
!nvidia-smi

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import math

# Dataset: multivariate input, univariate target (next Global_active_power)
class TimeSeriesDataset(Dataset):
    def __init__(self, df: pd.DataFrame, seq_length: int, target_col: str):
        # df: DataFrame với datetime index, các cột features
        self.features = df.values.astype('float32')
        self.seq_length = seq_length
        self.target_idx = df.columns.get_loc(target_col)

    def __len__(self):
        return len(self.features) - self.seq_length

    def __getitem__(self, idx):
        x = self.features[idx : idx + self.seq_length]            # (seq_length, num_features)
        y = self.features[idx + self.seq_length, self.target_idx] # scalar
        return torch.from_numpy(x), torch.tensor(y, dtype=torch.float32)

# Positional encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)  # (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x: (seq_len, batch, d_model)
        x = x + self.pe[:x.size(0)]
        return x

# Transformer-based model for time series forecasting
class TransformerModel(nn.Module):
    def __init__(
        self,
        input_size: int,
        d_model: int = 64,
        nhead: int = 4,
        num_layers: int = 2,
        dim_feedforward: int = 128,
        dropout: float = 0.1,
    ):
        super().__init__()
        # Map input features to d_model dimension
        self.input_fc = nn.Linear(input_size, d_model)
        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model)
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=False,
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
        # Final projection to scalar output
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        # x: (batch, seq_len, input_size)
        # 1) project to d_model
        x = self.input_fc(x)            # (batch, seq_len, d_model)
        # 2) prepare for transformer: (seq_len, batch, d_model)
        x = x.permute(1, 0, 2)
        # 3) add positional encoding
        x = self.pos_encoder(x)
        # 4) transformer encode
        output = self.transformer_encoder(x)  # (seq_len, batch, d_model)
        # 5) take last time step's representation
        last = output[-1, :, :]               # (batch, d_model)
        # 6) decode to scalar
        y = self.decoder(last)                # (batch, 1)
        return y.squeeze(-1)                  # (batch,)

# Training function
def train(model, dataloader, criterion, optimizer, epochs, device):
    model.to(device)
    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0.0
        for x_batch, y_batch in dataloader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            preds = model(x_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch}/{epochs}, Loss: {avg_loss:.6f}")



In [None]:
import matplotlib.pyplot as plt

# Giả lập df_new tương tự cấu trúc thực tế
df_new = pd.read_csv("/kaggle/input/clean-ts/clean.csv")
df_new['dt'] = pd.to_datetime(df_new['dt'])
df_new.set_index('dt', inplace=True)

# Hyperparameters
seq_length = 7
batch_size = 50000
d_model = 96
nhead = 4
num_layers = 2
dim_feedforward = 128
learning_rate = 1e-3
epochs = 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset & DataLoader
dataset = TimeSeriesDataset(df_new, seq_length, target_col='Global_active_power')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Model, loss, optimizer
model = TransformerModel(
    input_size=df_new.shape[1],
    d_model=d_model,
    nhead=nhead,
    num_layers=num_layers,
    dim_feedforward=dim_feedforward,
    dropout=0.1,
)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train
train(model, dataloader, criterion, optimizer, epochs, device)

# Save weights
torch.save(model.state_dict(), 'transformer_model.pth')
print("Transformer model saved to transformer_model.pth")

In [None]:
    loaded = TransformerModel(
        input_size=df_new.shape[1], d_model=d_model,
        nhead=nhead, num_layers=num_layers,
        dim_feedforward=dim_feedforward
    )
    loaded.load_state_dict(torch.load('transformer_model.pth', map_location=device))
    loaded.to(device).eval()

    n_samples = 200
    y_true, y_pred = [], []
    with torch.no_grad():
        for idx in range(len(dataset) - n_samples, len(dataset)):
            x, y = dataset[idx]
            x = x.unsqueeze(0).to(device)
            pred = loaded(x)
            y_true.append(y.item())
            y_pred.append(pred.item())

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mse = np.mean((y_true - y_pred) ** 2)
    mae = np.mean(np.abs(y_true - y_pred))
    print(f"MSE over {n_samples} samples: {mse:.6f}")
    print(f"MAE over {n_samples} samples: {mae:.6f}")

    # Vẽ biểu đồ so sánh
    plt.figure(figsize=(10, 5))
    plt.plot(y_true, label='Actual')
    plt.plot(y_pred, label='Predicted')
    plt.title('Actual vs Predicted Global_active_power (Transformer)')
    plt.xlabel('Sample Index')
    plt.ylabel('Global_active_power')
    plt.legend()
    plt.show()
