In [1]:
# ==== Config ====
DATA_SOURCE = "toy"  # "toy" hoặc "csv"
CSV_PATH = "...pm25_data.csv"  # dùng khi DATA_SOURCE="csv"

# Nếu dùng CSV thật, chỉnh đúng tên cột (cột đầu tiên trong FEATURES sẽ được coi là PM2.5)
FEATURES = ["PM2.5", "NO2", "O3", "TEMP", "RH"]
TARGET_COL = "PM2.5"

# Cửa sổ & horizon
L = 48     # history length
H = 6      # số bước dự báo (1 = one-step; >1 = multi-step)

VAL_RATIO = 0.2

# Model & train
ENC_HIDDEN = 64
DEC_HIDDEN = 64
NUM_LAYERS = 1
DROPOUT = 0.0
LR = 1e-3
WEIGHT_DECAY = 1e-5
EPOCHS = 10
BATCH_SIZE = 64
TEACHER_FORCING_RATIO = 0.5
SEED = 42


In [3]:
!pip install torch
import math, random, numpy as np, pandas as pd
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)


Collecting torch
  Downloading torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.5.1 (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec>=0.8.5 (from torch)
  Downloading fsspec-2025.9.0-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cudnn-cu12==9.10.2.21 (from torch)
  Downloading n

<torch._C.Generator at 0x7f154317d590>

In [None]:
def make_toy_dataset(T=3000):
    t = np.arange(T)
    daily  = 10*np.sin(2*np.pi*t/24.0)
    weekly =  5*np.sin(2*np.pi*t/(24.0*7))
    noise  = np.random.normal(0, 2, size=T)

    NO2  = 30 + 5*np.sin(2*np.pi*(t-3)/24.0)  + np.random.normal(0,1,size=T)
    O3   = 50 + 7*np.sin(2*np.pi*(t-12)/24.0) + np.random.normal(0,1,size=T)
    TEMP = 27 + 4*np.sin(2*np.pi*t/24.0)      + np.random.normal(0,0.3,size=T)
    RH   = 70 +10*np.sin(2*np.pi*(t-6)/24.0)  + np.random.normal(0,1,size=T)

    PM25 = 40 + 0.6*np.roll(daily,1) + 0.4*np.roll(weekly,24) \
           + 0.15*NO2 - 0.1*np.roll(O3,6) - 0.2*(TEMP-27) + 0.05*(RH-70) + noise
    PM25 = np.clip(PM25, 5, None)
    data = np.stack([PM25, NO2, O3, TEMP, RH], axis=1)
    return pd.DataFrame(data, columns=["PM2.5","NO2","O3","TEMP","RH"])

def load_data():
    if DATA_SOURCE == "toy":
        return make_toy_dataset(3000)
    else:
        df = pd.read_csv(CSV_PATH)
        df = df[FEATURES].copy().dropna().reset_index(drop=True)
        return df

df = load_data()
display(df.head())
print("Shape:", df.shape)


In [None]:
class WindowDS(Dataset):
    def __init__(self, X, y, L, H):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32)
        self.L = L; self.H = H
    def __len__(self):
        return len(self.X) - self.L - self.H + 1
    def __getitem__(self, i):
        x = self.X[i:i+self.L]                           # (L, D)
        target = self.y[i+self.L:i+self.L+self.H]        # (H,)
        return torch.tensor(x), torch.tensor(target).unsqueeze(-1)  # (H,1)

scaler = StandardScaler()
X_all = scaler.fit_transform(df.values)    # (T, D)
y_all = df[TARGET_COL].values              # (T,)

T_total = len(df)
T_val = int(T_total*VAL_RATIO)
T_train = T_total - T_val

X_train, X_val = X_all[:T_train], X_all[T_train:]
y_train, y_val = y_all[:T_train], y_all[T_train:]

train_ds = WindowDS(X_train, y_train, L, H)
val_ds   = WindowDS(X_val,   y_val,   L, H)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

D = X_all.shape[1]
print(f"D={D}, |train|={len(train_ds)}, |val|={len(val_ds)}")


In [None]:
class LuongAttention(nn.Module):
    def __init__(self, enc_hidden, dec_hidden):
        super().__init__()
        self.Wa = nn.Linear(enc_hidden, dec_hidden, bias=False)

    def forward(self, encoder_h, s_prev):
        # encoder_h: (B, L, H_enc)
        # s_prev:    (B, H_dec)
        Wa_h  = self.Wa(encoder_h)                  # (B, L, H_dec)
        s_prev = s_prev.unsqueeze(1)                # (B, 1, H_dec)
        scores = torch.bmm(Wa_h, s_prev.transpose(1,2)).squeeze(-1)  # (B, L)
        attn   = torch.softmax(scores, dim=1)       # (B, L)
        context = torch.bmm(attn.unsqueeze(1), encoder_h).squeeze(1) # (B, H_enc)
        return context, attn


In [None]:
class Seq2SeqAttention(nn.Module):
    def __init__(self, input_dim, enc_hidden, dec_hidden, num_layers=1, dropout=0.0):
        super().__init__()
        self.encoder = nn.LSTM(input_dim, enc_hidden, num_layers=num_layers,
                               batch_first=True,
                               dropout=dropout if num_layers>1 else 0.0)
        self.decoder = nn.LSTMCell(1, dec_hidden)
        self.attn    = LuongAttention(enc_hidden, dec_hidden)
        self.bridge_h = nn.Linear(enc_hidden, dec_hidden)
        self.bridge_c = nn.Linear(enc_hidden, dec_hidden)
        self.out = nn.Linear(dec_hidden + enc_hidden, 1)

    def forward(self, x, y=None, teacher_forcing_ratio=0.0, horizon=1, return_attn=False):
        B = x.size(0)
        enc_out, (hT, cT) = self.encoder(x)     # enc_out: (B,L,H_enc)
        h = self.bridge_h(hT[-1])               # (B,H_dec)
        c = self.bridge_c(cT[-1])               # (B,H_dec)
        dec_in = x[:, -1, 0:1]                  # (B,1) giả sử cột 0 là PM2.5

        preds = []
        attn_list = []
        for t in range(horizon):
            h, c = self.decoder(dec_in, (h, c))          # h: (B,H_dec)
            context, attn = self.attn(enc_out, h)        # context: (B,H_enc), attn: (B,L)
            if return_attn: attn_list.append(attn)
            cat  = torch.cat([h, context], dim=-1)       # (B,H_dec+H_enc)
            pred = self.out(cat).unsqueeze(1)            # (B,1,1)
            preds.append(pred)

            if (self.training and y is not None and random.random() < teacher_forcing_ratio):
                dec_in = y[:, t, :]                       # teacher forcing
            else:
                dec_in = pred.squeeze(1)

        preds = torch.cat(preds, dim=1)                   # (B,H,1)
        if return_attn:
            return preds, torch.stack(attn_list, dim=1)   # (B,H,L)
        return preds


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = Seq2SeqAttention(D, ENC_HIDDEN, DEC_HIDDEN, NUM_LAYERS, DROPOUT).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
loss_fn = nn.MSELoss()

def run_epoch(loader, train=True, tf_ratio=0.5):
    if train: model.train()
    else: model.eval()
    total=0.0; n=0
    with torch.set_grad_enabled(train):
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            if train:
                opt.zero_grad()
                pred = model(xb, y=yb, teacher_forcing_ratio=tf_ratio, horizon=H)
                loss = loss_fn(pred, yb)
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                opt.step()
            else:
                pred = model(xb, y=None, teacher_forcing_ratio=0.0, horizon=H)
                loss = loss_fn(pred, yb)
            total += loss.item()*xb.size(0); n += xb.size(0)
    return total/n

for epoch in range(1, EPOCHS+1):
    tr = run_epoch(train_loader, True, TEACHER_FORCING_RATIO)
    va = run_epoch(val_loader,   False, 0.0)
    print(f"Epoch {epoch:02d} | Train {tr:.4f} | Val {va:.4f}")


In [None]:
model.eval()
xb, yb = next(iter(val_loader))
xb = xb.to(device)
with torch.no_grad():
    preds, attn = model(xb, horizon=H, return_attn=True)

A = attn.detach().cpu().numpy()[0]   # (H, L) — mỗi hàng là 1 bước dự báo
plt.figure(figsize=(8,4))
plt.imshow(A, aspect='auto')  # không set màu để dùng mặc định
plt.title("Attention Heatmap (rows=forecast steps, cols=encoder time)")
plt.xlabel("Encoder time (1..L)")
plt.ylabel("Forecast step (1..H)")
plt.colorbar()
plt.tight_layout()
