In [1]:
# models/informer.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class ProbSparseAttention(nn.Module):
    def __init__(self, mask_flag=False, factor=5, scale=None, attention_dropout=0.1):
        super(ProbSparseAttention, self).__init__()
        self.factor = factor
        self.scale = scale
        self.mask_flag = mask_flag
        self.dropout = nn.Dropout(attention_dropout)

    def _prob_QK(self, Q, K, sample_k, n_top):
        B, H, L_Q, D = Q.shape
        _, _, L_K, _ = K.shape

        K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, D)
        index_sample = torch.randint(L_K, (L_Q, sample_k))
        K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
        Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)

        M = Q_K_sample.max(-1)[0] - Q_K_sample.mean(-1)
        M_top = M.topk(n_top, sorted=False)[1]

        return M_top

    def forward(self, queries, keys, values, attn_mask):
        B, L_Q, D = queries.shape
        B, L_K, D = keys.shape
        H = 8
        D_head = D // H

        queries = queries.view(B, L_Q, H, D_head).transpose(1, 2)
        keys = keys.view(B, L_K, H, D_head).transpose(1, 2)
        values = values.view(B, L_K, H, D_head).transpose(1, 2)

        U_part = self.factor * math.ceil(math.log(L_K))
        u = self.factor * math.ceil(math.log(L_Q))

        scores_top = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)

        Q_reduce = queries[:, :, scores_top, :]
        scores = torch.matmul(Q_reduce, keys.transpose(-2, -1)) / math.sqrt(D_head)
        A = self.dropout(torch.softmax(scores, dim=-1))
        V = torch.matmul(A, values)

        out = torch.zeros(B, H, L_Q, D_head, device=queries.device)
        out[:, :, scores_top, :] = V

        out = out.transpose(1, 2).contiguous().view(B, L_Q, D)
        return out, None

class AttentionLayer(nn.Module):
    def __init__(self, attention, d_model, n_heads):
        super().__init__()
        self.inner_attention = attention
        self.query_projection = nn.Linear(d_model, d_model)
        self.key_projection = nn.Linear(d_model, d_model)
        self.value_projection = nn.Linear(d_model, d_model)
        self.out_projection = nn.Linear(d_model, d_model)
        self.norm = nn.LayerNorm(d_model)

    def forward(self, x):
        residual = x
        queries = self.query_projection(x)
        keys = self.key_projection(x)
        values = self.value_projection(x)
        out, _ = self.inner_attention(queries, keys, values, attn_mask=None)
        out = self.out_projection(out)
        return self.norm(out + residual)

class EncoderLayer(nn.Module):
    def __init__(self, attention, d_model, d_ff, dropout=0.1):
        super().__init__()
        self.attention = AttentionLayer(attention, d_model, n_heads=8)
        self.ffn = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_ff, d_model)
        )
        self.norm = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.attention(x)
        res = x
        x = self.ffn(x)
        return self.norm(res + self.dropout(x))

class Informer(nn.Module):
    def __init__(self, input_len, d_model=512, e_layers=3, d_ff=2048):
        super().__init__()
        self.embedding = nn.Conv1d(1, d_model, kernel_size=3, padding=1)
        self.encoder = nn.ModuleList([
            EncoderLayer(
                ProbSparseAttention(), d_model=d_model, d_ff=d_ff
            ) for _ in range(e_layers)
        ])
        self.projection = nn.Linear(d_model, 1)

    def forward(self, x):
        # x: [B, L, 1] => [B, d_model, L] => [B, L, d_model]
        x = self.embedding(x.permute(0, 2, 1)).permute(0, 2, 1)
        for layer in self.encoder:
            x = layer(x)
        out = self.projection(x[:, -1, :])  # dự báo điểm cuối
        return out


In [None]:
# utils/data_loader.py
import os
import glob
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def process_file(file_path, window_size=60):
    try:
        df = pd.read_parquet(file_path, columns=['close'])
        data = df['close'].dropna().values
        if len(data) <= window_size:
            return [], []
        X, y = [], []
        for i in range(len(data) - window_size):
            X.append(data[i:i + window_size])
            y.append(data[i + window_size])
        return np.array(X), np.array(y)
    except Exception as e:
        print(f"⚠️ Lỗi khi xử lý file {file_path}: {e}")
        return np.empty((0, window_size)), np.empty((0,))

def load_batch_files(file_list, window_size=60):
    X_all, y_all = [], []
    for f in file_list:
        X, y = process_file(f, window_size)
        if len(X):
            X_all.append(X)
            y_all.append(y)

    if not X_all:
        return None

    X_all = np.concatenate(X_all, axis=0)
    y_all = np.concatenate(y_all, axis=0)

    mean = X_all.mean()
    std = X_all.std()
    X_all = (X_all - mean) / std

    X_all = np.expand_dims(X_all, axis=-1)  # [B, L, 1]

    return TimeSeriesDataset(X_all, y_all), mean, std


In [None]:
# train.py
import os
import glob
import torch
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
from tqdm import tqdm
import matplotlib.pyplot as plt

from models.informer import Informer
from utils.data_loader import load_batch_files

# ==== Cấu hình ====
DATA_DIR = "/content/drive/MyDrive/Draft/TimeSeries/Data_50"
BATCH_SIZE = 5
WINDOW_SIZE = 60
EPOCHS_PER_BATCH = 1
TRAIN_BATCH_SIZE = 128
VAL_SPLIT = 0.1

files = glob.glob(os.path.join(DATA_DIR, "*.parquet"))
total_batches = len(files) // BATCH_SIZE + int(len(files) % BATCH_SIZE != 0)

model = None
criterion = nn.MSELoss()
history_all = {'train_loss': [], 'val_loss': []}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for batch_num in range(total_batches):
    print(f"\n🔄 Batch {batch_num+1}/{total_batches}")
    batch_files = files[batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]
    dataset, mean, std = load_batch_files(batch_files, window_size=WINDOW_SIZE)

    if dataset is None:
        print("⚠️ Batch trống, bỏ qua.")
        continue

    val_size = int(len(dataset) * VAL_SPLIT)
    train_size = len(dataset) - val_size
    train_set, val_set = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=TRAIN_BATCH_SIZE)

    if model is None:
        model = Informer(input_len=WINDOW_SIZE).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    # ==== Huấn luyện ====
    model.train()
    running_loss = 0
    for x_batch, y_batch in tqdm(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(x_batch).squeeze()
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * len(x_batch)

    train_loss = running_loss / len(train_loader.dataset)

    # ==== Validation ====
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            pred = model(x_val).squeeze()
            loss = criterion(pred, y_val)
            val_loss += loss.item() * len(x_val)
    val_loss /= len(val_loader.dataset)

    print(f"✅ Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")
    history_all['train_loss'].append(train_loss)
    history_all['val_loss'].append(val_loss)

# ==== Vẽ biểu đồ loss ====
plt.figure(figsize=(10, 5))
plt.plot(history_all['train_loss'], label='Train Loss')
plt.plot(history_all['val_loss'], label='Validation Loss')
plt.xlabel('Batch Epoch')
plt.ylabel('Loss (MSE)')
plt.title('Loss qua các batch')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("training_loss.png")
print("📉 Đã lưu biểu đồ loss vào training_loss.png")
