In [3]:
!pip install torch_geometric



In [4]:
import pandas as pd
import numpy as np
import json

from sklearn.preprocessing import MinMaxScaler

def data_loader(data_path, city, year, level='district', length=12, n_steps=12, is_scale=False, temporal_copy=False, is_realtime=False, train_ratio=0.8):
    
    def normalize(train, test):
        if is_scale:
            scaler = MinMaxScaler()
            train_shape, test_shape = train.shape, test.shape
            train = scaler.fit_transform(train.reshape(-1, train_shape[-1]))
            test = scaler.transform(test.reshape(-1, test_shape[-1]))
            return train.reshape(train_shape), test.reshape(test_shape), scaler
        else:
            return train, test, None

    risk_data = pd.read_csv(f'{data_path}/risk_scores/{city}-{year}-{level}-hour-risk.csv')
    selected_areas = risk_data.drop(columns=['date', 'time']).columns
    n_districts = len(selected_areas) # number of districts
    n_outputs = len(selected_areas)
    train_length = int(30 * train_ratio)

    risk_train, y_train = [], []
    risk_test, y_test = [], []
    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            y_train.append(risk_data.drop(columns=['date', 'time']).iloc[i:i+n_steps, :n_outputs].to_numpy())
            risk_train.append(risk_data.drop(columns=['date', 'time']).iloc[i-length:i, :n_districts].to_numpy())
        else:
            y_test.append(risk_data.drop(columns=['date', 'time']).iloc[i:i+n_steps, :n_outputs].to_numpy())
            risk_test.append(risk_data.drop(columns=['date', 'time']).iloc[i-length:i, :n_districts].to_numpy())
        
    risk_train, risk_test, risk_scaler = normalize(np.array(risk_train), np.array(risk_test))
    y_train, y_test = np.array(y_train), np.array(y_test)
    y_train_scaled, y_test_scaled, y_scaler = normalize(y_train, y_test)

    # Weather & Air Quality  
    weather_data = pd.read_csv(f'{data_path}/weather/{city}-{year}-count.csv').fillna(0)
    if level == 'district':
        weather_data['location'] = weather_data['location'].apply(lambda x: x.split('|')[0])
        weather_data = weather_data.groupby(by=['date','time','location'], as_index=False).mean()                
    weather_train, weather_test = [], []

    location_weather = []
    for location in selected_areas:
        location_weather.append(weather_data[weather_data['location'] == location].iloc[:, 3:].to_numpy())

    location_weather = np.concatenate(location_weather, axis=1)

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            weather_train.append(location_weather[i-length:i])
        else:
            weather_test.append(location_weather[i-length:i])
    
    weather_train, weather_test, _ = normalize(np.array(weather_train).reshape(len(weather_train), length, n_districts, -1), np.array(weather_test).reshape(len(weather_test), length, n_districts, -1))


    # Dangerous Driving Behavior
    dtg_data = pd.read_csv(f'{data_path}/dangerous_cases/{city}-{year}-date-hour-{level}-new.csv')
    dtg_train, dtg_test = [], []

    location_dtg = []
    for location in selected_areas:
        if level == 'district':
            district = location.split('|')[0]
            location_dtg.append(dtg_data[dtg_data['district'] == district].iloc[:, 3:].to_numpy())
        else:
            district, subdistrict = location.split('|')[0], location.split('|')[1]
            location_dtg.append(dtg_data[(dtg_data['district'] == district) & (dtg_data['subdistrict'] == subdistrict)].iloc[:, 3:].to_numpy())

    location_dtg = np.concatenate(location_dtg, axis=1)

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            dtg_train.append(location_dtg[i-length:i])
        else:
            dtg_test.append(location_dtg[i-length:i])

    dtg_train, dtg_test, _ = normalize(np.array(dtg_train).reshape(len(dtg_train), length, n_districts, -1), np.array(dtg_test).reshape(len(dtg_test), length, n_districts, -1))


    # Road data
    road_data = pd.read_csv(f'{data_path}/roads/{city}-{year}-{level}-road-count.csv').drop(columns=['attribute'])
    road_train, road_test = [], []

    location_road = []
    for location in selected_areas:
        location_road.append(road_data[location].to_numpy())

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            road_train.append(np.array([location_road]*length)) if temporal_copy else road_train.append(np.array(location_road))
        else:
            road_test.append(np.array([location_road]*length)) if temporal_copy else road_test.append(np.array(location_road))
            
    road_train, road_test, _ = normalize(np.array(road_train), np.array(road_test))


    # demographics data
    demo_data = pd.read_csv(f'{data_path}/demographic/{city}-{year}-{level}.csv').drop(columns=['index'])
    demo_train, demo_test = [], []

    location_demo = []
    for location in selected_areas:
        location_demo.append(demo_data[location].to_numpy())

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            demo_train.append(np.array([location_demo]*length)) if temporal_copy else demo_train.append(np.array(location_demo))
        else:
            demo_test.append(np.array([location_demo]*length)) if temporal_copy else demo_test.append(np.array(location_demo))
            
    demo_train, demo_test, _ = normalize(np.array(demo_train), np.array(demo_test))


    # POI data
    poi_data = pd.read_csv(f'{data_path}/poi/{city}-{year}-{level}.csv').drop(columns=['location'])
    poi_train, poi_test = [], []

    location_poi = []
    for location in selected_areas:
        location_poi.append(poi_data[location].to_numpy())

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            poi_train.append(np.array([location_poi]*length)) if temporal_copy else poi_train.append(np.array(location_poi))
        else:
            poi_test.append(np.array([location_poi]*length)) if temporal_copy else poi_test.append(np.array(location_poi))
            
    poi_train, poi_test, _ = normalize(np.array(poi_train), np.array(poi_test))


    # traffic volumes
    volume_data = pd.read_csv(f'{data_path}/traffic_volume/{city}-{year}.csv').drop(columns=['date', 'hour'])
    volume_train, volume_test = [], []

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            volume_train.append(volume_data.iloc[i-length:i, :n_districts].to_numpy())
        else:
            volume_test.append(volume_data.iloc[i-length:i, :n_districts].to_numpy())

    volume_train, volume_test, _ = normalize(np.array(volume_train), np.array(volume_test))
    

    # traffic speed
    speed_data = pd.read_csv(f'{data_path}/traffic_speed/{city}-{year}.csv').drop(columns=['date', 'hour'])
    speed_train, speed_test = [], []

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            speed_train.append(speed_data.iloc[i-length:i, :n_districts].to_numpy())
        else:
            speed_test.append(speed_data.iloc[i-length:i, :n_districts].to_numpy())

    speed_train, speed_test, _ = normalize(np.array(speed_train), np.array(speed_test))
    

    # calendar
    calendar_data = pd.read_csv(f'{data_path}/calendar/calendar-{city}-{year}-{level}.csv')
    calendar_train, calendar_test = [], []
    
    location_calendar = []
    for location in selected_areas:
        location_calendar.append(calendar_data[calendar_data['location'] == location].iloc[:, 1:].to_numpy())

    location_calendar = np.concatenate(location_calendar, axis=1)

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            calendar_train.append(location_calendar[i:i+n_steps]) if is_realtime else calendar_train.append(location_calendar[i-length:i])
        else:
            calendar_test.append(location_calendar[i:i+n_steps]) if is_realtime else calendar_test.append(location_calendar[i-length:i])
    calendar_train, calendar_test = np.array(calendar_train), np.array(calendar_test)        
    calendar_train, calendar_test, _ = normalize(calendar_train.reshape(calendar_train.shape[0], calendar_train.shape[1], n_districts, -1), calendar_test.reshape(calendar_test.shape[0], calendar_test.shape[1], n_districts, -1))
    
    # Match Shape
    risk_train = risk_train[:,:,:,None]
    risk_test = risk_test[:,:,:,None]
    volume_train = volume_train[:,:,:,None]
    volume_test = volume_test[:,:,:,None]
    speed_train = speed_train[:,:,:,None]
    speed_test = speed_test[:,:,:,None]

    return {
        'risk': [risk_train, risk_test],
        'road': [road_train, road_test],
        'poi': [poi_train, poi_test],
        'demo': [demo_train, demo_test],
        'weather': [weather_train, weather_test],
        'calendar': [calendar_train, calendar_test],
        'volume': [volume_train, volume_test],
        'speed': [speed_train, speed_test],
        'dtg': [dtg_train, dtg_test],
        'y': [y_train, y_test],
        'y_scaled': [y_train_scaled, y_test_scaled],
        'selected_areas': selected_areas,
        'scaler': risk_scaler
    }

In [14]:
import os
import warnings
import logging
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data, Batch
from torch_geometric.nn import MessagePassing
from torch.nn import Parameter
from typing import Union, Tuple, Callable
from torch import Tensor
from torch_geometric.typing import OptPairTensor, Adj, OptTensor, Size
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr

# --- SETUP MÔI TRƯỜNG ---
logging.disable(logging.WARNING)
warnings.filterwarnings('ignore')
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Chọn GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ==========================================
# 1. LOAD DATA
# ==========================================
# Import data_loader (Giả định file data_loader.py đã tồn tại)
# try:
#     from data_loader import data_loader
# except ImportError:
#     raise ImportError("Không tìm thấy file data_loader.py. Vui lòng đảm bảo file này nằm cùng thư mục.")

city = 'Seoul'
year = '2016' 
n_steps, length = 6, 12
metric = 'jaccard'
DATA_PATH = '/kaggle/input/mg-tar' # Đường dẫn dữ liệu của bạn

print("Loading Data...")
# Lưu ý: Đảm bảo đường dẫn DATA_PATH chính xác tới folder chứa datasets
datasets = data_loader(DATA_PATH, city, year, length=length, n_steps=n_steps, is_scale=True, temporal_copy=True)
n_districts = len(datasets['selected_areas'])

# Extract Features
risk_train, risk_test = datasets['risk'][0], datasets['risk'][1]
demo_train, demo_test = datasets['demo'][0], datasets['demo'][1]
poi_train, poi_test = datasets['poi'][0], datasets['poi'][1]
road_train, road_test = datasets['road'][0], datasets['road'][1]
volume_train, volume_test = datasets['volume'][0], datasets['volume'][1]
speed_train, speed_test = datasets['speed'][0], datasets['speed'][1]
weather_train, weather_test = datasets['weather'][0], datasets['weather'][1]
calendar_train, calendar_test = datasets['calendar'][0], datasets['calendar'][1]
c_train, c_test = datasets['dtg'][0], datasets['dtg'][1]
y_train, y_test = datasets['y'][0], datasets['y'][1]

# Train - Validation Split
val_idx = round(risk_train.shape[0] * 0.10)
risk_train, risk_val = risk_train[:-val_idx], risk_train[-val_idx:]
demo_train, demo_val = demo_train[:-val_idx], demo_train[-val_idx:]
poi_train, poi_val = poi_train[:-val_idx], poi_train[-val_idx:]
road_train, road_val = road_train[:-val_idx], road_train[-val_idx:]
volume_train, volume_val = volume_train[:-val_idx], volume_train[-val_idx:]
speed_train, speed_val = speed_train[:-val_idx], speed_train[-val_idx:]
weather_train, weather_val = weather_train[:-val_idx], weather_train[-val_idx:]
calendar_train, calendar_val = calendar_train[:-val_idx], calendar_train[-val_idx:]
c_train, c_val = c_train[:-val_idx], c_train[-val_idx:]
y_train, y_val = y_train[:-val_idx], y_train[-val_idx:]

# Load Adjacency Matrix
A = pd.read_csv(f'{DATA_PATH}/graph_data/{city}-normalized-district.csv', engine='c', index_col=0).to_numpy()

# Concatenate Features
node_features_train = np.concatenate([risk_train, demo_train, poi_train, road_train, volume_train, speed_train, weather_train, calendar_train, c_train], axis=-1)
node_features_val = np.concatenate([risk_val, demo_val, poi_val, road_val, volume_val, speed_val, weather_val, calendar_val, c_val], axis=-1)
node_features_test = np.concatenate([risk_test, demo_test, poi_test, road_test, volume_test, speed_test, weather_test, calendar_test, c_test], axis=-1)

print(f"Train Shape: {node_features_train.shape}")
print(f"Val Shape:   {node_features_val.shape}")
print(f"Test Shape:  {node_features_test.shape}")

# ==========================================
# 2. XỬ LÝ GRAPH & DATASET
# ==========================================

def get_static_graph_structure(adj_matrix, num_nodes):
    # 1. Edge Index
    rows, cols = np.where(adj_matrix > 0)
    edge_index = torch.tensor(np.array([rows, cols]), dtype=torch.long)
    num_edges = edge_index.shape[1]
    
    # 2. FIX QUAN TRỌNG: Dùng Zeros thay vì Random
    # Để tránh việc model bị nhiễu bởi tín hiệu ngẫu nhiên
    component_dir = torch.zeros((num_edges, 2), dtype=torch.float) 
    component_ang = torch.zeros((num_edges, 2), dtype=torch.float)
    
    return edge_index, component_dir, component_ang

# Tạo cấu trúc graph tĩnh
static_edge_index, static_dir, static_ang = get_static_graph_structure(A, n_districts)
static_edge_index = static_edge_index.to(device)
static_dir = static_dir.to(device)
static_ang = static_ang.to(device)

class TrafficDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x = x_data
        self.y = y_data
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        # Flatten Time dimension vào Feature dimension
        x_sample = self.x[idx] # (Time, Nodes, Feats)
        x_sample = np.transpose(x_sample, (1, 0, 2)) # (Nodes, Time, Feats)
        num_nodes = x_sample.shape[0]
        x_flat = x_sample.reshape(num_nodes, -1) # (Nodes, Input_Dim)
        
        # Xử lý Label (Y)
        y_sample = self.y[idx] # (Horizon, Nodes)
        y_sample = y_sample.T  # (Nodes, Horizon)
        
        x_tensor = torch.tensor(x_flat, dtype=torch.float)
        y_tensor = torch.tensor(y_sample, dtype=torch.float)
        
        return Data(x=x_tensor, y=y_tensor)

# Tạo DataLoader
batch_size = 32
train_dataset = TrafficDataset(node_features_train, y_train)
val_dataset = TrafficDataset(node_features_val, y_val)
test_dataset = TrafficDataset(node_features_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: Batch.from_data_list(x))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: Batch.from_data_list(x))
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: Batch.from_data_list(x))

# Tính input dimension
input_dim = node_features_train.shape[1] * node_features_train.shape[3]
output_dim = n_steps # Dự báo 6 bước tương lai

print(f"Input Feature Dim (per Node): {input_dim}")
print(f"Output Prediction Dim (per Node): {output_dim}")

# ==========================================
# 3. MODEL DEFINITION
# ==========================================

class TRAVELConv(MessagePassing):
    def __init__(self, in_channels: Union[int, Tuple[int, int]],
                 out_channels: int, nn: Callable, aggr: str = 'add',
                 root_weight: bool = True, bias: bool = True, **kwargs):
        super(TRAVELConv, self).__init__(aggr=aggr, **kwargs)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.nn = nn
        self.aggr = aggr
        if isinstance(in_channels, int):
            in_channels = (in_channels, in_channels)
        self.in_channels_l = in_channels[0]

        if root_weight:
            self.root = Parameter(torch.Tensor(in_channels[1], out_channels))
        else:
            self.register_parameter('root', None)
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        for layer in self.nn:
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()
        if self.root is not None:
            nn.init.xavier_uniform_(self.root)
        if self.bias is not None:
            nn.init.zeros_(self.bias)

    def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj,
                edge_attr: OptTensor = None, size: Size = None) -> Tensor:
        if isinstance(x, Tensor):
            x: OptPairTensor = (x, x)
        out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=size)
        x_r = x[1]
        if x_r is not None and self.root is not None:
            out += torch.matmul(x_r, self.root)
        if self.bias is not None:
            out += self.bias
        return out

    def message(self, x_i: Tensor, x_j: Tensor, edge_attr: Tensor) -> Tensor:
        inputs = torch.cat([x_j, edge_attr], dim=1) if edge_attr is not None else x_j
        return self.nn(inputs)

class TRAVELNetRegressor(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_edges, dropout_p=0.5):
        super(TRAVELNetRegressor, self).__init__()
        self.dropout_p = dropout_p
        convdim = 32 # Tăng capacity
        
        # FIX: Node Encoder mạnh hơn có BatchNorm
        self.node_encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )
        
        # FIX: Edge Encoder (Learnable)
        # Vì đầu vào là Zeros (shape 2), ta dùng Linear để project nó thành vector ẩn
        self.edge_encoder_dir = nn.Sequential(nn.Linear(2, hidden_dim), nn.LeakyReLU())
        self.edge_encoder_ang = nn.Sequential(nn.Linear(2, hidden_dim), nn.LeakyReLU())
        
        # Conv Block 1
        nn1 = nn.Sequential(nn.Linear(hidden_dim + hidden_dim, hidden_dim), nn.LeakyReLU(), nn.Linear(hidden_dim, convdim))
        self.conv1 = TRAVELConv(hidden_dim, convdim, nn1)
        
        nn1_2 = nn.Sequential(nn.Linear(hidden_dim + hidden_dim, hidden_dim), nn.LeakyReLU(), nn.Linear(hidden_dim, convdim))
        self.conv1_2 = TRAVELConv(hidden_dim, convdim, nn1_2)
        
        self.bn1 = nn.BatchNorm1d(convdim*2)
        
        # Conv Block 2
        nn2 = nn.Sequential(nn.Linear(2*convdim + hidden_dim, hidden_dim), nn.LeakyReLU(), nn.Linear(hidden_dim, output_dim))
        self.conv2 = TRAVELConv(2*convdim, output_dim, nn2)
        
        nn2_2 = nn.Sequential(nn.Linear(2*convdim + hidden_dim, hidden_dim), nn.LeakyReLU(), nn.Linear(hidden_dim, output_dim))
        self.conv2_2 = TRAVELConv(2*convdim, output_dim, nn2_2)
        
        # Final Output
        self.fc = nn.Linear(output_dim*2, output_dim)

    def forward(self, x, edge_index, component_dir, component_ang):
        # 1. Encode
        x = self.node_encoder(x)
        
        # 2. Learn Edge features
        edge_attr_dir = self.edge_encoder_dir(component_dir)
        edge_attr_ang = self.edge_encoder_ang(component_ang)
        
        # 3. Layer 1
        x1 = F.relu(self.conv1(x, edge_index, edge_attr_dir))
        x2 = F.relu(self.conv1_2(x, edge_index, edge_attr_ang))
        x_concat = torch.cat((x1, x2), axis=1)
        
        if x_concat.shape[0] > 1: # Batch norm requires > 1 sample
            x_concat = self.bn1(x_concat)
        x_concat = F.dropout(x_concat, p=self.dropout_p, training=self.training)
        
        # 4. Layer 2
        x1_out = self.conv2(x_concat, edge_index, edge_attr_dir)
        x2_out = self.conv2_2(x_concat, edge_index, edge_attr_ang)
        
        x_final = torch.cat((x1_out, x2_out), axis=1)
        
        # 5. Final Projection
        out = self.fc(x_final)
        return out

# ==========================================
# 4. TRAINING LOOP
# ==========================================

# Khởi tạo Model
num_edges = static_edge_index.shape[1]
model = TRAVELNetRegressor(
    input_dim=input_dim, 
    hidden_dim=64, 
    output_dim=output_dim,
    num_edges=num_edges
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

def train_epoch(loader):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        
        # Tái tạo batch graph structure
        curr_batch_size = batch.num_graphs
        edge_index_list = []
        dir_list = []
        ang_list = []
        
        for i in range(curr_batch_size):
            edge_index_list.append(static_edge_index + i * n_districts)
            dir_list.append(static_dir)
            ang_list.append(static_ang)
            
        batch_edge_index = torch.cat(edge_index_list, dim=1)
        batch_dir = torch.cat(dir_list, dim=0)
        batch_ang = torch.cat(ang_list, dim=0)
        
        out = model(batch.x, batch_edge_index, batch_dir, batch_ang)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate_debug(loader, epoch_idx):
    model.eval()
    preds = []
    trues = []
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            # Tạo batch edge attributes
            curr_batch_size = batch.num_graphs
            edge_index_list = [static_edge_index + i * n_districts for i in range(curr_batch_size)]
            dir_list = [static_dir for _ in range(curr_batch_size)]
            ang_list = [static_ang for _ in range(curr_batch_size)]
            
            batch_edge_index = torch.cat(edge_index_list, dim=1)
            batch_dir = torch.cat(dir_list, dim=0)
            batch_ang = torch.cat(ang_list, dim=0)
            
            out = model(batch.x, batch_edge_index, batch_dir, batch_ang)
            preds.append(out.cpu().numpy())
            trues.append(batch.y.cpu().numpy())
            
    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    
    mse = mean_squared_error(trues, preds)
    mae = mean_absolute_error(trues, preds)
    rmse = np.sqrt(mse)
    pcc, _ = pearsonr(trues.flatten(), preds.flatten())
    
    # DEBUG: In ra 5 giá trị đầu tiên để xem model đoán cái gì
    if epoch_idx % 5 == 0:
        print(f"\n[DEBUG Epoch {epoch_idx}]")
        print(f"True (first 5): {trues.flatten()[:5]}")
        print(f"Pred (first 5): {preds.flatten()[:5]}")
        print(f"Pred Mean: {preds.mean():.4f}, True Mean: {trues.mean():.4f}\n")

    return mse, mae, rmse, pcc

print("Start Training FIXED TRAVELNet...")
best_val_pcc = -1 

for epoch in range(1, 51):
    train_loss = train_epoch(train_loader)
    val_mse, val_mae, val_rmse, val_pcc = evaluate_debug(val_loader, epoch)
    
    # Step LR theo MSE
    scheduler.step(val_mse)
    
    print(f'Epoch {epoch:02d} | Loss: {train_loss:.4f} | MSE: {val_mse:.4f} | MAE: {val_mae:.4f} | RMSE: {val_rmse:.4f} | PCC: {val_pcc:.4f}')
    
    # Save model if PCC improves
    if val_pcc > best_val_pcc:
        best_val_pcc = val_pcc
        torch.save(model.state_dict(), 'best_travel_model_fixed.pth')

# ==========================================
# 5. TEST & METRICS
# ==========================================
print("\nEvaluating on Test Set...")
try:
    model.load_state_dict(torch.load('best_travel_model_fixed.pth'))
except FileNotFoundError:
    print("Warning: Không tìm thấy file 'best_travel_model_fixed.pth'. Model có thể chưa được lưu nếu PCC không cải thiện.")

model.eval()

all_preds = []
all_trues = []

with torch.no_grad():
    for batch in test_loader:
        batch = batch.to(device)
        curr_batch_size = batch.num_graphs
        edge_index_list = [static_edge_index + i * n_districts for i in range(curr_batch_size)]
        dir_list = [static_dir for _ in range(curr_batch_size)]
        ang_list = [static_ang for _ in range(curr_batch_size)]
        
        batch_edge_index = torch.cat(edge_index_list, dim=1)
        batch_dir = torch.cat(dir_list, dim=0)
        batch_ang = torch.cat(ang_list, dim=0)
        
        out = model(batch.x, batch_edge_index, batch_dir, batch_ang)
        all_preds.append(out.cpu().numpy())
        all_trues.append(batch.y.cpu().numpy())

y_pred_flat = np.concatenate(all_preds, axis=0) 
y_true_flat = np.concatenate(all_trues, axis=0)

# Reshape về (Samples, Horizon, Nodes)
final_preds = y_pred_flat.reshape(len(node_features_test), n_districts, n_steps).transpose(0, 2, 1)
final_trues = y_true_flat.reshape(len(node_features_test), n_districts, n_steps).transpose(0, 2, 1)

# --- TÍNH TOÁN STEP-WISE ERROR ---
print(f"\nModel Performance Seoul ({year}) - TRAVELNet:")
print("-" * 60)
print(f"{'Horizon':<10} | {'MSE':<10} | {'MAE':<10} | {'RMSE':<10} | {'PCC':<10}")
print("-" * 60)

metric_lists = {'MSE': [], 'MAE': [], 'RMSE': [], 'PCC': []}

for i in range(n_steps):
    y_true_step = final_trues[:, i, :]
    y_pred_step = final_preds[:, i, :]
    
    step_mse = mean_squared_error(y_true_step, y_pred_step)
    step_mae = mean_absolute_error(y_true_step, y_pred_step)
    step_rmse = np.sqrt(step_mse)
    step_pcc, _ = pearsonr(y_true_step.flatten(), y_pred_step.flatten())
    
    metric_lists['MSE'].append(step_mse)
    metric_lists['MAE'].append(step_mae)
    metric_lists['RMSE'].append(step_rmse)
    metric_lists['PCC'].append(step_pcc)
    
    print(f"Step {i+1:<5} | {step_mse:.4f}     | {step_mae:.4f}     | {step_rmse:.4f}     | {step_pcc:.4f}")

print("-" * 60)
print(f"AVERAGE    | {np.mean(metric_lists['MSE']):.4f}     | {np.mean(metric_lists['MAE']):.4f}     | {np.mean(metric_lists['RMSE']):.4f}     | {np.mean(metric_lists['PCC']):.4f}")
print("-" * 60)

Using device: cuda
Loading Data...
Train Shape: (509, 12, 25, 133)
Val Shape:   (56, 12, 25, 133)
Test Shape:  (138, 12, 25, 133)
Input Feature Dim (per Node): 1596
Output Prediction Dim (per Node): 6
Start Training FIXED TRAVELNet...
Epoch 01 | Loss: 2.7266 | MSE: 2.8319 | MAE: 0.9237 | RMSE: 1.6828 | PCC: 0.0634
Epoch 02 | Loss: 2.4128 | MSE: 2.6667 | MAE: 0.8061 | RMSE: 1.6330 | PCC: 0.1177
Epoch 03 | Loss: 2.3776 | MSE: 2.6561 | MAE: 0.8072 | RMSE: 1.6297 | PCC: 0.1287
Epoch 04 | Loss: 2.3610 | MSE: 2.6667 | MAE: 0.7909 | RMSE: 1.6330 | PCC: 0.1304

[DEBUG Epoch 5]
True (first 5): [2. 0. 0. 0. 2.]
Pred (first 5): [1.1539905  0.88912976 1.0600554  0.97054684 1.1605597 ]
Pred Mean: 0.4110, True Mean: 0.5899

Epoch 05 | Loss: 2.3557 | MSE: 2.6267 | MAE: 0.8431 | RMSE: 1.6207 | PCC: 0.1340
Epoch 06 | Loss: 2.3487 | MSE: 2.6776 | MAE: 0.7781 | RMSE: 1.6364 | PCC: 0.1347
Epoch 07 | Loss: 2.3452 | MSE: 2.6286 | MAE: 0.8352 | RMSE: 1.6213 | PCC: 0.1367
Epoch 08 | Loss: 2.3427 | MSE: 2.5965