In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import tqdm
import os
import re

import torch
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import torch_geometric
from torch_geometric_temporal.signal import temporal_signal_split
from dataLoader import MyMETRLADatasetLoader
import transformers
from transformers import AutoTokenizer, AutoModel
import ollama
from networkTrafficGNN import BaseSTGNN, TextTCN, TextSTGNN3, BaseA3TGCN2, TextA3TGCN, BaseAGCRN, TextAGCRN, BaseTCN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data Preprocession

In [2]:
DAILY_TIME_STEPS = 288
IN_TIME_SETPS = 12
  
BATCH_SIZE = 64
SHUFFLE = False
RAW_DATA_DICT = 'metr-la'
SAVE_DICT = 'models'

In [3]:
la_loader = MyMETRLADatasetLoader(raw_data_dir=RAW_DATA_DICT, norm=False)
NODE_NUM, _, TOTAL_TIME_STEPS = la_loader.X.shape
# temporal encoding
la_loader.X = torch.cat([la_loader.X, torch.arange(TOTAL_TIME_STEPS).expand(NODE_NUM, 1, -1)], dim=1)
la_loader.X[:,1,:] = torch.cos(la_loader.X[:,1,:] * 2 * torch.pi)
# missing data imputation
la_data = pd.read_csv(f'{RAW_DATA_DICT}/metr-la.csv')
la_data.rename(columns={'Unnamed: 0': 'time'}, inplace=True)
la_data['time'] = pd.to_datetime(la_data['time'])
la_data.replace(0, np.nan, inplace=True)
dayofweek = la_data['time'].dt.weekday.values
la_data['workday'] = (dayofweek < 5).astype(int)
la_data['daytime'] = la_data['time'].dt.strftime('%H:%M')
typical_la_data = la_data.iloc[:18*DAILY_TIME_STEPS,1:].groupby(['daytime', 'workday']).mean()
la_data = la_data.merge(right=typical_la_data, left_on=['daytime', 'workday'], right_index=True, suffixes=['', '_t'])
del la_data['daytime'], la_data['workday']
for det_id in la_data.columns[1: NODE_NUM+1]:
    la_data.fillna({det_id: la_data[f'{det_id}_t']}, inplace=True)
la_data.sort_values(by='time', inplace=True)
la_loader.X[:,0,:] = torch.from_numpy(la_data[la_data.columns[1:208]].values.astype(np.float32).T)
del la_data
speed_mean, speed_std = la_loader.X[:,0,:].mean(), la_loader.X[:,0,:].std()
print(speed_std)
la_loader.X[:,0,:] = (la_loader.X[:,0,:] - speed_mean) / speed_std # normalization

tensor(12.8283)


In [4]:
## Qwen-3-14B + jina-embeddings
from transformers import AutoModelForCausalLM, AutoTokenizer

# load the tokenizer and the model
model_name = "Qwen/Qwen3-14B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

day_type = 'weekend' # ['workday', 'weekend']
background_prompt = f"You are a helpful traffic analyzer. The following traffic speed series was collected from a road loop detector every 5 minutes on a typical {day_type} in LA County. "
typical_speed_series = typical_la_data.xs(1 if day_type == 'workday' else 0, level='workday')
hourly_det_pattern_list = []
# hourly_det_pattern_embeddings = []
with torch.no_grad():
    for detector_id, speed_series in tqdm.notebook.tqdm(typical_speed_series.items()):
        for i in range(24):
            querry_prompt = f"Please summarize the speed trends of the corresponding road segment from {i:02d}:00 to {i+1:02d}:00 precisely in paragraphs. Speed Series: "
            speed_series_prompt = ""
            for timestamp, speed in speed_series.iloc[i*12:(i+1)*12].items():
                speed = int(round(speed, 0))
                speed_series_prompt += f'{timestamp}: {speed} km/h, '
            speed_series_prompt = speed_series_prompt[:-2]
            speed_series_prompt += '.'
            messages = [{"role": "user", "content": background_prompt + querry_prompt + speed_series_prompt}]
            text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True,
                enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
            )
            model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
            # conduct text completion
            outputs = model.generate(**model_inputs, temperature=0.6, top_p=0.95, top_k=20, min_p=0, max_new_tokens=32768, return_dict_in_generate=True)
            # hourly_det_pattern_embeddings.append(outputs.hidden_states[-1][-1].squeeze())
            output_ids = outputs['sequences'][0][len(model_inputs.input_ids[0]):].tolist() 
            # parsing thinking content
            try:
                # rindex finding 151668 (</think>)
                index = len(output_ids) - output_ids[::-1].index(151668)
            except ValueError:
                index = 0
            thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
            content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
            hourly_det_pattern_list.append([detector_id, i, i+1, thinking_content, content])
hourly_det_pattern = DataFrame(hourly_det_pattern_list, columns=['det_id', 'from_time', 'to_time', 'think', 'speed_trend'])
hourly_det_pattern.to_csv(f'{RAW_DATA_DICT}/llm-description/{day_type}-hourly-pattern-{model_name.split("/")[-1]}.csv.zip', compression='zip')
hourly_det_pattern = pd.read_csv(f'{RAW_DATA_DICT}/llm-description/{day_type}-hourly-pattern-{model_name.split("/")[-1]}.csv.zip', index_col=0)
hourly_det_pattern['inferrence'] = hourly_det_pattern['think'] + '\n\n' + hourly_det_pattern['speed_trend']
model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True, torch_dtype="auto", device_map="auto")
hourly_det_pattern_embeddings = model.encode(hourly_det_pattern['inferrence'], max_length=1024)
np.savez_compressed(f'{RAW_DATA_DICT}/llm-description/{day_type}-hourly-embeddings-{model_name.split("/")[-1]}.npz', embeddings=hourly_det_pattern_embeddings)
workday_hourly_det_pattern_embeddings = np.load(f'{RAW_DATA_DICT}/llm-description/workday-hourly-embeddings-Qwen3-14B.npz')['embeddings']
workday_hourly_det_pattern_embeddings = torch.from_numpy(workday_hourly_det_pattern_embeddings).type(torch.FloatTensor).to(device).view(207, 24, -1)
weekend_hourly_det_pattern_embeddings = np.load(f'{RAW_DATA_DICT}/llm-description/weekend-hourly-embeddings-Qwen3-14B.npz')['embeddings']
weekend_hourly_det_pattern_embeddings = torch.from_numpy(weekend_hourly_det_pattern_embeddings).type(torch.FloatTensor).to(device).view(207, 24, -1)
hourly_det_pattern_embeddings_qwen14b = torch.cat([workday_hourly_det_pattern_embeddings, weekend_hourly_det_pattern_embeddings], dim=1)

In [5]:
## LLama-3.1-8B + jina-embeddings
day_type = 'weekend'
background_prompt = f"You are a helpful traffic analyzer. The following traffic speed series was collected from a road loop detector every 5 minutes on a typical {day_type} in LA County. "
typical_speed_series = typical_la_data.xs(1 if day_type == 'workday' else 0, level='workday')
hourly_det_pattern_list = []
for detector_id, speed_series in typical_speed_series.items():
    for i in range(24):
        querry_prompt = f"Please summarize the speed trends of the corresponding road segment from {i:02d}:00 to {i+1:02d}:00 precisely in paragraphs. Speed Series: "
        speed_series_prompt = ""
        for timestamp, speed in speed_series.iloc[i*12:(i+1)*12].items():
            speed = int(round(speed, 0))
            speed_series_prompt += f'{timestamp}: {speed} km/h, '
        speed_series_prompt = speed_series_prompt[:-2]
        speed_series_prompt += '.'
        response = ollama.chat(model="llama3:8b", messages=[{"role": 'user', "content": background_prompt + querry_prompt + speed_series_prompt},])
        hourly_det_pattern_list.append([detector_id, i, i+1, response["message"]["content"]])
hourly_det_pattern = DataFrame(hourly_det_pattern_list, columns=['det_id', 'from_time', 'to_time', 'speed_trend'])
hourly_det_pattern.to_csv(f'D:/Dataset/metr-la/llm-description/{day_type}-hourly-pattern.csv.zip', compression='zip')

model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True, torch_dtype="auto", device_map="auto")
hourly_det_pattern_embeddings = model.encode(hourly_det_pattern['speed_trend'], max_length=1024)
np.savez_compressed(f'D:/Dataset/metr-la/llm-description/{day_type}-hourly-embeddings.npz', embeddings=hourly_det_pattern_embeddings)
workday_hourly_det_pattern_embeddings = np.load('{RAW_DATA_DICT}/llm-description/workday-hourly-embeddings.npz')['embeddings']
workday_hourly_det_pattern_embeddings = torch.from_numpy(workday_hourly_det_pattern_embeddings).type(torch.FloatTensor).to(device).view(207, 24, -1)
weekend_hourly_det_pattern_embeddings = np.load('{RAW_DATA_DICT}/llm-description/weekend-hourly-embeddings.npz')['embeddings']
weekend_hourly_det_pattern_embeddings = torch.from_numpy(weekend_hourly_det_pattern_embeddings).type(torch.FloatTensor).to(device).view(207, 24, -1)
hourly_det_pattern_embeddings = torch.cat([workday_hourly_det_pattern_embeddings, weekend_hourly_det_pattern_embeddings], dim=1)

In [6]:
def predict(model, raw_input, input_channels, hour_index_supplement, time_index_supplement, weekday_index_supplement, text_data=None, **kwags):
    with torch.no_grad():
        init_hour_index = ((raw_input[:,0,2,0] % DAILY_TIME_STEPS + 6) / 12).type(torch.IntTensor) % 24
        init_time_index = raw_input[:,0,2,0].type(torch.IntTensor)
        inputs = raw_input[:,:,:input_channels,:]
    
    match model:
        case TextTCN():
            weekday_indeces = ((init_time_index.unsqueeze(1) + weekday_index_supplement + 6) // DAILY_TIME_STEPS + dayofweek[0]).type(torch.IntTensor) % 7
            hour_indeces = init_hour_index.unsqueeze(1) + hour_index_supplement
            hour_indeces = torch.where(hour_indeces<24, hour_indeces, hour_indeces-24)
            hour_indeces = torch.where(weekday_indeces<5, hour_indeces, hour_indeces+24)
            sub_text_data = text_data[:,hour_indeces,:].permute(1,0,2,3)
            y_hat = model(inputs, sub_text_data, **kwags)
        case BaseTCN():
            y_hat = model(inputs, **kwags)
        case TextSTGNN3():
            weekday_indeces = ((init_time_index.unsqueeze(1) + weekday_index_supplement + 6) // DAILY_TIME_STEPS + dayofweek[0]).type(torch.IntTensor) % 7
            hour_indeces = init_hour_index.unsqueeze(1) + hour_index_supplement
            hour_indeces = torch.where(hour_indeces<24, hour_indeces, hour_indeces-24)
            hour_indeces = torch.where(weekday_indeces<5, hour_indeces, hour_indeces+24)
            sub_text_data = text_data[:,hour_indeces,:].permute(1,0,2,3)
            y_hat = model(inputs.permute(0, 3, 1, 2), sub_text_data, **kwags)
        case BaseSTGNN():
            y_hat = model(inputs.permute(0, 3, 1, 2), **kwags)
        case TextA3TGCN():
            weekday_indeces = ((init_time_index.unsqueeze(1) + weekday_index_supplement + 6) // DAILY_TIME_STEPS + dayofweek[0]).type(torch.IntTensor) % 7
            hour_indeces = init_hour_index.unsqueeze(1) + hour_index_supplement
            hour_indeces = torch.where(hour_indeces<24, hour_indeces, hour_indeces-24)
            hour_indeces = torch.where(weekday_indeces<5, hour_indeces, hour_indeces+24)
            sub_text_data = text_data[:,hour_indeces,:].permute(1,0,2,3)
            y_hat = model(inputs, sub_text_data, **kwags)
        case BaseA3TGCN2():
            y_hat = model(inputs, **kwags)
        case TextAGCRN():
            weekday_indeces = ((init_time_index.unsqueeze(1) + weekday_index_supplement + 6) // DAILY_TIME_STEPS + dayofweek[0]).type(torch.IntTensor) % 7
            hour_indeces = init_hour_index.unsqueeze(1) + hour_index_supplement
            hour_indeces = torch.where(hour_indeces<24, hour_indeces, hour_indeces-24)
            hour_indeces = torch.where(weekday_indeces<5, hour_indeces, hour_indeces+24)
            sub_text_data = text_data[:,hour_indeces,:].permute(1,0,2,3)
            y_hat = model(inputs, sub_text_data, **kwags)
        case BaseAGCRN():
            y_hat = model(inputs, **kwags)
        case _:
            raise ValueError('Unknown model type')

    return y_hat


def train(epoch_num, model, optimizer, train_loader, test_loader, input_channels:int=1, text_data=None, early_stop_patience=7, early_stop_delta=0, model_name_suffix='', **kwags):
    model_name = str(model.__class__.__name__)
    model.train()
    optimizer.zero_grad()
    train_loss_list, test_loss_list = [], []
    best_test_loss, early_stop_counter = np.inf, 0
    hour_index_supplement = torch.arange(1, 1 + TIME_WINDOW // 12).unsqueeze(0)
    time_index_supplement = torch.arange(12 + TIME_WINDOW).unsqueeze(0)
    weekday_index_supplement = torch.arange(12, 12 + TIME_WINDOW, 12).unsqueeze(0)
    for epoch in range(epoch_num):
        num, total_loss = 0, 0
        for encoder_inputs, labels in train_loader:
            y_hat = predict(model, encoder_inputs, input_channels, hour_index_supplement, time_index_supplement, weekday_index_supplement, text_data, **kwags)
            loss = F.l1_loss(labels, y_hat) # Mean squared error #loss = torch.mean((y_hat-labels)**2)  sqrt to change it to rmse
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            total_loss += loss.item() * labels.shape[0]
            num += labels.shape[0]
        train_loss = total_loss / num
        train_loss_list.append(train_loss)
        hourly_test_loss = test(model, test_loader, input_channels, text_data, **kwags)
        test_loss = hourly_test_loss.mean().item()
        test_loss_list.append(test_loss)
        print(f'Epoch {epoch}; Train MAE: {train_loss}; Test MAE: {test_loss}; Test Hourly MAE: {hourly_test_loss}')
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            torch.save(model.state_dict(), f'{SAVE_DICT}/{model_name}.pkl')
        elif test_loss >= best_test_loss + early_stop_delta:
            early_stop_counter += 1
            if early_stop_counter >= early_stop_patience:
                break
    print(f'Best Test MAE: {best_test_loss}')
    new_name = f'{SAVE_DICT}/{model_name}-{best_test_loss:.4f}.pkl' if model_name_suffix == '' else f'{SAVE_DICT}/{model_name}-{model_name_suffix}-{best_test_loss:.4f}.pkl'
    os.rename(f'{SAVE_DICT}/{model_name}.pkl', new_name)
    return train_loss_list, test_loss_list


@torch.no_grad()
def test(model, test_loader, input_channels:int=1, text_data=None, **kwags):
    model.eval()
    num, total_loss = 0, torch.zeros(TIME_WINDOW // 12).to(device)
    hour_index_supplement = torch.arange(1, 1 + TIME_WINDOW // 12).unsqueeze(0)
    time_index_supplement = torch.arange(12 + TIME_WINDOW).unsqueeze(0)
    weekday_index_supplement = torch.arange(12, 12 + TIME_WINDOW, 12).unsqueeze(0)
    for encoder_inputs, labels in test_loader:
        y_hat = predict(model, encoder_inputs, input_channels, hour_index_supplement, time_index_supplement, weekday_index_supplement, text_data, **kwags)
        labels, y_hat = labels.view(BATCH_SIZE, NODE_NUM, -1, 12), y_hat.view(BATCH_SIZE, NODE_NUM, -1, 12)
        loss = torch.abs(labels - y_hat).mean(dim=(0,1,3))
        total_loss += loss * labels.shape[0]
        num += labels.shape[0]
    return total_loss / num

# One-Hour-Ahead Prediction

In [6]:
TIME_WINDOW = 12
# spatiotemporal graph datasets
dataset = la_loader.get_dataset(num_timesteps_out=TIME_WINDOW)
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)
# Creating Dataloaders
train_input = np.array(train_dataset.features) # (, 207, 2, 12)
train_target = np.array(train_dataset.targets) # (, 207, 12)
train_x_tensor = torch.from_numpy(train_input).type(torch.FloatTensor).to(device)  # (B, N, F, T)
train_target_tensor = torch.from_numpy(train_target).type(torch.FloatTensor).to(device)  # (B, N, T)
train_dataset_new = torch.utils.data.TensorDataset(train_x_tensor, train_target_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset_new, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_input = np.array(test_dataset.features) # (, 207, 2, 12)
test_target = np.array(test_dataset.targets) # (, 207, 12)
test_x_tensor = torch.from_numpy(test_input).type(torch.FloatTensor).to(device)  # (B, N, F, T)
test_target_tensor = torch.from_numpy(test_target).type(torch.FloatTensor).to(device)  # (B, N, T)
test_dataset_new = torch.utils.data.TensorDataset(test_x_tensor, test_target_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset_new, batch_size=BATCH_SIZE, shuffle=SHUFFLE, drop_last=True)

## TCN

In [15]:
## TCN
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseTCN(in_features=IN_TIME_SETPS, in_channels=in_channels, hidden_channels=[64, 16, 128], kernel_size=2, out_features=TIME_WINDOW).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, input_channels=in_channels, early_stop_delta=0.005, model_name_suffix='3h')

Epoch 0; Train MAE: 0.3672033087393948; Test MAE: 0.3162669837474823; Test Hourly MAE: tensor([0.3163], device='cuda:0')
Epoch 1; Train MAE: 0.27740274380161384; Test MAE: 0.2831180691719055; Test Hourly MAE: tensor([0.2831], device='cuda:0')
Epoch 2; Train MAE: 0.27526915191768486; Test MAE: 0.2841547727584839; Test Hourly MAE: tensor([0.2842], device='cuda:0')
Epoch 3; Train MAE: 0.274631322683575; Test MAE: 0.2819331884384155; Test Hourly MAE: tensor([0.2819], device='cuda:0')
Epoch 4; Train MAE: 0.27388819376720447; Test MAE: 0.2838546931743622; Test Hourly MAE: tensor([0.2839], device='cuda:0')
Epoch 5; Train MAE: 0.2735214833274623; Test MAE: 0.28247731924057007; Test Hourly MAE: tensor([0.2825], device='cuda:0')
Epoch 6; Train MAE: 0.27318681195099775; Test MAE: 0.2813299000263214; Test Hourly MAE: tensor([0.2813], device='cuda:0')
Epoch 7; Train MAE: 0.2727769915118953; Test MAE: 0.2821645736694336; Test Hourly MAE: tensor([0.2822], device='cuda:0')
Epoch 8; Train MAE: 0.272844

In [16]:
## TCN + Temporal Encoding
in_channels = 2
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
# model = BaseTCN(num_nodes=NODE_NUM, in_channels=in_channels, out_features=TIME_WINDOW).to(device)
model = BaseTCN(in_features=IN_TIME_SETPS, in_channels=in_channels, hidden_channels=[64, 16, 128], kernel_size=2, out_features=TIME_WINDOW).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, input_channels=in_channels, early_stop_delta=0.005, model_name_suffix='3h')

Epoch 0; Train MAE: 0.3442872484153676; Test MAE: 0.3162192106246948; Test Hourly MAE: tensor([0.3162], device='cuda:0')
Epoch 1; Train MAE: 0.2723691170669605; Test MAE: 0.27348798513412476; Test Hourly MAE: tensor([0.2735], device='cuda:0')
Epoch 2; Train MAE: 0.2657984554697977; Test MAE: 0.2706884741783142; Test Hourly MAE: tensor([0.2707], device='cuda:0')
Epoch 3; Train MAE: 0.26344605932168874; Test MAE: 0.2691631317138672; Test Hourly MAE: tensor([0.2692], device='cuda:0')
Epoch 4; Train MAE: 0.2624439041469699; Test MAE: 0.2686276435852051; Test Hourly MAE: tensor([0.2686], device='cuda:0')
Epoch 5; Train MAE: 0.261545221800002; Test MAE: 0.2676315903663635; Test Hourly MAE: tensor([0.2676], device='cuda:0')
Epoch 6; Train MAE: 0.26084511539089345; Test MAE: 0.2669541537761688; Test Hourly MAE: tensor([0.2670], device='cuda:0')
Epoch 7; Train MAE: 0.2603187155431119; Test MAE: 0.265972375869751; Test Hourly MAE: tensor([0.2660], device='cuda:0')
Epoch 8; Train MAE: 0.259903815

In [17]:
## TCN + Llama3.1-8b
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
# model = TextTCN(num_nodes=NODE_NUM, in_channels=1, out_features=TIME_WINDOW, in_text_dim=768, text_attn_layer_num=3, dropout_p=0.5).to(device)
model = TextTCN(num_nodes=NODE_NUM, in_features=IN_TIME_SETPS, in_channels=1, hidden_channels=[64, 16, 128], kernel_size=2, out_features=TIME_WINDOW, in_text_dim=768, text_attn_layer_num=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, text_data=hourly_det_pattern_embeddings,
                                        early_stop_delta=0.005, early_stop_patience=15, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.36229125048234084; Test MAE: 0.2811448872089386; Test Hourly MAE: tensor([0.2811], device='cuda:0')
Epoch 1; Train MAE: 0.25172273894754527; Test MAE: 0.25212451815605164; Test Hourly MAE: tensor([0.2521], device='cuda:0')
Epoch 2; Train MAE: 0.2370941916080279; Test MAE: 0.2392190545797348; Test Hourly MAE: tensor([0.2392], device='cuda:0')
Epoch 3; Train MAE: 0.22982885848695986; Test MAE: 0.23755960166454315; Test Hourly MAE: tensor([0.2376], device='cuda:0')
Epoch 4; Train MAE: 0.2262543417583002; Test MAE: 0.23317234218120575; Test Hourly MAE: tensor([0.2332], device='cuda:0')
Epoch 5; Train MAE: 0.2240844224797231; Test MAE: 0.23242036998271942; Test Hourly MAE: tensor([0.2324], device='cuda:0')
Epoch 6; Train MAE: 0.22201750988018848; Test MAE: 0.23190838098526; Test Hourly MAE: tensor([0.2319], device='cuda:0')
Epoch 7; Train MAE: 0.2203650953265551; Test MAE: 0.22962988913059235; Test Hourly MAE: tensor([0.2296], device='cuda:0')
Epoch 8; Train MAE: 0.218

In [7]:
## TCN + Qwen3-14B
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
# model = TextTCN(num_nodes=NODE_NUM, in_channels=1, out_features=TIME_WINDOW, in_text_dim=768, text_attn_layer_num=3, dropout_p=0.5).to(device)
model = TextTCN(num_nodes=NODE_NUM, in_features=IN_TIME_SETPS, in_channels=1, hidden_channels=[64, 16, 128], kernel_size=2, out_features=TIME_WINDOW, in_text_dim=768, text_attn_layer_num=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, text_data=hourly_det_pattern_embeddings_qwen14b,
                                        early_stop_delta=0.005, early_stop_patience=15, model_name_suffix=f'{TIME_WINDOW//12}h')

  h = F.dropout(F.gelu(text_layer_norm(text_v_ln(h + F.scaled_dot_product_attention(query=text, key=text_k, value=text_v)))), p=self.dropout_p, training=self.training)


Epoch 0; Train MAE: 0.36181381330868906; Test MAE: 0.27887433767318726; Test Hourly MAE: tensor([0.2789], device='cuda:0')
Epoch 1; Train MAE: 0.25099917553434864; Test MAE: 0.2518692910671234; Test Hourly MAE: tensor([0.2519], device='cuda:0')
Epoch 2; Train MAE: 0.23631784647146117; Test MAE: 0.23882460594177246; Test Hourly MAE: tensor([0.2388], device='cuda:0')
Epoch 3; Train MAE: 0.23030559391340363; Test MAE: 0.2358599454164505; Test Hourly MAE: tensor([0.2359], device='cuda:0')
Epoch 4; Train MAE: 0.2267134633855285; Test MAE: 0.23568490147590637; Test Hourly MAE: tensor([0.2357], device='cuda:0')
Epoch 5; Train MAE: 0.22484258680700142; Test MAE: 0.23442326486110687; Test Hourly MAE: tensor([0.2344], device='cuda:0')
Epoch 6; Train MAE: 0.22258344853175022; Test MAE: 0.2324499785900116; Test Hourly MAE: tensor([0.2324], device='cuda:0')
Epoch 7; Train MAE: 0.2208394411011277; Test MAE: 0.22968417406082153; Test Hourly MAE: tensor([0.2297], device='cuda:0')
Epoch 8; Train MAE: 0

## STGCN

In [33]:
## STGCN
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseSTGNN(num_nodes=NODE_NUM, in_channels=1, out_features=TIME_WINDOW).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, early_stop_delta=0.005, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.3204336465797692; Test MAE: 0.28998368978500366; Test Hourly MAE: tensor([0.2900], device='cuda:0')
Epoch 1; Train MAE: 0.26924158966986933; Test MAE: 0.2754098176956177; Test Hourly MAE: tensor([0.2754], device='cuda:0')
Epoch 2; Train MAE: 0.2647579137549222; Test MAE: 0.26905933022499084; Test Hourly MAE: tensor([0.2691], device='cuda:0')
Epoch 3; Train MAE: 0.26124944527433297; Test MAE: 0.26755884289741516; Test Hourly MAE: tensor([0.2676], device='cuda:0')
Epoch 4; Train MAE: 0.25820753441374994; Test MAE: 0.26236072182655334; Test Hourly MAE: tensor([0.2624], device='cuda:0')
Epoch 5; Train MAE: 0.2556853287061241; Test MAE: 0.26321136951446533; Test Hourly MAE: tensor([0.2632], device='cuda:0')
Epoch 6; Train MAE: 0.2532026188744006; Test MAE: 0.2608388066291809; Test Hourly MAE: tensor([0.2608], device='cuda:0')
Epoch 7; Train MAE: 0.2514393853598109; Test MAE: 0.2565513849258423; Test Hourly MAE: tensor([0.2566], device='cuda:0')
Epoch 8; Train MAE: 0.24

In [34]:
## STGCN + Temporal Encoding
in_channels = 2
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseSTGNN(num_nodes=NODE_NUM, in_channels=in_channels, out_features=TIME_WINDOW).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, input_channels=in_channels, early_stop_delta=0.005, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.31909311670705537; Test MAE: 0.2789507210254669; Test Hourly MAE: tensor([0.2790], device='cuda:0')
Epoch 1; Train MAE: 0.25964356976274017; Test MAE: 0.25960758328437805; Test Hourly MAE: tensor([0.2596], device='cuda:0')
Epoch 2; Train MAE: 0.25136033671899377; Test MAE: 0.25534355640411377; Test Hourly MAE: tensor([0.2553], device='cuda:0')
Epoch 3; Train MAE: 0.24760310807935546; Test MAE: 0.2548583447933197; Test Hourly MAE: tensor([0.2549], device='cuda:0')
Epoch 4; Train MAE: 0.24454394626979517; Test MAE: 0.2482478767633438; Test Hourly MAE: tensor([0.2482], device='cuda:0')
Epoch 5; Train MAE: 0.24185566283832086; Test MAE: 0.251069039106369; Test Hourly MAE: tensor([0.2511], device='cuda:0')
Epoch 6; Train MAE: 0.2398965491382318; Test MAE: 0.24493853747844696; Test Hourly MAE: tensor([0.2449], device='cuda:0')
Epoch 7; Train MAE: 0.23786722342962416; Test MAE: 0.24467208981513977; Test Hourly MAE: tensor([0.2447], device='cuda:0')
Epoch 8; Train MAE: 0.

In [35]:
##STGCN + Llama3.1-8b
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = TextSTGNN3(num_nodes=NODE_NUM, in_channels=1, out_features=TIME_WINDOW, in_text_dim=768, text_attn_layer_num=3, dropout_p=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, text_data=hourly_det_pattern_embeddings, early_stop_delta=0.015, early_stop_patience=15,
                                        model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.3297572640754352; Test MAE: 0.27440565824508667; Test Hourly MAE: tensor([0.2744], device='cuda:0')
Epoch 1; Train MAE: 0.25205661032662213; Test MAE: 0.2533606290817261; Test Hourly MAE: tensor([0.2534], device='cuda:0')
Epoch 2; Train MAE: 0.23758215533795757; Test MAE: 0.2382042109966278; Test Hourly MAE: tensor([0.2382], device='cuda:0')
Epoch 3; Train MAE: 0.22987709274498103; Test MAE: 0.23543107509613037; Test Hourly MAE: tensor([0.2354], device='cuda:0')
Epoch 4; Train MAE: 0.22594222551751358; Test MAE: 0.23428495228290558; Test Hourly MAE: tensor([0.2343], device='cuda:0')
Epoch 5; Train MAE: 0.22317488300048302; Test MAE: 0.2310825139284134; Test Hourly MAE: tensor([0.2311], device='cuda:0')
Epoch 6; Train MAE: 0.22089955886112195; Test MAE: 0.2304762303829193; Test Hourly MAE: tensor([0.2305], device='cuda:0')
Epoch 7; Train MAE: 0.21938603287823846; Test MAE: 0.2307700216770172; Test Hourly MAE: tensor([0.2308], device='cuda:0')
Epoch 8; Train MAE: 0.

In [11]:
##STGCN + Qwen3-14b
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = TextSTGNN3(num_nodes=NODE_NUM, in_channels=1, out_features=TIME_WINDOW, in_text_dim=768, text_attn_layer_num=3, dropout_p=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, text_data=hourly_det_pattern_embeddings_qwen14b, early_stop_delta=0.015, early_stop_patience=15,
                                        model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.32836384385406414; Test MAE: 0.2774799168109894; Test Hourly MAE: tensor([0.2775], device='cuda:0')
Epoch 1; Train MAE: 0.2509007612330334; Test MAE: 0.24993418157100677; Test Hourly MAE: tensor([0.2499], device='cuda:0')
Epoch 2; Train MAE: 0.237353600700882; Test MAE: 0.23919785022735596; Test Hourly MAE: tensor([0.2392], device='cuda:0')
Epoch 3; Train MAE: 0.23049690710189186; Test MAE: 0.2346455156803131; Test Hourly MAE: tensor([0.2346], device='cuda:0')
Epoch 4; Train MAE: 0.22657557023323585; Test MAE: 0.23529992997646332; Test Hourly MAE: tensor([0.2353], device='cuda:0')
Epoch 5; Train MAE: 0.22396415618256987; Test MAE: 0.23134008049964905; Test Hourly MAE: tensor([0.2313], device='cuda:0')
Epoch 6; Train MAE: 0.22150324142285596; Test MAE: 0.2319323569536209; Test Hourly MAE: tensor([0.2319], device='cuda:0')
Epoch 7; Train MAE: 0.21974585762369298; Test MAE: 0.22886386513710022; Test Hourly MAE: tensor([0.2289], device='cuda:0')
Epoch 8; Train MAE: 0.

## A3TGCN2

In [36]:
## A3TGCN
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseA3TGCN2(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=12, out_periods=TIME_WINDOW, batch_size=BATCH_SIZE, add_self_loops=False).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, input_channels=in_channels, early_stop_delta=0.005, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.38899722946024384; Test MAE: 0.3447922170162201; Test Hourly MAE: tensor([0.3448], device='cuda:0')
Epoch 1; Train MAE: 0.3182858026055532; Test MAE: 0.3216594457626343; Test Hourly MAE: tensor([0.3217], device='cuda:0')
Epoch 2; Train MAE: 0.3046204937952701; Test MAE: 0.3094165623188019; Test Hourly MAE: tensor([0.3094], device='cuda:0')
Epoch 3; Train MAE: 0.29601944015126364; Test MAE: 0.3020124137401581; Test Hourly MAE: tensor([0.3020], device='cuda:0')
Epoch 4; Train MAE: 0.29069339964434365; Test MAE: 0.29946833848953247; Test Hourly MAE: tensor([0.2995], device='cuda:0')
Epoch 5; Train MAE: 0.28788620240498924; Test MAE: 0.297267347574234; Test Hourly MAE: tensor([0.2973], device='cuda:0')
Epoch 6; Train MAE: 0.2858855857331062; Test MAE: 0.29370036721229553; Test Hourly MAE: tensor([0.2937], device='cuda:0')
Epoch 7; Train MAE: 0.28448757048800727; Test MAE: 0.2939881384372711; Test Hourly MAE: tensor([0.2940], device='cuda:0')
Epoch 8; Train MAE: 0.2834

In [37]:
## A3TGCN + Temporal Encoding
in_channels = 2
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseA3TGCN2(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=12, out_periods=TIME_WINDOW, batch_size=BATCH_SIZE, add_self_loops=False).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, input_channels=in_channels, early_stop_delta=0.005, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.38331102579832077; Test MAE: 0.3367142379283905; Test Hourly MAE: tensor([0.3367], device='cuda:0')
Epoch 1; Train MAE: 0.29978198352678914; Test MAE: 0.29605141282081604; Test Hourly MAE: tensor([0.2961], device='cuda:0')
Epoch 2; Train MAE: 0.28191927709033554; Test MAE: 0.28618910908699036; Test Hourly MAE: tensor([0.2862], device='cuda:0')
Epoch 3; Train MAE: 0.2740589993370471; Test MAE: 0.2813586890697479; Test Hourly MAE: tensor([0.2814], device='cuda:0')
Epoch 4; Train MAE: 0.26958376534770584; Test MAE: 0.2751155197620392; Test Hourly MAE: tensor([0.2751], device='cuda:0')
Epoch 5; Train MAE: 0.26663459217715485; Test MAE: 0.27235695719718933; Test Hourly MAE: tensor([0.2724], device='cuda:0')
Epoch 6; Train MAE: 0.26452913281516494; Test MAE: 0.2749212682247162; Test Hourly MAE: tensor([0.2749], device='cuda:0')
Epoch 7; Train MAE: 0.2630919978788523; Test MAE: 0.2699432075023651; Test Hourly MAE: tensor([0.2699], device='cuda:0')
Epoch 8; Train MAE: 0.2

In [38]:
##A3TGCN + Llama3.1-8b
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = TextA3TGCN(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=12, out_periods=TIME_WINDOW, batch_size=BATCH_SIZE, in_text_dim=768, text_attn_layer_num=3, add_self_loops=False, dropout_p=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, input_channels=in_channels, text_data=hourly_det_pattern_embeddings, early_stop_delta=0.015, early_stop_patience=15,
                                        model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.37365959034623386; Test MAE: 0.30441704392433167; Test Hourly MAE: tensor([0.3044], device='cuda:0')
Epoch 1; Train MAE: 0.2757689175761749; Test MAE: 0.2719798982143402; Test Hourly MAE: tensor([0.2720], device='cuda:0')
Epoch 2; Train MAE: 0.2574581531372583; Test MAE: 0.25840920209884644; Test Hourly MAE: tensor([0.2584], device='cuda:0')
Epoch 3; Train MAE: 0.2459072112758583; Test MAE: 0.2524731755256653; Test Hourly MAE: tensor([0.2525], device='cuda:0')
Epoch 4; Train MAE: 0.23997215167662808; Test MAE: 0.24778057634830475; Test Hourly MAE: tensor([0.2478], device='cuda:0')
Epoch 5; Train MAE: 0.23661437080563785; Test MAE: 0.24614129960536957; Test Hourly MAE: tensor([0.2461], device='cuda:0')
Epoch 6; Train MAE: 0.23355927036326624; Test MAE: 0.24264223873615265; Test Hourly MAE: tensor([0.2426], device='cuda:0')
Epoch 7; Train MAE: 0.23138324502054777; Test MAE: 0.24252669513225555; Test Hourly MAE: tensor([0.2425], device='cuda:0')
Epoch 8; Train MAE: 0

In [8]:
##A3TGCN Qwen3-14b
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = TextA3TGCN(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=12, out_periods=TIME_WINDOW, batch_size=BATCH_SIZE, in_text_dim=768, text_attn_layer_num=3, add_self_loops=False, dropout_p=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for snapshot in train_dataset:
    static_edge_index = snapshot.edge_index.to(device)
    static_edge_attr = snapshot.edge_attr.to(device)
    break
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, edge_index=static_edge_index,
                                        edge_weight=static_edge_attr, input_channels=in_channels, text_data=hourly_det_pattern_embeddings_qwen14b, early_stop_delta=0.015, early_stop_patience=15,
                                        model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.369297040748262; Test MAE: 0.30774250626564026; Test Hourly MAE: tensor([0.3077], device='cuda:0')
Epoch 1; Train MAE: 0.2767221498015885; Test MAE: 0.2744843363761902; Test Hourly MAE: tensor([0.2745], device='cuda:0')
Epoch 2; Train MAE: 0.25831462568211777; Test MAE: 0.26018795371055603; Test Hourly MAE: tensor([0.2602], device='cuda:0')
Epoch 3; Train MAE: 0.24686760479741007; Test MAE: 0.25234922766685486; Test Hourly MAE: tensor([0.2523], device='cuda:0')
Epoch 4; Train MAE: 0.24076643328521855; Test MAE: 0.249410018324852; Test Hourly MAE: tensor([0.2494], device='cuda:0')
Epoch 5; Train MAE: 0.23714042541997454; Test MAE: 0.2468658983707428; Test Hourly MAE: tensor([0.2469], device='cuda:0')
Epoch 6; Train MAE: 0.23414305775104283; Test MAE: 0.24321535229682922; Test Hourly MAE: tensor([0.2432], device='cuda:0')
Epoch 7; Train MAE: 0.23166673045570604; Test MAE: 0.24333198368549347; Test Hourly MAE: tensor([0.2433], device='cuda:0')
Epoch 8; Train MAE: 0.2

## AGCRN

In [30]:
## AGCRN
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseAGCRN(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=IN_TIME_SETPS, gru_channels=64, out_periods=TIME_WINDOW, K=2, embedding_dimensions=4).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, input_channels=in_channels,
                                        early_stop_delta=0.005, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.3603156594054721; Test MAE: 0.289206862449646; Test Hourly MAE: tensor([0.2892], device='cuda:0')
Epoch 1; Train MAE: 0.2675988594087485; Test MAE: 0.2698434293270111; Test Hourly MAE: tensor([0.2698], device='cuda:0')
Epoch 2; Train MAE: 0.2596960766153915; Test MAE: 0.26233863830566406; Test Hourly MAE: tensor([0.2623], device='cuda:0')
Epoch 3; Train MAE: 0.2534167457058608; Test MAE: 0.25690439343452454; Test Hourly MAE: tensor([0.2569], device='cuda:0')
Epoch 4; Train MAE: 0.24934013936424923; Test MAE: 0.25369706749916077; Test Hourly MAE: tensor([0.2537], device='cuda:0')
Epoch 5; Train MAE: 0.24516003606753928; Test MAE: 0.25003471970558167; Test Hourly MAE: tensor([0.2500], device='cuda:0')
Epoch 6; Train MAE: 0.2418355211337036; Test MAE: 0.24710476398468018; Test Hourly MAE: tensor([0.2471], device='cuda:0')
Epoch 7; Train MAE: 0.23918421973831186; Test MAE: 0.24535807967185974; Test Hourly MAE: tensor([0.2454], device='cuda:0')
Epoch 8; Train MAE: 0.23

In [32]:
## AGCRN + Temporal Encoding
in_channels = 2
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = BaseAGCRN(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=IN_TIME_SETPS, gru_channels=64, out_periods=TIME_WINDOW, K=2, embedding_dimensions=4).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, input_channels=in_channels,
                                        early_stop_delta=0.005, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.363097981105898; Test MAE: 0.2792561948299408; Test Hourly MAE: tensor([0.2793], device='cuda:0')
Epoch 1; Train MAE: 0.2619288226363258; Test MAE: 0.26114898920059204; Test Hourly MAE: tensor([0.2611], device='cuda:0')
Epoch 2; Train MAE: 0.24993965464913956; Test MAE: 0.2531661093235016; Test Hourly MAE: tensor([0.2532], device='cuda:0')
Epoch 3; Train MAE: 0.242361087595748; Test MAE: 0.2448059320449829; Test Hourly MAE: tensor([0.2448], device='cuda:0')
Epoch 4; Train MAE: 0.23738411183808453; Test MAE: 0.24161207675933838; Test Hourly MAE: tensor([0.2416], device='cuda:0')
Epoch 5; Train MAE: 0.23432317926226376; Test MAE: 0.23949743807315826; Test Hourly MAE: tensor([0.2395], device='cuda:0')
Epoch 6; Train MAE: 0.23218201302757888; Test MAE: 0.23795007169246674; Test Hourly MAE: tensor([0.2380], device='cuda:0')
Epoch 7; Train MAE: 0.23047323692067762; Test MAE: 0.2362445741891861; Test Hourly MAE: tensor([0.2362], device='cuda:0')
Epoch 8; Train MAE: 0.229

In [31]:
## AGCRN + Llama3.1-8b
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = TextAGCRN(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=IN_TIME_SETPS, gru_channels=64, out_periods=TIME_WINDOW, K=2, embedding_dimensions=4, in_text_dim=768, text_attn_layer_num=3, dropout_p=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, input_channels=in_channels, text_data=hourly_det_pattern_embeddings,
                                        early_stop_delta=0.005, early_stop_patience=15, model_name_suffix=f'{TIME_WINDOW//12}h')

Epoch 0; Train MAE: 0.36879789543764613; Test MAE: 0.28390568494796753; Test Hourly MAE: tensor([0.2839], device='cuda:0')
Epoch 1; Train MAE: 0.2578240613862176; Test MAE: 0.25930070877075195; Test Hourly MAE: tensor([0.2593], device='cuda:0')
Epoch 2; Train MAE: 0.2479727317657426; Test MAE: 0.2498275637626648; Test Hourly MAE: tensor([0.2498], device='cuda:0')
Epoch 3; Train MAE: 0.2378199085175434; Test MAE: 0.2411484569311142; Test Hourly MAE: tensor([0.2411], device='cuda:0')
Epoch 4; Train MAE: 0.23293248222810087; Test MAE: 0.23762845993041992; Test Hourly MAE: tensor([0.2376], device='cuda:0')
Epoch 5; Train MAE: 0.2296681621712502; Test MAE: 0.2359681874513626; Test Hourly MAE: tensor([0.2360], device='cuda:0')
Epoch 6; Train MAE: 0.22729896092526267; Test MAE: 0.23587192595005035; Test Hourly MAE: tensor([0.2359], device='cuda:0')
Epoch 7; Train MAE: 0.22542954532203274; Test MAE: 0.2338632345199585; Test Hourly MAE: tensor([0.2339], device='cuda:0')
Epoch 8; Train MAE: 0.22

In [7]:
## AGCRN + Qwen3-14b
in_channels = 1
torch.manual_seed(1)
torch.cuda.manual_seed_all(3)
model = TextAGCRN(num_nodes=NODE_NUM, in_channels=in_channels, in_periods=IN_TIME_SETPS, gru_channels=64, out_periods=TIME_WINDOW, K=2, embedding_dimensions=4, in_text_dim=768, text_attn_layer_num=3, dropout_p=0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training the model 
train_loss_list, test_loss_list = train(epoch_num=100, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, input_channels=in_channels, text_data=hourly_det_pattern_embeddings_qwen14b,
                                        early_stop_delta=0.005, early_stop_patience=15, model_name_suffix=f'{TIME_WINDOW//12}h')

  h = F.dropout(F.gelu(text_layer_norm(text_v_ln(h + F.scaled_dot_product_attention(query=text, key=text_k, value=text_v)))), p=self.dropout_p, training=self.training)


Epoch 0; Train MAE: 0.3688000644618106; Test MAE: 0.28243425488471985; Test Hourly MAE: tensor([0.2824], device='cuda:0')
Epoch 1; Train MAE: 0.25600622744064466; Test MAE: 0.25788235664367676; Test Hourly MAE: tensor([0.2579], device='cuda:0')
Epoch 2; Train MAE: 0.24552878995086544; Test MAE: 0.2465442270040512; Test Hourly MAE: tensor([0.2465], device='cuda:0')
Epoch 3; Train MAE: 0.23624373241284183; Test MAE: 0.24129018187522888; Test Hourly MAE: tensor([0.2413], device='cuda:0')
Epoch 4; Train MAE: 0.23268575994210822; Test MAE: 0.2378234714269638; Test Hourly MAE: tensor([0.2378], device='cuda:0')
Epoch 5; Train MAE: 0.22974435734414608; Test MAE: 0.23568899929523468; Test Hourly MAE: tensor([0.2357], device='cuda:0')
Epoch 6; Train MAE: 0.22759987427809528; Test MAE: 0.2351541817188263; Test Hourly MAE: tensor([0.2352], device='cuda:0')
Epoch 7; Train MAE: 0.22601963140975648; Test MAE: 0.2352062165737152; Test Hourly MAE: tensor([0.2352], device='cuda:0')
Epoch 8; Train MAE: 0