In [1]:
import pandas as pd
import numpy as np
import json

from sklearn.preprocessing import MinMaxScaler

def data_loader(data_path, city, year, level='district', length=12, n_steps=12, is_scale=False, temporal_copy=False, is_realtime=False, train_ratio=0.8):
    
    def normalize(train, test):
        if is_scale:
            scaler = MinMaxScaler()
            train_shape, test_shape = train.shape, test.shape
            train = scaler.fit_transform(train.reshape(-1, train_shape[-1]))
            test = scaler.transform(test.reshape(-1, test_shape[-1]))
            return train.reshape(train_shape), test.reshape(test_shape), scaler
        else:
            return train, test, None

    risk_data = pd.read_csv(f'{data_path}/risk_scores/{city}-{year}-{level}-hour-risk.csv')
    selected_areas = risk_data.drop(columns=['date', 'time']).columns
    n_districts = len(selected_areas) # number of districts
    n_outputs = len(selected_areas)
    train_length = int(30 * train_ratio)

    risk_train, y_train = [], []
    risk_test, y_test = [], []
    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            y_train.append(risk_data.drop(columns=['date', 'time']).iloc[i:i+n_steps, :n_outputs].to_numpy())
            risk_train.append(risk_data.drop(columns=['date', 'time']).iloc[i-length:i, :n_districts].to_numpy())
        else:
            y_test.append(risk_data.drop(columns=['date', 'time']).iloc[i:i+n_steps, :n_outputs].to_numpy())
            risk_test.append(risk_data.drop(columns=['date', 'time']).iloc[i-length:i, :n_districts].to_numpy())
        
    risk_train, risk_test, risk_scaler = normalize(np.array(risk_train), np.array(risk_test))
    y_train, y_test = np.array(y_train), np.array(y_test)
    y_train_scaled, y_test_scaled, y_scaler = normalize(y_train, y_test)

    # Weather & Air Quality  
    weather_data = pd.read_csv(f'{data_path}/weather/{city}-{year}-count.csv').fillna(0)
    if level == 'district':
        weather_data['location'] = weather_data['location'].apply(lambda x: x.split('|')[0])
        weather_data = weather_data.groupby(by=['date','time','location'], as_index=False).mean()                
    weather_train, weather_test = [], []

    location_weather = []
    for location in selected_areas:
        location_weather.append(weather_data[weather_data['location'] == location].iloc[:, 3:].to_numpy())

    location_weather = np.concatenate(location_weather, axis=1)

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            weather_train.append(location_weather[i-length:i])
        else:
            weather_test.append(location_weather[i-length:i])
    
    weather_train, weather_test, _ = normalize(np.array(weather_train).reshape(len(weather_train), length, n_districts, -1), np.array(weather_test).reshape(len(weather_test), length, n_districts, -1))


    # Dangerous Driving Behavior
    dtg_data = pd.read_csv(f'{data_path}/dangerous_cases/{city}-{year}-date-hour-{level}-new.csv')
    dtg_train, dtg_test = [], []

    location_dtg = []
    for location in selected_areas:
        if level == 'district':
            district = location.split('|')[0]
            location_dtg.append(dtg_data[dtg_data['district'] == district].iloc[:, 3:].to_numpy())
        else:
            district, subdistrict = location.split('|')[0], location.split('|')[1]
            location_dtg.append(dtg_data[(dtg_data['district'] == district) & (dtg_data['subdistrict'] == subdistrict)].iloc[:, 3:].to_numpy())

    location_dtg = np.concatenate(location_dtg, axis=1)

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            dtg_train.append(location_dtg[i-length:i])
        else:
            dtg_test.append(location_dtg[i-length:i])

    dtg_train, dtg_test, _ = normalize(np.array(dtg_train).reshape(len(dtg_train), length, n_districts, -1), np.array(dtg_test).reshape(len(dtg_test), length, n_districts, -1))


    # Road data
    road_data = pd.read_csv(f'{data_path}/roads/{city}-{year}-{level}-road-count.csv').drop(columns=['attribute'])
    road_train, road_test = [], []

    location_road = []
    for location in selected_areas:
        location_road.append(road_data[location].to_numpy())

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            road_train.append(np.array([location_road]*length)) if temporal_copy else road_train.append(np.array(location_road))
        else:
            road_test.append(np.array([location_road]*length)) if temporal_copy else road_test.append(np.array(location_road))
            
    road_train, road_test, _ = normalize(np.array(road_train), np.array(road_test))


    # demographics data
    demo_data = pd.read_csv(f'{data_path}/demographic/{city}-{year}-{level}.csv').drop(columns=['index'])
    demo_train, demo_test = [], []

    location_demo = []
    for location in selected_areas:
        location_demo.append(demo_data[location].to_numpy())

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            demo_train.append(np.array([location_demo]*length)) if temporal_copy else demo_train.append(np.array(location_demo))
        else:
            demo_test.append(np.array([location_demo]*length)) if temporal_copy else demo_test.append(np.array(location_demo))
            
    demo_train, demo_test, _ = normalize(np.array(demo_train), np.array(demo_test))


    # POI data
    poi_data = pd.read_csv(f'{data_path}/poi/{city}-{year}-{level}.csv').drop(columns=['location'])
    poi_train, poi_test = [], []

    location_poi = []
    for location in selected_areas:
        location_poi.append(poi_data[location].to_numpy())

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            poi_train.append(np.array([location_poi]*length)) if temporal_copy else poi_train.append(np.array(location_poi))
        else:
            poi_test.append(np.array([location_poi]*length)) if temporal_copy else poi_test.append(np.array(location_poi))
            
    poi_train, poi_test, _ = normalize(np.array(poi_train), np.array(poi_test))


    # traffic volumes
    volume_data = pd.read_csv(f'{data_path}/traffic_volume/{city}-{year}.csv').drop(columns=['date', 'hour'])
    volume_train, volume_test = [], []

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            volume_train.append(volume_data.iloc[i-length:i, :n_districts].to_numpy())
        else:
            volume_test.append(volume_data.iloc[i-length:i, :n_districts].to_numpy())

    volume_train, volume_test, _ = normalize(np.array(volume_train), np.array(volume_test))
    

    # traffic speed
    speed_data = pd.read_csv(f'{data_path}/traffic_speed/{city}-{year}.csv').drop(columns=['date', 'hour'])
    speed_train, speed_test = [], []

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            speed_train.append(speed_data.iloc[i-length:i, :n_districts].to_numpy())
        else:
            speed_test.append(speed_data.iloc[i-length:i, :n_districts].to_numpy())

    speed_train, speed_test, _ = normalize(np.array(speed_train), np.array(speed_test))
    

    # calendar
    calendar_data = pd.read_csv(f'{data_path}/calendar/calendar-{city}-{year}-{level}.csv')
    calendar_train, calendar_test = [], []
    
    location_calendar = []
    for location in selected_areas:
        location_calendar.append(calendar_data[calendar_data['location'] == location].iloc[:, 1:].to_numpy())

    location_calendar = np.concatenate(location_calendar, axis=1)

    for i in range(length, 721-n_steps):
        if i <= (train_length * 24): # before date 25th
            calendar_train.append(location_calendar[i:i+n_steps]) if is_realtime else calendar_train.append(location_calendar[i-length:i])
        else:
            calendar_test.append(location_calendar[i:i+n_steps]) if is_realtime else calendar_test.append(location_calendar[i-length:i])
    calendar_train, calendar_test = np.array(calendar_train), np.array(calendar_test)        
    calendar_train, calendar_test, _ = normalize(calendar_train.reshape(calendar_train.shape[0], calendar_train.shape[1], n_districts, -1), calendar_test.reshape(calendar_test.shape[0], calendar_test.shape[1], n_districts, -1))
    
    # Match Shape
    risk_train = risk_train[:,:,:,None]
    risk_test = risk_test[:,:,:,None]
    volume_train = volume_train[:,:,:,None]
    volume_test = volume_test[:,:,:,None]
    speed_train = speed_train[:,:,:,None]
    speed_test = speed_test[:,:,:,None]

    return {
        'risk': [risk_train, risk_test],
        'road': [road_train, road_test],
        'poi': [poi_train, poi_test],
        'demo': [demo_train, demo_test],
        'weather': [weather_train, weather_test],
        'calendar': [calendar_train, calendar_test],
        'volume': [volume_train, volume_test],
        'speed': [speed_train, speed_test],
        'dtg': [dtg_train, dtg_test],
        'y': [y_train, y_test],
        'y_scaled': [y_train_scaled, y_test_scaled],
        'selected_areas': selected_areas,
        'scaler': risk_scaler
    }

In [2]:
import numpy as np
import tensorflow as tf
import os


from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import top_k_accuracy_score as top_accuracy

def compute_error(y_true, y_pred):
    top_y_true, top_y_pred = y_true[np.nonzero(y_true > 1)], y_pred[np.nonzero(y_true > 1)]
    unique_idx = tuple(np.unique((np.nonzero(y_true > 0)[0], np.nonzero(y_true > 0)[1]), axis=1))  
    
    n_districts = y_true.shape[-1]

    mse_score = mse(np.ravel(y_true), np.ravel(y_pred))
    mae_score = mae(np.ravel(y_true), np.ravel(y_pred))
    mape_score = mape(top_y_true, top_y_pred)
    topk_acc = top_accuracy(np.argmax(y_true[unique_idx]/n_districts, axis=1), y_pred[unique_idx]/n_districts, k=round(n_districts*0.20), labels=[l for l in range(n_districts)])
    return { 'MAE': mae_score, 'MSE': mse_score, 'MAPE': mape_score, 'ACC': topk_acc }


def stepwise_error(y_true, y_pred, n_steps):    
    mae_scores, mse_scores, mape_scores, topk_accs = [], [], [], np.zeros(y_true.shape[:-1], dtype=int)
    topk_recalls = np.where(topk_accs != 0, topk_accs, np.nan)
        
    n_districts = y_true.shape[-1]
    
    for t in range(n_steps):
        y_true_t, y_pred_t = y_true[:,t,:], y_pred[:,t,:]
        top_y_true, top_y_pred = y_true_t[np.nonzero(y_true_t > 1)], y_pred_t[np.nonzero(y_true_t > 1)]
    
        mse_scores.append(mse(np.ravel(y_true_t), np.ravel(y_pred_t)))
        mae_scores.append(mae(np.ravel(y_true_t), np.ravel(y_pred_t)))
        mape_scores.append(mape(top_y_true, top_y_pred))
    
    for i in range(y_true.shape[0]):
        for t in range(n_steps):

            with tf.device('/cpu:0'):
                t_true = tf.math.top_k(y_true[i,t,:], k=round(n_districts * 0.20))
                t_true = set(t_true.indices.numpy()[[di for di, val in enumerate(t_true.values.numpy()) if val > 0]])
                t_pred = set(tf.math.top_k(y_pred[i,t,:], k=round(n_districts * 0.20)).indices.numpy())
            
                if len(t_true) > 0:
                    topk_recalls[i, t] = len(t_true.intersection(t_pred)) / len(t_true)
    
    return { 'MAE': mae_scores, 'MSE': mse_scores, 'MAPE': mape_scores, 'ACC': list(np.nanmean(topk_recalls, axis=0)), 'TOP_ACC': topk_recalls }

2025-12-19 05:00:13.754679: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766120413.952334      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766120414.007038      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766120414.487883      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766120414.487918      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766120414.487921      55 computation_placer.cc:177] computation placer alr

In [3]:
!pip install spektral==1.0.6

Collecting spektral==1.0.6
  Downloading spektral-1.0.6-py3-none-any.whl.metadata (5.7 kB)
Downloading spektral-1.0.6-py3-none-any.whl (114 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.4/114.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: spektral
Successfully installed spektral-1.0.6


In [4]:
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow import keras

from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Activation, MultiHeadAttention, Dropout, RepeatVector, TimeDistributed
from tensorflow.keras.layers import Concatenate, Lambda, Reshape, GRU, BatchNormalization, Dot, Add, Bidirectional

from spektral.layers import GCNConv, GlobalAvgPool

es = tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True)
lr = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001, decay_steps=10000, decay_rate=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
def InterviewAttention(V, H):
    V_attn = Dense(V.shape[1], activation='relu')(V)
    V_attn = Dense(V_attn.shape[1], activation='sigmoid')(V_attn)
    return Dot(axes=-1)([H, V_attn])

def TemporalAttention(h_units, H, length):
    H = TransformerBlock(H.shape[-1], length, 2048)(H, training=True)
    H = Reshape(target_shape=[length, -1])(H)
    return GRU(h_units)(H)

def MG_TAR(x_train, y_train, x_val, y_val, configs, length=12, n_steps=6):
    tf.keras.backend.clear_session()
    
    _, _, _, _, _, node_features = x_train
    _, _, n_districts, n_features = node_features.shape
    gru_h, gcn_f, fc_h, n_layers, bn, d = configs

    A_S = Input(shape=[n_districts, n_districts]) # spatial closeness
    A_P = Input(shape=[n_districts, n_districts]) # functional similarity (POI)
    A_R = Input(shape=[n_districts, n_districts]) # road similarity
    A_D = Input(shape=[n_districts, n_districts]) # demographic similarity
    A_T = Input(shape=[length, n_districts, n_districts]) # traffic patterns

    F = Input(shape=[length, n_districts, n_features]) # node features
    
    H = [] # H_1 to H_T

    for t in range(length):
        # slice for each time step t
        Ft = Lambda(lambda f: f[:,t,:,:])(F)
        A_Tt = Lambda(lambda a: a[:,t,:,:])(A_T)

        X_S, X_P, X_R, X_D, X_Tt = Ft, Ft, Ft, Ft, Ft # input H_t0 time t layer 0
        H_S, H_P, H_R, H_D, H_Tt = X_S, X_P, X_R, X_D, X_Tt

        for i in range(n_layers): # using aggregation for each layer as in ST-MGCN ?
            H_S = GCNConv(gcn_f)([H_S, A_S]) # GCN for Adjacency Matrix
            if (i + 1) % 2 == 0:
                H_S = BatchNormalization()(H_S) if bn else H_S
            H_S = Activation('relu')(H_S)

            H_P = GCNConv(gcn_f)([H_P, A_P]) # GCN for POI Graph
            if (i + 1) % 2 == 0:
                H_P = BatchNormalization()(H_P) if bn else H_P
            H_P = Activation('relu')(H_P)

            H_D = GCNConv(gcn_f)([H_D, A_D])  # GCN for Demographic Graph
            if (i + 1) % 2 == 0:
                H_D = BatchNormalization()(H_D) if bn else H_D
            H_D = Activation('relu')(H_D)

            H_R = GCNConv(gcn_f)([H_R, A_R]) # GCN for Road Graph
            if (i + 1) % 2 == 0:
                H_R = BatchNormalization()(H_R) if bn else H_R
            H_R = Activation('relu')(H_R)   

            H_Tt = GCNConv(gcn_f)([H_Tt, A_Tt]) # GCN for Traffic Patterns
            if (i + 1) % 2 == 0:
                H_Tt = BatchNormalization()(H_Tt) if bn else H_Tt
            H_Tt = Activation('relu')(H_Tt) 

            
        H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
        H_P = GCNConv(1, activation='relu')([H_P, A_P])
        H_D = GCNConv(1, activation='relu')([H_D, A_D])
        H_R = GCNConv(1, activation='relu')([H_R, A_R])
        H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])
        
        # summarize each channel (i.e., view) into a scalar 
        z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
        Ht = Concatenate()([H_S, H_P, H_D, H_R, H_Tt]) # concatenate each view i to Ht
        H.append(InterviewAttention(z, Ht)) # get scaled Ht
        
    H = Concatenate()(H)
    H = Reshape(target_shape=[length, n_districts, 1])(H)
    H = Concatenate()([H, F])
    H = TemporalAttention(gru_h, H, length)
    
    H = Dense(fc_h, activation='relu')(H)
    H = Dropout(0.1)(H)
    H = Dense(fc_h, activation='relu')(H)
    H = Dropout(0.1)(H)
        
    y = Dense(n_steps * n_districts)(H)
    y = Reshape([n_steps, n_districts])(y)

    # A_train, A_poi_train, A_demo_train, A_road_train, A_traffic_train, node_features_train
    model = Model(inputs=[A_S, A_P, A_D, A_R, A_T, F], outputs=y)
    model.compile(optimizer=optimizer, loss=tf.keras.losses.Huber(delta=d))
    model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_val, y_val), callbacks=[es], verbose=0)
    return model

def MG_TAR_V(x_train, y_train, x_val, y_val, configs, length=12, n_steps=6, view='All'):
    tf.keras.backend.clear_session()
    
    _, _, _, _, _, node_features = x_train
    _, _, n_districts, n_features = node_features.shape
    gru_h, gcn_f, fc_h, n_layers, bn, d = configs

    A_S = Input(shape=[n_districts, n_districts]) # spatial closeness
    A_P = Input(shape=[n_districts, n_districts]) # functional similarity (POI)
    A_R = Input(shape=[n_districts, n_districts]) # road similarity
    A_D = Input(shape=[n_districts, n_districts]) # demographic similarity
    A_T = Input(shape=[length, n_districts, n_districts]) # traffic patterns

    F = Input(shape=[length, n_districts, n_features]) # node features
    
    H = [] # H_1 to H_T

    for t in range(length):
        # slice for each time step t
        Ft = Lambda(lambda f: f[:,t,:,:])(F)
        A_Tt = Lambda(lambda a: a[:,t,:,:])(A_T)

        X_S, X_P, X_R, X_D, X_Tt = Ft, Ft, Ft, Ft, Ft # input H_t0 time t layer 0
        H_S, H_P, H_R, H_D, H_Tt = X_S, X_P, X_R, X_D, X_Tt

        for i in range(n_layers): # using aggregation for each layer as in ST-MGCN ?
            if view == 'All' or view == 'OS':
                H_S = GCNConv(gcn_f)([H_S, A_S]) # GCN for Adjacency Matrix
                if (i + 1) % 2 == 0:
                    H_S = BatchNormalization()(H_S) if bn else H_S
                H_S = Activation('relu')(H_S)
                
            if view == 'All' or (view != 'P' and view != 'OS'):
                H_P = GCNConv(gcn_f)([H_P, A_P]) # GCN for POI Graph
                if (i + 1) % 2 == 0:
                    H_P = BatchNormalization()(H_P) if bn else H_P
                H_P = Activation('relu')(H_P)
                
            if view == 'All' or (view != 'D' and view != 'OS'):
                H_D = GCNConv(gcn_f)([H_D, A_D])  # GCN for Demographic Graph
                if (i + 1) % 2 == 0:
                    H_D = BatchNormalization()(H_D) if bn else H_D
                H_D = Activation('relu')(H_D)
            
            if view == 'All' or (view != 'R' and view != 'OS'):
                H_R = GCNConv(gcn_f)([H_R, A_R]) # GCN for Road Graph
                if (i + 1) % 2 == 0:
                    H_R = BatchNormalization()(H_R) if bn else H_R
                H_R = Activation('relu')(H_R)   
                
            if view == 'All' or (view != 'T' and view != 'OS'):
                H_Tt = GCNConv(gcn_f)([H_Tt, A_Tt]) # GCN for Traffic Patterns
                if (i + 1) % 2 == 0:
                    H_Tt = BatchNormalization()(H_Tt) if bn else H_Tt
                H_Tt = Activation('relu')(H_Tt) 

        
        if view == 'All':
            H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
            H_P = GCNConv(1, activation='relu')([H_P, A_P])
            H_D = GCNConv(1, activation='relu')([H_D, A_D])
            H_R = GCNConv(1, activation='relu')([H_R, A_R])
            H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])

            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            Ht = Concatenate()([H_S, H_P, H_D, H_R, H_Tt]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        elif view == 'OS':
            H_S = GCNConv(1, activation='relu')([H_S, A_S]) 

            # summarize each channel (i.e., view) into a scalar 
            z = GlobalAvgPool()(H_S) # concatenate it into vector z
            Ht = H_S # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        elif view == 'S':
            H_P = GCNConv(1, activation='relu')([H_P, A_P])
            H_D = GCNConv(1, activation='relu')([H_D, A_D])
            H_R = GCNConv(1, activation='relu')([H_R, A_R])
            H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])

            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            Ht = Concatenate()([H_P, H_D, H_R, H_Tt]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        elif view == 'P':
            H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
            H_D = GCNConv(1, activation='relu')([H_D, A_D])
            H_R = GCNConv(1, activation='relu')([H_R, A_R])
            H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])

            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            Ht = Concatenate()([H_S, H_D, H_R, H_Tt]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        elif view == 'R':
            H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
            H_P = GCNConv(1, activation='relu')([H_P, A_P])
            H_D = GCNConv(1, activation='relu')([H_D, A_D])
            H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])

            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            Ht = Concatenate()([H_S, H_P, H_D, H_Tt]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        elif view == 'D':
            H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
            H_P = GCNConv(1, activation='relu')([H_P, A_P])
            H_R = GCNConv(1, activation='relu')([H_R, A_R])
            H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])

            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            Ht = Concatenate()([H_S, H_P, H_R, H_Tt]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        elif view == 'T':
            H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
            H_P = GCNConv(1, activation='relu')([H_P, A_P])
            H_D = GCNConv(1, activation='relu')([H_D, A_D])
            H_R = GCNConv(1, activation='relu')([H_R, A_R])

            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R)]) # concatenate it into vector z
            Ht = Concatenate()([H_S, H_P, H_D, H_R]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

        
    H = Concatenate()(H)
    H = Reshape(target_shape=[length, n_districts, 1])(H)
    H = Concatenate()([H, F])
    H = TemporalAttention(gru_h, H, length)
    
    
    H = Dense(fc_h, activation='relu')(H)
    H = Dropout(0.1)(H)
    H = Dense(fc_h, activation='relu')(H)
    H = Dropout(0.1)(H)
        
    y = Dense(n_steps * n_districts)(H)
    y = Reshape([n_steps, n_districts])(y)

    # A_train, A_poi_train, A_demo_train, A_road_train, A_traffic_train, node_features_train
    model = Model(inputs=[A_S, A_P, A_D, A_R, A_T, F], outputs=y)
    model.compile(optimizer=optimizer, loss=tf.keras.losses.Huber(delta=d))
    model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_val, y_val), callbacks=[es], verbose=0)
    return model

def MG_TAR_A(x_train, y_train, x_val, y_val, configs, length=12, n_steps=6, attn='All'):
    tf.keras.backend.clear_session()
    
    _, _, _, _, _, node_features = x_train
    _, _, n_districts, n_features = node_features.shape
    gru_h, gcn_f, fc_h, n_layers, bn, d = configs

    A_S = Input(shape=[n_districts, n_districts]) # spatial closeness
    A_P = Input(shape=[n_districts, n_districts]) # functional similarity (POI)
    A_R = Input(shape=[n_districts, n_districts]) # road similarity
    A_D = Input(shape=[n_districts, n_districts]) # demographic similarity
    A_T = Input(shape=[length, n_districts, n_districts]) # traffic patterns

    F = Input(shape=[length, n_districts, n_features]) # node features
    
    H = [] # H_1 to H_T

    for t in range(length):
        # slice for each time step t
        Ft = Lambda(lambda f: f[:,t,:,:])(F)
        A_Tt = Lambda(lambda a: a[:,t,:,:])(A_T)

        X_S, X_P, X_R, X_D, X_Tt = Ft, Ft, Ft, Ft, Ft # input H_t0 time t layer 0
        H_S, H_P, H_R, H_D, H_Tt = X_S, X_P, X_R, X_D, X_Tt

        for i in range(n_layers): # using aggregation for each layer as in ST-MGCN ?
            H_S = GCNConv(gcn_f)([H_S, A_S]) # GCN for Adjacency Matrix
            if (i + 1) % 2 == 0:
                H_S = BatchNormalization()(H_S) if bn else H_S
            H_S = Activation('relu')(H_S)

            H_P = GCNConv(gcn_f)([H_P, A_P]) # GCN for POI Graph
            if (i + 1) % 2 == 0:
                H_P = BatchNormalization()(H_P) if bn else H_P
            H_P = Activation('relu')(H_P)

            H_D = GCNConv(gcn_f)([H_D, A_D])  # GCN for Demographic Graph
            if (i + 1) % 2 == 0:
                H_D = BatchNormalization()(H_D) if bn else H_D
            H_D = Activation('relu')(H_D)

            H_R = GCNConv(gcn_f)([H_R, A_R]) # GCN for Road Graph
            if (i + 1) % 2 == 0:
                H_R = BatchNormalization()(H_R) if bn else H_R
            H_R = Activation('relu')(H_R)   

            H_Tt = GCNConv(gcn_f)([H_Tt, A_Tt]) # GCN for Traffic Patterns
            if (i + 1) % 2 == 0:
                H_Tt = BatchNormalization()(H_Tt) if bn else H_Tt
            H_Tt = Activation('relu')(H_Tt) 

            
        H_S = GCNConv(1, activation='relu')([H_S, A_S]) 
        H_P = GCNConv(1, activation='relu')([H_P, A_P])
        H_D = GCNConv(1, activation='relu')([H_D, A_D])
        H_R = GCNConv(1, activation='relu')([H_R, A_R])
        H_Tt = GCNConv(1, activation='relu')([H_Tt, A_Tt])
        
        if attn == 'View':
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            H.append(z)
        else:
            # summarize each channel (i.e., view) into a scalar 
            z = Concatenate()([GlobalAvgPool()(H_S), GlobalAvgPool()(H_P), GlobalAvgPool()(H_D), GlobalAvgPool()(H_R), GlobalAvgPool()(H_Tt)]) # concatenate it into vector z
            Ht = Concatenate()([H_S, H_P, H_D, H_R, H_Tt]) # concatenate each view i to Ht
            H.append(InterviewAttention(z, Ht)) # get scaled Ht

    if attn == 'Temp':
        H = Concatenate()(H)
    elif attn == 'View':
        H = Concatenate()(H)
        H = Reshape(target_shape=[length, 5, 1])(H)
        H = TemporalAttention(gru_h, H, length)
    else:
        H = Concatenate()(H)
        H = Reshape(target_shape=[length, n_districts, 1])(H)
        H = Concatenate()([H, F])
        H = TemporalAttention(gru_h, H, length)
    
    
    H = Dense(fc_h, activation='relu')(H)
    H = Dropout(0.1)(H)
    H = Dense(fc_h, activation='relu')(H)
    H = Dropout(0.1)(H)
        
    y = Dense(n_steps * n_districts)(H)
    y = Reshape([n_steps, n_districts])(y)

    # A_train, A_poi_train, A_demo_train, A_road_train, A_traffic_train, node_features_train
    model = Model(inputs=[A_S, A_P, A_D, A_R, A_T, F], outputs=y)
    model.compile(optimizer=optimizer, loss=tf.keras.losses.Huber(delta=d))
    model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_val, y_val), callbacks=[es], verbose=0)
    return model

I0000 00:00:1766120431.460211      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1766120431.464098      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [5]:
import os
import warnings
import logging
import itertools
import json
import tensorflow as tf
import pandas as pd
import numpy as np
from scipy.stats import pearsonr # Cần thêm thư viện này để tính PCC

# Giả định các module này nằm cùng thư mục
from tqdm.notebook import tqdm

logging.disable(logging.WARNING)
warnings.filterwarnings('ignore')

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

# --- CONFIGURATION ---
city = 'Seoul'
year = '2016' # '2016' or '2018'
n_steps, length = 6, 12
metric = 'jaccard'
model_configs = {
    "2018": [
        [256, 256, 512, 4, False, 11], 
        [256, 128, 1024, 4, True, 13], 
        [256, 128, 512, 6, True, 7], 
        [512, 128, 512, 8, True, 13], 
        [256, 128, 512, 8, True, 13]
    ], 
    "2016": [
        [256, 128, 512, 4, True, 9], 
        [256, 128, 1024, 6, True, 9], 
        [512, 128, 512, 6, True, 7], 
        [256, 128, 512, 8, True, 9], 
        [512, 128, 1024, 8, True, 9]
    ]
}
# --- DATA LOADING ---
datasets = data_loader('/kaggle/input/mg-tar', city, year, length=length, n_steps=n_steps, is_scale=True, temporal_copy=True)
n_districts = len(datasets['selected_areas'])

# Extract Features to their corresponding variables
risk_train, risk_test = datasets['risk'][0], datasets['risk'][1]
demo_train, demo_test = datasets['demo'][0], datasets['demo'][1]
poi_train, poi_test = datasets['poi'][0], datasets['poi'][1]
road_train, road_test = datasets['road'][0], datasets['road'][1]
volume_train, volume_test = datasets['volume'][0], datasets['volume'][1]
speed_train, speed_test = datasets['speed'][0], datasets['speed'][1]
weather_train, weather_test = datasets['weather'][0], datasets['weather'][1]
calendar_train, calendar_test = datasets['calendar'][0], datasets['calendar'][1]
c_train, c_test = datasets['dtg'][0], datasets['dtg'][1]
y_train, y_test = datasets['y'][0], datasets['y'][1]

# Train - Validation Split
val_idx = round(risk_train.shape[0] * 0.10) # 10% of Train Set
risk_train, risk_val = risk_train[:-val_idx], risk_train[-val_idx:]
demo_train, demo_val = demo_train[:-val_idx], demo_train[-val_idx:]
poi_train, poi_val = poi_train[:-val_idx], poi_train[-val_idx:]
road_train, road_val = road_train[:-val_idx], road_train[-val_idx:]
volume_train, volume_val = volume_train[:-val_idx], volume_train[-val_idx:]
speed_train, speed_val = speed_train[:-val_idx], speed_train[-val_idx:]
weather_train, weather_val = weather_train[:-val_idx], weather_train[-val_idx:]
calendar_train, calendar_val = calendar_train[:-val_idx], calendar_train[-val_idx:]
c_train, c_val = c_train[:-val_idx], c_train[-val_idx:]
y_train, y_val = y_train[:-val_idx], y_train[-val_idx:]

# Contextual & Adjacency Matrices
base_path = '/kaggle/input/mg-tar/graph_data'
A = pd.read_csv(f'{base_path}/{city}-normalized-district.csv', engine='c', index_col=0).to_numpy()
A_poi = pd.read_csv(f'{base_path}/{city}-{year}-poi-normalized-district-{metric}.csv', engine='c', index_col=0).to_numpy()
A_demo = pd.read_csv(f'{base_path}/{city}-{year}-demo-normalized-district-{metric}.csv', engine='c', index_col=0).to_numpy()
A_road = pd.read_csv(f'{base_path}/{city}-{year}-road-normalized-district-{metric}.csv', engine='c', index_col=0).to_numpy()

# Tiling matrices for batch processing
A_train, A_val = np.tile(A, (risk_train.shape[0], 1, 1)), np.tile(A, (risk_val.shape[0], 1, 1))
A_poi_train, A_poi_val = np.tile(A_poi, (risk_train.shape[0], 1, 1)), np.tile(A_poi, (risk_val.shape[0], 1, 1))
A_demo_train, A_demo_val = np.tile(A_demo, (risk_train.shape[0], 1, 1)), np.tile(A_demo, (risk_val.shape[0], 1, 1))
A_road_train, A_road_val = np.tile(A_road, (risk_train.shape[0], 1, 1)), np.tile(A_road, (risk_val.shape[0], 1, 1))

A_test = np.tile(A, (risk_test.shape[0], 1, 1))
A_poi_test = np.tile(A_poi, (risk_test.shape[0], 1, 1))
A_demo_test = np.tile(A_demo, (risk_test.shape[0], 1, 1))
A_road_test = np.tile(A_road, (risk_test.shape[0], 1, 1))

with open(f'{base_path}/{city}-{year}-traffic-district-normalized-train-{metric}.npy', 'rb') as f:
    A_traffic_train = np.load(f)
    A_traffic_train, A_traffic_val = A_traffic_train[:-val_idx], A_traffic_train[-val_idx:]

with open(f'{base_path}/{city}-{year}-traffic-district-normalized-test-{metric}.npy', 'rb') as f:
    A_traffic_test = np.load(f)

# Features Aggregation
node_features_train = np.concatenate([risk_train, demo_train, poi_train, road_train, volume_train, speed_train, weather_train, calendar_train, c_train], axis=-1)
node_features_val = np.concatenate([risk_val, demo_val, poi_val, road_val, volume_val, speed_val, weather_val, calendar_val, c_val], axis=-1)
node_features_test = np.concatenate([risk_test, demo_test, poi_test, road_test, volume_test, speed_test, weather_test, calendar_test, c_test], axis=-1)

x_train = [A_train, A_poi_train, A_demo_train, A_road_train, A_traffic_train, node_features_train]
x_val = [A_val, A_poi_val, A_demo_val, A_road_val, A_traffic_val, node_features_val]
x_test = [A_test, A_poi_test, A_demo_test, A_road_test, A_traffic_test, node_features_test]
print(1)
# --- MODEL LOADING & PREDICTION ---
best_config = model_configs[year][0]
best_model = MG_TAR(x_train, y_train, x_val, y_val, best_config)
y_pred = best_model.predict(x_test)

# --- EVALUATION METRICS (MAE, RMSE, PCC) ---

# Đảm bảo shape phù hợp để tính toán
y_true_flat = y_test.flatten()
y_pred_flat = y_pred.flatten()

# 1. MAE (Mean Absolute Error) -> Đổi tên biến thành global_mae
global_mae = np.mean(np.abs(y_true_flat - y_pred_flat))

# 2. RMSE (Root Mean Squared Error) -> Đổi tên biến thành global_rmse
global_rmse = np.sqrt(np.mean((y_true_flat - y_pred_flat) ** 2))

# 3. PCC (Pearson Correlation Coefficient)
pcc_val, _ = pearsonr(y_true_flat, y_pred_flat)

print(f'\n=== Model Performance Seoul ({year}) ===')
print(f"MAE : {global_mae:.5f}")
print(f"RMSE: {global_rmse:.5f}")
print(f"PCC : {pcc_val:.5f}")

# Bây giờ chạy hàm stepwise_error sẽ không bị lỗi nữa
sw_errors = stepwise_error(y_test, y_pred, n_steps)
print(f"Original MSE: {np.average(sw_errors['MSE'])} - Original Acc@K: {np.average(sw_errors['ACC'])}")

1


E0000 00:00:1766120566.697039      55 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/transformer_block_1/dropout_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
I0000 00:00:1766120585.582495     130 cuda_dnn.cc:529] Loaded cuDNN version 91002


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2s/step

=== Model Performance Seoul (2016) ===
MAE : 0.87456
RMSE: 1.61599
PCC : 0.16956
Original MSE: 2.611439267794291 - Original Acc@K: 0.32299498746867156
