In [22]:
import numpy as np
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from torch.utils.data import DataLoader

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
import torch.utils
import torch.utils.data

import os
import sys
cur_dir = os.path.dirname(os.path.abspath("__file__"))  # Gets the current notebook directory
src_dir = os.path.join(cur_dir, '../')  # Constructs the path to the 'src' directory
# Add the 'src' directory to sys.path
if src_dir not in sys.path:
    sys.path.append(src_dir)

from src.constant import *
from tqdm.notebook import tqdm

from src.MyDataset import MyDataset
import random

In [26]:

lookback = 20
dir = '../data/PandasData/Sampled/'
ds = MyDataset(lookback=lookback)
train_batch_size = 4
test_batch_size = 16

def process_data(df_dir : str, target_freq : int = 10):
    df: pd.DataFrame = pd.read_pickle(df_dir)
    df.dropna(inplace=True, how='any')
    f_per_sec = df.groupby('TimestampID').count().mean().mean()
    if f_per_sec < target_freq:
        raise ValueError('The frequency of the data is lower than the target frequency')
    elif int(f_per_sec) == target_freq:
        pass
    else:
        resample_ratio = int(f_per_sec/target_freq)
        df = df.iloc[::resample_ratio, :]
    # # for origin
    for drop_column in ['Confidence', 
                          'Timestamp', 'TimestampID', 
                          'DatapointID', 'PID', 'SCN', 'U_X', 'U_Y', 'U_Z', 
                          'AGV_Z', 'User_Z', 'GazeOrigin_Z', 'User_Pitch', 'User_Yaw', 'User_Roll', 
                          'EyeTarget']:
        df = df.drop(columns=[drop_column], errors='ignore')

    target_columns = ['User_X', 'User_Y']
    # Reorder columns
    new_columns = target_columns + [col for col in df.columns if col not in target_columns]
    df = df[new_columns]

    return df

for file in os.listdir(dir):
    if file.endswith('.pkl'):
        df = process_data(dir+file)
        ds.read_data(df)
        
dataframes = ds.dataset

### format the data

In [27]:
for i, df in enumerate(dataframes):
    df['time_idx'] = range(len(df))
    df['series_id'] = i 

random.shuffle(dataframes)
print(f"Trajectories: {len(dataframes)}")
all_trajectories = len(dataframes)
# Concatenate all dataframes into one
full_df = pd.concat(dataframes).reset_index(drop=True)

Trajectories: 368


In [28]:
full_df.columns

Index(['User_X', 'User_Y', 'AGV_distance_X', 'AGV_distance_Y', 'AGV_speed_X',
       'AGV_speed_Y', 'AGV_speed', 'User_speed_X', 'User_speed_Y',
       'User_speed', 'User_velocity_X', 'User_velocity_Y', 'Wait_time',
       'Gazing_station', 'closest_station', 'distance_to_closest_station',
       'distance_to_closest_station_X', 'distance_to_closest_station_Y',
       'start_station_X', 'start_station_Y', 'end_station_X', 'end_station_Y',
       'distance_from_start_station_X', 'distance_from_start_station_Y',
       'distance_from_end_station_X', 'distance_from_end_station_Y',
       'GazeDirection_X', 'GazeDirection_Y', 'GazeDirection_Z', 'AGV_X',
       'AGV_Y', 'rolling_avg', 'time_idx', 'series_id'],
      dtype='object')

In [45]:
from pytorch_forecasting import TimeSeriesDataSet

max_encoder_length = 30
max_prediction_length = 20
frac = 0.8
target = ['User_X', 'User_Y']


time_varying_known_reals=['AGV_speed_X', 'AGV_speed_Y', 'AGV_speed', 
   'User_speed', 'User_velocity_X', 'User_velocity_Y', 'Wait_time',
   'start_station_X', 'start_station_Y', 'end_station_X', 'end_station_Y',
   'AGV_X', 'AGV_Y', 'rolling_avg'
   ]

time_varying_unknown_reals=['User_X', 'User_Y', 'AGV_distance_X', 'AGV_distance_Y', 
    'User_speed_X', 'User_speed_Y', 'distance_to_closest_station',
    'distance_to_closest_station_X', 'distance_to_closest_station_Y',
    'distance_from_start_station_X', 'distance_from_start_station_Y',
    'distance_from_end_station_X', 'distance_from_end_station_Y', 
    'GazeDirection_X', 'GazeDirection_Y', 'GazeDirection_Z', 
    'Gazing_station', 'closest_station'
    ]

training = TimeSeriesDataSet(
    full_df[lambda x: x.series_id <= all_trajectories * frac],  # Adjust based on your timeline
    time_idx="time_idx",
    target=target,
    group_ids=["series_id"],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals= time_varying_known_reals,
    time_varying_unknown_reals= time_varying_unknown_reals,
)

validation = TimeSeriesDataSet(
    full_df[lambda x: x.series_id > all_trajectories * frac],  # Adjust based on your timeline
    time_idx="time_idx",
    target=target,
    group_ids=["series_id"],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals= time_varying_known_reals,
    time_varying_unknown_reals= time_varying_unknown_reals,
)

In [46]:
from torch.utils.data import DataLoader

batch_size = 16  # Adjust based on your GPU capacity
train_dataloader = DataLoader(training, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(validation, batch_size=batch_size, shuffle=False)


In [47]:
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning import Trainer

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=32,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=2,  # Default quantiles
    loss=nn.MSELoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

trainer = Trainer(max_epochs=20, gpus=1)
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)


/home/shaoze/anaconda3/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/shaoze/anaconda3/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


TypeError: 'int' object is not iterable