In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim


import os
import sys
cur_dir = os.path.dirname(os.path.abspath("__file__"))  # Gets the current notebook directory
src_dir = os.path.join(cur_dir, '../')  # Constructs the path to the 'src' directory
# Add the 'src' directory to sys.path
if src_dir not in sys.path:
    sys.path.append(src_dir)

from src.constant import *
from tqdm.notebook import tqdm

from src.MyDataset import MyDataset
from src.TraPredModel import TraPredModel

In [12]:
import torch.utils
import torch.utils.data


lookback = 20
dir = '../data/PandasData/Sampled/'
ds = MyDataset(lookback=lookback)

def process_data(df_dir : str, target_freq : int = 10):
    df: pd.DataFrame = pd.read_pickle(df_dir)
    df.dropna(inplace=True, how='any')
    f_per_sec = df.groupby('TimestampID').count().mean().mean()
    if f_per_sec < target_freq:
        raise ValueError('The frequency of the data is lower than the target frequency')
    elif int(f_per_sec) == target_freq:
        pass
    else:
        resample_ratio = int(f_per_sec/target_freq)
        df = df.iloc[::resample_ratio, :]
    # # for origin
    for drop_column in ['Confidence', 'Timestamp', 'TimestampID', 
                          'DatapointID', 'PID', 'SCN', 'U_X', 'U_Y', 'U_Z', 
                          'AGV_Z', 'User_Z', 'GazeOrigin_Z', 'User_Pitch', 'User_Yaw', 'User_Roll', 
                          'EyeTarget']:
        df = df.drop(columns=[drop_column], errors='ignore')

    target_columns = ['User_X', 'User_Y']
    # Reorder columns
    new_columns = target_columns + [col for col in df.columns if col not in target_columns]
    df = df[new_columns]

    return df

for file in os.listdir(dir):
    if file.endswith('.pkl'):
        df = process_data(dir+file)
        ds.read_data(df)

train:torch.utils.data.DataLoader
test:torch.utils.data.DataLoader
train, test = ds.split_data(frac=0.9, shuffle=True, batch_size=16)


feature_dim = ds.feature_dim
print(f"columns : {df.columns} \nfeature_dim : {feature_dim}")

columns : Index(['User_X', 'User_Y', 'AGV_distance_X', 'AGV_distance_Y', 'AGV_speed_X',
       'AGV_speed_Y', 'AGV_speed', 'User_speed_X', 'User_speed_Y',
       'User_speed', 'User_velocity_X', 'User_velocity_Y', 'Wait_time',
       'intent_to_cross', 'Gazing_station', 'possible_interaction',
       'facing_along_sidewalk', 'facing_to_road', 'On_sidewalks', 'On_road',
       'closest_station', 'distance_to_closest_station',
       'distance_to_closest_station_X', 'distance_to_closest_station_Y',
       'looking_at_AGV', 'start_station_X', 'start_station_Y', 'end_station_X',
       'end_station_Y', 'distance_from_start_station_X',
       'distance_from_start_station_Y', 'distance_from_end_station_X',
       'distance_from_end_station_Y', 'facing_start_station',
       'facing_end_station', 'GazeDirection_X', 'GazeDirection_Y',
       'GazeDirection_Z', 'AGV_X', 'AGV_Y', 'AGV_name',
       'looking_at_closest_station'],
      dtype='object') 
feature_dim : 31


In [13]:
for i, (X, y) in enumerate(train):
    print(X.shape, y.shape)
    break

print(len(train), len(test))

torch.Size([16, 20, 31]) torch.Size([16, 20, 31])
8751 973


In [14]:
model = TraPredModel(input_size=feature_dim, lookback=lookback)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()


In [16]:
n_epochs = 1
eval_step = 100
# model = TraPredModel(input_size=numeric_df.shape[1], lookback=lookback)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device}")
model.to(device)

train_all = len(train)

loss_all = []

for epoch in range(n_epochs):
    model.train()
    for step, (X_batch, y_batch) in tqdm(enumerate(train), total = train_all):
        X_batch = X_batch.float().to(device)
        y_batch = y_batch.float().to(device)

        y_pred = model(X_batch)
        loss = torch.mean(loss_fn(y_pred, y_batch[:, :, :2]))
        if torch.isnan(loss):
            print("Loss is NaN")
            continue
        loss_all.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Validation
        if (epoch * train_all + step + 1) % eval_step == 0:
            print(f"Start testing")
            with torch.no_grad():
                model.eval()
                all_test = len(test)
                test_rmse_all = []
                for X_test_batch, y_test_batch in tqdm(test):
                    X_test_batch = X_test_batch.float().to(device)
                    y_test_batch = y_test_batch.float().to(device)
                    y_pred = model(X_test_batch)
                    test_rmse = torch.mean(loss_fn(y_pred, y_test_batch[:, :, :2]))
                    test_rmse = torch.sqrt(test_rmse)
                    if not torch.isnan(test_rmse):
                        test_rmse_all.append(test_rmse.item())

                print("Epoch %d: test RMSE %.4f" % (epoch, sum(test_rmse_all)/all_test))
            
            model.train()
        # break



Using cuda


  0%|          | 0/8751 [00:00<?, ?it/s]

Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2647.3929
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2646.4415
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2633.3919
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2693.1469
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2633.1595
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2635.2949
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2644.8287
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2651.1829
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2662.6153
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

Epoch 0: test RMSE 2642.0212
Start testing


  0%|          | 0/973 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
loss_np = np.sqrt(np.array(loss_all))
np.save('../model/loss_baseline.npy', loss_np)

loss_np

array([6342.35098367, 7642.91750577, 6565.10746294, ..., 3722.12949802,
       1052.65705479,  946.87693894])