In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim


import os
import sys
cur_dir = os.path.dirname(os.path.abspath("__file__"))  # Gets the current notebook directory
src_dir = os.path.join(cur_dir, '../')  # Constructs the path to the 'src' directory
# Add the 'src' directory to sys.path
if src_dir not in sys.path:
    sys.path.append(src_dir)

from src.constant import *
from tqdm.notebook import tqdm

from src.MyDataset import MyDataset
from src.TraPredModel import TraPredModel

In [2]:
lookback = 20
dir = '../data/PandasData/Original/'

ds = MyDataset(lookback=lookback)

In [3]:
def process_data(df_dir : str, target_freq : int = 10):
    df = pd.read_pickle(df_dir)
    f_per_sec = df.groupby('TimestampID').count().mean().mean()
    resample_ratio = int(f_per_sec/target_freq)
    df = df.iloc[::resample_ratio, :]
    # for origin
    df = df.drop(columns=['Confidence', 'Timestamp', 'TimestampID', 
                          'DatapointID', 'PID', 'SCN', 'U_X', 'U_Y', 'U_Z', 
                          'AGV_Z', 'User_Z', 'GazeOrigin_Z', 'User_Pitch', 'User_Yaw', 'User_Roll', 
                          'EyeTarget'], errors='ignore')
    return df

for file in os.listdir(dir):
    if file.endswith('.pkl'):
        df = process_data(dir+file)
        ds.read_data(df)

train, test = ds.split_data(frac=0.8, shuffle=True, batch_size=16)
feature_dim = ds.feature_dim
print(df.columns)

  return torch.tensor(X), torch.tensor(y)


Index(['AGV_name', 'User_X', 'User_Y', 'AGV_X', 'AGV_Y', 'AGV_Pitch',
       'AGV_Yaw', 'AGV_Roll', 'AGV_speed', 'GazeOrigin_X', 'GazeOrigin_Y',
       'GazeDirection_X', 'GazeDirection_Y', 'GazeDirection_Z'],
      dtype='object')


In [4]:
for i, (X, y) in enumerate(train):
    print(X.shape, y.shape)
    break

print(len(train), len(test))

torch.Size([16, 20, 13]) torch.Size([16, 20, 13])
7783 1946


In [5]:
model = TraPredModel(input_size=feature_dim, lookback=lookback)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()


In [6]:
n_epochs = 1
eval_step = 100000
# model = TraPredModel(input_size=numeric_df.shape[1], lookback=lookback)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device}")
model.to(device)

train_all = len(train)

loss_all = []

for epoch in range(n_epochs):
    model.train()
    for step, (X_batch, y_batch) in tqdm(enumerate(train), total = train_all):
        X_batch = X_batch.float().to(device)
        y_batch = y_batch.float().to(device)

        y_pred = model(X_batch)
        loss = torch.mean(loss_fn(y_pred, y_batch[:, :, :2]))
        loss_all.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Validation
        if (epoch * train_all + step + 1) % eval_step == 0:
            print(f"Start testing")
            with torch.no_grad():
                model.eval()
                all_test = len(test)
                test_rmse_all = []
                for X_test_batch, y_test_batch in tqdm(test):
                    X_test_batch = X_test_batch.float().to(device)
                    y_test_batch = y_test_batch.float().to(device)
                    y_pred = model(X_test_batch)
                    test_rmse = torch.mean(loss_fn(y_pred, y_test_batch[:, :, :2]))
                    if not torch.isnan(test_rmse):
                        test_rmse_all.append(test_rmse.item())

                print("Epoch %d: test RMSE %.4f" % (epoch, sum(test_rmse_all)/all_test))
            
            model.train()
        # break



Using cuda


  0%|          | 0/7783 [00:00<?, ?it/s]

In [8]:
loss_np = np.sqrt(np.array(loss_all))
np.save('../model/loss_baseline.npy', loss_np)

loss_np

array([7058.70158032, 7491.86865875, 6997.30462392, ..., 2526.97368407,
       2375.570984  , 3065.86056434])