In [94]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset

from sklearn.preprocessing import MinMaxScaler, StandardScaler

import os
import sys
cur_dir = os.path.dirname(os.path.abspath("__file__"))  # Gets the current notebook directory
src_dir = os.path.join(cur_dir, '../')  # Constructs the path to the 'src' directory
# Add the 'src' directory to sys.path
if src_dir not in sys.path:
    sys.path.append(src_dir)

from src.constant import *
from tqdm.notebook import tqdm


In [83]:
df = pd.read_pickle('../data/PandasData/Original/PID001_NSL.pkl')

# for origin
df = df.drop(columns=['Confidence', 'Timestamp', 'TimestampID', 'DatapointID', 'PID', 'SCN', 'U_X', 'U_Y', 'U_Z', 'AGV_Z', 'User_Z', 'GazeOrigin_Z', 'User_Pitch', 'User_Yaw', 'User_Roll', 'EyeTarget'])


# numeric_df = df.select_dtypes(include=[np.number])
# numeric_df.drop(columns=['Timestamp', 'start_station_X', 'start_station_Y', 'end_station_X', 'end_station_Y',
#        'distance_from_start_station_X', 'distance_from_start_station_Y',
#        'distance_from_end_station_X', 'distance_from_end_station_Y','AGV distance X', 'AGV distance Y', ], inplace=True)

In [84]:
class MyDataset():
    def __init__(self) -> None:
        self.data = None
        self.train = None
        self.test = None
        self.feature_dim = None

    def read_data(self, df: pd.DataFrame, agv_col_name = 'AGV_name', lookback = 10):
        agv_list = df[agv_col_name].unique()
        for agv in agv_list:
            cur_data = df[df[agv_col_name] == agv]
            cur_data = cur_data.select_dtypes(include=[np.number])
            if self.feature_dim is None:
                self.feature_dim = cur_data.shape[1]
            else:
                assert self.feature_dim == cur_data.shape[1], "Feature dimension should be the same"
            
            X, y = self.create_dataset(cur_data.values, lookback=lookback)
            if self.data is None:
                self.data = TensorDataset(X, y)
            else:
                self.data = torch.utils.data.ConcatDataset([self.data, TensorDataset(X, y)])
    
    @staticmethod
    def create_dataset(dataset, lookback):
        """Transform a time series into a prediction dataset
        Args:
            dataset: A numpy array of time series, first dimension is the time steps
            lookback: Size of window for prediction
        """
        if isinstance(dataset, pd.DataFrame):
            dataset = dataset.select_dtypes(include=[np.number])
            
        X, y = [], []
        for i in range(len(dataset)-lookback):
            feature = dataset[i:i+lookback]
            target = dataset[i+1:i+lookback+1]
            X.append(feature)
            y.append(target)
        return torch.tensor(X), torch.tensor(y)
    
    
    def split_data(self, frac: float = 0.8, shuffle: bool = True, batch_size: int = 4):
        n = len(self.data)
        train_size = int(n * frac)
        test_size = n - train_size

        # if shuffle:
        #     train, test = torch.utils.data.random_split(self.data, [train_size, test_size])
        # else:
        #     train = torch.utils.data.Subset(self.data, range(0, train_size))
        #     test = torch.utils.data.Subset(self.data, range(train_size, n))
        
        train = torch.utils.data.Subset(self.data, range(0, train_size))
        test = torch.utils.data.Subset(self.data, range(train_size, n))
        
        train = DataLoader(train, batch_size=batch_size, shuffle=shuffle)
        test = DataLoader(test, batch_size=batch_size, shuffle=shuffle)
        
        self.train = train
        self.test = test

        return train, test

In [90]:
lookback = 10

ds = MyDataset()
ds.read_data(df, lookback=lookback)
train, test = ds.split_data(frac=0.8, shuffle=True, batch_size=4)
feature_dim = ds.feature_dim

In [89]:
for i, (X, y) in enumerate(train):
    print(X.shape, y.shape)
    break

print(len(train), len(test))

torch.Size([4, 10, 13]) torch.Size([4, 10, 13])
12545 3137


In [54]:
class TraPredModel(nn.Module):
    def __init__(self, input_size = None, lookback = None):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=50, num_layers=lookback, batch_first=True)
        self.linear = nn.Linear(50, 2)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

In [87]:
model = TraPredModel(input_size=feature_dim, lookback=lookback)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

# train_ds = DataLoader(TensorDataset(X_train, y_train), shuffle=True, batch_size=8)
# test_ds = DataLoader(TensorDataset(X_test, y_test), shuffle=False, batch_size=8)


In [95]:
n_epochs = 100
# model = TraPredModel(input_size=numeric_df.shape[1], lookback=lookback)

for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in tqdm(train):
        X_batch = X_batch.float()
        y_batch = y_batch.float()

        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch[:, :, -2:])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    if epoch % 1 == 0:
        with torch.no_grad():
            model.eval()
            all_test = len(test)
            test_rmse_all = []
            for X_test_batch, y_test_batch in test:
                X_test_batch = X_test_batch.float()
                y_test_batch = y_test_batch.float()
                y_pred = model(X_test_batch)
                test_rmse = np.sqrt(loss_fn(y_pred, y_test_batch[:, :, -2:]))
                if not torch.isnan(test_rmse):
                    test_rmse_all.append(test_rmse.item())
            
            print("Epoch %d: test RMSE %.4f" % (epoch, sum(test_rmse_all)/all_test))
    # break
 

  0%|          | 0/12545 [00:00<?, ?it/s]

tensor(0.1838, grad_fn=<MseLossBackward0>)
tensor(0.2737, grad_fn=<MseLossBackward0>)
tensor(0.4092, grad_fn=<MseLossBackward0>)
tensor(0.3214, grad_fn=<MseLossBackward0>)
tensor(0.3827, grad_fn=<MseLossBackward0>)
tensor(0.3965, grad_fn=<MseLossBackward0>)
tensor(0.2678, grad_fn=<MseLossBackward0>)
tensor(0.1686, grad_fn=<MseLossBackward0>)
tensor(0.4037, grad_fn=<MseLossBackward0>)
tensor(0.1617, grad_fn=<MseLossBackward0>)
tensor(0.2607, grad_fn=<MseLossBackward0>)
tensor(0.1098, grad_fn=<MseLossBackward0>)
tensor(0.3192, grad_fn=<MseLossBackward0>)
tensor(0.3409, grad_fn=<MseLossBackward0>)
tensor(0.2395, grad_fn=<MseLossBackward0>)
tensor(0.3447, grad_fn=<MseLossBackward0>)
tensor(0.2970, grad_fn=<MseLossBackward0>)
tensor(0.1922, grad_fn=<MseLossBackward0>)
tensor(0.2864, grad_fn=<MseLossBackward0>)
tensor(0.3180, grad_fn=<MseLossBackward0>)
tensor(0.1906, grad_fn=<MseLossBackward0>)
tensor(0.1802, grad_fn=<MseLossBackward0>)
tensor(0.2771, grad_fn=<MseLossBackward0>)
tensor(0.27

RuntimeError: expected m1 and m2 to have the same dtype, but got: double != float