In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from torch import optim

In [2]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'Dataset'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
#     elif filename.startswith('traceGroundTruthJSON'):
#         label = 0
#     else:
#         raise ValueError("File format not supported")
    
        # Add the label column to the dataframe
        if int(label) == 0:
            df['label'] = 0
        elif int(label) >=1 and int(label) <= 9:
            df['label'] = 1
        else:
            df['label'] = 2

        # Append the dataframe to the merged dataframe
        merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [3]:
merged_df.head()

Unnamed: 0,pos,spd,label
0,"[135.87344027765874, 1121.710704454854, 0.0]","[0.105041502379317, -1.120827962433954, 0.0]",0
1,"[145.01476212424743, 1011.8710051136839, 0.0]","[0.08023592074115701, -1.0820438844348481, 0.0]",0
2,"[152.5341133861298, 908.2379520504257, 0.0]","[9.913844887820957, 3.900485098416998, 0.0]",0
3,"[179.69089537234865, 894.6932049329928, 0.0]","[11.19012769033562, 1.490894004843721, 0.0]",0
4,"[136.60543156206504, 1112.5643178728526, 0.0]","[0.093148449699676, -0.9939284577211601, 0.0]",0


In [4]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [5]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [6]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [7]:
merged_df

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,-0.994793,1.533930,-0.078272,-0.278675
1,0,-0.971192,1.181178,-0.080826,-0.274784
2,0,-0.951778,0.848359,0.931694,0.225080
3,0,-0.881665,0.804860,1.063107,-0.016658
4,0,-0.992903,1.504556,-0.079496,-0.265944
...,...,...,...,...,...
352103,0,0.973421,-0.201441,-0.834365,-1.262799
352104,0,0.894472,-0.388351,0.561997,-0.759938
352105,0,0.954767,-0.236419,-0.833878,-1.261135
352106,0,0.907831,-0.398881,0.274590,-0.386849


In [8]:
df_new = pd.DataFrame()
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==0].sample(frac=0.3)], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==1]], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==2]], ignore_index=True)
df_new.loc[df_new['label']==2]

Unnamed: 0,label,posX,posY,spdX,spdY
122091,2,0.836759,-0.716916,-0.293391,-0.234486
122092,2,0.343139,-0.944234,-1.473338,-0.626720
122093,2,1.135562,-0.609056,-1.801718,-0.290726
122094,2,1.135562,-0.609056,-1.801718,-0.290726
122095,2,0.958451,-0.668895,-0.264179,-0.216153
...,...,...,...,...,...
177923,2,-0.713489,-1.341754,-0.784589,0.736851
177924,2,-0.720056,-1.303335,-0.287956,1.127343
177925,2,-0.725248,-1.261643,-0.268590,1.001783
177926,2,-0.728489,-1.232114,-0.199792,0.555923


In [9]:
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [10]:
#hyperparameters
input_size = 4
num_classes = 3
learning_rate = 0.001
batch_size = 64
num_epochs = 1

In [11]:
#load data
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe[['posX', 'posY', 'spdX', 'spdY']].values
        self.targets = dataframe['label'].values
        self.data = self.data[:10 * (len(self.data)//10)]
        self.targets = self.targets[:10 * (len(self.targets)//10)]
        # print(len(self.data))
        # print(len(self.targets))

    def __len__(self):
        return (len(self.data) // 10) - 1

    def __getitem__(self, index):
        index *= 10
        data = torch.tensor(self.data[index:index+20], dtype=torch.float)
        target = torch.tensor(self.targets[index:index+20], dtype=torch.long)
        return data, target

In [12]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df_new['label'] = le.fit_transform(df_new['label'])

In [13]:
# split dataframe into train and test sets
train_df, test_df = train_test_split(df_new, test_size=0.2, shuffle=False)

In [14]:
# create dataset and dataloader
train_dataset = CustomDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=True, shuffle=False)

test_dataset = CustomDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True, shuffle=False)

In [15]:
print(len(test_dataset))
print(test_dataset[215])

3557
(tensor([[-0.6746, -1.7860, -0.2967,  1.0842],
        [-0.6899, -1.5261, -0.2811,  0.9404],
        [-0.7778, -0.8231, -0.0891, -0.1662],
        [-0.6217, -1.8713,  0.0119, -0.8692],
        [-0.6675, -1.5279,  0.1496, -1.5819],
        [-0.7773, -0.8231, -0.0891, -0.1662],
        [-0.7701, -0.8617, -0.0890, -0.1661],
        [-0.9150,  0.4213,  0.1247, -1.6189],
        [-0.7705, -0.8618, -0.0900, -0.1599],
        [-0.6805, -1.7429, -0.3400,  1.2659],
        [-0.6945, -1.4908, -0.2759,  0.9412],
        [-0.6617, -1.9003, -0.0940, -0.1198],
        [-0.7705, -0.8616, -0.0891, -0.1652],
        [-0.6201, -1.8862, -0.0561, -0.4226],
        [-0.7707, -0.8615, -0.0890, -0.1661],
        [-0.7772, -0.8230, -0.0891, -0.1662],
        [-0.7700, -0.8617, -0.0890, -0.1661],
        [-0.6954, -1.3005,  0.1296, -1.5829],
        [-0.6792, -1.4370,  0.1225, -1.5891],
        [-0.6617, -1.9003, -0.0940, -0.1198]]), tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))


In [16]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
#         self.softmax = nn.Softmax(dim=1)
        self.h_n = None
        self.c_n = None


    def forward(self, x):
        if self.h_n is None:
            out, (self.h_n, self.c_n) = self.lstm(x)    
        else:
            out, (self.h_n, self.c_n) = self.lstm(x, (self.h_n, self.c_n))
        self.h_n = self.h_n.detach()
        self.c_n = self.c_n.detach()
        # out = self.softmax(self.fc(out))
        out = self.fc(out)

        return out

In [21]:
model = LSTM(input_size=4,hidden_size=256,num_layers=3,num_classes=3).to(device)

In [22]:
# Define the loss function (criterion) and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [23]:
print(len(train_dataset))

14233


In [27]:
# Training loop
num_epochs = 100
break_ = False

for epoch in range(num_epochs):
        if break_:
            break
        for i, (batch_x, batch_y) in enumerate(train_loader):
            if batch_x is None:
                print(i, batch_x, batch_y)
                break
            try:

              model.zero_grad()

              # Move inputs and labels to the device (e.g. GPU) if available
              batch_x = batch_x.to(device).float()
              batch_y = batch_y.to(device).float()
              batch_y = batch_y.unsqueeze(2)
              y_onehot = F.one_hot(batch_y.to(torch.int64), num_classes=3)
              y_onehot = torch.squeeze(y_onehot).float()
              
              # Forward pass
              outputs = model(batch_x)
#               print(outputs.shape)
#               print(batch_y.shape)
              loss = criterion(outputs, y_onehot)
              # total_loss = loss.item() * batch_size

              # Backward and optimize
              # optimizer.zero_grad()
              loss.backward()
              optimizer.step()
              

              # Print the loss every 100 steps
              if (i + 1) % 100 == 0:
                  print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
            except Exception as e:
                print(e)
                print(batch_x.shape, batch_y.shape, outputs.shape)
                break_ = True
                break


Epoch [1/100], Step [100/222], Loss: 0.2222
Epoch [1/100], Step [200/222], Loss: 0.2223
Epoch [2/100], Step [100/222], Loss: 0.2222
Epoch [2/100], Step [200/222], Loss: 0.2222
Epoch [3/100], Step [100/222], Loss: 0.2222
Epoch [3/100], Step [200/222], Loss: 0.2222
Epoch [4/100], Step [100/222], Loss: 0.2222
Epoch [4/100], Step [200/222], Loss: 0.2222
Epoch [5/100], Step [100/222], Loss: 0.2222
Epoch [5/100], Step [200/222], Loss: 0.2222
Epoch [6/100], Step [100/222], Loss: 0.2222
Epoch [6/100], Step [200/222], Loss: 0.2222
Epoch [7/100], Step [100/222], Loss: 0.2222
Epoch [7/100], Step [200/222], Loss: 0.2222
Epoch [8/100], Step [100/222], Loss: 0.2222
Epoch [8/100], Step [200/222], Loss: 0.2222
Epoch [9/100], Step [100/222], Loss: 0.2222
Epoch [9/100], Step [200/222], Loss: 0.2222
Epoch [10/100], Step [100/222], Loss: 0.2222
Epoch [10/100], Step [200/222], Loss: 0.2222
Epoch [11/100], Step [100/222], Loss: 0.2222
Epoch [11/100], Step [200/222], Loss: 0.2222
Epoch [12/100], Step [100/22

### Testing

In [28]:
# Training loop
num_epochs = 10
break_ = False
total_loss = 0.

model.eval()

with torch.no_grad():
    for epoch in range(num_epochs):
        for i, (batch_x, batch_y) in enumerate(test_loader):
            model.zero_grad()
            # Move inputs and labels to the device (e.g. GPU) if available
            batch_x = batch_x.to(device).float()
            batch_y = batch_y.to(device).float()
            batch_y = batch_y.unsqueeze(2)
            y_onehot = F.one_hot(batch_y.to(torch.int64), num_classes=3)
            y_onehot = torch.squeeze(y_onehot).float()
            print(y_onehot)
            print(batch_y)
            # Forward pass
            outputs = model(batch_x)
            print(outputs.shape)
            print(outputs)
            loss = criterion(outputs, y_onehot)
            total_loss = loss.item() * batch_size

avg_loss = total_loss / len(test_dataset)

tensor([[[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         ...,
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],

        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         ...,
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],

        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         ...,
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],

        ...,

        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         ...,
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],

        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         ...,
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],

        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         ...,
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]]], device='cuda:0')
tensor([[[2.],
         [2.],
         [2.],


In [None]:
print(avg_loss)