In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from torch import optim

In [2]:
# !unzip /content/VeReMi_0_3600_2022-9-11_12.51.1.zip

In [3]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'..\Dataset_final\VeReMi_0_3600_2022-9-11_12.51.1'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
#     elif filename.startswith('traceGroundTruthJSON'):
#         label = 0
#     else:
#         raise ValueError("File format not supported")
    
        # Add the label column to the dataframe
        if int(label) == 0:
            df['label'] = 0
        elif int(label) >=1 and int(label) <= 8:
            df['label'] = 1
        else:
            df['label'] = 2

        # Append the dataframe to the merged dataframe
        merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [4]:
merged_df.head()

Unnamed: 0,pos,spd,label
0,"[873.6579894237055, 515.0419704516012, 0.0]","[-0.5615534138496401, 0.34935102148086505, 0.0]",0
1,"[872.1848544954322, 515.9474424294032, 0.0]","[-2.125938318465096, 1.322478270765143, 0.0]",0
2,"[869.1128639211374, 517.7971405872365, 0.0]","[-3.5899683398356963, 2.233296724389207, 0.0]",0
3,"[864.6579538884271, 520.5743407739925, 0.0]","[-5.312629533686103, 3.304274438501985, 0.0]",0
4,"[860.0207470696706, 525.7162975093687, 0.0]","[-5.34186098834915, 5.817769455952666, 0.0]",0


In [5]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [6]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [7]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [None]:
merged_df

In [None]:
df_new = pd.DataFrame()
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==0].sample(frac=0.3)], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==1]], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==2]], ignore_index=True)
df_new.loc[df_new['label']==2]

In [None]:
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
#hyperparameters
input_size = 4
num_classes = 3
learning_rate = 0.001
batch_size = 64
num_epochs = 1

In [None]:
#load data
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe[['posX', 'posY', 'spdX', 'spdY']].values
        self.targets = dataframe['label'].values
        self.data = self.data[:10 * (len(self.data)//10)]
        self.targets = self.targets[:10 * (len(self.targets)//10)]
        # print(len(self.data))
        # print(len(self.targets))

    def __len__(self):
        return (len(self.data) // 10) - 1

    def __getitem__(self, index):
        index *= 10
        data = torch.tensor(self.data[index:index+20], dtype=torch.float)
        target = torch.tensor(self.targets[index:index+20], dtype=torch.long)
        return data, target

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df_new['label'] = le.fit_transform(df_new['label'])

In [None]:
# split dataframe into train and test sets
train_df, test_df = train_test_split(df_new, test_size=0.2, shuffle=False)

In [None]:
# create dataset and dataloader
train_dataset = CustomDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=True, shuffle=False)

test_dataset = CustomDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True, shuffle=False)

In [None]:
print(len(test_dataset))
print(test_dataset[215])

In [None]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
#         self.softmax = nn.Softmax(dim=1)
        self.h_n = None
        self.c_n = None


    def forward(self, x):
        if self.h_n is None:
            out, (self.h_n, self.c_n) = self.lstm(x)    
        else:
            out, (self.h_n, self.c_n) = self.lstm(x, (self.h_n, self.c_n))
        self.h_n = self.h_n.detach()
        self.c_n = self.c_n.detach()
        # out = self.softmax(self.fc(out))
        out = self.fc(out)

        return out

In [None]:
inp = torch.randn(64,20,4)

In [None]:
model = LSTM(input_size=4,hidden_size=256,num_layers=3,num_classes=3)

In [None]:
output = model(inp)

In [None]:
print(output.type())
print(output[1])
loss_fn = nn.MSELoss()

In [None]:
# testing
batch_x, batch_y = next(iter(train_loader))
# print(batch_x.shape)
batch_y = batch_y.unsqueeze(2)
# print(batch_y.shape)
print(torch.max(batch_y))


# y_onehot = torch.LongTensor(batch_size, 20, 3)
# print(y_onehot.shape)
# # In your for loop
# y_onehot.zero_()
# # print(y_onehot)
# # print(batch_y.shape)
# y_onehot.scatter_(1, batch_y, 1)
# # print(y_onehot)
y_onehot = F.one_hot(batch_y.to(torch.int64), num_classes=3)
y_onehot = torch.squeeze(y_onehot)
print(y_onehot.shape)
# print(y_onehot[:1], batch_y[:1])
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
# print(batch_y.shape)
pred = model(batch_x)
print(pred.shape)
# print(y_onehot.shape)
loss = loss_fn(pred, batch_y)
print(loss)
print(loss.shape)

In [None]:
# Define the loss function (criterion) and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

In [None]:
print(len(train_dataset))

In [None]:
# Training loop
num_epochs = 100
break_ = False

for epoch in range(num_epochs):
        if break_:
            break
        for i, (batch_x, batch_y) in enumerate(train_loader):
            if batch_x is None:
                print(i, batch_x, batch_y)
                break
            try:

              model.zero_grad()

              # Move inputs and labels to the device (e.g. GPU) if available
              batch_x = batch_x.to(device).float()
              batch_y = batch_y.to(device).float()
              batch_y = batch_y.unsqueeze(2)
              y_onehot = F.one_hot(batch_y.to(torch.int64), num_classes=3)
              y_onehot = torch.squeeze(y_onehot).float()
              
              # Forward pass
              outputs = model(batch_x)
#               print(outputs.shape)
#               print(batch_y.shape)
              loss = criterion(outputs, y_onehot)
              # total_loss = loss.item() * batch_size

              # Backward and optimize
              # optimizer.zero_grad()
              loss.backward()
              optimizer.step()
              

              # Print the loss every 100 steps
              # if (i + 1) % 100 == 0:
              print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
            except Exception as e:
                print(e)
                print(batch_x.shape, batch_y.shape, outputs.shape)
                break_ = True
                break


### Testing

In [None]:
# Training loop
num_epochs = 10
break_ = False
total_loss = 0.

model.eval()

with torch.no_grad():
    for epoch in range(num_epochs):
        for i, (batch_x, batch_y) in enumerate(test_loader):
            model.zero_grad()

            # Move inputs and labels to the device (e.g. GPU) if available
            batch_x = batch_x.to(device).float()
            batch_y = batch_y.to(device).float()
            batch_y = batch_y.unsqueeze(2)
            y_onehot = F.one_hot(batch_y.to(torch.int64), num_classes=3)
            y_onehot = torch.squeeze(y_onehot).float()
            print(y_onehot)
            print(batch_y)
            # Forward pass
            outputs = model(batch_x)
            print(outputs.shape)
            print(outputs)
#             for i in len(outputs):
#               outputs[i] = torch.argmax(outputs[i],dim=0)
#             print(outputs)
            loss = criterion(outputs, y_onehot)
            total_loss = loss.item() * batch_size
            # Print the loss every 100 steps
            # if (i + 1) % 100 == 0:
            # print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(test_loader)}], Loss: {loss.item():.4f}')
            # except Exception as e:
            #     print(e)
            #     print(batch_x.shape, batch_y.shape, outputs.shape)
            #     break_ = True
            #     break

avg_loss = total_loss / len(test_dataset)

In [None]:
print(avg_loss)

In [None]:
# example = torch.zeros(64,20,3)
# example[:,:,0] = 1
# example

In [None]:
# Import the required library
import torch

# define an input tensor
input = torch.randn(4,4)

# print above defined tensor
print("Input Tensor:", input)

# Compute indices of the maximum value
indices = torch.argmax(input)

# print the indices
print("Indices:", indices)

# Compute indices of the maximum value in dim 0
indices = torch.argmax(input, dim=0)

# print the indices
print("Indices in dim 0:", indices)

# Compute indices of the maximum value in dim 1
indices = torch.argmax(input, dim=1)

# print the indices
print("Indices in dim 1:", indices)