In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from torch import optim

In [1]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'D:\Study Material\4-2\Dataset_final\VeReMi_0_3600_2022-9-11_12.51.1'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
    elif filename.startswith('traceGroundTruthJSON'):
        label = 0
    else:
        raise ValueError("File format not supported")
    
    # Add the label column to the dataframe
    if int(label) == 0:
        df['label'] = 0
    elif int(label) >=1 and int(label) <= 8:
        df['label'] = 1
    else:
        df['label'] = 2

    # Append the dataframe to the merged dataframe
    merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [3]:
merged_df

Unnamed: 0,pos,spd,label
0,"[266.1342377193001, 41.31755030776682, 0.0]","[-0.08087719292206601, 0.7826628559043071, 0.0]",0
1,"[266.06130476311273, 43.37327132906824, 0.0]","[-0.307226018249808, 2.973074697703729, 0.0]",0
2,"[265.50620000532376, 47.479615917740944, 0.0]","[-0.522133162849577, 5.052698244401898, 0.0]",0
3,"[264.8798230282204, 53.81269421777557, 0.0]","[-0.7501579494382471, 7.25934146781901, 0.0]",0
4,"[263.74132084628656, 62.04745136461824, 0.0]","[-1.196011717133152, 9.262387108898814, 0.0]",0
...,...,...,...
10737,"[325.90687464156844, 730.1637061673084, 0.0]","[8.1339354760141, 7.116579088873951, 0.0]",0
10738,"[337.2546973533207, 729.634766863171, 0.0]","[12.196912885390818, -4.544761986234479, 0.0]",0
10739,"[345.51983998911123, 719.5846003226957, 0.0]","[6.73889447260984, -9.893360827534599, 0.0]",0
10740,"[350.82649247285724, 711.7160476513725, 0.0]","[4.224393707179231, -6.201659069760074, 0.0]",0


In [4]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [5]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [6]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [7]:
merged_df

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,-0.733647,-2.038607,0.064290,0.123897
1,0,-0.733858,-2.031121,0.037468,0.374456
2,0,-0.735468,-2.016168,0.012002,0.612341
3,0,-0.737286,-1.993106,-0.015018,0.864757
4,0,-0.740588,-1.963119,-0.067851,1.093883
...,...,...,...,...,...
10737,0,-0.560248,0.469826,1.037734,0.848426
10738,0,-0.527329,0.467900,1.519191,-0.485500
10739,0,-0.503352,0.431303,0.872424,-1.097320
10740,0,-0.487958,0.402649,0.574459,-0.675031


In [8]:
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
#hyperparameters
input_size = 4
num_classes = 3
learning_rate = 0.001
batch_size = 64
num_epochs = 1

In [10]:
#load data
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe[['posX', 'posY', 'spdX', 'spdY']].values
        self.targets = dataframe['label'].values

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):
        data = torch.tensor(self.data[index], dtype=torch.float)
        target = torch.tensor(self.targets[index], dtype=torch.long)
        return data, target

In [11]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
merged_df['label'] = le.fit_transform(merged_df['label'])

In [12]:
# split dataframe into train and test sets
train_df, test_df = train_test_split(merged_df, test_size=0.2, random_state=42)

In [13]:
# create dataset and dataloader
train_dataset = CustomDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [22]:
# import torch
# import torch.nn as nn

# class LSTM(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers, num_classes):
#         super(LSTM, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_size, num_classes)
#         self.softmax = nn.Softmax(dim=1)
# #         self.h_n = None
# #         self.c_n = None


#     def forward(self, x):
# #         if self.h_n is None:
# #             out, (self.h_n, self.c_n) = self.lstm(x)    
# #         else:
# #             out, (self.h_n, self.c_n) = self.lstm(x, (self.h_n, self.c_n))
    
#         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()

#         c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()

#         out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

#         print
#         out = self.softmax(self.fc(out))

#         return out

In [26]:
class LSTMModel(nn.Module):
    def __init__(self, input_d, hidden_d, layer_d, output_d):
        super(LSTMModel, self).__init__()
        
        self.hidden_dim = hidden_d
        self.layer_dim = layer_d

        # LSTM model 
        self.lstm = nn.LSTM(input_d, hidden_d, layer_d, batch_first=True) 

        self.fc = nn.Linear(hidden_d, output_d)

    def forward(self, x):
    
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        out = self.fc(out[:, -1, :]) 
        return out
    
input_dim = 4
hidden_dim = 256
output_dim = 3
layer_dim = 3

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

#step 4: calculating cross entropy loss
error = nn.CrossEntropyLoss()

#step 5: optimizer 
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [27]:
inp = torch.randn(64,4)

In [24]:
# model = LSTM(input_size=4,hidden_size=256,num_layers=3,num_classes=3)

In [28]:
output = model(inp)

RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

In [18]:
print(output.type())
print(output[1])
loss_fn = nn.MSELoss()

torch.FloatTensor
tensor([0.3345, 0.3321, 0.3334], grad_fn=<SelectBackward0>)


In [19]:
# testing
batch_x, batch_y = next(iter(train_loader))
# print(batch_x.shape)
batch_y = batch_y.unsqueeze(1)
# print(batch_y.shape)
# print(batch_y)


y_onehot = torch.FloatTensor(batch_size, 3)
# print(y_onehot.shape)
# In your for loop
y_onehot.zero_()
y_onehot.scatter_(1, batch_y, 1)
# print(y_onehot.shape)

# print(y_onehot[:10], batch_y[:10])
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
pred = model(batch_x)
print(pred.shape)
print(y_onehot.shape)
loss = loss_fn(pred, y_onehot)
print(loss)
print(loss.shape)

torch.Size([64, 3])
torch.Size([64, 3])
tensor(0.2215, grad_fn=<MseLossBackward0>)
torch.Size([])


In [20]:
# Define the loss function (criterion) and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [21]:
# Training loop
for epoch in range(num_epochs):
    for i, (batch_x, batch_y) in enumerate(train_loader):
        # Move inputs and labels to the device (e.g. GPU) if available
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        batch_y = batch_y.unsqueeze(1)
        y_onehot = torch.FloatTensor(batch_size, 3)
        y_onehot.zero_()
        y_onehot.scatter_(1, batch_y, 1)

        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, y_onehot)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print the loss every 100 steps
        if (i + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.