In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from torch import optim
from torch.autograd import Variable 

In [2]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'D:\Study Material\4-2\Dataset_final\VeReMi_0_3600_2022-9-11_12.51.1'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
#     elif filename.startswith('traceGroundTruthJSON'):
#         label = 0
#     else:
#         raise ValueError("File format not supported")
    
        # Add the label column to the dataframe
        if int(label) == 0:
            df['label'] = 0
        elif int(label) >=1 and int(label) <= 8:
            df['label'] = 1
        else:
            df['label'] = 2

        # Append the dataframe to the merged dataframe
        merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [3]:
merged_df

Unnamed: 0,pos,spd,label
0,"[873.6579894237055, 515.0419704516012, 0.0]","[-0.5615534138496401, 0.34935102148086505, 0.0]",0
1,"[872.1848544954322, 515.9474424294032, 0.0]","[-2.125938318465096, 1.322478270765143, 0.0]",0
2,"[869.1128639211374, 517.7971405872365, 0.0]","[-3.5899683398356963, 2.233296724389207, 0.0]",0
3,"[864.6579538884271, 520.5743407739925, 0.0]","[-5.312629533686103, 3.304274438501985, 0.0]",0
4,"[860.0207470696706, 525.7162975093687, 0.0]","[-5.34186098834915, 5.817769455952666, 0.0]",0
...,...,...,...
5711,"[325.90687464156844, 730.1637061673084, 0.0]","[8.1339354760141, 7.116579088873951, 0.0]",0
5712,"[337.2546973533207, 729.634766863171, 0.0]","[12.196912885390818, -4.544761986234479, 0.0]",0
5713,"[345.51983998911123, 719.5846003226957, 0.0]","[6.73889447260984, -9.893360827534599, 0.0]",0
5714,"[350.82649247285724, 711.7160476513725, 0.0]","[4.224393707179231, -6.201659069760074, 0.0]",0


In [4]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [5]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [59]:
merged_df.loc[merged_df['label']==0].sample()

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.976520,-0.275624,-0.547465,0.357530
4,0,0.963110,-0.257291,-0.550694,0.631611
...,...,...,...,...,...
5711,0,-0.581491,0.471636,0.937950,0.773238
5712,0,-0.548675,0.469750,1.386778,-0.498359
5713,0,-0.524773,0.433918,0.783842,-1.081591
5714,0,-0.509426,0.405863,0.506071,-0.679033


In [10]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [11]:
merged_df

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.976520,-0.275624,-0.547465,0.357530
4,0,0.963110,-0.257291,-0.550694,0.631611
...,...,...,...,...,...
5711,0,-0.581491,0.471636,0.937950,0.773238
5712,0,-0.548675,0.469750,1.386778,-0.498359
5713,0,-0.524773,0.433918,0.783842,-1.081591
5714,0,-0.509426,0.405863,0.506071,-0.679033


In [12]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
merged_df['label'] = le.fit_transform(merged_df['label'])

In [13]:
merged_df.head()

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.97652,-0.275624,-0.547465,0.35753
4,0,0.96311,-0.257291,-0.550694,0.631611


In [14]:
X = merged_df.iloc[:,1:]
y = merged_df.iloc[:,0:1]

In [15]:
X

Unnamed: 0,posX,posY,spdX,spdY
0,1.002547,-0.295349,-0.022623,0.035314
1,0.998287,-0.292121,-0.195437,0.141427
2,0.989403,-0.285526,-0.357166,0.240747
3,0.976520,-0.275624,-0.547465,0.357530
4,0.963110,-0.257291,-0.550694,0.631611
...,...,...,...,...
5711,-0.581491,0.471636,0.937950,0.773238
5712,-0.548675,0.469750,1.386778,-0.498359
5713,-0.524773,0.433918,0.783842,-1.081591
5714,-0.509426,0.405863,0.506071,-0.679033


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [17]:
print("Training Shape", X_train.shape, y_train.shape)
print("Testing Shape", X_test.shape, y_test.shape) 

Training Shape (3829, 4) (3829, 1)
Testing Shape (1887, 4) (1887, 1)


In [18]:
# Convert the DataFrame to a numpy array
X_train_np = np.array(X_train)
X_test_np = np.array(X_test)
y_train_np = np.array(y_train)
y_test_np = np.array(y_test)

# Convert the numpy array to a Tensor
X_train_tensors = torch.Tensor(X_train_np)
X_test_tensors = torch.Tensor(X_test_np)
y_train_tensors = torch.Tensor(y_train_np) 
y_test_tensors = torch.Tensor(y_test_np) 

In [19]:
X_train_tensors.type()

'torch.FloatTensor'

In [20]:
#reshaping to rows, timestamps, features

X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))


X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape[0], 1, X_test_tensors.shape[1])) 

In [21]:
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print("Testing Shape", X_test_tensors_final.shape, y_test_tensors.shape) 

Training Shape torch.Size([3829, 1, 4]) torch.Size([3829, 1])
Testing Shape torch.Size([1887, 1, 4]) torch.Size([1887, 1])


In [22]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

In [23]:
num_epochs = 1000 #1000 epochs
learning_rate = 0.001 #0.001 lr

input_size = 4 #number of features
hidden_size = 2 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 3 #number of output classes 

In [24]:
lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class

In [25]:
lstm1

LSTM1(
  (lstm): LSTM(4, 2, batch_first=True)
  (fc_1): Linear(in_features=2, out_features=128, bias=True)
  (fc): Linear(in_features=128, out_features=3, bias=True)
  (relu): ReLU()
)

In [26]:
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate) 

In [27]:
for epoch in range(num_epochs):
    outputs = lstm1.forward(X_train_tensors_final) #forward pass
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
 
    # obtain the loss function
    loss = criterion(outputs, y_train_tensors)
 
    loss.backward() #calculates the loss of the loss function
 
    optimizer.step() #improve from loss, i.e backprop
    if epoch % 100 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0, loss: 0.79173
Epoch: 100, loss: 0.53866
Epoch: 200, loss: 0.50556
Epoch: 300, loss: 0.48957
Epoch: 400, loss: 0.47718
Epoch: 500, loss: 0.47024
Epoch: 600, loss: 0.46900
Epoch: 700, loss: 0.46813
Epoch: 800, loss: 0.46744
Epoch: 900, loss: 0.46659


In [28]:
preds = lstm1(X_train_tensors_final)

In [29]:
preds.shape

torch.Size([3829, 3])

In [51]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score

In [55]:
    knn=KNeighborsClassifier(n_neighbors=3, metric='manhattan')
    knn.fit(X_train, y_train)
    predictions = knn.predict(X_test)
    print(classification_report(y_test,predictions))
    print("Confusion Matrix")
    print(confusion_matrix(y_test,predictions))
    print("\n Accuracy")
    print(accuracy_score(y_test,predictions))

              precision    recall  f1-score   support

           0       0.89      0.93      0.91      1406
           1       0.72      0.55      0.62       187
           2       0.70      0.67      0.68       294

    accuracy                           0.85      1887
   macro avg       0.77      0.71      0.74      1887
weighted avg       0.84      0.85      0.84      1887

Confusion Matrix
[[1302   29   75]
 [  74  103   10]
 [  87   11  196]]

 Accuracy
0.8484366719660837


  return self._fit(X, y)


In [49]:
predictions = knn.predict(X_test)

In [50]:
print(classification_report(y_test,predictions))
print("Confusion Matrix")
print(confusion_matrix(y_test,predictions))
print("\n Accuracy")
print(accuracy_score(y_test,predictions))

              precision    recall  f1-score   support

           0       0.89      0.93      0.91      1406
           1       0.72      0.55      0.62       187
           2       0.70      0.67      0.68       294

    accuracy                           0.85      1887
   macro avg       0.77      0.71      0.74      1887
weighted avg       0.84      0.85      0.84      1887

Confusion Matrix
[[1302   29   75]
 [  74  103   10]
 [  87   11  196]]

 Accuracy
0.8484366719660837
