In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from torch import optim
from torch.autograd import Variable 

In [2]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'D:\Study Material\4-2\Dataset_final\VeReMi_0_3600_2022-9-11_12.51.1'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
#     elif filename.startswith('traceGroundTruthJSON'):
#         label = 0
#     else:
#         raise ValueError("File format not supported")
    
        # Add the label column to the dataframe
        if int(label) == 0:
            df['label'] = 0
        elif int(label) >=1 and int(label) <= 8:
            df['label'] = 1
        else:
            df['label'] = 2

        # Append the dataframe to the merged dataframe
        merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [3]:
merged_df

Unnamed: 0,pos,spd,label
0,"[873.6579894237055, 515.0419704516012, 0.0]","[-0.5615534138496401, 0.34935102148086505, 0.0]",0
1,"[872.1848544954322, 515.9474424294032, 0.0]","[-2.125938318465096, 1.322478270765143, 0.0]",0
2,"[869.1128639211374, 517.7971405872365, 0.0]","[-3.5899683398356963, 2.233296724389207, 0.0]",0
3,"[864.6579538884271, 520.5743407739925, 0.0]","[-5.312629533686103, 3.304274438501985, 0.0]",0
4,"[860.0207470696706, 525.7162975093687, 0.0]","[-5.34186098834915, 5.817769455952666, 0.0]",0
...,...,...,...
5711,"[325.90687464156844, 730.1637061673084, 0.0]","[8.1339354760141, 7.116579088873951, 0.0]",0
5712,"[337.2546973533207, 729.634766863171, 0.0]","[12.196912885390818, -4.544761986234479, 0.0]",0
5713,"[345.51983998911123, 719.5846003226957, 0.0]","[6.73889447260984, -9.893360827534599, 0.0]",0
5714,"[350.82649247285724, 711.7160476513725, 0.0]","[4.224393707179231, -6.201659069760074, 0.0]",0


In [4]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [5]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [6]:
merged_df.loc[merged_df['label']==0]

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,873.657989,515.041970,-0.561553,0.349351
1,0,872.184854,515.947442,-2.125938,1.322478
2,0,869.112864,517.797141,-3.589968,2.233297
3,0,864.657954,520.574341,-5.312630,3.304274
4,0,860.020747,525.716298,-5.341861,5.817769
...,...,...,...,...,...
5711,0,325.906875,730.163706,8.133935,7.116579
5712,0,337.254697,729.634767,12.196913,-4.544762
5713,0,345.519840,719.584600,6.738894,-9.893361
5714,0,350.826492,711.716048,4.224394,-6.201659


In [7]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [8]:
merged_df

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.976520,-0.275624,-0.547465,0.357530
4,0,0.963110,-0.257291,-0.550694,0.631611
...,...,...,...,...,...
5711,0,-0.581491,0.471636,0.937950,0.773238
5712,0,-0.548675,0.469750,1.386778,-0.498359
5713,0,-0.524773,0.433918,0.783842,-1.081591
5714,0,-0.509426,0.405863,0.506071,-0.679033


In [9]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
merged_df['label'] = le.fit_transform(merged_df['label'])

In [10]:
merged_df.head()

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.97652,-0.275624,-0.547465,0.35753
4,0,0.96311,-0.257291,-0.550694,0.631611


In [11]:
df_new = pd.DataFrame()
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==0].sample(frac=0.3)], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==1]], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==2]], ignore_index=True)
df_new.loc[df_new['label']==2]

Unnamed: 0,label,posX,posY,spdX,spdY
1819,2,0.771836,0.555587,-0.185285,-0.058431
1820,2,0.764458,0.554298,-0.384405,-0.076939
1821,2,0.751292,0.557101,-0.580195,0.103151
1822,2,0.733394,0.567206,-0.724216,0.413355
1823,2,0.716500,0.591541,-0.596702,0.874907
...,...,...,...,...,...
2689,2,-1.138951,1.351121,-0.042162,1.084084
2690,2,-1.141381,1.386592,-0.042296,1.085854
2691,2,-1.143821,1.422045,-0.041797,1.079225
2692,2,-1.145615,1.456448,-0.029910,0.920895


In [12]:
X = df_new.iloc[:,1:]
y = df_new.iloc[:,0:1]

In [13]:
X

Unnamed: 0,posX,posY,spdX,spdY
0,1.062577,0.334599,0.327531,-0.281088
1,1.454399,1.180631,-1.678577,-0.435036
2,0.413585,0.404679,-1.338186,-0.615233
3,-0.978641,-0.206754,0.039464,-0.002728
4,-0.837332,-0.707706,0.039591,-0.002602
...,...,...,...,...
2689,-1.138951,1.351121,-0.042162,1.084084
2690,-1.141381,1.386592,-0.042296,1.085854
2691,-1.143821,1.422045,-0.041797,1.079225
2692,-1.145615,1.456448,-0.029910,0.920895


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [15]:
print("Training Shape", X_train.shape, y_train.shape)
print("Testing Shape", X_test.shape, y_test.shape) 

Training Shape (1804, 4) (1804, 1)
Testing Shape (890, 4) (890, 1)


In [16]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score

In [17]:
knn=KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
print(classification_report(y_test,predictions))
print("Confusion Matrix")
print(confusion_matrix(y_test,predictions))
print("\n Accuracy")
print(accuracy_score(y_test,predictions))

              precision    recall  f1-score   support

           0       0.79      0.75      0.77       432
           1       0.73      0.74      0.73       185
           2       0.73      0.78      0.76       273

    accuracy                           0.76       890
   macro avg       0.75      0.76      0.75       890
weighted avg       0.76      0.76      0.76       890

Confusion Matrix
[[326  43  63]
 [ 33 137  15]
 [ 52   8 213]]

 Accuracy
0.7595505617977528


  return self._fit(X, y)


In [18]:
X = df_new.iloc[:,1:].values
y = df_new.iloc[:,0:1].values

In [19]:
X

array([[ 1.06257653,  0.33459927,  0.32753118, -0.28108845],
       [ 1.45439882,  1.18063081, -1.67857671, -0.4350363 ],
       [ 0.41358505,  0.40467912, -1.3381858 , -0.61523273],
       ...,
       [-1.1438215 ,  1.42204498, -0.04179722,  1.07922491],
       [-1.14561503,  1.45644792, -0.02990959,  0.92089467],
       [-1.14732369,  1.47803924,  0.00624491,  0.43927773]])

In [20]:
# Make datasets and dataloaders for train, validation and test
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.2, stratify=y_trainval)

In [21]:
# Convert data to PyTorch tensors and create dataloaders
train_loader = DataLoader(TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float()), batch_size=32, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float()), batch_size=32, shuffle=False)
test_loader = DataLoader(TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()), batch_size=32, shuffle=False)

In [22]:
# Define the model architecture
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(4, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x


In [23]:
model = MyModel()

In [24]:
# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

In [25]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    # Evaluate on validation set
    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            val_acc += ((outputs > 0.5) == labels).sum().item()
    # Print progress and results
    print(f"Epoch {epoch+1} - Training Loss: {running_loss/len(train_loader)}, Validation Loss: {val_loss/len(val_loader)}, Validation Accuracy: {val_acc/len(X_val)*100}")

Epoch 1 - Training Loss: 0.673137797249688, Validation Loss: 0.5854244019304004, Validation Accuracy: 19.489559164733176
Epoch 2 - Training Loss: 0.5175230148834763, Validation Loss: 0.4484876436846597, Validation Accuracy: 19.489559164733176
Epoch 3 - Training Loss: 0.3959312329965609, Validation Loss: 0.3506278068359409, Validation Accuracy: 23.201856148491878
Epoch 4 - Training Loss: 0.2971000131909494, Validation Loss: 0.27631487218397005, Validation Accuracy: 28.074245939675173
Epoch 5 - Training Loss: 0.2032954610746216, Validation Loss: 0.19256792962551117, Validation Accuracy: 28.538283062645007
Epoch 6 - Training Loss: 0.09660735257245877, Validation Loss: 0.10980280701603208, Validation Accuracy: 29.00232018561485
Epoch 7 - Training Loss: -0.021346861082646582, Validation Loss: 0.011413753564868654, Validation Accuracy: 29.930394431554525
Epoch 8 - Training Loss: -0.14489633024290757, Validation Loss: -0.10981588561220892, Validation Accuracy: 29.930394431554525
Epoch 9 - Tra

In [26]:
# Evaluate the model on the test set
model.eval()
test_loss = 0.0
test_acc = 0.0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        test_loss += criterion(outputs, labels).item()
        test_acc += ((outputs > 0.5) == labels).sum().item()
print(f"Test Loss: {test_loss/len(test_loader)}, Test Accuracy: {test_acc/len(X_test)*100}")

Test Loss: -0.4397023179951836, Test Accuracy: 32.28200371057514


In [27]:
import torch
import torch.nn as nn
import torch.optim as optim

class LSTM(nn.Module):
    def __init__(self, input_size=4, hidden_size=32, num_layers=1, output_size=1):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size).to(device=x.device)
        c0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size).to(device=x.device)
        out, _ = self.lstm(x, (h0,c0))
        out = self.fc(out[-1])
        return out

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X,y) in enumerate(dataloader):
        X,y = X.to(device), y.to(device)

        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred,y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def test_loop(dataloader, model):
    size = len(dataloader.dataset)
    test_loss = 0
    with torch.no_grad():
        for X,y in dataloader:
            X,y = X.to(device), y.to(device)

            # Compute prediction and loss
            pred = model(X)
            test_loss += loss_fn(pred,y).item()

    test_loss /= size
    print(f"Test Error: Avg Loss: {test_loss:>8f} \n")

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

model = LSTM().to(device)
print(model)

loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader,model,loss_fn,optimizer)
    test_loop(test_loader,model)

print("Done!")

Using cpu device
LSTM(
  (lstm): LSTM(4, 32)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)
Epoch 1
-------------------------------


RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors