In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from torch import optim

In [2]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'D:\Study Material\4-2\Dataset_final\VeReMi_0_3600_2022-9-11_12.51.1'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
    elif filename.startswith('traceGroundTruthJSON'):
        label = 0
    else:
        raise ValueError("File format not supported")
    
    # Add the label column to the dataframe
    if int(label) == 0:
        df['label'] = 0
    elif int(label) >=1 and int(label) <= 8:
        df['label'] = 1
    else:
        df['label'] = 2

    # Append the dataframe to the merged dataframe
    merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [3]:
merged_df

Unnamed: 0,pos,spd,label
0,"[266.1342377193001, 41.31755030776682, 0.0]","[-0.08087719292206601, 0.7826628559043071, 0.0]",0
1,"[266.06130476311273, 43.37327132906824, 0.0]","[-0.307226018249808, 2.973074697703729, 0.0]",0
2,"[265.50620000532376, 47.479615917740944, 0.0]","[-0.522133162849577, 5.052698244401898, 0.0]",0
3,"[264.8798230282204, 53.81269421777557, 0.0]","[-0.7501579494382471, 7.25934146781901, 0.0]",0
4,"[263.74132084628656, 62.04745136461824, 0.0]","[-1.196011717133152, 9.262387108898814, 0.0]",0
...,...,...,...
10737,"[325.90687464156844, 730.1637061673084, 0.0]","[8.1339354760141, 7.116579088873951, 0.0]",0
10738,"[337.2546973533207, 729.634766863171, 0.0]","[12.196912885390818, -4.544761986234479, 0.0]",0
10739,"[345.51983998911123, 719.5846003226957, 0.0]","[6.73889447260984, -9.893360827534599, 0.0]",0
10740,"[350.82649247285724, 711.7160476513725, 0.0]","[4.224393707179231, -6.201659069760074, 0.0]",0


In [4]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [5]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [6]:
merged_df.loc[merged_df['label']==0]

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,266.134238,41.317550,-0.080877,0.782663
1,0,266.061305,43.373271,-0.307226,2.973075
2,0,265.506200,47.479616,-0.522133,5.052698
3,0,264.879823,53.812694,-0.750158,7.259341
4,0,263.741321,62.047451,-1.196012,9.262387
...,...,...,...,...,...
10737,0,325.906875,730.163706,8.133935,7.116579
10738,0,337.254697,729.634767,12.196913,-4.544762
10739,0,345.519840,719.584600,6.738894,-9.893361
10740,0,350.826492,711.716048,4.224394,-6.201659


In [7]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [8]:
merged_df

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,-0.733647,-2.038607,0.064290,0.123897
1,0,-0.733858,-2.031121,0.037468,0.374456
2,0,-0.735468,-2.016168,0.012002,0.612341
3,0,-0.737286,-1.993106,-0.015018,0.864757
4,0,-0.740588,-1.963119,-0.067851,1.093883
...,...,...,...,...,...
10737,0,-0.560248,0.469826,1.037734,0.848426
10738,0,-0.527329,0.467900,1.519191,-0.485500
10739,0,-0.503352,0.431303,0.872424,-1.097320
10740,0,-0.487958,0.402649,0.574459,-0.675031


In [9]:
# class NN(nn.Module):
#     def __init__(self,input_size,num_classes):
#         super(NN,self).__init__()
#         self.fc1 = nn.Linear(input_size,50)
#         self.fc2 = nn.Linear(50,num_classes)
    
#     def forward(self,x):
#         x = F.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

In [10]:
#set device
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [17]:
#hyperparameters
input_size = 4
num_classes = 3
learning_rate = 0.001
batch_size = 64
num_epochs = 1

In [18]:
#load data
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe[['posX', 'posY', 'spdX', 'spdY']].values
        self.targets = dataframe['label'].values

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):
        data = torch.tensor(self.data[index], dtype=torch.float)
        target = torch.tensor(self.targets[index], dtype=torch.long)
        return data, target

In [19]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
merged_df['label'] = le.fit_transform(merged_df['label'])

In [20]:
# split dataframe into train and test sets
train_df, test_df = train_test_split(merged_df, test_size=0.2, random_state=42)

In [21]:
train_df

Unnamed: 0,label,posX,posY,spdX,spdY
5044,0,1.156619,0.228011,-0.900274,0.989788
2732,0,-0.886338,-0.780452,0.073933,0.034426
9254,0,0.318916,0.330289,-0.383191,-0.172765
7883,0,1.099461,0.317848,0.532514,-0.402720
1018,0,-0.817031,-0.734676,0.074068,0.034556
...,...,...,...,...,...
5734,2,0.979400,0.461513,1.372445,-0.972808
5191,0,-0.436014,-0.326188,1.158598,0.972286
5390,0,-0.527411,-0.788647,-0.904539,-0.670471
860,0,0.540051,0.440598,-1.365799,-0.463219


In [22]:
# create dataset and dataloader
train_dataset = CustomDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [23]:
#initialse network
model = NN(input_size=input_size,num_classes=num_classes).to(device)

NameError: name 'NN' is not defined

In [24]:
#loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = learning_rate)

NameError: name 'model' is not defined

In [72]:
#train network
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device=device)
        target = target.to(device=device)
        #get to correct shape
        data = data.reshape(data.shape[0],-1)
        #forward
        scores = model(data)
        loss = criterion(scores,target)
        #backward
        optimizer.zero_grad()
        loss.backward()
        #gradient descent
        optimizer.step()

In [75]:
#check accuracy
def check_accuracy(loader,model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0],-1)
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
    
    model.train()

In [76]:
check_accuracy(train_loader,model)

Got 7475 / 8593 with accuracy 86.99


In [77]:
check_accuracy(test_loader,model)

Got 1868 / 2149 with accuracy 86.92


In [26]:
import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=4, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.fc1 = nn.Linear(in_features= 32*32*32, out_features=500)
        self.relu3 = nn.ReLU()

        self.fc2 = nn.Linear(in_features=500, out_features=num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = x.view(-1, 32*32*32)
        x = self.fc1(x)
        x = self.relu3(x)

        x = self.fc2(x)
        x = self.softmax(x)
        return x

# Define the model, loss function, and optimizer
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(train_loader):
        features = features.unsqueeze(1) # Add a channel dimension
        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Predict on test set
with torch.no_grad():
    correct = 0
    total = 0
    for features, labels in test_loader:
        features = features.unsqueeze(1) # Add a channel dimension
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test features: {} %'.format(100 * correct / total))

RuntimeError: Given groups=1, weight of size [16, 4, 3, 3], expected input[1, 64, 1, 4] to have 4 channels, but got 64 channels instead