In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
from torch.utils.data import Dataset,DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from torch import optim
from torch.autograd import Variable 

In [2]:
import pandas as pd
import glob
import os

# Set the directory path
dir_path = r'Dataset'

# Get a list of all JSON files in the specified directory and its subdirectories
json_files = glob.glob(os.path.join(dir_path, '**', '*.json'), recursive=True)

# Create an empty dataframe to store the merged data
merged_df = pd.DataFrame()

# Loop through each file and read it into a pandas dataframe
for f in json_files:
    # Read the json file into a dataframe
    df = pd.read_json(f, orient='records', lines=True)[["pos", "spd"]]

    # Extract the label from the file prefix
    filename = os.path.basename(f)

    if filename.startswith('traceJSON'):
        label = filename.split('-')[3][1:]
#     elif filename.startswith('traceGroundTruthJSON'):
#         label = 0
#     else:
#         raise ValueError("File format not supported")
    
        # Add the label column to the dataframe
        if int(label) == 0:
            df['label'] = 0
        elif int(label) >=1 and int(label) <= 9:
            df['label'] = 1
        else:
            df['label'] = 2

        # Append the dataframe to the merged dataframe
        merged_df = pd.concat([merged_df, df], ignore_index=True)

# Now merged_df contains all the data from the JSON files, with label column added

In [3]:
merged_df

Unnamed: 0,pos,spd,label
0,"[873.6579894237055, 515.0419704516012, 0.0]","[-0.5615534138496401, 0.34935102148086505, 0.0]",0
1,"[872.1848544954322, 515.9474424294032, 0.0]","[-2.125938318465096, 1.322478270765143, 0.0]",0
2,"[869.1128639211374, 517.7971405872365, 0.0]","[-3.5899683398356963, 2.233296724389207, 0.0]",0
3,"[864.6579538884271, 520.5743407739925, 0.0]","[-5.312629533686103, 3.304274438501985, 0.0]",0
4,"[860.0207470696706, 525.7162975093687, 0.0]","[-5.34186098834915, 5.817769455952666, 0.0]",0
...,...,...,...
5711,"[325.90687464156844, 730.1637061673084, 0.0]","[8.1339354760141, 7.116579088873951, 0.0]",0
5712,"[337.2546973533207, 729.634766863171, 0.0]","[12.196912885390818, -4.544761986234479, 0.0]",0
5713,"[345.51983998911123, 719.5846003226957, 0.0]","[6.73889447260984, -9.893360827534599, 0.0]",0
5714,"[350.82649247285724, 711.7160476513725, 0.0]","[4.224393707179231, -6.201659069760074, 0.0]",0


In [4]:
split = pd.DataFrame(merged_df['pos'].to_list(), columns = ['posX', 'posY','posZ'])
split = split.drop('posZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('pos',axis=1)

In [5]:
split = pd.DataFrame(merged_df['spd'].to_list(), columns = ['spdX', 'spdY','spdZ'])
split = split.drop('spdZ',axis=1)
merged_df = pd.concat([merged_df, split], axis=1) 
merged_df = merged_df.drop('spd',axis=1)

In [6]:
merged_df.loc[merged_df['label']==0]

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,873.657989,515.041970,-0.561553,0.349351
1,0,872.184854,515.947442,-2.125938,1.322478
2,0,869.112864,517.797141,-3.589968,2.233297
3,0,864.657954,520.574341,-5.312630,3.304274
4,0,860.020747,525.716298,-5.341861,5.817769
...,...,...,...,...,...
5711,0,325.906875,730.163706,8.133935,7.116579
5712,0,337.254697,729.634767,12.196913,-4.544762
5713,0,345.519840,719.584600,6.738894,-9.893361
5714,0,350.826492,711.716048,4.224394,-6.201659


In [7]:
from sklearn.preprocessing import StandardScaler

columns_to_scale = merged_df.columns.drop('label')

merged_df[columns_to_scale] = StandardScaler().fit_transform(merged_df[columns_to_scale])

In [8]:
merged_df

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.976520,-0.275624,-0.547465,0.357530
4,0,0.963110,-0.257291,-0.550694,0.631611
...,...,...,...,...,...
5711,0,-0.581491,0.471636,0.937950,0.773238
5712,0,-0.548675,0.469750,1.386778,-0.498359
5713,0,-0.524773,0.433918,0.783842,-1.081591
5714,0,-0.509426,0.405863,0.506071,-0.679033


In [9]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
merged_df['label'] = le.fit_transform(merged_df['label'])

In [10]:
merged_df.head()

Unnamed: 0,label,posX,posY,spdX,spdY
0,0,1.002547,-0.295349,-0.022623,0.035314
1,0,0.998287,-0.292121,-0.195437,0.141427
2,0,0.989403,-0.285526,-0.357166,0.240747
3,0,0.97652,-0.275624,-0.547465,0.35753
4,0,0.96311,-0.257291,-0.550694,0.631611


In [11]:
df_new = pd.DataFrame()
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==0].sample(frac=0.3)], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==1]], ignore_index=True)
df_new = pd.concat([df_new, merged_df.loc[merged_df['label']==2]], ignore_index=True)
df_new.loc[df_new['label']==2]

Unnamed: 0,label,posX,posY,spdX,spdY
1956,2,0.771836,0.555587,-0.185285,-0.058431
1957,2,0.764458,0.554298,-0.384405,-0.076939
1958,2,0.751292,0.557101,-0.580195,0.103151
1959,2,0.733394,0.567206,-0.724216,0.413355
1960,2,0.716500,0.591541,-0.596702,0.874907
...,...,...,...,...,...
2689,2,-1.138951,1.351121,-0.042162,1.084084
2690,2,-1.141381,1.386592,-0.042296,1.085854
2691,2,-1.143821,1.422045,-0.041797,1.079225
2692,2,-1.145615,1.456448,-0.029910,0.920895


In [12]:
X = df_new.iloc[:,1:]
y = df_new.iloc[:,0:1]

In [13]:
X

Unnamed: 0,posX,posY,spdX,spdY
0,-1.113625,0.979726,-0.047331,0.694455
1,1.148791,0.109514,-0.620133,-1.364455
2,-0.092475,0.869176,0.986813,-1.249080
3,-1.078037,1.063195,0.108711,-0.468173
4,-0.832955,0.861405,0.240799,-1.556814
...,...,...,...,...
2689,-1.138951,1.351121,-0.042162,1.084084
2690,-1.141381,1.386592,-0.042296,1.085854
2691,-1.143821,1.422045,-0.041797,1.079225
2692,-1.145615,1.456448,-0.029910,0.920895


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [15]:
print("Training Shape", X_train.shape, y_train.shape)
print("Testing Shape", X_test.shape, y_test.shape) 

Training Shape (1804, 4) (1804, 1)
Testing Shape (890, 4) (890, 1)


In [16]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score

In [17]:
knn=KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
print(classification_report(y_test,predictions))
print("Confusion Matrix")
print(confusion_matrix(y_test,predictions))
print("\n Accuracy")
print(accuracy_score(y_test,predictions))

              precision    recall  f1-score   support

           0       0.78      0.73      0.76       432
           1       0.70      0.73      0.71       230
           2       0.72      0.77      0.74       228

    accuracy                           0.74       890
   macro avg       0.73      0.74      0.74       890
weighted avg       0.74      0.74      0.74       890

Confusion Matrix
[[316  61  55]
 [ 49 167  14]
 [ 40  12 176]]

 Accuracy
0.7404494382022472


  return self._fit(X, y)
