In [2]:
import pandas as pd
import json
from glob import glob
from tqdm import tqdm
import numpy as np

In [3]:
def read_trace_file(files, type, tqdm_desc):
    columns =['spdX', 'spdY', 'accX', 'accY', 'posX', 'posY', 'hedX', 'hedY', 'spdXNoise', 'spdYNoise', 'accXNoise', 'accYNoise', 'posXNoise', 'posYNoise', 'hedXNoise', 'hedYNoise', 'label', 'messageID']
    rows = []
    ret_df = pd.DataFrame()
    for file in tqdm(files, desc=tqdm_desc):
        with open(file) as f:
            for line in f:
                line = line.strip()
                dec = json.JSONDecoder()
                pos = 0
                while not pos == len(str(line)):
                    json_line, json_len = dec.raw_decode(str(line)[pos:])
                    pos += json_len

                    # json_line = json.loads(j)

                    if json_line['type'] == type:
                        label = 0 if 'A0' in file else 1
                        new_row = (
                            json_line['spd'][0],
                            json_line['spd'][1],
                            json_line['acl'][0],
                            json_line['acl'][1],
                            json_line['pos'][0],
                            json_line['pos'][1],
                            json_line['hed'][0],
                            json_line['hed'][1],

                            json_line['spd_noise'][0],
                            json_line['spd_noise'][1],
                            json_line['acl_noise'][0],
                            json_line['acl_noise'][1],
                            json_line['pos_noise'][0],
                            json_line['pos_noise'][1],
                            json_line['hed_noise'][0],
                            json_line['hed_noise'][1],


                            label, 
                            json_line['messageID'] if 'messageID' in json_line.keys() else -1
                        )
                        df = pd.DataFrame([list(new_row)], columns=columns)
                        rows.append(df)
    all_df = [ret_df]
    all_df.extend(rows)
    ret_df = pd.concat(all_df)
    return ret_df
                        # pd.concat(ret_df, df, ignore_index=True)

In [4]:
train_path = './data/train/*.*'
train_files = glob(train_path)
train_df = read_trace_file(train_files, 2, 'Aggregating Training Data')

test_path = './data/test/*.*'
test_files = glob(test_path)
test_df = read_trace_file(test_files, 3, 'Aggregating Testing Data')

Aggregating Training Data: 100%|██████████| 6298/6298 [04:02<00:00, 25.98it/s]
Aggregating Testing Data:   3%|▎         | 124/4369 [00:23<08:16,  8.55it/s]

In [None]:
X = train_df.loc[:, :'hedYNoise'].to_numpy()
Y = train_df.loc[:, 'label'].to_numpy()

X_test = test_df.loc[:, :'hedYNoise'].to_numpy()
Y_test = test_df.loc[:, 'label'].to_numpy()
message_ids = test_df.loc[:, 'messageID'].to_numpy()

# NN

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader

from torch import nn
from torch.nn import functional as F


In [None]:
class Net(nn.Module):
  def __init__(self,input_shape):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_shape,32)
    self.fc2 = nn.Linear(32,64)
    self.fc3 = nn.Linear(64,1)
  def forward(self,x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = torch.sigmoid(self.fc3(x))
    return x

In [None]:
class dataset(Dataset):
    def __init__(self,x,y):
        self.x = torch.tensor(x,dtype=torch.float32)
        self.y = torch.tensor(y,dtype=torch.float32)
        self.length = self.x.shape[0]
 
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
    def __len__(self):
        return self.length

In [None]:
def train(epochs, model, loss_fn, trainloader, optimizer, x, y):
    #forward loop
    losses = []
    accur = []
    for i in range(epochs):
        for j,(x_train,y_train) in enumerate(trainloader):
            
            #calculate output
            output = model(x_train)
        
            #calculate loss
            loss = loss_fn(output,y_train.reshape(-1,1))
        
            #accuracy
            predicted = model(torch.tensor(x,dtype=torch.float32))
            acc = (predicted.reshape(-1).detach().numpy().round() == y).mean()
            #backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if i%50 == 0:
            losses.append(loss)
            accur.append(acc)
            print("epoch {}\tloss : {}\t accuracy : {}".format(i,loss,acc))
    return losses, accur

In [None]:
def test(model, testloader, x_test):
    #forward loop
    preds = []
    for j,(x_test, y_test) in enumerate(testloader):
        preds = model(torch.tensor(x_test,dtype=torch.float32))
        preds.reshape(-1).detach().numpy().round()

    return preds

In [None]:
# Scaler Shit

sc = StandardScaler()
X = sc.fit_transform(X)

In [None]:
train_set = dataset(X, Y)
test_set = dataset(X_test, Y_test)

trainloader = DataLoader(train_set,batch_size=64,shuffle=False)
testloader = DataLoader(train_set,batch_size=test_set.length,shuffle=False)


TypeError: Instance and class checks can only be used with @runtime_checkable protocols

In [None]:
#hyper parameters
learning_rate = 0.01
epochs = 700
# Model , Optimizer, Loss
model = Net(input_shape=x.shape[1])
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
loss_fn = nn.BCELoss()

losses, accur = train(epochs, model, loss_fn, trainloader, optimizer, X, Y)

In [None]:
#plotting the loss
plt.subplot(1,2,1)
plt.plot(losses)
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('loss')

#printing the accuracy
plt.subplot(1,2,2)
plt.plot(accur)
plt.title('Accuracy vs Epochs')
plt.xlabel('Accuracy')
plt.ylabel('loss')

In [None]:
preds = test(model, testloader, X_test)
np.sum(preds) / len(preds)
results = pd.DataFrame(columns=['data_id', 'prediction'])
results['data_id'] = list(message_ids)
results['prediction'] = list(preds)
results.to_csv('results13.csv', index=False)