***TO DO***

*    Add balance dataset as done by NU Huskies
* Studty and apply loss og Imperial Model
* Study and apply KD loss
* Study the sparsity of lidar data

In [1]:
# from google.colab import drive
# drive.mount('/gdrive')

## Imports

In [2]:
!pip install pytorch-lightning
!pip install tables==3.6.1
!pip install fs.sshfs



In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from enum import Enum
import torch.nn.functional as F

import numpy as np
import csv
import sys
import os
import random
import matplotlib.pyplot as plt
import matplotlib
import time
from math import floor
import tqdm
import random
import pandas as pd
import time

import pytorch_lightning as pl

seed = 0

random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

## Downloading Data 

Only once every runtime

In [4]:
import fs
import fs.opener
import fs.opener.sshfs

fs.opener.registry.install(fs.opener.sshfs.SSHOpener)
conn = fs.open_fs("ssh://ubuntu:Random.12345@23.96.3.111:22/home/ubuntu/Data")

In [5]:
conn.listdir('.')

['coords_labels.h5', 'S008_npz.zip', 'S009_npz.zip']

In [6]:
# f = open('S009_npz.zip','wb')
# conn.download('./S009_npz.zip',f)
# !unzip -q S009_npz.zip
# f = open('S008_npz.zip','wb')
# conn.download('./S008_npz.zip',f)
# !unzip -q S008_npz.zip
# f = open('coords_labels.h5','wb')
# conn.download('coords_labels.h5',f)

In [7]:
# lidar_data = '/gdrive/MyDrive/BTP/Data/BeamSoup/lidar_data'
# lidar_008 = os.path.join(lidar_data,'lidar_009.npz')
# lidar_data_008 = np.load(lidar_008)['input']
# lidar_save = './S009_npz'
# os.makedirs(lidar_save,exist_ok=True)
# for i in range(0,lidar_data_008.shape[0]):
#     if(i%1000==0):
#         print(f'{i} files have been saved')
#     try:
#         np.savez_compressed(lidar_save+'/'+str(i)+'.npz',input = lidar_data_008[i])
#     except:
#         print(f'Error in id {i}')

# !zip -r -q S009_npz.zip S009_npz

## Hyper Params

In [8]:
# lidar_dir = '/gdrive/MyDrive/BTP/Data/BeamSoup/lidar_data/lidar_sparse'
lidar_dir = '.'
coord_file = 'coords_labels.h5'

BATCH_SIZE = 64
n_worker = 2

num_classes = 256

thresholdBelowMax = 6

## Utilities

In [9]:
def top_k_acc(y_true:torch.Tensor,y_pred:torch.Tensor,k=1):
    y_pred_tpk = torch.topk(y_pred,k,dim=1)[1]
    if(len(y_true.shape)>=2):
        y_true = np.argmax(y_true,axis=1)
    ovr = 0
    pos = 0
    for i in range(0,len(y_pred_tpk)):
        if(y_true[i] in y_pred_tpk[i]):
            pos+=1
        ovr+=1
    acc = pos/ovr
    return acc

## Ranking beam output

Reading beam_output and converting them to logarithm scale; Power (dB) (neg) and setting a threshold value below which rounding all to zero

Overall classes: 8*32 = 256

In [10]:
def beamsLogScale(y:np,thresholdBelowMax=thresholdBelowMax):
    y_shape = y.shape
    
    for i in range(0,y_shape[0]):            
        thisOutputs = y[i,:]
        logOut = 20*np.log10(thisOutputs + 1e-30)
        minValue = np.amax(logOut) - thresholdBelowMax
        zeroedValueIndices = logOut < minValue
        thisOutputs[zeroedValueIndices]=0
        thisOutputs = thisOutputs / sum(thisOutputs)
        y[i,:] = thisOutputs
    
    return y

## Loading Data

Coord data is loaded fully into RAM while lidar data is read sequentially

* Number of training samples in training is 11194
* Number of validation samples is 9638

In [11]:
coord_train = pd.read_hdf(coord_file,key='train')
coord_val = pd.read_hdf(coord_file,key='val')

In [12]:
y_train = coord_train[['Labels']].to_numpy()
y_train = np.array(y_train[:,0].tolist())
y_train = y_train/(np.sum(y_train,axis=1).reshape((y_train.shape[0],1)))

# y_train = np.argmax(beamsLogScale(y_train),axis=1)

y_val = coord_val[['Labels']].to_numpy()
y_val = np.array(y_val[:,0].tolist())
y_val = y_val/(np.sum(y_val,axis=1).reshape((y_val.shape[0],1)))

# y_val = np.argmax(beamsLogScale(y_val),axis=1)

## Creating Dataset and dataloader

Shape of lidar data of BeamSoup is (180,330,10) and **it is sparse**

In [26]:
class custom_dataset(Dataset):
    def __init__(self,lidar_dir:str,label:np,coord:pd.DataFrame):
        super().__init__()
        self.label = label
        self.lidar_dir = lidar_dir
        self.coord = coord
    
    def __getitem__(self,idx):
        sample = dict()

        sample['label'] = torch.tensor(self.label[idx],dtype = torch.double)
        sample['coord'] = torch.from_numpy(pd.to_numeric(self.coord.loc[idx,['X','Y','Z']]).to_numpy())

        lidar_data = np.load(os.path.join(self.lidar_dir,str(idx)+'.npz'))['input']
        # sample['lidar'] = lidar_data.reshape((10,180,330))
        sample['lidar'] = torch.from_numpy(self.lidar2D(lidar_data))

        return sample

    def __len__(self):
        return len(self.label)

    # Deleting the 'Z' dimension of the data, done by Imperial_IPC1
    def lidar2D(self,lidar_data:np):
        lidar_data1 = np.zeros_like(lidar_data)[:, :, 1]
        lidar_data1[np.max(lidar_data == 1, axis=-1)] = 1
        lidar_data1[np.max(lidar_data == -2, axis=-1)] = -2
        lidar_data1[np.max(lidar_data == -1, axis=-1)] = -1
        return lidar_data1
        

In [27]:
train_dataset = custom_dataset(os.path.join(lidar_dir,'S008_npz'),y_train,coord_train)
val_dataset = custom_dataset(os.path.join(lidar_dir,'S009_npz'),y_val,coord_val)

In [28]:
train_loader = DataLoader(train_dataset,
                          batch_size = BATCH_SIZE,
                          shuffle = True,
                          num_workers = n_worker,
                          pin_memory = True,
                          drop_last = True)

In [29]:
val_loader = DataLoader(val_dataset,
                          batch_size = BATCH_SIZE,
                          shuffle = False,
                          num_workers = n_worker,
                          pin_memory = True,
                          drop_last = True)

## Customized Loss Function

In [30]:
 class BeamSoup():
    def __init__(self,beta):
        self.KLdivloss = nn.KLDivLoss()
        self.CEloss = nn.CrossEntropyLoss()
        self.beta = beta

    def cal(self,pred,y_true):
        #Remember y_true shoukd be a one hot vector
        y_true = torch.double(y_true)
        print(y_true.dtype)
        y_true_label = torch.argmax(y_true,dim=1).long()
        loss = (1-self.beta)*(self.KLdivloss(pred,y_true)) + self.beta*self.CEloss(pred,y_true_label)
        return loss


## Imperial Model

Checking if, increasing the lidar data shape, effect the performance of imperial model at all

Using Cross entropy loss. Study Imperial IPC loss

Use KDloss from BeamSoup

In [31]:
class imperial(pl.LightningModule):
    def __init__(self,lr=1e-4):
        super().__init__()
        self.lr = lr
        self.drop_prob = 0.15
        self.channel = 32

        self.KDloss = BeamSoup(beta=0.8)

        self.drop = nn.Dropout(self.drop_prob)
        # self.bn = nn.BatchNorm2d(self.channels)
        
        self.conv0 = nn.Sequential(nn.Conv2d(1,32,(3,3)),nn.ReLU())
        self.conv1 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv2 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv3 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv4 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv5 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv6 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv7 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())
        self.conv8 = nn.Sequential(nn.Conv2d(32,32,(3,3)),nn.ReLU())

        self.m_pool1 = nn.MaxPool2d((16,16),(4,4))
        self.m_pool2 = nn.MaxPool2d((2,2))
        self.flatten = nn.Flatten(start_dim=1)


        self.coord_lin = nn.Linear(3,128)
        self.linear7a = nn.Linear(896,512)
        self.linear7b = nn.Linear(512,512)
        self.relu7 = nn.ReLU()
        self.linear8 = nn.Linear(512, 256)

    def forward(self,X,coord):
        X = X.reshape((BATCH_SIZE,1,180,330))
        a = X = self.conv0(X)
        X = F.pad(X,(1,1,1,1))
        a = F.pad(a,(1,1,1,1))
        X = self.conv1(X)
        X = F.pad(X,(1,1,1,1))
        X = self.conv2(X)
        X = F.pad(X,(1,1,1,1))
        X = X+a
        X = self.m_pool1(X)


        b = X= self.drop(X)

        X = self.conv3(X)
        X = F.pad(X,(1,1,1,1))
        X = self.conv4(X)
        X = F.pad(X,(1,1,1,1))
        X = X+b
        X = self.m_pool1(X)

        c = X = self.drop(X)

        X = self.conv5(X)
        X = F.pad(X,(1,1,1,1))
        X = self.conv6(X)
        X = F.pad(X,(1,1,1,1))
        X = X + c
        X = self.m_pool2(X)
        
        d = X = self.drop(X)

        X = self.conv7(X)
        X = F.pad(X,(1,1,1,1))
        X = self.conv8(X)
        X = F.pad(X,(1,1,1,1))
        X = X + d

        X = self.flatten(X)

        coord = self.coord_lin(coord)
        out = torch.cat((X,coord),dim=1)

        out = self.linear7a(out)
        out = self.relu7(out)
        out = self.drop(out)
        out = self.linear7b(out)
        out = self.relu7(out)
        out = self.drop(out)
        out = self.linear8(out)

        return out

    def training_step(self,batch,batch_idx):
        label = batch['label'].double()
        lidar = batch['lidar'].float()
        coord = batch['coord'].float()

        yhat = self(lidar,coord)

        loss = self.KDloss.cal(yhat,label)
        
        self.log('my_loss',loss)

        return {'loss':loss,'pred':yhat,'label':label}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr= self.lr)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, 0.5)
        return [optimizer],[scheduler]

    def training_epoch_end(self,train_out):
        len_out = len(train_out)
        y_pred = torch.Tensor(len_out*BATCH_SIZE,num_classes)
        y_true = torch.Tensor(len_out*BATCH_SIZE)

        for i in range(0,len_out):
            y_pred[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = train_out[i]['pred'] 
            y_true[i*BATCH_SIZE:(i+1)*BATCH_SIZE] = train_out[i]['label']

        top1 = top_k_acc(y_true,y_pred,k=1)
        top5 = top_k_acc(y_true,y_pred,k=5)
        top10 = top_k_acc(y_true,y_pred,k=10)
        top50 = top_k_acc(y_true,y_pred,k=50)

        print('Train Topk accuracies are :  Top 1: {:.4f}, Top 5: {:.4f}, Top 10: {:.4f},Top 50: {:.4f}'.format(top1,top5,top10,top50))

    def validation_step(self,batch,batch_idx):
        label = batch['label'].long()
        lidar = batch['lidar'].float()
        coord = batch['coord'].float()

        yhat = self(lidar,coord)

        return [yhat.cpu().detach(),label.cpu().detach()]
    
    def validation_epoch_end(self,val_out):
        len_out = len(val_out)
        y_pred = torch.Tensor(len_out*BATCH_SIZE,num_classes)
        y_true = torch.Tensor(len_out*BATCH_SIZE,num_classes)

        for i in range(0,len_out):
            y_pred[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = val_out[i][0] 
            y_true[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = val_out[i][1] 

        top1 = top_k_acc(y_true,y_pred,k=1)
        top5 = top_k_acc(y_true,y_pred,k=5)
        top10 = top_k_acc(y_true,y_pred,k=10)
        top50 = top_k_acc(y_true,y_pred,k=50)

        print('Dev Topk accuracies are :  Top 1: {:.4f}, Top 5: {:.4f}, Top 10: {:.4f},Top 50: {:.4f}'.format(top1,top5,top10,top50))

    def test_step(self,batch,batch_idx):
        label = batch['label'].long()
        lidar = batch['lidar'].float()
        coord = batch['coord'].float()

        yhat = self(lidar,coord)

        return [yhat.cpu().detach(),label.cpu().detach()]
    
    def test_epoch_end(self,test_out):
        len_out = len(test_out)
        y_pred = torch.Tensor(len_out*BATCH_SIZE,num_classes)
        y_true = torch.Tensor(len_out*BATCH_SIZE,num_classes)

        for i in range(0,len_out):
            y_pred[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = test_out[i][0] 
            y_true[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = test_out[i][1] 

        top1 = top_k_acc(y_true,y_pred,k=1)
        top5 = top_k_acc(y_true,y_pred,k=5)
        top10 = top_k_acc(y_true,y_pred,k=10)
        top50 = top_k_acc(y_true,y_pred,k=50)

        print('Test Topk accuracies are :  Top 1: {:.4f}, Top 5: {:.4f}, Top 10: {:.4f},Top 50: {:.4f}'.format(top1,top5,top10,top50)) 

    



## Training the model

In [32]:
model_imp = imperial()

In [33]:
trainer_lidar = pl.Trainer(reload_dataloaders_every_epoch = True,
                     gpus=1,
                     max_epochs = 50,
                     num_sanity_val_steps=1,
                     auto_lr_find = False
                     )

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [34]:
# trainer_lidar.tune(model_imp,train_loader,val_loader)
trainer_lidar.fit(model_imp,train_loader,val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name      | Type       | Params
------------------------------------------
0  | drop      | Dropout    | 0     
1  | conv0     | Sequential | 320   
2  | conv1     | Sequential | 9.2 K 
3  | conv2     | Sequential | 9.2 K 
4  | conv3     | Sequential | 9.2 K 
5  | conv4     | Sequential | 9.2 K 
6  | conv5     | Sequential | 9.2 K 
7  | conv6     | Sequential | 9.2 K 
8  | conv7     | Sequential | 9.2 K 
9  | conv8     | Sequential | 9.2 K 
10 | m_pool1   | MaxPool2d  | 0     
11 | m_pool2   | MaxPool2d  | 0     
12 | flatten   | Flatten    | 0     
13 | coord_lin | Linear     | 512   
14 | linear7a  | Linear     | 459 K 
15 | linear7b  | Linear     | 262 K 
16 | relu7     | ReLU       | 0     
17 | linear8   | Linear     | 131 K 
------------------------------------------
928 K     Trainable params
0         Non-trainable params
928 K     Total params
3.712     Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Dev Topk accuracies are :  Top 1: 0.0000, Top 5: 0.0000, Top 10: 0.0000,Top 50: 1.0000


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

TypeError: ignored