In [1]:
import h5py # .h5 파일을 읽기 위한 패키지
import random
import pandas as pd
import numpy as np
import os
import glob
import math

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from tqdm.auto import tqdm
from cfg.voxelnet_cfg import config as cfg 

from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch


# conv2d + bn + relu
class Conv2d(nn.Module):
    def __init__(self, in_channels, out_channels, k,s,p, activation=True, batch_norm=True):
        super(Conv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=k,stride=s,padding=p)
        
        if batch_norm:
            self.bn = nn.BatchNorm2d(out_channels)
        else:
            self.bn = None
        self.activation = activation
        
    def forward(self,x):
        x = self.conv(x)
        if self.bn is not None:
            x=self.bn(x)
        if self.activation:
            return F.relu(x,inplace=True)
        else:
            return x

# conv3d + bn + relu
class Conv3d(nn.Module):

    def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True):
        super(Conv3d, self).__init__()
        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
        if batch_norm:
            self.bn = nn.BatchNorm3d(out_channels)
        else:
            self.bn = None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)

        return F.relu(x, inplace=True)

# Fully Connected Network
class FCN(nn.Module):

    def __init__(self,cin,cout):
        super(FCN, self).__init__()
        self.cout = cout
        self.linear = nn.Linear(cin, cout)
        self.bn = nn.BatchNorm1d(cout)

    def forward(self,x):
        # KK is the stacked k across batch
#         print("1. FCN x.shape : ",x.shape)
        kk, t, _ = x.shape
#         N, D, H, W = x.shape

        x = self.linear(x.view(kk * t, -1))
#         x = self.linear(x.view(N*D*H, -1))
        

#         print("2. FCN x.shape : ", x.shape)
        x = F.relu(self.bn(x))
        
        return x.view(kk,t, -1)
#         return x.view(N, -1, H, x.shape[1])

# Voxel Feature Encoding layer
class VFE(nn.Module):

    def __init__(self,cin,cout):
        super(VFE, self).__init__()
        assert cout % 2 == 0
        self.units = cout // 2
        self.fcn = FCN(cin,self.units)

    def forward(self, x, mask):
        # point-wise feauture
        pwf = self.fcn(x)
        
        #locally aggregated feature
#         print("1. VEF pwf.shape : ", pwf.shape)
#         print("1. VEF torch max pwf shape : ",  torch.max(pwf,2)[0].shape)
#         print("1. VEF torch max pwf unsq(1) shape : ",  torch.max(pwf,2)[0].unsqueeze(2).shape)
#         print("1. VEF torch max pwf unsq(1) repeat(1,35,1) shape : ",  torch.max(pwf,2)[0].unsqueeze(2).repeat(1,1,cfg.T,1).shape)
        laf = torch.max(pwf,1)[0].unsqueeze(1).repeat(1,cfg.T,1)
#         laf = torch.max(pwf,2)[0].unsqueeze(2).repeat(1,1,cfg.T,1)
#         print("2. VEF laf.shape : ", laf.shape)
        
        # point-wise concat feature
        pwcf = torch.cat((pwf,laf),dim=2)
#         pwcf = torch.cat((pwf,laf),dim=3)
#         print("3. VEF pwcf.shape : ", pwcf.shape)

        # apply mask
#         print("4. VEF mask shape : ", mask.shape)
#         print("4. VEF mask unsq(2) shape : ", mask.unsqueeze(2).shape)
#         print("4. VEF mask unsq(2) repeat(1,1, ??) shape : ", mask.unsqueeze(2).repeat(1, 1, self.units * 2).shape)
        mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2)
#         mask = mask.unsqueeze(3).repeat(1, 1, 1, self.units * 2)
#         print("4. VEF mask.shape : ", mask.shape)
        pwcf = pwcf * mask.float()

        return pwcf

# Stacked Voxel Feature Encoding
class SVFE(nn.Module):

    def __init__(self):
        super(SVFE, self).__init__()
#         self.vfe_1 = VFE(6,32)
        self.vfe_1 = VFE(7,32)
        self.vfe_2 = VFE(32,128)
        self.fcn = FCN(128,128)
        
    def forward(self, x):
        mask = torch.ne(torch.max(x,2)[0], 0)
#         print("SVFE Mask value : ", mask)
        x = self.vfe_1(x, mask)
        x = self.vfe_2(x, mask)
        x = self.fcn(x)
#         print("SVFE x.shape: ",x.shape)
        # element-wise max pooling
        x = torch.max(x,1)[0]
        return x

# Convolutional Middle Layer
class CML(nn.Module):
    def __init__(self):
        super(CML, self).__init__()
        self.conv3d_1 = Conv3d(128, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
        self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1))
        self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1))

    def forward(self, x):
        x = self.conv3d_1(x)
        x = self.conv3d_2(x)
        x = self.conv3d_3(x)
        return x

class RPN(nn.Module):
    def __init__(self):
        super(RPN, self).__init__()
        self.block_1 = [Conv2d(192, 192, 3, 2, 1)]
        self.block_1 += [Conv2d(192, 192, 3, 1, 1) for _ in range(3)]
        self.block_1 = nn.Sequential(*self.block_1)

        self.block_2 = [Conv2d(192, 192, 3, 2, 1)]
        self.block_2 += [Conv2d(192, 192, 3, 1, 1) for _ in range(5)]
        self.block_2 = nn.Sequential(*self.block_2)

        self.block_3 = [Conv2d(192, 256, 3, 2, 1)]
        self.block_3 += [nn.Conv2d(256, 256, 3, 1, 1) for _ in range(5)]
        self.block_3 = nn.Sequential(*self.block_3)

        self.deconv_1 = nn.Sequential(nn.ConvTranspose2d(256, 256, 4, 4, 0),nn.BatchNorm2d(256))
        self.deconv_2 = nn.Sequential(nn.ConvTranspose2d(192, 256, 2, 2, 0),nn.BatchNorm2d(256))
        self.deconv_3 = nn.Sequential(nn.ConvTranspose2d(192, 256, 1, 1, 0),nn.BatchNorm2d(256))

        self.score_head = Conv2d(768, 10, 1, 1, 0, activation=False, batch_norm=False)
#         self.reg_head = Conv2d(768, 7 * cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False)
        self.cls = Classifier(cin=640)
    def forward(self,x, batch_size):
        x = self.block_1(x)
        x_skip_1 = x
        x = self.block_2(x)
        x_skip_2 = x
        x = self.block_3(x)
#         print("RPN block_3 x.shape : ",x.shape)
        x_0 = self.deconv_1(x)
#         print("RPN deconv_1 x_0.shape : ",x_0.shape)
        x_1 = self.deconv_2(x_skip_2)
#         print("RPN deconv_2 x_1.shape : ",x_1.shape)
        x_2 = self.deconv_3(x_skip_1)
#         print("RPN deconv_3 x_2.shape : ",x_2.shape)
        x = torch.cat((x_0,x_1,x_2),1)
#         print("RPN cat x.shape : ",x.shape)
        x = self.score_head(x)
#         print("RPN score_head x.shape : ",x.shape)
        x = self.cls(x.view(batch_size, -1))
#         print("RPN cls x.shape : ",x.shape)
        return x #self.score_head(x),self.reg_head(x)
    
    
# classifier
class Classifier(nn.Module):

    def __init__(self, cin, num_classes=10, dropout_rate=0.5):
        super(Classifier, self).__init__()  
        
        self.linear_1 = nn.Sequential(
            nn.Linear(cin, cin//2),
            nn.GELU(),
            nn.Dropout(p=dropout_rate, inplace=False)
        )
        
        self.linear_2 = nn.Sequential(
            nn.Linear(cin//2, cin//4),
            nn.GELU(),
            nn.Dropout(p=dropout_rate, inplace=False)
        )
        
        self.linear_3 = nn.Linear(cin//4, num_classes)


    def forward(self,x):
        # KK is the stacked k across batch
#         print("classifier x.shape : ", x.shape)
#         print("classifier x.view.shape : ", x.view(2,-1).shape)
        b, f, *_ = x.shape
        x = self.linear_1(x.view(b, -1))
        x = self.linear_2(x)
        x = self.linear_3(x)
        return x
    

class VoxelNet(nn.Module):

    def __init__(self):
        super(VoxelNet, self).__init__()
        self.svfe = SVFE()
        self.cml = CML()
        self.rpn = RPN()
        self.cls = Classifier(cin=64)
        
    def voxel_indexing(self, sparse_features, coords, batch_size):
        dim = sparse_features.shape[-1]
#         print("sparse_features.shape : ", sparse_features.shape)
        
        coords= coords.type(torch.long)
#         print("coords.shape : ", coords.shape)
#         print("coords values [:,0]: ",coords[:,0])
        dense_feature = torch.zeros(dim, batch_size, cfg.D, cfg.H, cfg.W).to(cfg.device)
#         print("dense_feature.shape : ", dense_feature.shape)
        dense_feature[:, coords[:,0], coords[:,1], coords[:,2], coords[:,3]]= sparse_features.transpose(0,1)
#         dense_feature[:, coords[:,0], coords[:,1], coords[:,2]]= sparse_features
        return dense_feature.transpose(0, 1)

    def forward(self, voxel_features, voxel_coords, batch_size):

        # feature learning network
        vwfs = self.svfe(voxel_features)
        vwfs = self.voxel_indexing(vwfs,voxel_coords, batch_size)

        # convolutional middle network
        cml_out = self.cml(vwfs)
        cml_out = cml_out.view(batch_size, -1,cfg.H, cfg.W)
#         print("cml_out.shape : ", cml_out.shape)

        # region proposal network
        score = self.rpn(cml_out, batch_size)
        # merge the depth and feature dim into one, output probability score map and regression map
        # psm,rm = self.rpn(cml_out.view(cfg.N,-1,cfg.H, cfg.W))
        
        # classifier
#         print("score shape : ", score)

        return score #psm, rm

In [3]:
def detection_collate(batch):
    voxel_features = []
    voxel_coords = []
    train = True if len(batch[0]) == 3 else False
    
    for i, sample in enumerate(batch):
        voxel_features.append(sample[0])
        voxel_coords.append(np.pad(sample[1], ((0,0), (1,0)), mode='constant', constant_values=i))

    if train :
        return np.concatenate(voxel_features), np.concatenate(voxel_coords), np.array(batch)[:, 2].astype(np.long)
    else :
        return np.concatenate(voxel_features), np.concatenate(voxel_coords), len(batch)

In [4]:
# def detection_collate(batch):
#     voxel_features = []
#     voxel_coords = []
#     label = []
#     shapes = [s[0].shape[0] for s in batch]
#     ind_max = np.argmax(shapes)
#     ind_min = np.argmin(shapes)
#     diff_shapes = shapes[ind_max] - shapes[ind_min]
    
#     for i, sample in enumerate(batch):

#         if ind_max == i :
#             voxel_features.append(sample[0])
#             voxel_coords.append(sample[1])
#         else :
#             voxel_features.append(F.pad(sample[0], (0,0,0,0,diff_shapes,0), "constant", 0))

# #         voxel_coords.append(
# #             F.pad(sample[1], (0, 0, 1, 0), mode='constant', value=i))
        
#             voxel_coords.append(F.pad(sample[1], (0,0,diff_shapes,0), "constant", 0))
        
#         label.append(sample[2])

#     return torch.stack(voxel_features, dim=0), torch.stack(voxel_coords, dim=0), torch.Tensor(label)

In [5]:
class VoxelDataset(Dataset) :
    def __init__(self, id_list, label_list, point_list) :
        self.id_list = id_list
        self.label_list = label_list
        self.point_list = point_list
    
    def __getitem__(self, index) :
        image_id = self.id_list[index]
        points= self.point_list[str(image_id)][:]
        
#         rand_degree = np.random.choice([-np.pi/12, -np.pi/8, -np.pi/6, -np.pi/4, -np.pi/3], 3)
#         rotated_points = self.rotate(rand_degree[0], rand_degree[1], rand_degree[2], points)
#         voxel_features, voxel_coords= self.voxelization(rotated_points)

        if self.label_list is not None:
            points= self.trans_axis_range(points, axis=[0,1,2])
            
            x_degree = self.rand_degree(-np.pi/2, np.pi/2)
            y_degree = self.rand_degree(-np.pi/2, np.pi/2)
            z_degree = self.rand_degree(-np.pi/6, np.pi/6)
        
            points = self.rotate(x_degree, y_degree, z_degree, points)
            points = self.jittering(points, (-0.02, 0.02))
            points = self.scaling(points, (0.98, 1.02))
            
            voxel_features, voxel_coords= self.voxelization(points)
            label = self.label_list[index]
            return voxel_features, voxel_coords, label
        else:
            voxel_features, voxel_coords= self.voxelization(points)
            return voxel_features, voxel_coords
        
    def rotate(self, a, b, c, dots):
        mx = np.array([[1, 0, 0], [0, np.cos(a), -np.sin(a)], [0, np.sin(a), np.cos(a)]])
        my = np.array([[np.cos(b), 0, np.sin(b)], [0, 1, 0], [-np.sin(b), 0, np.cos(b)]])
        mz = np.array([[np.cos(c), -np.sin(c), 0], [np.sin(c), np.cos(c), 0], [0, 0, 1]])
        m = np.dot(np.dot(mx,my),mz)
        dots = np.dot(dots, m.T)
        return dots
    
    def jittering(self, point, scale_range) :
        point += np.random.uniform(*scale_range, size=point.shape) 
        return point
    
    def scaling(self, point, scale_range) :
        point *= np.random.uniform(*scale_range)
        return point
    
    def rand_degree(self, *rotation_range) :
        return np.random.uniform(*rotation_range)
    
    def trans_axis_range(self, point, axis=[0]) :
        # Transform train point range to test point range
#         for ax in axis :
#             point[:, ax] = point[:, ax] / (np.max(np.abs(cfg.train_range[ax])) + 0.1) * (np.min(np.abs(cfg.test_range[ax])) - 0.1)
        point[:, axis] = point[:, axis] / (np.max(np.abs(cfg.train_range[axis])) + 0.1) * (np.min(np.abs(cfg.test_range[axis])) - 0.1)

        return point
    
    def voxelization(self, point) :
        point_reflectance = np.zeros((point.shape[0],1))
        point = np.concatenate((point, point_reflectance), 1)
        
        voxel_coords = ((point[:, :3] - np.array([cfg.test_range[0][0], cfg.test_range[1][0], cfg.test_range[2][0]])) / 
                       (cfg.vw, cfg.vh, cfg.vd)).astype(np.int32)

        # convert to (D,H,W)
        voxel_coords = voxel_coords[:, [2,1,0]]
        voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, 
                                                        axis=0, 
                                                        return_inverse=True, 
                                                        return_counts=True)
        voxel_features = []
        for i in range(len(voxel_coords)) :
            voxel = np.zeros((cfg.T, 7), dtype=np.float32)
            pts = point[inv_ind == i]
            
            # Random sampling
            if voxel_counts[i] > cfg.T :
                random_sampling = np.random.randint(0, pts.shape[0], size=cfg.T)
                pts = pts[random_sampling, : ]
                voxel_counts[i] = cfg.T
            
            voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1)
            voxel_features.append(voxel)

        return np.array(voxel_features), voxel_coords
    
    def __len__(self) :
        return len(self.id_list)

# Train

In [6]:
def cal_acc(label,pred) :
    model_preds = pred.argmax(1).detach().cpu().numpy().tolist()
    true_labels = label.detach().cpu().numpy().tolist()
    return accuracy_score(true_labels, model_preds)

In [7]:
def weight_load(model, optim, ckpt) :
    checkpoint = torch.load(ckpt)
    
    model.load_state_dict(checkpoint['model_state_dict'])
    optim.load_state_dict(checkpoint["optimizer_state_dict"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, 
                              T_max=CFG['cosine_lr_T_max'], 
                              eta_min=CFG['cosine_lr_eta_min'])
    
    return model, optim, scheduler, checkpoint['epoch']

In [8]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    
    if CFG['reuse'] :
        model, optimizer, scheduler, E = weight_load(model, optimizer, CFG['checkpoint'])
    else :
        E = 0
    
    # tensorboard
    log_writter = SummaryWriter(CFG['LOG'])
    
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    best_score = 0
    for epoch in range(E, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_acc = []
        training_bar = tqdm(iter(train_loader))
        batch = 1
        for vf, vc, label in training_bar:
                
            vf = torch.tensor(vf).to(device)
            vc = torch.tensor(vc).to(device)
            label = torch.tensor(label, dtype=torch.long).to(device)
            
            optimizer.zero_grad()
            
            output = model(vf, vc, batch_size=label.shape[0])
            loss = criterion(output, label)
            
            loss.backward()
            optimizer.step()
            
            acc = cal_acc(label, output)
            train_acc.append(acc)
            train_loss.append(loss.item())
            
            training_bar.set_postfix({
                'Training Loss' : np.mean(train_loss), 
                'Training ACC' : np.mean(train_acc)})
            
            
            log_writter.add_scalar('Training Loss',
                                    loss.item(),
                                    epoch * len(train_loader) + batch)
            log_writter.add_scalar('Training Accuracy',
                                    acc,
                                    epoch * len(train_loader) + batch)
            batch += 1
            
        if scheduler is not None:
            scheduler.step()
            
        val_loss, val_acc = validation(model, criterion, val_loader, device, log_writter)
        
        log_writter.add_scalar('Validation Loss',
                                val_loss,
                                epoch)
        log_writter.add_scalar('Validation Accuracy',
                                val_acc,
                                epoch)
        
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss)}] Val Loss : [{val_loss}] Val ACC : [{val_acc}]')
        
        if best_score < val_acc:
            best_score = val_acc
            torch.save({
                    "epoch" : epoch,
                    "model_state_dict" : model.state_dict(),
                    "optimizer_state_dict" : optimizer.state_dict()
                }, './ckpt/'+str(epoch)+'E-val'+str(best_score)+'-'+CFG['output'])


In [9]:
def validation(model, criterion, val_loader, device, log_writter):
    model.eval()
    true_labels = []
    model_preds = []
    val_loss = []
    with torch.no_grad():
        for vf, vc, label in tqdm(iter(val_loader)):
            vf = torch.tensor(vf).to(device)
            vc = torch.tensor(vc).to(device)
            label = torch.tensor(label, dtype=torch.long).to(device)

            model_pred = model(vf, vc, batch_size=label.shape[0])
            loss = criterion(model_pred, label)

            val_loss.append(loss.item())

            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += label.detach().cpu().numpy().tolist()
            
            
    return np.mean(val_loss), accuracy_score(true_labels, model_preds)

In [10]:
CFG = {
    'EPOCHS':10,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':64,
    
    'cosine_lr_eta_min' : 1e-4,
    'cosine_lr_T_max' : 5,
    
    'LOG' : "./tensorboard",   
    'output' : 'jitter-scale-axis-voxelnet.pth',
    
    'checkpoint' : './ckpt/15E-val0.9366-lr_1e21e3-xaxis_reg-voxelnet.pth',
    'reuse' : False
}
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [11]:
all_df = pd.read_csv('./data/train.csv')
all_points = h5py.File('./data/train.h5', 'r')

train_df = all_df.iloc[:int(len(all_df)*0.8)]
val_df = all_df.iloc[int(len(all_df)*0.8):]

train_dataset = VoxelDataset(train_df['ID'].values, train_df['label'].values, all_points)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], collate_fn=detection_collate, shuffle=True, num_workers=0)

val_dataset = VoxelDataset(val_df['ID'].values, val_df['label'].values, all_points)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], collate_fn=detection_collate, shuffle=False, num_workers=0)

In [None]:
model = VoxelNet().to(device)
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 
                                                      T_max=CFG['cosine_lr_eta_min'], 
                                                      eta_min=CFG['cosine_lr_T_max'])

train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/625 [00:00<?, ?it/s]

# Submission

In [13]:
def predict(model, test_loader, device, dim_changer=None):
    model.to(device)
    model.eval()
    model_preds = []
    with torch.no_grad():
        for vf, vc, batch_size in tqdm(iter(test_loader)):
            vf = torch.tensor(vf).to(device)
            vc = torch.tensor(vc).to(device)

            batch_pred = model(vf, vc, batch_size=batch_size)
            
            model_preds += batch_pred.argmax(1).detach().cpu().numpy().tolist()
    
    return model_preds

In [14]:
test_df = pd.read_csv('./data/sample_submission.csv')
test_points = h5py.File('./data/test.h5', 'r')

test_dataset = VoxelDataset(test_df['ID'].values, None, test_points)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'],  collate_fn=detection_collate, shuffle=False, num_workers=0)

checkpoint = torch.load('./ckpt/20E-val0.9419-lr_1e21e3-xaxis_reg-voxelnet.pth')
model = VoxelNet().to(device)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [15]:
preds = predict(model, test_loader, device)

  0%|          | 0/625 [00:00<?, ?it/s]

In [16]:
test_df['label'] = preds

test_df.to_csv('./submission/20E-val0.9419-lr_1e21e3-xaxis_reg-voxelnet.csv', index=False)