In [1]:
import h5py # .h5 파일을 읽기 위한 패키지
import random
import pandas as pd
import numpy as np
import os
import glob
import math

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm
from cfg.voxelnet_cfg import config as cfg 

from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [160]:
import torch.nn as nn
import torch.nn.functional as F
import torch


# conv2d + bn + relu
class Conv2d(nn.Module):

    def __init__(self,in_channels,out_channels,k,s,p, activation=True, batch_norm=True):
        super(Conv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels,out_channels,kernel_size=k,stride=s,padding=p)
        if batch_norm:
            self.bn = nn.BatchNorm2d(out_channels)
        else:
            self.bn = None
        self.activation = activation
    def forward(self,x):
        x = self.conv(x)
        if self.bn is not None:
            x=self.bn(x)
        if self.activation:
            return F.relu(x,inplace=True)
        else:
            return x

# conv3d + bn + relu
class Conv3d(nn.Module):

    def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True):
        super(Conv3d, self).__init__()
        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
        if batch_norm:
            self.bn = nn.BatchNorm3d(out_channels)
        else:
            self.bn = None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)

        return F.relu(x, inplace=True)

# Fully Connected Network
class FCN(nn.Module):

    def __init__(self,cin,cout):
        super(FCN, self).__init__()
        self.cout = cout
        self.linear = nn.Linear(cin, cout)
        self.bn = nn.BatchNorm1d(cout)

    def forward(self,x):
        # KK is the stacked k across batch
        kk, t, _ = x.shape
        x = self.linear(x.view(kk*t,-1))
        x = F.relu(self.bn(x))
        return x.view(kk,t,-1)

# Voxel Feature Encoding layer
class VFE(nn.Module):

    def __init__(self,cin,cout):
        super(VFE, self).__init__()
        assert cout % 2 == 0
        self.units = cout // 2
        self.fcn = FCN(cin,self.units)

    def forward(self, x, mask):
        # point-wise feauture
        pwf = self.fcn(x)
        #locally aggregated feature
        laf = torch.max(pwf,1)[0].unsqueeze(1).repeat(1,cfg.T,1)
        # point-wise concat feature
        pwcf = torch.cat((pwf,laf),dim=2)
        # apply mask
        mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2)
        pwcf = pwcf * mask.float()

        return pwcf

# Stacked Voxel Feature Encoding
class SVFE(nn.Module):

    def __init__(self):
        super(SVFE, self).__init__()
        self.vfe_1 = VFE(7,32)
        self.vfe_2 = VFE(32,128)
        self.fcn = FCN(128,128)
        
    def forward(self, x):
        mask = torch.ne(torch.max(x,2)[0], 0)
        x = self.vfe_1(x, mask)
        x = self.vfe_2(x, mask)
        x = self.fcn(x)
        # element-wise max pooling
        x = torch.max(x,1)[0]
        return x

# Convolutional Middle Layer
class CML(nn.Module):
    def __init__(self):
        super(CML, self).__init__()
        self.conv3d_1 = Conv3d(128, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
        self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1))
        self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1))

    def forward(self, x):
        x = self.conv3d_1(x)
        x = self.conv3d_2(x)
        x = self.conv3d_3(x)
        return x


# classifier
class Classifier(nn.Module):

    def __init__(self, cin, num_classes=10, dropout_rate=0.5):
        super(Classifier, self).__init__()  
        
        self.linear_1 = nn.Sequential(
            nn.Linear(cin, cin//2),
            nn.GELU(),
            nn.Dropout(p=dropout_rate, inplace=False)
        )
        
        self.linear_2 = nn.Sequential(
            nn.Linear(cin//2, cin//4),
            nn.GELU(),
            nn.Dropout(p=dropout_rate, inplace=False)
        )
        
        self.linear_3 = nn.Linear(cin//4, num_classes)


    def forward(self,x):
        # KK is the stacked k across batch
        b, f, _ = x.shape
        x = self.linear_1(x.view(b * f, -1))
        x = self.linear_2(x)
        x = self.linear_3(x)
        return x
    

class VoxelNet(nn.Module):

    def __init__(self):
        super(VoxelNet, self).__init__()
        self.svfe = SVFE()
        self.cml = CML()
#         self.rpn = RPN()
        self.cls = Classifier(cin=64)
        
    def voxel_indexing(self, sparse_features, coords):
        dim = sparse_features.shape[-1]
        print(sparse_features.shape)
        dense_feature = torch.zeros(dim, cfg.N, cfg.D, cfg.H, cfg.W).to(cfg.device)

        dense_feature[:, coords[:,0], coords[:,1], coords[:,2], coords[:,3]]= sparse_features

        return dense_feature.transpose(0, 1)

    def forward(self, voxel_features, voxel_coords):

        # feature learning network
        vwfs = self.svfe(voxel_features)
        vwfs = self.voxel_indexing(vwfs,voxel_coords)

        # convolutional middle network
        cml_out = self.cml(vwfs)

        # region proposal network

        # merge the depth and feature dim into one, output probability score map and regression map
        # psm,rm = self.rpn(cml_out.view(cfg.N,-1,cfg.H, cfg.W))
        
        # classifier
        score = self.cls(cml_out)

        return score #psm, rm

In [188]:
max([1,2,8,4])

8

In [239]:
def detection_collate(batch):
    voxel_features = []
    voxel_coords = []
    label = []
    shapes = [s[0].shape[0] for s in batch]
    ind_max = np.argmax(shapes)
    ind_min = np.argmin(shapes)
    diff_shapes = shapes[ind_max] - shapes[ind_min]
    
    for i, sample in enumerate(batch):
        print("diff_shapes : ", diff_shapes)
        print("sample[0] : " ,sample[0].shape)
        print("sample[1] : " ,sample[1].shape)
        if ind_max == i :
            voxel_features.append(sample[0])
            voxel_coords.append(sample[1])
        else :
            voxel_features.append(F.pad(sample[0], (0,0,0,0,diff_shapes,0), "constant", 0))
            print("voxel_Feauture : ", F.pad(sample[0], (0,0,0,0,diff_shapes,0)).shape)
#         voxel_coords.append(
#             F.pad(sample[1], (0, 0, 1, 0), mode='constant', value=i))
        
            voxel_coords.append(F.pad(sample[1], (0,0,diff_shapes,0), "constant", 0))
            print("voxel_coord : ", F.pad(sample[1], (0,0,diff_shapes,0)).shape)
        
        label.append(sample[2])

    return torch.stack(voxel_features, dim=0), torch.stack(voxel_coords, dim=0), torch.Tensor(label)

In [240]:
class VoxelDataset(Dataset) :
    def __init__(self, id_list, label_list, point_list) :
        self.id_list = id_list
        self.label_list = label_list
        self.point_list = point_list
    
    def __getitem__(self, index) :
        image_id = self.id_list[index]
        
        points= self.point_list[str(image_id)][:]
        voxel_features, voxel_coords= self.voxelization(points)

        if self.label_list is not None:
            label = self.label_list[index]
            return torch.Tensor(voxel_features), torch.Tensor(voxel_coords), label
        else:
            return torch.Tensor(voxel_features), torch.Tensor(voxel_coords)
    
    def voxelization(self, point) :
        voxel_coords = ((point - np.array([cfg.xrange[0], cfg.yrange[0], cfg.zrange[0]])) / 
                       (cfg.vw, cfg.vh, cfg.vd)).astype(np.int32)
        
        # convert to (D,H,W)
        voxel_coords = voxel_coords[:, [2,1,0]]
        voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, 
                                                        axis=0, 
                                                        return_inverse=True, 
                                                        return_counts=True)
        
        
        voxel_features = []
        for i in range(len(voxel_coords)) :
            voxel = np.zeros((cfg.T, 7), dtype=np.float32)
            pts = point[inv_ind == i]
            if voxel_counts[i] > cfg.T :
                pts = pts[:cfg.T, : ]
                voxel_counts[i] = cfg.T
            
            voxel[:pts.shape[0], :6] = np.concatenate((pts[:, :3], pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1)
            voxel_features.append(voxel)

        return np.array(voxel_features), voxel_coords
    
    def __len__(self) :
        return len(self.id_list)

In [241]:
all_df = pd.read_csv('./data/train.csv')
all_points = h5py.File('./data/train.h5', 'r')

train_df = all_df.iloc[:int(len(all_df)*0.8)]

train_dataset = VoxelDataset(train_df['ID'].values, train_df['label'].values, all_points)
train_loader = DataLoader(train_dataset, batch_size = cfg.N,  collate_fn=detection_collate, shuffle=True, num_workers=0)

model = VoxelNet()

In [243]:
for f, c, l in train_loader :
    print("train f : ", f.shape)
    print("train c : ", c.shape)
    a = model(f,c)
    print(a)
    break

diff_shapes :  16
sample[0] :  torch.Size([18, 35, 7])
sample[1] :  torch.Size([18, 3])
voxel_Feauture :  torch.Size([34, 35, 7])
voxel_coord :  torch.Size([34, 3])
diff_shapes :  16
sample[0] :  torch.Size([34, 35, 7])
sample[1] :  torch.Size([34, 3])
train f :  torch.Size([2, 34, 35, 7])
train c :  torch.Size([2, 34, 3])


ValueError: too many values to unpack (expected 3)