In [1]:
import open3d as o3d

In [2]:
import numpy as np
import math
import random
import os
import torch
import scipy.spatial.distance
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from path import Path

import plotly.graph_objects as go
import plotly.express as px

## TODO

- NumpyのデータファイルからPytorchテンソルへの変換

- Modelに投入できるようにする

- 正規化の前処理

In [78]:
# dummy Data

data = np.random.randn(4, 10, 3)
print(f"data.shape: {data.shape}")

data.shape: (4, 10, 3)


In [79]:
# output_sizeになるように1データの点群数を削除
class PointSampler(object):
    def __init__(self, output_size):
        assert isinstance(output_size, int)
        self.output_size = output_size
    
    def __call__(self, data):
        return data[:self.output_size]

In [80]:
pointcloud = PointSampler(5)(data[0])
print(pointcloud)

[[ 2.00929395  0.21707681 -1.56866688]
 [ 0.40063073  0.54677322 -0.18662653]
 [ 0.11402492 -1.18012687 -0.8323378 ]
 [-0.29722923  0.72178364  0.06638323]
 [ 1.54528062 -0.58398358  0.23409262]]


In [81]:
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2

        # x, y, z軸で平均を引く→各ベクトルの大きさの最大値で各要素を割る
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0)
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return norm_pointcloud

In [82]:
# Note

norm_pointcloud = Normalize()(data[0])
print(norm_pointcloud)

[[ 0.83434377  0.09765435 -0.54252567]
 [ 0.14276073  0.23939468  0.05162955]
 [ 0.01954556 -0.50301976 -0.22596923]
 [-0.15725739  0.31463369  0.16040139]
 [ 0.63485904 -0.24673083  0.23250161]
 [-0.47461863 -0.14810998 -0.04415313]
 [-0.22853379  0.1827154   0.21805426]
 [-0.10600291 -0.54272667  0.50066115]
 [-0.62332109  0.02171597 -0.12639524]
 [-0.04177528  0.58447315 -0.22420469]]


In [83]:
class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2

        return torch.from_numpy(pointcloud)

In [84]:
ToTensor()(norm_pointcloud)

tensor([[ 0.8343,  0.0977, -0.5425],
        [ 0.1428,  0.2394,  0.0516],
        [ 0.0195, -0.5030, -0.2260],
        [-0.1573,  0.3146,  0.1604],
        [ 0.6349, -0.2467,  0.2325],
        [-0.4746, -0.1481, -0.0442],
        [-0.2285,  0.1827,  0.2181],
        [-0.1060, -0.5427,  0.5007],
        [-0.6233,  0.0217, -0.1264],
        [-0.0418,  0.5845, -0.2242]], dtype=torch.float64)

In [85]:
def default_transforms():
    return transforms.Compose([
        PointSampler(600),
        Normalize(),
        ToTensor()
    ])

In [86]:
def read_pcd(path):
    pcd = o3d.io.read_point_cloud(path)
    points = np.array(pcd.points)
    return points

In [87]:
# Custom Pytorch Datasetオブジェクトの作成

class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder="train", transform=default_transforms()):
        self.root_dir = root_dir
        folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        self.classes = {folder:i for i, folder in enumerate(folders)}
        self.transforms = transform if not valid else default_transforms()
        self.valid = valid
        self.files = []

        for category in self.classes.keys():
            new_dir = root_dir/Path(category)
            for file in os.listdir(new_dir):
                if file.endswith('.pcd'):
                    # PCDファイルからpcd.points読み込み
                    sample = {}
                    sample['pcd_path'] = new_dir/file
                    sample['category'] = category
                    self.files.append(sample)
    
    def __len__(self):
        return len(self.files)
    
    def __preproc__(self, path):
        points = read_pcd(path)
        if self.transforms:
            pointcloud = self.transforms(points)
        return pointcloud
    
    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        pointcloud = self.__preproc__(pcd_path)
        return {'pointcloud': pointcloud, 'category': self.classes[category]}
                    

In [88]:
path = Path("../Data/five_faces_class")

In [89]:
folders = [dir for dir in sorted(os.listdir(path)) if os.path.isdir(path/dir)]
classes = {folder: i for i, folder in enumerate(folders)}
classes

{'0': 0, 'l45': 1, 'l90': 2, 'r45': 3, 'r90': 4}

In [90]:
train_ds = PointCloudData(path)

In [91]:
inv_classes = {i:cat for cat, i in train_ds.classes.items()}
inv_classes

{0: '0', 1: 'l45', 2: 'l90', 3: 'r45', 4: 'r90'}

In [92]:
print('Train dataset size: ', len(train_ds))
print('Number of classes: ', len(train_ds.classes))
print('Sample pointcloud shape: ', train_ds[-1]['pointcloud'].size())
print('Class: ', inv_classes[train_ds[0]['category']])

Train dataset size:  50
Number of classes:  5
Sample pointcloud shape:  torch.Size([600, 3])
Class:  0


In [93]:
print(train_ds[-1]['pointcloud'].size())

torch.Size([600, 3])


In [94]:
train_loader = DataLoader(dataset=train_ds, batch_size=32, shuffle=True)
# valid_loader = DataLoader(dataset=valid_ds, batch_size=64)

## Model



In [95]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
    def __init__(self, k=3):
        super().__init__()
        self.k = k
        self.conv1 = nn.Conv1d(k, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k*k)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)

    def forward(self, input):
        # input shape == (bs, n, 3)
        bs = input.size(0)
        xb = F.relu(self.bn1(self.conv1(input)))
        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = F.relu(self.bn3(self.conv3(xb)))
        # maxPool/Flattenオブジェクトの作成→実行
        pool = nn.MaxPool1d(xb.size(-1))(xb)
        flat = nn.Flatten(1)(pool)
        xb = F.relu(self.bn4(self.fc1(flat)))
        xb = F.relu(self.bn5(self.fc2(xb)))

        # 対角行列の生成→それを複製
        init = torch.eye(self.k, requires_grad=True).repeat(bs, 1, 1)
        if xb.is_cuda:
            init = init.cuda
        # tensorサイズを自動的に調整して(view)、initを足している
        matrix = self.fc3(xb).view(-1, self.k, self.k) + init
        return matrix

class Transform(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3, 64, 1)

        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

    def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # バッチごとに2次元×2次元の行列積を演算するので、3次元×3次元の計算をします
        xb = torch.bmm(torch.transpose(input, 1, 2), matrix3x3).transpose(1, 2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb, 1, 2), matrix64x64).transpose(1, 2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 3)

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.identity = nn.Identity()

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.fc2(xb)))
        output = self.fc3(xb)
        return self.identity(output), matrix3x3, matrix64x64

In [96]:
## Notes

nm = np.array([[[1.0, 2.0, 3.0],
                [4.0, 5.0, 6.0],
                [7.0, 8.0, 9.0]],
                
                [[2.0, 3.0, 4.0],
                #  [1.0, 2.0, 3.0],
                 [2.0, 3.0, 5.0],
                 [2.0, 1.0, 6.0]]])
# nm = np.random.randn(2, 10, 3)
print(nm)
nm = nm.astype(np.float32)
x = torch.from_numpy(nm)
x = x.transpose(1, 2)
print(x)

[[[1. 2. 3.]
  [4. 5. 6.]
  [7. 8. 9.]]

 [[2. 3. 4.]
  [2. 3. 5.]
  [2. 1. 6.]]]
tensor([[[1., 4., 7.],
         [2., 5., 8.],
         [3., 6., 9.]],

        [[2., 2., 2.],
         [3., 3., 1.],
         [4., 5., 6.]]])


In [97]:
## Notes

tnet = Tnet()
y = tnet(x)
# print(y)
# print(y[0])

pn = PointNet()
y, m3, m64 = pn(x)
print(y)


tensor([[ 0.2978,  0.4885,  0.4669],
        [-0.7378, -0.1396,  0.2702]], grad_fn=<AddmmBackward>)


In [98]:
def pointnetloss(outputs, m3x3, m64x64, alpha=0.0001):
    criterion = torch.nn.MSELoss()
    bs = outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs, 1, 1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs, 1, 1)
    if outputs.is_cuda:
        id3x3 = id3x3.cuda()
        id64x64 = id64x64.cuda()
    diff3x3 = id3x3 - torch.bmm(m3x3, m3x3.transpose(1, 2))
    diff64x64 = id64x64 - torch.bmm(m64x64, m64x64.transpose(1, 2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3) + torch.norm(diff64x64)) / float(bs)

## TrainingLoop

In [99]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [100]:
pointnet = PointNet()
pointnet.to(device)

PointNet(
  (transform): Transform(
    (input_transform): Tnet(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (feature_transform): Tnet(
      (conv1): Conv1d(64, 64, kernel_size=(1,

In [101]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)

In [102]:
def train(model, train_loader, val_loader=None, epochs=15, save=True):
    for epoch in range(epochs):
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device).float()
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1, 2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statics
            running_loss += loss.item()
            if i % 10 == 9: # print every 10 mini-batches
                print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                    (epoch+1, i+1, len(train_loader), running_loss / 10))
                running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device).float()
                    predicted, __, __ = pointnet(inputs.transpose(1, 2))
                    mse_loss = torch.nn.MSELoss()
                    print('Mean Squared Error: %.3f' % mse_loss(predicted, labels))
        
        if save:
            torch.save(pointnet.state_dict(), "save_" + str(epochs) + ".pth")

In [104]:
# train(pointnet, train_loader, valid_loader, save=True)
train(pointnet, train_loader, save=True)

TypeError: add(): argument 'other' (position 1) must be Tensor, not builtin_function_or_method