In [2]:
import numpy as np
import math
import random
import os
import torch
import scipy.spatial.distance
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import plotly.graph_objects as go
import plotly.express as px

## TODO

- NumpyのデータファイルからPytorchテンソルへの変換

- Modelに投入できるようにする

- 正規化の前処理

In [3]:
# dummy Data

data = np.random.randn(4, 10, 3)
print(f"data.shape: {data.shape}")

data.shape: (4, 10, 3)


In [14]:
# output_sizeになるように1データの点群数を削除
class PointSampler(object):
    def __init__(self, output_size):
        assert isinstance(output_size, int)
        self.output_size = output_size
    
    def __call__(self, data):
        return data[:self.output_size]

In [17]:
pointcloud = PointSampler(5)(data[0])
print(pointcloud)

[[-0.97869085 -0.04427212  2.15237277]
 [ 0.49597524  1.17119548  1.5125282 ]
 [-1.62430108 -1.77269708 -0.53545669]
 [-0.75024138  0.26540019  1.42908106]
 [ 1.38188116  0.9384979   0.29304972]]


In [5]:
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2

        # x, y, z軸で平均を引く→各ベクトルの大きさの最大値で各要素を割る
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0)
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return norm_pointcloud

In [8]:
# Note

norm_pointcloud = Normalize()(data[0])
print(norm_pointcloud)

[[-0.31506618 -0.07202067  0.6449331 ]
 [ 0.22120289  0.36998968  0.41225069]
 [-0.54984529 -0.70057031 -0.33250838]
 [-0.23198948  0.04059308  0.38190476]
 [ 0.54336663  0.28536814 -0.03121824]
 [-0.05316551  0.56217761 -0.82530586]
 [ 0.14513996 -0.15547889  0.14297166]
 [-0.0348982  -0.14216677 -0.00644547]
 [-0.19131902 -0.16338044 -0.27635758]
 [ 0.46657419 -0.02451144 -0.11022467]]


In [9]:
class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2

        return torch.from_numpy(pointcloud)

In [10]:
ToTensor()(norm_pointcloud)

tensor([[-0.3151, -0.0720,  0.6449],
        [ 0.2212,  0.3700,  0.4123],
        [-0.5498, -0.7006, -0.3325],
        [-0.2320,  0.0406,  0.3819],
        [ 0.5434,  0.2854, -0.0312],
        [-0.0532,  0.5622, -0.8253],
        [ 0.1451, -0.1555,  0.1430],
        [-0.0349, -0.1422, -0.0064],
        [-0.1913, -0.1634, -0.2764],
        [ 0.4666, -0.0245, -0.1102]], dtype=torch.float64)

In [12]:
def default_transforms():
    return transforms.Compose([
        PointSampler(600),
        Normalize(),
        ToTensor()
    ])

In [None]:
# Custom Pytorch Datasetオブジェクトの作成

## Model



In [14]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
    def __init__(self, k=3):
        super().__init__()
        self.k = k
        self.conv1 = nn.Conv1d(k, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k*k)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)

    def forward(self, input):
        # input shape == (bs, n, 3)
        bs = input.size(0)
        xb = F.relu(self.bn1(self.conv1(input)))
        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = F.relu(self.bn3(self.conv3(xb)))
        # maxPool/Flattenオブジェクトの作成→実行
        pool = nn.MaxPool1d(xb.size(-1))(xb)
        flat = nn.Flatten(1)(pool)
        xb = F.relu(self.bn4(self.fc1(flat)))
        xb = F.relu(self.bn5(self.fc2(xb)))

        # 対角行列の生成→それを複製
        init = torch.eye(self.k, requires_grad=True).repeat(bs, 1, 1)
        if xb.is_cuda:
            init = init.cuda
        # tensorサイズを自動的に調整して(view)、initを足している
        matrix = self.fc3(xb).view(-1, self.k, self.k) + init
        return matrix

class Transform(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3, 64, 1)

        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

    def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # バッチごとに2次元×2次元の行列積を演算するので、3次元×3次元の計算をします
        xb = torch.bmm(torch.transpose(input, 1, 2), matrix3x3).transpose(1, 2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb, 1, 2), matrix64x64).transpose(1, 2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 3)

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.identity = nn.Identity()

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.fc2(xb)))
        output = self.fc3(xb)
        return self.identity(output), matrix3x3, matrix64x64

In [59]:
## Notes

nm = np.array([[[1.0, 2.0, 3.0],
                [4.0, 5.0, 6.0],
                [7.0, 8.0, 9.0]],
                
                [[2.0, 3.0, 4.0],
                #  [1.0, 2.0, 3.0],
                 [2.0, 3.0, 5.0],
                 [2.0, 1.0, 6.0]]])
# nm = np.random.randn(2, 10, 3)
print(nm)
nm = nm.astype(np.float32)
x = torch.from_numpy(nm)
x = x.transpose(1, 2)
print(x)

[[[1. 2. 3.]
  [4. 5. 6.]
  [7. 8. 9.]]

 [[2. 3. 4.]
  [2. 3. 5.]
  [2. 1. 6.]]]
tensor([[[1., 4., 7.],
         [2., 5., 8.],
         [3., 6., 9.]],

        [[2., 2., 2.],
         [3., 3., 1.],
         [4., 5., 6.]]])


In [64]:
## Notes

tnet = Tnet()
y = tnet(x)
# print(y)
# print(y[0])

pn = PointNet()
y, m3, m64 = pn(x)
print(y)


tensor([[-0.1998,  0.3639,  0.4269],
        [ 0.4509,  0.5154, -0.2120]], grad_fn=<AddmmBackward>)


In [4]:
def pointnetloss(outputs, m3x3, m64x64, alpha=0.0001):
    criterion = torch.nn.MSELoss()
    bs = outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs, 1, 1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs, 1, 1)
    if outputs.is_cuda:
        id3x3 = id3x3.cuda()
        id64x64 = id64x64.cuda()
    diff3x3 = id3x3 - torch.bmm(m3x3, m3x3.transpose(1, 2))
    diff64x64 = id64x64 - torch.bmm(m64x64, m64x64.transpose(1, 2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3) + torch.norm(diff64x64)) / float(bs)

## TrainingLoop

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
pointnet = PointNet()
pointnet.to(device)

In [None]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, val_loader=None, epochs=15, save=True):
    for epoch in range(epochs):
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device).float()
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1, 2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statics
            running_loss += loss.item()
            if i % 10 == 9: # print every 10 mini-batches
                print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                    (epoch+1, i+1, len(train_loader), running_loss / 10))
                running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device).float()
                    predicted, __, __ = pointnet(inputs.transpose(1, 2))
                    mse_loss = torch.nn.MSELoss()
                    print('Mean Squared Error: %.3f' % mse_loss(predicted, labels))
        
        if save:
            torch.save(pointnet.state_dict(), "save_" + str(epochs) + ".pth")

In [None]:
train(pointnet, train_loader, valid_loader, save=True)