In [3]:
# import required packages
# utility
import os
import math
import random
import itertools
import numpy as np
from path import Path

# torch
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# plots
import scipy.spatial.distance
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

# metrics
from sklearn.metrics import confusion_matrix

In [None]:
# configs
random.seed = 42

In [49]:
# define data folder path
data_path = Path('./data/model-net-40')

# extract all folder names
folders = [dir for dir in sorted(os.listdir(data_path)) if os.path.isdir(data_path/dir)]

# extract class names from the list of folder names
classes = {folder: i for i, folder in enumerate(folders)}

classes

{'airplane': 0,
 'bathtub': 1,
 'bed': 2,
 'bench': 3,
 'bookshelf': 4,
 'bottle': 5,
 'bowl': 6,
 'car': 7,
 'chair': 8,
 'cone': 9,
 'cup': 10,
 'curtain': 11,
 'desk': 12,
 'door': 13,
 'dresser': 14,
 'flower_pot': 15,
 'glass_box': 16,
 'guitar': 17,
 'keyboard': 18,
 'lamp': 19,
 'laptop': 20,
 'mantel': 21,
 'monitor': 22,
 'night_stand': 23,
 'person': 24,
 'piano': 25,
 'plant': 26,
 'radio': 27,
 'range_hood': 28,
 'sink': 29,
 'sofa': 30,
 'stairs': 31,
 'stool': 32,
 'table': 33,
 'tent': 34,
 'toilet': 35,
 'tv_stand': 36,
 'vase': 37,
 'wardrobe': 38,
 'xbox': 39}

In [38]:
# utility functions

# reads an OFF file
# returns the list of vertices and faces
def read_off(file):
    # reads the first line
    off_header = file.readline().strip()
    
    # checks if the first line is OFF
    if 'OFF' == off_header:
        # first line is OFF
        # reads second line to extract #vertices and #faces
        n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split()])
    else:
        # first line is not OFF
        # extracts #vertices and #faces from first line
        n_verts, n_faces, __ = tuple([int(s) for s in off_header[3:].split()])
       
    # reads and extracts the list of vertices
    verts = []
    for v_idx in range(n_verts):
        x, y, z = map(float, file.readline().strip().split())
        verts.append((x, y, z))
    
    # reads and extracts the list of faces
    # discards the first value of each face (#vertices)
    # assumption: each face is described using 3 vertices
    faces = []
    for f_idx in range(n_faces):
        face = [int(v_idx) for v_idx in file.readline().strip().split()[1:]]
        faces.append(face)
    
    return verts, faces

In [68]:
# utility class to sample specific number of points within an object's area
class PointSampler(object):
    # constructor
    # output_size: number of points to be sampled
    def __init__(self, output_size):
        assert isinstance(output_size, int)
        self.output_size = output_size
    
    
    # calculates area of a triangle
    # requires 3 points as the triangle vertices
    def triangle_area(self, pt1, pt2, pt3):
        a = np.linalg.norm(pt1 - pt2)
        b = np.linalg.norm(pt2 - pt3)
        c = np.linalg.norm(pt3 - pt1)
        s = (a + b + c) / 2
        
        return math.sqrt(max(s * (s-a) * (s-b) * (s-c), 0))
    
    
    # samples a point using barycentric coordinates
    # the points lie inside the triangle
    # requires 3 points as the triangle vertices
    def sample_point(self, pt1, pt2, pt3):
        # barycentric coordinates on a triangle
        # https://mathworld.wolfram.com/BarycentricCoordinates.html
        
        s, t = sorted([random.random(), random.random()])
        f = lambda i: s * pt1[i] + (t-s) * pt2[i] + (1-t) * pt3[i]
        
        return (f(0), f(1), f(2))
    
    
    # calls the object functions internally to sample points
    def __call__(self, mesh):
        verts, faces = mesh
        verts = np.array(verts)
        areas = np.zeros(len(faces))
        
        # calculate the area of each face
        for i in range(len(areas)):
            areas[i] = self.triangle_area(
                verts[faces[i][0]],
                verts[faces[i][1]],
                verts[faces[i][2]]
            )
            
        # samples the required number of faces
        sampled_faces = random.choices(faces, weights=areas, cum_weights=None, k=self.output_size)
        
        # creates an empty list of points
        # size of the list is equal to the required number of sampled points
        sampled_points = np.zeros((self.output_size, 3))
        
        # samples the points using the randomly sampled faces
        for i in range(len(sampled_faces)):
            sampled_points[i] = (self.sample_point(
                verts[sampled_faces[i][0]],
                verts[sampled_faces[i][1]],
                verts[sampled_faces[i][2]]
            ))
        
        return sampled_points

In [73]:
# utility class to normalize a pointcloud data
# the normalized coordinates fall in the range [-1, 1]
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2
        
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0)
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))
        
        return norm_pointcloud

In [96]:
# utility class to rotate pointcloud coordinates along z-axis by a random angle
class RandRotation_z(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2
        
        # calculate a random theta (angle of rotation)
        theta = random.random() * 2 * math.pi
        
        # generate a rotation matrix
        rot_matrix = np.array([
            [math.cos(theta), -math.sin(theta), 0],
            [math.sin(theta), math.cos(theta), 0],
            [0, 0, 1]
        ])
        
        # rotate the pointcloud data
        rot_pointcloud = rot_matrix.dot(pointcloud.T).T
        
        return rot_pointcloud
    

# utility class to add random noise to pointcloud data
# noise range: [0, 0.02]
class RandomNoise(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2
        
        # generate a noise array of same shape as pointcloud data
        noise = np.random.normal(0, 0.02, (pointcloud.shape))
        
        # add noise to the pointcloud data
        noisy_pointcloud = pointcloud + noise
        
        return noisy_pointcloud

In [98]:
# utility class to convert pointcloud data from numpy array into a pytorch tensor
class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape) == 2
        
        return torch.from_numpy(pointcloud)
    

# utility function
# combines all types of pointcloud transform functions
def default_transforms():
    return transforms.Compose([
        PointSampler(1024),
        Normalize(),
        ToTensor()
    ])

In [99]:
class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder='train', transform=default_transforms()):
        self.root_dir = root_dir
        folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        self.classes = {folder: i for i, folder in enumerate(folders)}
        self.transforms = transform if not valid else default_transforms()
        self.valid = valid
        self.files = []
        for category in self.classes.keys():
            new_dir = root_dir/Path(category)/folder
            for file in os.listdir(new_dir):
                if file.endswith('.off'):
                    sample = {}
                    sample['pcd_path'] = new_dir/file
                    sample['category'] = category
                    self.files.append(sample)
                    
    
    def __len__(self):
        return len(self.files)
    
    def __preproc__(self, file):
        verts, faces = read_off(file)
        if self.transforms:
            pointcloud = self.transforms((verts, faces))
        return pointcloud
    
    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        with open(pcd_path, 'r') as f:
            pointcloud = self.__preproc__(f)
        return {'pointcloud': pointcloud, 'category': self.classes[category]}

In [100]:
train_transforms = transforms.Compose([
    PointSampler(1024),
    Normalize(),
    RandRotation_z(),
    RandomNoise(),
    ToTensor()
])

In [101]:
train_ds = PointCloudData(data_path, transform=train_transforms)
valid_ds = PointCloudData(data_path, valid=True, folder='test', transform=train_transforms)

In [103]:
inv_classes = {i: category for category, i in train_ds.classes.items()}
inv_classes

{0: 'airplane',
 1: 'bathtub',
 2: 'bed',
 3: 'bench',
 4: 'bookshelf',
 5: 'bottle',
 6: 'bowl',
 7: 'car',
 8: 'chair',
 9: 'cone',
 10: 'cup',
 11: 'curtain',
 12: 'desk',
 13: 'door',
 14: 'dresser',
 15: 'flower_pot',
 16: 'glass_box',
 17: 'guitar',
 18: 'keyboard',
 19: 'lamp',
 20: 'laptop',
 21: 'mantel',
 22: 'monitor',
 23: 'night_stand',
 24: 'person',
 25: 'piano',
 26: 'plant',
 27: 'radio',
 28: 'range_hood',
 29: 'sink',
 30: 'sofa',
 31: 'stairs',
 32: 'stool',
 33: 'table',
 34: 'tent',
 35: 'toilet',
 36: 'tv_stand',
 37: 'vase',
 38: 'wardrobe',
 39: 'xbox'}

In [104]:
print('Train dataset size: ', len(train_ds))
print('Valid dataset size: ', len(valid_ds))
print('Number of classes: ', len(train_ds.classes))
print('Sample pointcloud shape: ', train_ds[0]['pointcloud'].size())
print('Class: ', inv_classes[train_ds[0]['category']])

Train dataset size:  9843
Valid dataset size:  2468
Number of classes:  40
Sample pointcloud shape:  torch.Size([1024, 3])
Class:  airplane


In [106]:
train_loader = DataLoader(dataset=train_ds, batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset=valid_ds, batch_size=64)

In [None]:
# https://www.kaggle.com/code/balraj98/pointnet-for-3d-object-classification-ii-pytorch/notebook

In [107]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
    def __init__(self, k=3):
        super().__init__()
        self.k=k
        self.conv1 = nn.Conv1d(k,64,1)
        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)
        self.fc1 = nn.Linear(1024,512)
        self.fc2 = nn.Linear(512,256)
        self.fc3 = nn.Linear(256,k*k)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)


    def forward(self, input):
        # input.shape == (bs,n,3)
        bs = input.size(0)
        xb = F.relu(self.bn1(self.conv1(input)))
        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = F.relu(self.bn3(self.conv3(xb)))
        pool = nn.MaxPool1d(xb.size(-1))(xb)
        flat = nn.Flatten(1)(pool)
        xb = F.relu(self.bn4(self.fc1(flat)))
        xb = F.relu(self.bn5(self.fc2(xb)))

        #initialize as identity
        init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
        if xb.is_cuda:
            init=init.cuda()
        matrix = self.fc3(xb).view(-1,self.k,self.k) + init
        return matrix


class Transform(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3,64,1)

        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)


        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

    def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self, classes = 40):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)
        

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=0.3)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)
        return self.logsoftmax(output), matrix3x3, matrix64x64


In [108]:
def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id64x64=id64x64.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff64x64 = id64x64-torch.bmm(m64x64,m64x64.transpose(1,2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff64x64)) / float(bs)

In [110]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [111]:
pointnet = PointNet()
pointnet.to(device);

# Load a pre-trained model if it exists
if os.path.exists('../model/pointnet-for-3d-object-classification-ii-pytorch/save.pth'):
    pointnet.load_state_dict(torch.load('../model/pointnet-for-3d-object-classification-ii-pytorch/save.pth'))
    print('Loaded Pre-trained PointNet Model!')

In [112]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.0008)

In [113]:
def train(model, train_loader, val_loader=None,  epochs=1):
    for epoch in range(epochs): 
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1,2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 5 == 4:    # print every 5 mini-batches
                print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                    (epoch + 1, i + 1, len(train_loader), running_loss / 5))
                running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
                    outputs, __, __ = pointnet(inputs.transpose(1,2))
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            val_acc = 100. * correct / total
            print('Valid accuracy: %d %%' % val_acc)

        # save the model
        torch.save(pointnet.state_dict(), "save.pth")

In [114]:
train(pointnet, train_loader, valid_loader)

[Epoch: 1, Batch:    5 /  308], loss: 3.681
[Epoch: 1, Batch:   10 /  308], loss: 3.316
[Epoch: 1, Batch:   15 /  308], loss: 3.256
[Epoch: 1, Batch:   20 /  308], loss: 3.083
[Epoch: 1, Batch:   25 /  308], loss: 2.814
[Epoch: 1, Batch:   30 /  308], loss: 2.994
[Epoch: 1, Batch:   35 /  308], loss: 2.840
[Epoch: 1, Batch:   40 /  308], loss: 2.742
[Epoch: 1, Batch:   45 /  308], loss: 2.831
[Epoch: 1, Batch:   50 /  308], loss: 2.734
[Epoch: 1, Batch:   55 /  308], loss: 2.779
[Epoch: 1, Batch:   60 /  308], loss: 2.587
[Epoch: 1, Batch:   65 /  308], loss: 2.642
[Epoch: 1, Batch:   70 /  308], loss: 2.718
[Epoch: 1, Batch:   75 /  308], loss: 2.541
[Epoch: 1, Batch:   80 /  308], loss: 2.431
[Epoch: 1, Batch:   85 /  308], loss: 2.404
[Epoch: 1, Batch:   90 /  308], loss: 2.274
[Epoch: 1, Batch:   95 /  308], loss: 2.255
[Epoch: 1, Batch:  100 /  308], loss: 2.362
[Epoch: 1, Batch:  105 /  308], loss: 2.461
[Epoch: 1, Batch:  110 /  308], loss: 2.265
[Epoch: 1, Batch:  115 /  308], 