<a href="https://colab.research.google.com/github/lamalex/cs722-pointnet/blob/main/cs722-pointnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!curl -L http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip -o ModelNet10.zip
!unzip -q ModelNet10.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  451M  100  451M    0     0  15.8M      0  0:00:28  0:00:28 --:--:-- 30.0M


In [None]:
!python -V
!pip install pycodestyle pycodestyle_magic flake8 nptyping

%load_ext pycodestyle_magic

In [210]:
import random
import numpy as np
from enum import Enum
from pathlib import Path
from itertools import islice
# from functools import reduce
from nptyping import NDArray
from typing import Tuple, List, Optional, Callable

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import plotly.graph_objects as go
from plotly.subplots import make_subplots

**ModelNet10** contains CAD models from 10 categories. The models are described using .off files.
.off is a simple format where:
- The first line has OFF to mark the file as .off
- The 2nd line is # vertices, # faces, # edges
- List of vertices (X, Y, Z, W)
- List of faces
- List of edges

In [None]:
path = Path("ModelNet10")

In [None]:
# Would have liked to use a DataClass here to define
# a simple Mesh type with vertices/faces accessors, but
# LO: Python 3.6.9 vs Python 3.7
# A Tuple of lists will work, but I don't love it.

def read_off(file: str) -> Tuple[List[int], List[float]]:
    '''
    https://en.wikipedia.org/wiki/OFF_(file_format)
    Simple geometry definition file containing lists
    of vertices, faces, and edges

    Args:
        file: File system path to be read

    Returns:
        Tuple of (vertices, faces)
    '''
    if 'OFF' != file.readline().strip():
        raise('Not a valid OFF header')

    n_verts, n_faces, _ = tuple(
        [int(s) for s in file.readline().strip().split(' ')]
    )
    vertices = [
        [float(s) for s in file.readline().strip().split(' ')]
        for _ in range(n_verts)
    ]
    faces = [
        [int(s) for s in file.readline().strip().split(' ')][1:]
        for _ in range(n_faces)
    ]

    return vertices, faces

## Generating a point cloud from 3D cad object
CAD objects are defined using vertices and faces but we can sample faces
to generate a point cloud.

Note: Data directly from a raw source like LiDAR or Kinect scanner would
be a point cloud (which is the motivation for this paper!), but CAD programs
from which our dataset is derived define their objects in a different format.

In [202]:
def triangle_area(a: NDArray[np.float64], b: NDArray[np.float64],
                  c: NDArray[np.float64]) -> np.float64:
    '''
    Calculate area of a triangle by its vertices
    https://en.wikipedia.org/wiki/Heron%27s_formula

    Args:
        a: First vertiex defining triangle
        b: Second vertex defining triangle
        c: Third vertex defining triangle

    Returns:
        Area of triangle defined by a, b, c
    '''
    side_a = np.linalg.norm(a - b)
    side_b = np.linalg.norm(b - c)
    side_c = np.linalg.norm(c - a)
    s = 0.5 * (side_a + side_b + side_c)
    return max(s * (s - side_a) * (s - side_b) * (s - side_c), 0)**0.5


def sample_point(a: NDArray[np.float64],
                 b: NDArray[np.float64],
                 c: NDArray[np.float64]) -> Tuple[np.float64,
                                                  np.float64,
                                                  np.float64]:
    '''
    Sample points on surface of a triangle defined by 3 vertices

    Args:
        a: First vertex defining triangle
        b: Second vertex defining triangle
        c: Third vertex defining triangle

    Returns:
        Point defined by (x,y,z) located on surface of triangle defined
        by input (a, b, c)
    '''
    # barycentric coordinates on a triangle
    # https://mathworld.wolfram.com/BarycentricCoordinates.html
    s, t = sorted([random.random(), random.random()])
    f = lambda i: s * a[i] + (t-s) * b[i] + (1-t) * c[i]
    return (f(0), f(1), f(2))


verts, faces = mesh
verts = np.array(verts)

# we calculate areas of all faces in our mesh
areas = (triangle_area(verts[face[0]],
                       verts[face[1]],
                       verts[face[2]]) for face in faces)

# we sample 'k' faces with probabilities proportional to their areas
# weights are used to create a distribution.
# they don't have to sum up to one.
k = 1024
sampled_faces = (random.choices(faces, weights=areas, k=k))

pointcloud = np.array([sample_point(verts[sampled_face[0]],
                                    verts[sampled_face[1]],
                                    verts[sampled_face[2]]) for sampled_face
                       in sampled_faces])

# Visualizing our 3D model
in 3 ways

In [203]:
with open(f'{path}/chair/train/chair_0001.off', 'r') as night_stand_file:
    mesh = read_off(night_stand_file)

# Same plot type 3 times
plots = [[{'type': 'scene'} for _ in range(3)]]
marker_style = {'size': 1.5 }

fig = make_subplots(rows=1,
                    cols=3,
                    specs=plots,
                    subplot_titles=("3D Mesh", "Vertices Only", "Point Cloud"))

fig.add_trace(
    go.Mesh3d(
        x=[x[0] for x in mesh[0]],
        y=[y[1] for y in mesh[0]],
        z=[z[2] for z in mesh[0]],
        i=[i[0] for i in mesh[1]],
        j=[j[1] for j in mesh[1]],
        k=[k[2] for k in mesh[1]],
    ),
    row=1,
    col=1)

fig.add_trace(
    go.Scatter3d(
        x=[x[0] for x in mesh[0]],
        y=[y[1] for y in mesh[0]],
        z=[z[2] for z in mesh[0]],
        mode='markers',
        marker=marker_style
    ),
    row=1,
    col=2)

fig.add_trace(
    go.Scatter3d(
        x=[x[0] for x in pointcloud],
        y=[y[1] for y in pointcloud],
        z=[z[2] for z in pointcloud],
        mode='markers',
        marker=marker_style
    ),
    row=1,
    col=3)

fig.update_layout(height=600, width=1024, showlegend=False)
fig.show()

# PointNet

In [211]:
class DataSplitType(Enum):
    '''
    ModelNet has training and test data split into different folders
    Rather than using magic strings use an enum
    '''
    TRAIN = 'train'
    TEST = 'test'


class PointCloudDataSet(Dataset):
    '''
    Point cloud data loading helper
    https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
    '''
    def __init__(self,
                 root_dir: str,
                 split_type: 'DataSplitType',
                 transform: Optional[Callable]=None):
        '''
        Args:
            root_dir: Path to dataset
            split_type: Which split to load
            transform: Optional transformation to be applied to sample 
        '''
        pass

    def __len__(self):
        pass
    
    def __getitem__(self, idx):
        pass


## T-Net

In [204]:
def window(seq, n=2):
    '''
    Returns a sliding window (of width n) over data from the iterable
    s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...
    '''
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result


class TNet(nn.Module):
    '''
    Regression network for predicting a k x k transformation matrix.
    A sequence of batch normalized CNNs, MLPs, and a max pooling layer.
    '''
    def __init__(self, k: int):
        '''
        Args:
            k: Input layer dimensionality
        '''
        super(TNet, self).__init__()
        dims = [k, 64, 128, 1024, 512, 256, k**2]
        cnn_dims = window(dims[:4])
        fc_dims = window(dims[3:])

        self.k = k
        self.cnn = [nn.Conv1d(i, o, 1) for (i, o) in cnn_dims]
        self.fc = [nn.Linear(i, o) for (i, o) in fc_dims]
        self.bn = [nn.BatchNorm1d(d) for d in dims[1:-1]]

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        '''
        Predict (bs, k, k) affine transformation matrix
        for projecting model into normalized space.

        Args:
            x: A batch of n examples with shape (batch size, n, k)
        
        Returns:
            Tensor of (bs, k, k) affine transformation matrix
        '''
        batchsize = x.size()[0]
        bn_iter = iter(self.bn)

        # Extremely obtuse one-liner for sequential application of
        # ReLU(BatchNorm(CNN)) for each CNN layer, but I finally found a use
        # for the walrus operator so I'm keeping it.
        # EXCEPT OMFG GOOGLE COLAB IS PYTHON 3.6 SO I CAN'T USE := 🤬
        # x = reduce(lambda x,
        #             f: (cnn := f[0],
        #                 bn := f[1],
        #                 F.relu(bn(cnn(x)))
        #                )[-1],
        #             zip(self.cnn, bn_iter), x)

        # Same as above in a much more readable and Python 3.6 friendly version
        for i in range(len(self.cnn)):
            bn = bn_iter.__next__()
            x = F.relu(bn(self.cnn[i](x)))

        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, 1024)

        # IT STINGS EVERY TIME
        # x = reduce(lambda x,
        #            f: (fc := f[0],
        #                bn := f[1],
        #                F.relu(bn(fc(x)))
        #               )[-1],
        #            zip(self.fc, bn_iter), x)
        for i in range(len(self.fc[:-1])):
            bn = bn_iter.__next__()
            x = F.relu(bn(self.fc[i](x)))

        x = self.fc[-1](x)
        ident = Variable(
            torch.from_numpy(np.identity(self.k))
        ).view(1, self.k**2).repeat(batchsize, 1)
        ident = ident.cuda()

        x.view(-1, self.k, self.k)
        x += ident

        return x


tnet = TNet(k=3)
pointcloud_tensor = torch.from_numpy(pointcloud)
print(f'{pointcloud_tensor} {pointcloud_tensor.shape}')

SyntaxError: ignored