# Testing PointNet++
This Notebook is set up to test some functions and models to build a PointNet++.

The main functions are from <a href='https://github.com/yanx27/Pointnet_Pointnet2_pytorch/tree/master'>Xu Yan - GitHub</a>

<b>This Jupyter-Notebook is part of a master thesis with the topic<br>
<i>Analysis of deep learning methods for semantic segmentation of photogrammetric point clouds from aerial images</i><br>
&copy; Markus Hülsen, Matr.-Nr. 6026370<br>
Date: 15.08.2023</b>

## Libarys

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import laspy
import os
import pandas as pd
from tqdm.notebook import tqdm 

## Import Data
Data is stored as `LAS`-file. We will use `laspy` to import und convert the data.<br>
First we create a function to import an `LAS`-File and convert them into a Pandas `DataFrame`.

In [2]:
def import_las_to_Dataframe(path):
    with laspy.open(path) as f:
        las = f.read()
    
    # read coordinates from las
    x = np.array(las.x)
    y = np.array(las.y)
    z = np.array(las.z)

    df = pd.DataFrame({'X':x,'Y':y,'Z':z},index=np.arange(len(x)))

    for i in range(3, len(las.point_format.dimensions)):
        dim = las.point_format.dimensions[i].name
        df[dim] = np.array(las[dim])
     
    return df

Express path where the data is stored, that we want to processed. <br>All `las`-Files inside the defined folder will be stored in a list.

In [3]:
# path where the data ist stored
data_path = '../../Daten/Datensatz_H3D/'
# sub-folder which includes different acquisition dates and types like 'DIM_2016', 'DIM_2019', 'DIM_2022', 'ALS2016', ...
data = 'DIM_2022/7 - DBScan/edited'

# save files that are in laz-format
lst_files = []
for file in os.listdir(data_path + data):
    if file.endswith('.laz'):
        lst_files.append(data_path + data + '/' + file)

        
lst_files = sorted(lst_files)
print('Found', len(lst_files), 'laz-files:')
print(lst_files)

Found 12 laz-files:
['../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/554000_5798000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/554000_5799000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/554000_5800000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/554000_5801000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/555000_5798000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/555000_5799000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/555000_5800000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/555000_5801000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/556000_5798000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/556000_5799000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/556000_5800000.laz', '../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/556000_5801000.laz']


Use function above to read the point cloud and convert it to pandas DataFrame.

In [4]:
# define file we want to use
las_path = lst_files[0]
las_path

'../../Daten/Datensatz_H3D/DIM_2022/7 - DBScan/edited/554000_5798000.laz'

In [5]:
# import data
df = import_las_to_Dataframe(las_path)

df

Unnamed: 0,X,Y,Z,intensity,return_number,number_of_returns,scan_direction_flag,edge_of_flight_line,classification,synthetic,...,planarity,eigenentropy,curvature change,local_pointdensity,roughness,label,z_to_dem,inside_road,count_veg,count_ground
0,554866.15,5798487.56,70.56,8869,1,5,0,0,6,0,...,,,,,,,0.507541,0.0,,
1,554864.20,5798487.63,70.45,8873,1,2,0,0,6,0,...,,,,,,,0.343253,0.0,,
2,554877.16,5798492.81,70.61,9356,1,2,0,0,6,0,...,,,,,,,0.510372,0.0,,
3,554864.20,5798487.79,70.47,8873,1,2,0,0,6,0,...,,,,,,,0.361733,0.0,,
4,554869.83,5798491.05,74.13,17329,1,7,0,0,6,0,...,,,,,,,4.054098,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5631159,554330.37,5798588.29,69.24,7499,1,4,0,0,6,0,...,1.228618,-0.501297,0.431701,0.713115,0.674832,1.0,2.455299,0.0,33.0,19.0
5631160,554326.79,5798588.75,69.24,7499,1,4,0,0,6,0,...,0.274811,-3.273526,0.075397,1.068392,0.123233,1.0,2.453916,0.0,42.0,2.0
5631161,554330.68,5798588.91,69.21,7499,1,5,0,0,6,0,...,0.522878,-1.338616,0.041746,0.730131,0.963822,1.0,2.435219,0.0,31.0,23.0
5631162,554324.51,5798587.84,69.27,10151,1,3,0,0,6,0,...,-0.604568,-0.630222,0.383430,0.659132,1.499709,5.0,2.428519,0.0,26.0,42.0


## Datapreparation
Some of the features are not necessary/redudant. We will remove them.

In [6]:
df = df.loc[df['synthetic'] == 0]
df = df.loc[:,['X','Y','Z','intensity','return_number', 'number_of_returns', 'classification']]
df

Unnamed: 0,X,Y,Z,intensity,return_number,number_of_returns,classification
0,554866.15,5798487.56,70.56,8869,1,5,6
1,554864.20,5798487.63,70.45,8873,1,2,6
2,554877.16,5798492.81,70.61,9356,1,2,6
3,554864.20,5798487.79,70.47,8873,1,2,6
4,554869.83,5798491.05,74.13,17329,1,7,6
...,...,...,...,...,...,...,...
5631159,554330.37,5798588.29,69.24,7499,1,4,6
5631160,554326.79,5798588.75,69.24,7499,1,4,6
5631161,554330.68,5798588.91,69.21,7499,1,5,6
5631162,554324.51,5798587.84,69.27,10151,1,3,6


## Point Cloud Filtering

### Reduce area of the Pointcloud
To reduce the number of points within the pointcloud, we will create a Bounding-Box to select a smaller areas of the pointcloud.

In [7]:
# Bounding Box
bbox_x1 = df.X.min()
bbox_y1 = df.Y.min()
bbox_x2 = bbox_x1 + 100   # (df.X.max() - df.X.min()) / 1
bbox_y2 = bbox_y1 + 100   # (df.Y.max() - df.Y.min()) / 1

# Select Point inside of Bounding Box
df_sub = df.loc[(df.X >= bbox_x1) & (df.Y >= bbox_y1) & (df.X <= bbox_x2) & (df.Y <= bbox_y2)]

print('Check Results')
print('X min:', df_sub.X.min(), 'X max:', df_sub.X.max())
print('Y min:', df_sub.Y.min(), 'Y max:', df_sub.Y.max())

# drop df to save memory
del df, bbox_x1, bbox_y1, bbox_x2, bbox_y2

df_sub

Check Results
X min: 554000.0 X max: 554100.0
Y min: 5798000.0 Y max: 5798100.0


Unnamed: 0,X,Y,Z,intensity,return_number,number_of_returns,classification
1961,554055.63,5798086.94,66.41,9244,1,5,6
1962,554055.63,5798086.40,66.49,9245,1,4,6
1963,554055.86,5798083.58,66.56,8956,1,2,6
1964,554054.66,5798084.38,66.78,9002,1,2,6
1965,554056.10,5798084.01,66.63,8956,1,6,6
...,...,...,...,...,...,...,...
5621623,554090.44,5798068.78,65.94,8975,1,3,6
5621624,554092.72,5798072.02,66.61,8777,1,2,6
5622345,554016.48,5798054.33,64.78,9003,1,3,2
5622346,554027.62,5798029.32,65.00,11818,1,2,2


## Set up PyTorch

In [8]:
import torch
import torch.nn as nn

## Convert Dataframe to Numpy array
Before we will go on with the processing we will convert the dataframe into a numpy array.<br>
To speed up calculations and to save memory, we center the coordinates to the middle of the bounding box

In [9]:
# extract x,y,z-columns from DataFrame and convert to NumPy array
xyz_np = df_sub.loc[:,'X':'Z'].to_numpy()

# center by center point of bounding box
mini = np.min(xyz_np, axis=0)
maxi = np.max(xyz_np, axis=0)
xyz_np = xyz_np - np.expand_dims(0.5 * (maxi+mini), axis=0)
xyz_np

array([[  5.63 ,  36.94 ,  -5.065],
       [  5.63 ,  36.4  ,  -4.985],
       [  5.86 ,  33.58 ,  -4.915],
       ...,
       [-33.52 ,   4.33 ,  -6.695],
       [-22.38 , -20.68 ,  -6.475],
       [-33.29 ,   3.91 ,  -6.705]])

## Convert DataFrame into Tensors
First we will create a tensor wich contains the coordinates of the points

In [10]:
xyz = torch.from_numpy(xyz_np)
print(f'Shape of XYZ_Tensor: {xyz.shape}')
print(f'Number of dimensions of XYZ_Tensor: {len(xyz.shape)}')

xyz

Shape of XYZ_Tensor: torch.Size([63350, 3])
Number of dimensions of XYZ_Tensor: 2


tensor([[  5.6300,  36.9400,  -5.0650],
        [  5.6300,  36.4000,  -4.9850],
        [  5.8600,  33.5800,  -4.9150],
        ...,
        [-33.5200,   4.3300,  -6.6950],
        [-22.3800, -20.6800,  -6.4750],
        [-33.2900,   3.9100,  -6.7050]], dtype=torch.float64)

Tensor must have three dimensions: <br>
`(Batchsize x number of points x 3)`

In [11]:
xyz = xyz.unsqueeze(0)
xyz.shape

torch.Size([1, 63350, 3])

Next we create a tensor with the features

In [12]:
feat = torch.from_numpy(df_sub.drop(['X','Y','Z','classification'], axis=1).to_numpy().astype(np.int32)).unsqueeze(0)
print(f'Shape of feature_Tensor: {feat.shape}')
print(f'Number of dimensions of feature_Tensor: {len(feat.shape)}')

feat

Shape of feature_Tensor: torch.Size([1, 63350, 3])
Number of dimensions of feature_Tensor: 3


tensor([[[ 9244,     1,     5],
         [ 9245,     1,     4],
         [ 8956,     1,     2],
         ...,
         [ 9003,     1,     3],
         [11818,     1,     2],
         [ 8543,     1,     2]]], dtype=torch.int32)

Create Tensor we the labels

In [13]:
labels = torch.from_numpy(df_sub.classification.to_numpy())
labels

tensor([6, 6, 6,  ..., 2, 2, 2], dtype=torch.uint8)

Push our two tensors to the GPU to speed up the calculation

In [14]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    xyz = xyz.to(device)
    feat = feat.to(device)

print(f'tensor is coordinates ist now stored on: {xyz.device}')

tensor is coordinates ist now stored on: cuda:0


In [15]:
del df_sub, xyz_np

## Sample and Group

This part studies the sampling & grouping part of the Set Abstraction layer, which is then only followed by a multi-layer perceptron and max pooling.

### Farthest point sampling

First we need to create a function to execute the farthest point sampling.

In [16]:
def farthest_point_sample(xyz, npoint):
    """
    Input:
        xyz: pointcloud data, [B, N, 3]
        npoint: number of samples
    Return:
        centroids: sampled pointcloud index, [B, npoint]
    """
    # get device
    device = xyz.device
    # Batchsize (B), number of points (N), num of dims (C)
    B, N, C = xyz.shape
    # initialze centroids with zeros
    centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
    # initialze distances with 1**10
    distance = torch.ones(B, N, dtype=torch.float32).to(device) * 1e10
    # initalize farthest point indices with random integar
    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
    # get indices of the batches
    batch_indices = torch.arange(B, dtype=torch.long).to(device)
    
    # iterate throw number of points
    for i in range(npoint):
        centroids[:, i] = farthest
        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
        dist = torch.sum((xyz - centroid) ** 2, -1, dtype=torch.float32)
        mask = dist < distance
        distance[mask] = dist[mask]
        farthest = torch.max(distance, -1)[1]
    return centroids

Next we will check the results from our FPS-function.

In [17]:
idx = farthest_point_sample(xyz, 1024)
print(f'Shape of the FPS-output: {idx.shape}')
print(idx)

Shape of the FPS-output: torch.Size([1, 1024])
tensor([[24668, 54023, 17967,  ..., 46454, 56218, 45006]], device='cuda:0')


As we will notice, the functions results the indices of the sampled points.<br>
We will create a function the gather the coordinates of the points

In [18]:
def index_points(points, idx):
    """

    Input:
        points: input points data, [B, N, C]
        idx: sample index data, [B, S]
    Return:
        new_points:, indexed points data, [B, S, C]
    """
    device = points.device
    B = points.shape[0]
    view_shape = list(idx.shape)
    view_shape[1:] = [1] * (len(view_shape) - 1)
    repeat_shape = list(idx.shape)
    repeat_shape[0] = 1
    batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
    new_points = points[batch_indices, idx, :]
    return new_points

Now we can use the function to get the coordinates of the sampeld points

In [19]:
fps_points = index_points(xyz, idx)
print(f'Sampled point tensor with shape {fps_points.shape}\n{fps_points}')

Sampled point tensor with shape torch.Size([1, 1024, 3])
tensor([[[ 40.7800,  12.8000,  -6.0050],
         [-49.7500, -49.6200,  -5.3550],
         [-49.9700,  49.8000,  -6.8650],
         ...,
         [ 47.3200,  48.4600,  -4.6350],
         [ 18.0600,  -2.4800,  -3.9550],
         [ 13.4600, -17.5800,  -3.9050]]], device='cuda:0',
       dtype=torch.float64)


## Query fixed radius (ball) points

For every sampled point, retrieve all points from the input within a given fixed radius. This is sometimes also referred to as a ball query. 

First we will create a function to calculate the squared distance between the points

In [20]:
def square_distance(src, dst):
    """
    Calculate Euclid distance between each two points.

    src^T * dst = xn * xm + yn * ym + zn * zm；
    sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn;
    sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm;
    dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2
         = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst

    Input:
        src: source points, [B, N, C]
        dst: target points, [B, M, C]
    Output:
        dist: per-point square distance, [B, N, M]
    """
    B, N, _ = src.shape
    _, M, _ = dst.shape
    dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
    dist += torch.sum(src ** 2, -1).view(B, N, 1)
    dist += torch.sum(dst ** 2, -1).view(B, 1, M)
    return dist

Next we create a function to get the indices of our points inside the ball.

In [21]:
def query_ball_point(radius, nsample, xyz, new_xyz):
    """
    Input:
        radius: local region radius
        nsample: max sample number in local region
        xyz: all points, [B, N, 3]
        new_xyz: query points, [B, S, 3]
    Return:
        group_idx: grouped points index, [B, S, nsample]
    """
    device = xyz.device
    B, N, C = xyz.shape
    _, S, _ = new_xyz.shape
    group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
    sqrdists = square_distance(new_xyz, xyz)
    group_idx[sqrdists > radius ** 2] = N
    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
    group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
    mask = group_idx == N
    group_idx[mask] = group_first[mask]
    return group_idx

Check the results of our ball query.

In [22]:
# cause of memory limitation on the gpu we will do the calculation on the cpu
xyz = xyz.to(torch.device('cpu'))
fps_points = fps_points.to(torch.device('cpu'))

idx_ball = query_ball_point(10, 16, xyz, fps_points)
print(f'Result of our ball query has shape: {idx_ball.shape}\n{idx_ball}')

Result of our ball query has shape: torch.Size([1, 1024, 16])
tensor([[[  470,   471,   472,  ..., 23849, 23898, 23899],
         [  473,   496,   618,  ...,  1271,  1288,  1319],
         [  398, 14042, 14071,  ..., 14409, 14456, 14488],
         ...,
         [  469, 31845, 31908,  ..., 32214, 32268, 32269],
         [  483,   493,   499,  ...,   761,   788,   805],
         [  486,   541,   543,  ...,   760,   770,   771]]])


Now we need to gather the points again.

In [23]:
# put tensors back to gpu
xyz = xyz.to(device)
fps_points = fps_points.to(device)

# gather points
fps_group_points = index_points(xyz, idx_ball)
print(f'Shape of tensor after FPS and grouping: {fps_group_points.shape}')

Shape of tensor after FPS and grouping: torch.Size([1, 1024, 16, 3])


Now we have the neighboring points of every sampled point.

### Zero-center every group 
Before each group can be processed by a small PointNet module (with an MLP and max pooling), every group must be zero-centered according to the sampling point it originates from. If we do not center the groups, each group is in its own coordinate space and the PointNet module could not learn shared features. Rather, it would try to learn features for all the different coordinate spaces, but would not succeed to generalize at all.group

In [24]:
fps_group_points_centered = fps_group_points - fps_points.unsqueeze(2).repeat(1, 1, 16, 1)

check the results on points 312

In [25]:
# look at group 312 before centering
print(f'the group:\n{fps_group_points[0, 312, 0:5, :]}\n\t...\n\nwill be substracted by the smapled point:\n{fps_points[0, 312, :]}\n\nwich results in\n{fps_group_points_centered[0, 312, 0:5, :]}\n\t...')

the group:
tensor([[48.8400, 45.8800, -5.9750],
        [35.1800, 48.1800, -5.9650],
        [38.5800, 44.1300, -5.9850],
        [35.6600, 46.5500, -6.2250],
        [35.7800, 47.0700, -6.4650]], device='cuda:0', dtype=torch.float64)
	...

will be substracted by the smapled point:
tensor([44.4900, 49.9300, -3.4650], device='cuda:0', dtype=torch.float64)

wich results in
tensor([[ 4.3500, -4.0500, -2.5100],
        [-9.3100, -1.7500, -2.5000],
        [-5.9100, -5.8000, -2.5200],
        [-8.8300, -3.3800, -2.7600],
        [-8.7100, -2.8600, -3.0000]], device='cuda:0', dtype=torch.float64)
	...


These zero-centered xyz-coordinates of the groups can now be fed into a PointNet module to extract geometric features for each group and by this for the sampling point. But PointNet++ also integrates further input features along with the xyz-coordinates before it applies a PointNet module.

## Group features

If there are further features per point as input besides the xyz-coordinates, then these features can be gathered with the `index_points()` function as well. <br>Feature could be input features like intensity, return number, etc., but are also features extracted from the previous set abstraction layer of the PointNet++ architecture.

The get features we can use the index-tensor from the ball query.

In [26]:
feat_group = index_points(feat, idx_ball)
feat_group.shape

torch.Size([1, 1024, 16, 3])

The feature groups do not need to be centered in any way. So, we are already finished.

Both the zero-centered xyz-coordinates as well as the feature groups have the same tensor shapes with the exception of the last dimension.

Therefore, we can concatenate them into a single tensor by their last (-1) dimension.

In [27]:
output = torch.cat((fps_group_points_centered, feat_group), 3)
print(f'final output shape: {output.shape}')

final output shape: torch.Size([1, 1024, 16, 6])


This output tensor can now be used in a PointNet module for feature extraction. It does not matter that the geometric and other features are in the same tensor. Either the PointNet modules can learn something useful from this combination. And if not, then it will learn to ignore certain (feature) channels by setting the weights for this channel to 0. As PointNet applies a large number of filters, each filter can learn a different combination of features. But such an approach is the most flexible and most general approach to handling features.

### PointNet++ implementation of the Set Abstraction layer

The first set abstraction layer might receive no input features, simply because the input data does not have any information besides the xyz-coordinates. Then, only the grouped points are the input to the PointNet module of the set abstraction layer. At the second set abstraction layer, there are always the features from the previous layer as additional input besides the coordinates. These features are provided by the variable called points (which is a confusing name and a better name would maybe be 'features').

PointNet++ optionally does not include the grouped point xyz coordinates, but only continues with the grouped features. Then the concatenate part (as seen above) is not executed and only the grouped features are the output of the function to sample and group. This might be interesting for the second and higher set abstraction layer. But then, no further features are derived from the geometry in higher set abstraction layers. The default, however, is to use xyz-coordinates.

In the following cell, an implementation of sample and group is given. You should recognize most of the parts (with the exception that there is also an option to use k nearest neighbors instead of a fixed radius ball query). 

As already mentioned above, the naming of the variables is sometimes a little confusing:

- `npoint` is the number of points for farthest point sampling
- `radius` is the radius of the fixed radius ball querys in each layer
- `nsample` is the numbers of points in the fixed radius ball query
- `xyz` input points
- `points` feature tensor
- `returnfps` is a boolean. If True it will return the `grouped_xyz` and the indices of the fps-points `fps_idx`

In [28]:
def sample_and_group(npoint, radius, nsample, xyz, points, returnfps=False):
    """
    Input:
        npoint:
        radius:
        nsample:
        xyz: input points position data, [B, N, 3]
        points: input points data, [B, N, D]
    Return:
        new_xyz: sampled points position data, [B, npoint, nsample, 3]
        new_points: sampled points data, [B, npoint, nsample, 3+D]
    """
    # Batchsize (B), num of points (N), num of channels (C)
    B, N, C = xyz.shape
    # number of Sampling point
    S = npoint
    # get indices farthest point sampling
    fps_idx = farthest_point_sample(xyz, npoint) # [B, npoint, C]
    # gather points with indices
    new_xyz = index_points(xyz, fps_idx)
    # get indices fixed radius ball query
    idx = query_ball_point(radius, nsample, xyz, new_xyz)
    # gather coords of points with indices
    grouped_xyz = index_points(xyz, idx) # [B, npoint, nsample, C]
    # center the groups with the FPS-point
    grouped_xyz_norm = grouped_xyz - new_xyz.view(B, S, 1, C)
    
    # add features if available
    if points is not None:
        grouped_points = index_points(points, idx)
        new_points = torch.cat([grouped_xyz_norm, grouped_points], dim=-1) # [B, npoint, nsample, C+D]
    else:
        new_points = grouped_xyz_norm
    if returnfps:
        return new_xyz, new_points, grouped_xyz, fps_idx
    else:
        return new_xyz, new_points

The function **sample_and_group()** implements all the above funcionality in a function and returns the sampled points, the grouped points with concatenated features, the indices from the ball query, and the grouped xyz-coordinates. Not all information is then further used in PointNet++.

In the following, the function is called without input features.

In [29]:
xyz = xyz.to(torch.device('cpu'))
feat = feat.to(torch.device('cpu'))

new_xyz, new_points = sample_and_group(1024, 10, 16, xyz, feat)

new_points.shape

torch.Size([1, 1024, 16, 6])

## Set Abstraction (SA) layer

In the following, a (simplified) implementation of the Set Abstraction layer is given as a TensorFlow custom layer. A custom layer is a class that is derived from the Keras base class Layer and can be used in a custom neural network model.


- The variable called `points` contains the feature tensor.
- `npoint` is the number of points for farthest point sampling
- `radius` is the radius of the fixed radius ball querys in each layer
- `nsample` is the numbers of points in the fixed radius ball query
- `in_channel` number of features
- `mlp` contains a list with the number of filter for convolution

In [30]:
import torch.nn.functional as F

In [31]:
class PointNetSetAbstraction(nn.Module):
    def __init__(self, npoint, radius, nsample, in_channel, mlp):
        
        super(PointNetSetAbstraction, self).__init__()
        # number of points for FSP
        self.npoint = npoint
        # radois of the fixed ball query
        self.radius = radius
        # numbers of points in the fixed ball query
        self.nsample = nsample
        # empty list with convoltionals 
        self.mlp_convs = nn.ModuleList()
        # empty list with batch normalisation
        self.mlp_bns = nn.ModuleList()
        last_channel = in_channel
        # generate  MLPs
        for out_channel in mlp:
            # append convolution to list
            self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1))
            # append batch normalisation to list
            self.mlp_bns.append(nn.BatchNorm2d(out_channel))
            last_channel = out_channel

    def forward(self, xyz, points):
        """
        Input:
            xyz: input points position data, [B, C, N]
            points: input features data, [B, D, N]
        Return:
            new_xyz: sampled points position data, [B, C, S]
            new_points_concat: sample points feature data, [B, D', S]
        """
        # xyz = xyz.permute(0, 2, 1)
        # if points is not None:
        #     points = points.permute(0, 2, 1)

        new_xyz, new_points = sample_and_group(self.npoint, self.radius, self.nsample, xyz, points)
        
        # new_xyz: sampled points position data, [B, npoint, C]
        # new_points: sampled points data, [B, npoint, nsample, C+D]
        new_points = new_points.permute(0, 3, 2, 1).float() # [B, C+D, nsample, npoint]
        for i, conv in enumerate(self.mlp_convs):
            bn = self.mlp_bns[i]
            new_points =  F.relu(bn(conv(new_points)))

        new_points = torch.max(new_points, 2)[0].permute(0,2,1)
        # new_xyz = new_xyz.permute(0, 2, 1)
        return new_xyz, new_points

As another option we could use **Mulit-Scale Grouping** inside of the Set Abstraction. <br>
In this cas we will use differnt radii for the FPS algorithmn. <br>
This is computionaly heavy - but sometimes its worth it.

- The variable called `points` contains the feature tensor.
- `npoint` is the number of points for farthest point sampling
- `radius_list` is a list with the radius of the fixed radius ball querys in each layer
- `nsample_list` is a list with the numbers of points in the fixed radius ball query
- `in_channel` number of features
- `mlp_list` contains a list with lists with the number of filter for convolution

In [32]:
class PointNetSetAbstractionMsg(nn.Module):
    # PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG)
    def __init__(self, npoint, radius_list, nsample_list, in_channel, mlp_list):
        
        super(PointNetSetAbstractionMsg, self).__init__()
        # number of points
        self.npoint = npoint
        
        # list with radius for ball query
        self.radius_list = radius_list
        
        # list with number of points of the ball query
        self.nsample_list = nsample_list
        
        # empty list with convoltional blocks 
        self.conv_blocks = nn.ModuleList()
        
        # empty list with Batch-Normalisation blocks
        self.bn_blocks = nn.ModuleList()
        
        # mlp_list contains the number of filters for convolution
        for i in range(len(mlp_list)):
            convs = nn.ModuleList()
            bns = nn.ModuleList()
            last_channel = in_channel + 3
            for out_channel in mlp_list[i]:
                convs.append(nn.Conv2d(last_channel, out_channel, 1))
                bns.append(nn.BatchNorm2d(out_channel))
                last_channel = out_channel
            self.conv_blocks.append(convs)
            self.bn_blocks.append(bns)

    def forward(self, xyz, points):
        """
        Input:
            xyz: input points position data, [B, C, N]
            points: input points data, [B, D, N]
        Return:
            new_xyz: sampled points position data, [B, C, S]
            new_points_concat: sample points feature data, [B, D', S]
        """
        # xyz = xyz.permute(0, 2, 1)
        # if points is not None:
        #     points = points.permute(0, 2, 1)

        B, N, C = xyz.shape
        S = self.npoint
        new_xyz = index_points(xyz, farthest_point_sample(xyz, S))
        new_points_list = []
        for i, radius in enumerate(self.radius_list):
            K = self.nsample_list[i]
            group_idx = query_ball_point(radius, K, xyz, new_xyz)
            grouped_xyz = index_points(xyz, group_idx)
            grouped_xyz -= new_xyz.view(B, S, 1, C)
            if points is not None:
                grouped_points = index_points(points, group_idx)
                grouped_points = torch.cat([grouped_points, grouped_xyz], dim=-1)
            else:
                grouped_points = grouped_xyz

            grouped_points = grouped_points.permute(0, 3, 2, 1)  # [B, D, K, S]
            for j in range(len(self.conv_blocks[i])):
                conv = self.conv_blocks[i][j]
                bn = self.bn_blocks[i][j]
                grouped_points =  F.relu(bn(conv(grouped_points)))
            new_points = torch.max(grouped_points, 2)[0]  # [B, D', S]
            new_points_list.append(new_points)

        new_xyz = new_xyz.permute(0, 2, 1)
        new_points_concat = torch.cat(new_points_list, dim=1)
        return new_xyz, new_points_concat

Let's see what it looks like, if we initalize our model end feed our data into the model.<br>
In this step we don't use Multi-Sclae Grouping. The weights are initalized randomly.

In [33]:
feat.shape

torch.Size([1, 63350, 3])

In [34]:
model = PointNetSetAbstraction(1024, 10, 16, 6, [32, 32, 64])
new_xyz, new_points = model(xyz, feat.double())
print(f'shape of new_xyz:\n{new_xyz.shape}\nnew_xyz:\n{new_xyz}\n\nshape of new_points:\n {new_points.shape}\nnew_points:\n {new_points}')

shape of new_xyz:
torch.Size([1, 1024, 3])
new_xyz:
tensor([[[ 17.4100, -17.9300,  -3.9150],
         [-49.9700,  49.8000,  -6.8650],
         [ 49.1400,  49.9000,  -6.5750],
         ...,
         [ 43.9600,  35.9100,  -4.9050],
         [-48.9100,  12.2200,  -6.4050],
         [ 22.8100, -25.5800,  -6.7750]]], dtype=torch.float64)

shape of new_points:
 torch.Size([1, 1024, 64])
new_points:
 tensor([[[1.4996, 1.0948, 1.5982,  ..., 0.8155, 1.0406, 1.5813],
         [1.4724, 1.1278, 1.2414,  ..., 0.9598, 1.4609, 1.2604],
         [0.0000, 0.0000, 0.0000,  ..., 0.9452, 1.4339, 0.0000],
         ...,
         [0.5805, 1.1876, 0.6565,  ..., 0.5039, 0.3675, 0.5328],
         [0.0000, 1.1028, 0.2567,  ..., 0.9314, 1.3629, 0.3549],
         [2.0140, 1.0989, 1.6969,  ..., 1.8418, 3.5543, 1.5028]]],
       grad_fn=<PermuteBackward0>)


## Feature Propagation
After we defined the set abstraction (SA) layer, we need to propagate back.<br>
To do so, we need to identify the nearest three points from the layer above and interpolate the feature with `inverse distance weights`.

### Distances to 3 nearest neighbors

First we will calculate the distances to the three nearest points.

In [35]:
# calculate squared distances
dists = square_distance(xyz, new_xyz)
# sort the distances
dists, idx = dists.sort(dim=-1)
# just consider the first three points with the lowest distance
dists, idx = dists[:, :, :3], idx[:, :, :3]  # [B, N, 3]
dists, idx

(tensor([[[ 5.7597,  7.3366,  7.3433],
          [ 4.8837,  5.2625,  9.5816],
          [ 1.1801, 13.4853, 15.4602],
          ...,
          [ 0.7362,  6.7118, 15.3636],
          [ 5.3058,  9.3259,  9.7457],
          [ 1.1630,  5.5428, 15.7274]]], dtype=torch.float64),
 tensor([[[734, 366, 114],
          [114, 734, 908],
          [114, 734, 619],
          ...,
          [459, 916, 972],
          [878, 327, 897],
          [459, 916,   4]]]))

Now we can calculate the weights based on the inversed distance

In [36]:
dist_recip = 1.0 / (dists + 1e-8)
norm = torch.sum(dist_recip, dim=2, keepdim=True)
weight = dist_recip / norm
print(f'shape of weight matrix:\n{weight.shape}\n\nweight matrix:\n{weight}')

shape of weight matrix:
torch.Size([1, 63350, 3])

weight matrix:
tensor([[[0.3892, 0.3055, 0.3053],
         [0.4102, 0.3807, 0.2091],
         [0.8592, 0.0752, 0.0656],
         ...,
         [0.8639, 0.0948, 0.0414],
         [0.4732, 0.2692, 0.2576],
         [0.7790, 0.1634, 0.0576]]], dtype=torch.float64)


Now that we have the weights, we can iterpolate the features for the points.

In [37]:
interpolated_points = torch.sum(index_points(new_points, idx) * weight.view(1, 63350, 3, 1), dim=2)
print(f'shape of interpolated points:\n{interpolated_points.shape}\n\ninterpolated points\n{interpolated_points}')

shape of interpolated points:
torch.Size([1, 63350, 64])

interpolated points
tensor([[[0.0000, 0.7999, 0.0000,  ..., 1.1108, 1.8103, 0.0000],
         [0.0000, 0.8054, 0.0000,  ..., 1.1116, 1.8110, 0.0000],
         [0.0000, 0.8122, 0.0000,  ..., 1.1130, 1.8115, 0.0000],
         ...,
         [1.4756, 1.1738, 1.5270,  ..., 0.9430, 1.3998, 1.4174],
         [1.9385, 1.1456, 1.7143,  ..., 0.7773, 0.9430, 1.6848],
         [1.4744, 1.1840, 1.5400,  ..., 0.9318, 1.3679, 1.4300]]],
       dtype=torch.float64, grad_fn=<SumBackward1>)


As we can see we now have the features for every point of the input point size.

Now let's put it all together into a single Layer which does all the steps above. <br>
In adition the layer will add some MLP's

In [38]:
class PointNetFeaturePropagation(nn.Module):
    def __init__(self, in_channel, mlp):
        
        super(PointNetFeaturePropagation, self).__init__()
        
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = in_channel
        for out_channel in mlp:
            self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
            self.mlp_bns.append(nn.BatchNorm1d(out_channel))
            last_channel = out_channel

    def forward(self, xyz1, xyz2, points1, points2):
        """
        Input:
            xyz1: input points position data, [B, N, C]
            xyz2: sampled input points position data, [B, S, C]
            points1: input points data, [B, N, D]
            points2: input points data, [B, S, D]
        Return:
            new_points: upsampled points data, [B, D', N]
        """

        B, N, C = xyz1.shape
        _, S, _ = xyz2.shape

        if S == 1:
            interpolated_points = points2.repeat(1, N, 1)
        else:
            dists = square_distance(xyz1, xyz2)
            dists, idx = dists.sort(dim=-1)
            dists, idx = dists[:, :, :3], idx[:, :, :3]  # [B, N, 3]

            dist_recip = 1.0 / (dists + 1e-8)
            norm = torch.sum(dist_recip, dim=2, keepdim=True)
            weight = dist_recip / norm
            interpolated_points = torch.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2)

        if points1 is not None:
            new_points = torch.cat([points1, interpolated_points], dim=-1)
        else:
            new_points = interpolated_points

        new_points = new_points.permute(0, 2, 1)
        for i, conv in enumerate(self.mlp_convs):
            bn = self.mlp_bns[i]
            new_points = F.relu(bn(conv(new_points.float())))
        return new_points.permute(0, 2, 1)

Now we can check is the Feature propagation works.

In [39]:
model_FP = PointNetFeaturePropagation(64+3, [64, 64, 128])
points_FP = model_FP(xyz, new_xyz, feat, new_points)
print(points_FP.shape)

torch.Size([1, 63350, 128])


## Costum PointNet++ model


In [40]:
labels.unique()

tensor([ 0,  2,  6, 20], dtype=torch.uint8)

In [41]:
class PointNet2(nn.Module):
    
    def __init__(self, num_classes):
        
        super(PointNet2, self).__init__()
        
        # set abstraction layers
        self.sa1 = PointNetSetAbstraction(npoint=8192, radius=1.0, nsample=16, in_channel=6 + 3, mlp=[64, 64, 128])
        self.sa2 = PointNetSetAbstraction(npoint=4096, radius=5.0, nsample=64, in_channel=128 + 3, mlp=[128, 128, 256])
        self.sa3 = PointNetSetAbstraction(npoint=2048, radius=15.0, nsample=64, in_channel=256 + 3, mlp=[128, 128, 256])
        
        # feature propagation
        self.fp3 = PointNetFeaturePropagation(in_channel=512, mlp = [256, 256])
        self.fp2 = PointNetFeaturePropagation(in_channel=384, mlp = [256, 256])
        self.fp1 = PointNetFeaturePropagation(in_channel=256, mlp = [128, 64])
        
        self.conv1 = nn.Conv1d(63350, 63350, 1)
        self.bn1 = nn.BatchNorm1d(63350)
        self.drop1 = nn.Dropout(0.5)
        self.conv2 = nn.Conv1d(63350, num_classes, 1)
    
    def forward(self, xyz):
        #xyz = xyz.permute(0, 2, 1)
        l0_points = xyz
        l0_xyz = xyz[:,:, 0:3]
        
        # set abstraction
        l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)
        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
        l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
        
        # feature propagation
        l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)
        l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)
        l0_points = self.fp1(l0_xyz, l1_xyz, None, l1_points)

        x = self.drop1(F.relu(self.bn1(self.conv1(l0_points))))
        x = self.conv2(x)
        x = F.log_softmax(x, dim=1)
        x = x.permute(0, 2, 1)
        return x, l4_points