# Point-based Learning Methods
For this exercise you will implement the PointNet architecture 

To create a dataset of point clouds you need Open3D. This can be installed via `conda install open3d`

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import openmesh as om
import numpy as np
import k3d
import matplotlib.pyplot as plt
from jupyterplot import ProgressPlot
from tqdm.notebook import tqdm
from sklearn.neighbors import KDTree

from dataset import ModelNet10

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## PointNet
The PointNet architecture you will implement in this exercise is a slightly simplified version of the one presented in the lecture. We will omit the T-Net modules for point and feature alignment.
Your task is therefore to implement a network, that transforms each point of a point cloud individually. and then take the maximum of each feature value over all points. The number of layers and layer parameters should be the same as presented in the lecture.

We will test your PointNet implementation on the ModelNet10 dataset. We will first need to download the dataset and as it contains meshes, sample it as well. This will take a couple of minutes.

Make sure to upload your `best_val.ckpt` checkpoint file so that we do not have to retrain your model. If the file is not included we **cannot** give you any points for this task.

In [2]:
class PointNet(nn.Module):
    def __init__(self, n_classes=10):
        super().__init__()

        ### BEGIN SOLUTION
        
        #increase dim to 64
        self.conv1 = torch.nn.Conv1d(3, 64, 1)
        #increase dim to 128
        self.conv2 = torch.nn.Conv1d(64, 128, 1)
        #increase dim to 1024
        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
        
        #nn with 1024 dim input, reduce finally to 10
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, n_classes)
        self.relu = nn.ReLU()

        #batch normalization functions
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)
        
        ### END SOLUTION

    def forward(self, x):
        ### BEGIN SOLUTION
        
        #shape of x is 64(batch size)x3(dim)x1024(npts)
                
        #blow up dim to 1024
        #MlP is a just a 1D conv with kernel size 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        
        #shape is [64(BS), 1024(npts), 1024(feature vector for each pt)])
        
        #max pooling- pick the max feature for each pt in pc
        #after max pooling shape is
        #[64(BS), 1024(feature vec corresponding to each pt in pc), 1] 
        x = torch.max(x, 2, keepdim=True)[0]
        
        #reshape to 64(batchsize)x1024(feature vec wrt each pt in pc)
        x = x.view(-1, 1024)

        #add a FC NN to beat down 1024 to 10 classes
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        
        ### END SOLUTION
        return x

In [None]:
batch_size = 64 # you can change the batch size depending on your memory requirements
train_data = ModelNet10('./ModelNet10', mode="train")
val_data = ModelNet10('./ModelNet10', mode="val")
test_data = ModelNet10('./ModelNet10', mode="test")
train_loader = torch.utils.data.DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = torch.utils.data.DataLoader(val_data, shuffle=False, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, shuffle=False, batch_size=batch_size)

if torch.cuda.is_available():
    device = torch.device('cuda')
    print(torch.cuda.get_device_name(0))
else:
    device = torch.device('cpu')
    
model = PointNet(10).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
n_epochs = 10

pp = ProgressPlot(plot_names=["loss", "accuracy"], line_names=["train", "val"],
                  x_lim=[0, n_epochs-1], y_lim=[[0,1], [0,1]])

best_val_acc = -1

pbar = tqdm(range(n_epochs))
for e in pbar:
    train_loss = 0
    train_acc = 0
    model.train()
    for (x,y) in train_loader:
        x, y = x.to(device), y.to(device)        
        pred = model(x)
        loss = F.cross_entropy(pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()
        train_loss += loss.item()
        train_acc += (pred.max(-1).indices == y).float().sum().item()
    train_loss /= len(train_loader)
    train_acc /= len(train_data)
    
    model.eval()
    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for (x,y) in val_loader:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = F.cross_entropy(pred, y)
            val_loss += loss.item()
            val_acc += (pred.max(-1).indices == y).float().sum().item()
        val_loss /= len(val_loader)
        val_acc /= len(val_data)
        if val_acc > best_val_acc:
            torch.save({
            'epoch': e,
            'model_state_dict': model.state_dict(),
            'optim_state_dict': optim.state_dict(),
            'val_acc': val_acc,
            }, "best_val.ckpt")
    
    pp.update([[train_loss, val_loss], [train_acc, val_acc]])
    pbar.set_description(f"train loss: {train_loss:.4f}, train acc.: {train_acc:.4f}")
pp.finalize()

In [None]:
checkpoint = torch.load("best_val.ckpt")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
test_acc = 0
with torch.no_grad():
    for (x,y) in tqdm(test_loader):
            x, y = x.to(device), y.to(device)
            pred = model(x)
            test_acc += (pred.max(-1).indices == y).float().sum().item()
test_acc /= len(test_data)
print(f"test acc.: {test_acc}")
assert test_acc >= 0.7