# Домашнее задание 3: Perception

В этом задании вам будет необходимо обучить PointNet для задачи фильтрации шума в лидарном облаке.

В 3 семинаре мы с вами придумавали руками фичи и пытались обучить на этих данных catboost. Практика показывает, что сетки куда более способные генераторы фичей.

[Данные](https://yadi.sk/d/CBoVCVIxJ2q2cw)

Задание:

1. Необходимо реализовать PointNet, который будет работать на данных со снегом из 3 семинара. PointNet должен работать на окрестностях точек, нет смысла запускать его на всем облаке. PointNet должен включать в себя шаг агрегации по множеству: например с помощью функции максимума, шаг подклеивания агрегированного вектора к исходным точкам и шаг вычисления фичей по отдельным точкам. Вероятно вы захотите повторить эту процедуру несколько раз для улучшения качества. Статья: https://arxiv.org/abs/1612.00593. Вы можете выбрать любой фреймворк для реализации.
2. Ваш PointNet должен ограничить сверху размер окрестности. В референсной реализации использовались 64 точки.
3. Разбиение на train/test. Для разбиения используйте следующий код.
```
scene_indices = np.arange(0, 291)
np.random.seed(100)
np.random.shuffle(scene_indices)
train_indices = scene_indices[:260]
test_indices = scene_indices[260:]
```
4. Данные лучше генерировать on-demand, таким образом вам не придется хранить в памяти большие массивы точек. В tensorflow это можно реализовать через tf.data.

5. PointNet это функция, которая работает на неупорядоченном множестве точек. В нашем же кейсе мы не хотим предсказать свойство окрестности, мы хотим предсказать свойство точки. Подумайте о том как можно модифицировать архитектуру, чтобы pointnet "не забывал" фичи точки, которая нам интересна. (Это поможет улучшить качество)


## Формальные требования

1. В вашей архитектуре должны быть признаки PointNet: вычисление глобального вектора множества, подклеивание его обратно, вычисление фичей по точкам.

2. ROC-AUC на тестовом датасете должен превышать 0.99


In [1]:
!pip install --upgrade torch

Requirement already up-to-date: torch in /usr/local/lib/python3.7/dist-packages (1.8.1+cu101)


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np
import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader

import typing as tp

In [4]:
# features = pd.read_csv('data/snow_features.csv', index_col=0)
features = pd.read_csv('drive/MyDrive/sdc-colab-data/perception/snow_features.csv', index_col=0)

  mask |= (ar1 == a)


In [5]:
features.head(10)

Unnamed: 0,scene_id,x,y,z,intensity,ring,label,min_intensity_1.0,max_intensity_1.0,median_intensity_1.0,std_intensity_1.0,min_ring_1.0,max_ring_1.0,median_ring_1.0,std_ring_1.0,r_std_1.0,n_neighbours_1.0
0,0.0,-11.355618,-4.206962,0.344085,0.0,23.0,1.0,0.0,0.0,0.0,0.0,23.0,23.0,23.0,0.0,0.0,1.0
1,0.0,-5.916535,-1.972164,0.283262,0.0,25.0,1.0,0.0,0.0,0.0,0.0,25.0,25.0,25.0,0.0,0.0,1.0
2,0.0,-7.410451,-2.113039,2.137792,0.0,31.0,1.0,0.0,0.0,0.0,0.0,31.0,31.0,31.0,0.0,0.0,1.0
3,0.0,-13.84587,-1.406652,0.40631,0.0,23.0,1.0,0.0,0.0,0.0,0.0,23.0,23.0,23.0,0.0,0.0,1.0
4,0.0,-8.326218,-0.34606,0.226469,0.0,22.0,1.0,0.0,0.0,0.0,0.0,22.0,22.0,22.0,0.0,0.0,1.0
5,0.0,-29.016968,-2.179385,0.945424,7.0,24.0,1.0,7.0,7.0,7.0,0.0,24.0,24.0,24.0,0.0,0.0,1.0
6,0.0,-2.074985,0.003017,0.044024,2.0,16.0,1.0,2.0,21.0,3.0,8.730534,16.0,27.0,17.0,4.966555,0.192132,3.0
7,0.0,-2.041912,-0.009894,0.055311,3.0,17.0,1.0,2.0,21.0,3.0,8.730534,16.0,27.0,17.0,4.966555,0.189939,3.0
8,0.0,-6.275961,0.790447,0.086301,0.0,19.0,1.0,0.0,0.0,0.0,0.0,19.0,25.0,22.0,3.0,0.041361,2.0
9,0.0,-8.290426,1.923754,0.044705,0.0,18.0,1.0,0.0,0.0,0.0,0.0,18.0,23.0,20.5,2.5,0.028832,2.0


In [6]:
from sklearn.neighbors import KDTree

class PointCloudDataset(Dataset):
    def __init__(self, data_df: pd.DataFrame) -> None:
        self.df: pd.DataFrame = data_df.reset_index(drop=True)
        self.scene_ids = self.df.scene_id.unique().tolist()
        self.n_scenes = len(self.scene_ids)
        
    def __getitem__(self, scene_idx: int) -> tp.Any:
        return self.df[self.df.scene_id == self.scene_ids[scene_idx]]
    
    def __len__(self) -> int:
        return self.n_scenes
    
    
class SceneDataset(Dataset):
    def __init__(self, cloud_df: pd.DataFrame) -> None:
        self.features = cloud_df.drop(columns=['label', 'scene_id']).to_numpy()
        self.tree = KDTree(self.features[:, :3])
        self.labels = cloud_df.label.to_numpy()
        
    def __getitem__(self, idx: int) -> tp.Any:
        point = self.features[idx, :3]
        neighbor_ids, _ = self.tree.query_radius(point[np.newaxis, ...], r=3,
                                                 return_distance=True, sort_results=True)
        neighbor_ids = neighbor_ids[0]
        n_points = neighbor_ids.size
        neighbor_features = self.features[neighbor_ids]
        if n_points < 32:
            neighbor_features = np.pad(neighbor_features, ((0, 32 - n_points), (0, 0)))
        neighbor_features = neighbor_features[:32]
        return neighbor_features, self.labels[idx]
    
    def __len__(self) -> int:
        return self.features.shape[0]

In [7]:
scene_indices = np.arange(0, 291)
np.random.seed(100)
np.random.shuffle(scene_indices)
train_indices = scene_indices[:260]
test_indices = scene_indices[260:]

train_data = features[features.scene_id.isin(train_indices)]
test_data = features[features.scene_id.isin(test_indices)]

In [8]:
train_data = PointCloudDataset(train_data)
test_data = PointCloudDataset(test_data)

In [9]:
class_balance = 1 / train_data.df.groupby('label').count().iloc[:, 0]

In [10]:
import typing as tp

import numpy as np
import torch

from sklearn import metrics as M

# Values are (metric, <needs_proba>)
NAME_TO_METRIC = {'accuracy': (M.accuracy_score, False),
                  'recall': (M.recall_score, False),
                  'precision': (M.precision_score, False),
                  'f1': (M.f1_score, False),
                  'roc_auc': (M.roc_auc_score, True)}


class ClassificationMetricLogger:
    def __init__(self, n_classes: int, metrics: tp.List[str] = ['precision', 'recall', 'f1'],
                 averaging: str = 'weighted') -> None:
        self.n_metrics = len(metrics)
        self.metrics = metrics
        self.train_losses: tp.List[float] = []
        self.train_probs: tp.List[float] = []
        self.train_preds: tp.List[int] = []
        self.train_gt: tp.List[int] = []
        self.val_losses: tp.List[float] = []
        self.val_probs: tp.List[float] = []
        self.val_preds: tp.List[int] = []
        self.val_gt: tp.List[int] = []
        self._train = True
        self.n_classes = n_classes
        self.averaging = averaging

    def train(self, train: bool = True) -> None:
        self._train = train

    def eval(self) -> None:
        self._train = False
        
    def __logits_to_probs(self, logits: torch.Tensor) -> tp.List[float]:
        return tp.cast(tp.List[float], torch.softmax(logits, dim=1)[:, 1].numpy().astype(float).tolist())

    def __logits_to_classes(self, logits: torch.Tensor) -> tp.List[int]:
        return tp.cast(tp.List[int], torch.argmax(logits, dim=1).numpy().astype(int).tolist())

    def process_predictions(self, preds: torch.Tensor, gt: torch.Tensor, loss: float) -> None:
        classes = self.__logits_to_classes(preds)
        probs = self.__logits_to_probs(preds)
        gt = gt.numpy().tolist()
        if self._train:
            self.train_losses.append(loss)
            self.train_probs.extend(probs)
            self.train_preds.extend(classes)
            self.train_gt.extend(gt)
        else:
            self.val_losses.append(loss)
            self.val_probs.extend(probs)
            self.val_preds.extend(classes)
            self.val_gt.extend(gt)

    def __metrics(self, train: bool = False) -> tp.Dict[str, float]:
        if train:
            losses = self.train_losses
            probs = self.train_probs
            preds = self.train_preds
            gt = self.train_gt
        else:
            losses = self.val_losses
            probs = self.val_probs
            preds = self.val_preds
            gt = self.val_gt

        metric_dict = {'mean_loss': float(np.mean(losses))}
        for metric in self.metrics:
            metric_fn, needs_proba = NAME_TO_METRIC[metric]
            if needs_proba:
                metric_dict[metric] = float(metric_fn(gt, probs,
                                                      labels=np.arange(self.n_classes),
                                                      average=self.averaging))
            else:
                metric_dict[metric] = float(metric_fn(gt, preds,
                                                      labels=np.arange(self.n_classes),
                                                      average=self.averaging))
        return metric_dict

    def __describe_split(self, train: bool = True) -> str:
        m = self.__metrics(train)
        s = ''
        for (k, v) in m.items():
            s += f'{k}: {v}\n'
        return s

    def train_metrics(self) -> tp.Dict[str, float]:
        return self.__metrics(train=True)

    def val_metrics(self) -> tp.Dict[str, float]:
        return self.__metrics(train=False)

    def get_summary(self) -> str:
        s = 'Train metrics:\n'
        s += self.__describe_split(train=True)
        s += 'Val metrics:\n'
        s += self.__describe_split(train=False)
        return s

    def print_summary(self) -> None:
        print(self.get_summary())

    def reset(self) -> None:
        self.train_losses = []
        self.train_probs = []
        self.train_preds = []
        self.train_gt = []
        self.val_losses = []
        self.val_probs = []
        self.val_preds = []
        self.val_gt = []

In [11]:
from torch import nn

class PointNetModel(nn.Module):
    def __init__(self, in_features: int = 15, n_out_classes: int = 2) -> None:
        super().__init__()
        self.embedding_mlp = nn.Sequential(nn.Linear(in_features, 64),
                                           nn.ReLU(),
                                           nn.Linear(64, 64))
        clf_mlp_features = 64
        get_clf_mlp = lambda: nn.Sequential(nn.Linear(clf_mlp_features + in_features, clf_mlp_features),
                                            nn.ReLU())
        self.combining_mlps = torch.nn.ModuleList([get_clf_mlp() for _ in range(3)])
        self.clf_mlp = nn.Sequential(nn.Linear(clf_mlp_features + in_features, 64),
                                     nn.ReLU(),
                                     nn.Linear(64, 16),
                                     nn.ReLU(),
                                     nn.Linear(16, n_out_classes))
        
        
    def forward(self, x):
        batch_size = x.shape[0]
        inputs = x
        global_features = torch.max(self.embedding_mlp(x), dim=1).values.view(batch_size, 1, -1)
        for i in range(len(self.combining_mlps)):
            x = torch.cat((inputs, torch.tile(global_features, (1, inputs.shape[1], 1))), dim=2)
            global_features = torch.max(self.combining_mlps[i](x), dim=1).values.view(batch_size, 1, -1)
        prediction = self.clf_mlp(torch.cat((inputs[:, 0], torch.squeeze(global_features, dim=1)), dim=1))
        return prediction

In [12]:
from tqdm import tqdm

def train(model: torch.nn.Module,
          optimizer: torch.optim.Adam,
          train_data: PointCloudDataset,
          compute_loss: tp.Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
          metric_logger: ClassificationMetricLogger) -> None:
    model.train()
    metric_logger.train()
    metrics = {}
    with tqdm(train_data, desc='Train scenes', position=1) as pbar:
        for i, scene_df in enumerate(pbar):
            if i == 30:
                break  # early stopping because model fits by this time
            scene_data = SceneDataset(scene_df)
            scene_loader = DataLoader(scene_data, batch_size=512, num_workers=2, shuffle=True)
            for (features, gt) in scene_loader:
                features = features.float()
                gt = gt.long()
                optimizer.zero_grad()
                pred = model(features.to(DEVICE))
                loss = compute_loss(pred, gt.to(DEVICE))
                loss.backward()
                optimizer.step()
                metric_logger.process_predictions(pred.detach().cpu(), gt, loss.detach().cpu().item())
            if i % 15 == 0:
                metrics = metric_logger.train_metrics()
            pbar.set_postfix(metrics)
            
            
def evaluate(model: torch.nn.Module,
             test_data: PointCloudDataset,
             compute_loss: tp.Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
             metric_logger: ClassificationMetricLogger) -> None:
    model.eval()
    metric_logger.eval()
    metrics = {}
    with torch.no_grad():
        with tqdm(test_data, desc='Test scenes', position=2) as pbar:
            for i, scene_df in enumerate(pbar):
                scene_data = SceneDataset(scene_df)
                scene_loader = DataLoader(scene_data, batch_size=512, num_workers=2, shuffle=True)
                for (features, gt) in scene_loader:
                    features = features.float()
                    gt = gt.long()
                    pred = model(features.to(DEVICE))
                    loss = compute_loss(pred, gt.to(DEVICE))
                    metric_logger.process_predictions(pred.cpu(), gt, loss.cpu().item())
                if i % 5 == 0:
                    metrics = metric_logger.val_metrics()
            pbar.set_postfix(metrics)
                

def single_epoch(model: torch.nn.Module,
                 optimizer: torch.optim.Adam,
                 train_data: PointCloudDataset,
                 test_data: PointCloudDataset,
                 compute_loss: tp.Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
                 metric_logger: ClassificationMetricLogger) -> None:
    train(model, optimizer, train_data, compute_loss, metric_logger)
    evaluate(model, test_data, compute_loss, metric_logger)

In [13]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# DEVICE = 'cpu'
N_EPOCHS = 1

In [14]:
model = PointNetModel()
model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.9)
metric_logger = ClassificationMetricLogger(n_classes=2,
                                           metrics=['precision', 'recall', 'f1', 'roc_auc'])

In [15]:
import os

# cache_path = './'
cache_path = 'drive/MyDrive/sdc-colab-data/perception/models/'
os.makedirs(cache_path, exist_ok=True)

In [16]:
start_epoch = 0

# state_dict = torch.load(os.path.join(cache_path, f'pointnet-{start_epoch - 1}.pth'), map_location='cpu')
# model.cpu()
# model.load_state_dict(state_dict['model'])
# model.to(DEVICE)
# optimizer.load_state_dict(state_dict['optimizer'])
# scheduler.load_state_dict(state_dict['scheduler'])

In [17]:
class_weights = torch.from_numpy(class_balance.to_numpy()).float()
class_weights /= class_weights.norm()
compute_loss = torch.nn.CrossEntropyLoss(weight=class_weights.to(DEVICE))

In [18]:
for ep in tqdm(range(start_epoch, N_EPOCHS), desc='Epoch', position=0):
    single_epoch(model, optimizer, train_data, test_data, compute_loss, metric_logger)
    print(f'Epoch {ep}:')
    metric_logger.print_summary()
    scheduler.step()
    state_dict = {'model': model.state_dict(),
                  'optimizer': optimizer.state_dict(),
                  'scheduler': scheduler.state_dict(),
                  'epoch': ep}
    torch.save(state_dict, os.path.join(cache_path, f'pointnet-{ep}.pth'))
    metric_logger.reset()

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Train scenes:   0%|          | 0/260 [00:00<?, ?it/s][A
Train scenes:   0%|          | 0/260 [00:05<?, ?it/s, mean_loss=0.201, precision=0.993, recall=0.945, f1=0.968, roc_auc=0.904][A
Train scenes:   0%|          | 1/260 [00:05<25:13,  5.84s/it, mean_loss=0.201, precision=0.993, recall=0.945, f1=0.968, roc_auc=0.904][A
Train scenes:   0%|          | 1/260 [00:11<25:13,  5.84s/it, mean_loss=0.201, precision=0.993, recall=0.945, f1=0.968, roc_auc=0.904][A
Train scenes:   1%|          | 2/260 [00:11<24:43,  5.75s/it, mean_loss=0.201, precision=0.993, recall=0.945, f1=0.968, roc_auc=0.904][A
Train scenes:   1%|          | 2/260 [00:15<24:43,  5.75s/it, mean_loss=0.201, precision=0.993, recall=0.945, f1=0.968, roc_auc=0.904][A
Train scenes:   1%|          | 3/260 [00:15<22:30,  5.25s/it, mean_loss=0.201, precision=0.993, recall=0.945, f1=0.968, roc_auc=0.904][A
Train scenes:   1%|          | 3/260 [00:21<22:30,  5.25s/it, mean_loss=0.201,

Epoch 0:


Epoch: 100%|██████████| 1/1 [05:41<00:00, 341.97s/it]

Train metrics:
mean_loss: 0.0445413033889357
precision: 0.9898578889618035
recall: 0.9890218484494693
f1: 0.9892503840959931
roc_auc: 0.9972642277622608
Val metrics:
mean_loss: 0.09404236614698751
precision: 0.9867603282300503
recall: 0.9863434504878009
f1: 0.9864964986867393
roc_auc: 0.9962878222091127




