In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import sys
import h5py
sys.path.insert(0, '..')
from models import PointNet, DGCNNSegBackbone
from datasets import PointCloudNormalize, ABCDataset
from torch.utils.data import Dataset, DataLoader
from utils.training_routines import RunningMetrics
from tqdm import tqdm


## Parameters

In [2]:
device = 'cuda:0'
exp_id = 'z52pya7i'
n_epochs = 50
lr = 5e-4
weight_decay = 1e-5

In [3]:
class NormalsDataset(Dataset):
    def __init__(self, file, transform=None, sample_frac=1, seed=42):
        super().__init__()
                    
        with h5py.File(file, 'r') as f:
            self.points = f['points'][:]
            self.normals = f['point_normals'][:]
        
        normals_lens = (self.normals**2).sum(axis=2, keepdims=True)**0.5
        self.normals /= (normals_lens + 1e-8)
        self.transform = transform
        
        if sample_frac < 1:
            n_samples = int(self.normals.shape[0] * sample_frac)
            r = np.random.RandomState(seed)
            idx = r.permutation(self.normals.shape[0])[:n_samples]
            self.points = self.points[idx]
            self.normals = self.normals[idx]
        
    def __getitem__(self, idx):
        pc = self.points[idx]
        
        if self.transform is not None:
            pc = self.transform(pc)
            
        return pc.T, self.normals[idx].T
    
    def __len__(self):
        return self.points.shape[0]
    
class Regressor(nn.Module):
    def __init__(self, backbone, finetune_head=False):
        super().__init__()
        self.backbone = backbone
        if finetune_head:
            for p in self.backbone.parameters():
                p.requires_grad = False
                
        self.head = nn.Sequential(
            nn.Conv1d(self.backbone.n_output_point, 512, 1),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(True),
            nn.Conv1d(256, 3, 1)
        )
        
    def forward(self, x):
        features = self.backbone.forward_features(x)
        return self.head(features)
    
def compute_loss(gt, pred):
    pred = F.normalize(pred, dim=1)
    cos_sim = torch.einsum('nck, nck -> nk', gt, pred)
    
    return (1 - cos_sim.pow_(2)).mean()

@torch.no_grad()
def validate(model, loader):
    model.eval()
    preds = []
    gts = []
    bar = tqdm(loader, desc='val')
    metrics = RunningMetrics()
    
    for x, gt in bar:
        pred = model(x.to(device))
        preds.append(pred.cpu())
        gts.append(gt)
        metrics.step({'loss': compute_loss(gt, pred.cpu())})
        bar.set_postfix(metrics.report())
    
    preds = torch.cat(preds, dim=0)
    gts = torch.cat(gts, dim=0)
    loss = compute_loss(gts, preds).item()
    print(metrics.report())
    
    print('val loss', loss)
    return loss

def train(model, train_loader, test_loader, optimizer, scheduler, n_epochs, val_every=1):
    val_loss_list = []
    for epoch in range(1, n_epochs + 1):
        bar = tqdm(train_loader)
        model.train()
        metrics = RunningMetrics()
        
        for x, gt in bar:
            optimizer.zero_grad()
            pred = model(x.to(device))
            loss = compute_loss(gt.to(device), pred) + 0.001 * model.backbone.reg
            loss.backward()
            optimizer.step()
            metrics.step({'loss': loss})
            report = metrics.report()
            report.update({'epoch': epoch})
            bar.set_postfix(report)
            
            scheduler.step()
        
        if epoch % val_every == 0:
            val_loss = validate(model, test_loader)
    
    val_loss_list.append(val_loss)
    return val_loss_list
            
def get_model(exp_id, n_epochs, finetune_head, lr, weight_decay):
    model = PointNet()
    # model = DGCNNSegBackbone()
    if exp_id is not None:
        checkpoint_path = f'../weights/simclr_run_{exp_id}_ckp_150.pt'
        state = torch.load(checkpoint_path, map_location='cpu')['model']
        model.load_state_dict(state)
        
    model = Regressor(model, finetune_head).to(device)
    optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad,
                                        model.parameters()),
                                 lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs * len(train_loader), eta_min=0)
    return model, optimizer, scheduler

In [4]:
dataset_path = '../../datasets/hdfs/train_0.hdf5'

## Simple training

### Only head

In [5]:
train_ds = ABCDataset(dataset_path,
                      'train',
                      'normals',
                      transform=PointCloudNormalize('box'))
test_ds = ABCDataset(dataset_path,
                     'test',
                     'normals',
                     transform=PointCloudNormalize('box'))

train_loader = DataLoader(train_ds, shuffle=True, batch_size=64)
test_loader = DataLoader(test_ds, shuffle=False, batch_size=32)

In [None]:
finetune_head = True
exp_id = 'z52pya7i'
model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
val_loss1 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs)

100%|██████████| 180/180 [00:39<00:00,  4.54it/s, loss=0.0322, epoch=1]
val: 100%|██████████| 154/154 [00:12<00:00, 12.70it/s, loss=0.0116]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.011616361338834794}
val loss 0.028366807180088046


100%|██████████| 180/180 [00:38<00:00,  4.66it/s, loss=0.00982, epoch=2]
val: 100%|██████████| 154/154 [00:13<00:00, 11.22it/s, loss=0.00977]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.009765839555065802}
val loss 0.024207011688957673


100%|██████████| 180/180 [00:38<00:00,  4.66it/s, loss=0.00849, epoch=3]
val: 100%|██████████| 154/154 [00:11<00:00, 12.84it/s, loss=0.00884]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.008842138788896794}
val loss 0.021340657224789387


100%|██████████| 180/180 [00:38<00:00,  4.67it/s, loss=0.00813, epoch=4]
val: 100%|██████████| 154/154 [00:12<00:00, 12.42it/s, loss=0.00941]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.00940612205951698}
val loss 0.021402344356352607


100%|██████████| 180/180 [00:38<00:00,  4.65it/s, loss=0.00732, epoch=5]
val: 100%|██████████| 154/154 [00:11<00:00, 12.97it/s, loss=0.00719]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.00718526574809981}
val loss 0.017258499373506035


100%|██████████| 180/180 [00:38<00:00,  4.62it/s, loss=0.00662, epoch=6]
val: 100%|██████████| 154/154 [00:12<00:00, 12.59it/s, loss=0.00698]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.006980279989205926}
val loss 0.016801920932681742


100%|██████████| 180/180 [00:38<00:00,  4.64it/s, loss=0.0061, epoch=7] 
val: 100%|██████████| 154/154 [00:12<00:00, 11.85it/s, loss=0.00969]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.009693494002728172}
val loss 0.02292255282601441


100%|██████████| 180/180 [00:39<00:00,  4.61it/s, loss=0.00621, epoch=8]
val: 100%|██████████| 154/154 [00:11<00:00, 13.02it/s, loss=0.00889]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.00889082900909806}
val loss 0.024843736338284166


100%|██████████| 180/180 [00:38<00:00,  4.64it/s, loss=0.00577, epoch=9]
val: 100%|██████████| 154/154 [00:12<00:00, 12.56it/s, loss=0.0068] 
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.006798417233047038}
val loss 0.016563965764451852


100%|██████████| 180/180 [00:38<00:00,  4.64it/s, loss=0.00559, epoch=10]
val: 100%|██████████| 154/154 [00:11<00:00, 12.98it/s, loss=0.00629]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.006287164666074511}
val loss 0.015272702275965036


100%|██████████| 180/180 [00:38<00:00,  4.64it/s, loss=0.00535, epoch=11]
val: 100%|██████████| 154/154 [00:12<00:00, 12.59it/s, loss=0.00613]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.006127076070280225}
val loss 0.01464462086908843


100%|██████████| 180/180 [00:38<00:00,  4.62it/s, loss=0.00504, epoch=12]
val: 100%|██████████| 154/154 [00:11<00:00, 12.94it/s, loss=0.00673]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.006731700745858091}
val loss 0.016643311328017194


100%|██████████| 180/180 [00:38<00:00,  4.63it/s, loss=0.00506, epoch=13]
val: 100%|██████████| 154/154 [00:11<00:00, 12.97it/s, loss=0.00652]
  0%|          | 0/180 [00:00<?, ?it/s]

{'loss': 0.0065170104715226804}
val loss 0.016235378344882583


  4%|▍         | 8/180 [00:01<00:37,  4.54it/s, loss=0.00424, epoch=14]

In [None]:
val_loss1

In [None]:
finetune_head = True
exp_id = None
model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
val_loss2 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs)

In [None]:
val_loss2

### Finetuning

In [None]:
finetune_head = False
exp_id = 'z52pya7i'
model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
val_loss3 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs)

In [None]:
val_loss3

### From scratch

In [None]:
finetune_head = False
exp_id = None
model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
val_loss4 = train(model, train_loader, test_loader, optimizer, scheduler, 55)

In [None]:
val_loss4

## Semisupervised

In [None]:
test_ds = ABCDataset(dataset_path,
                     'test',
                     'normals',
                     transform=PointCloudNormalize('box'))

test_loader = DataLoader(test_ds, shuffle=False, batch_size=32)

In [None]:
finetune_head = False
exp_id = 'z52pya7i'
n_epochs = 200
run_results = []

for i, seed in enumerate([24234, 23214, 64645]):
    train_ds = ABCDataset(dataset_path,
                          'train',
                          'normals',
                          transform=PointCloudNormalize('box'), sample_frac=0.01, seed=seed)

    train_loader = DataLoader(train_ds, shuffle=True, batch_size=64)
    model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
    val_loss4 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs, val_every=50)
    print(f'Run {i}: {val_loss4[-1]}')
    run_results.append(val_loss4[-1])
    
np.mean(run_results), np.std(run_results, ddof=1)

In [None]:
finetune_head = False
exp_id = None
n_epochs = 200
run_results = []

for i, seed in enumerate([24234, 23214, 64645]):
    train_ds = ABCDataset(dataset_path,
                          'train',
                          'normals',
                          transform=PointCloudNormalize('box'), sample_frac=0.01, seed=seed)

    train_loader = DataLoader(train_ds, shuffle=True, batch_size=64)

    model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
    val_loss4 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs, val_every=50)
    print(f'Run {i}: {val_loss4[-1]}')
    run_results.append(val_loss4[-1])
    
np.mean(run_results), np.std(run_results, ddof=1)

In [None]:
finetune_head = False
exp_id = 'z52pya7i'
n_epochs = 200
run_results = []

for i, seed in enumerate([24234, 23214, 64645]):
    train_ds = ABCDataset(dataset_path,
                          'train',
                          'normals',
                          transform=PointCloudNormalize('box'), sample_frac=0.05, seed=seed)

    train_loader = DataLoader(train_ds, shuffle=True, batch_size=64)

    model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
    val_loss4 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs, val_every=50)
    print(f'Run {i}: {val_loss4[-1]}')
    run_results.append(val_loss4[-1])
    
np.mean(run_results), np.std(run_results, ddof=1)

In [None]:
finetune_head = False
exp_id = None
n_epochs = 200
run_results = []

for i, seed in enumerate([24234, 23214, 64645]):
    train_ds = ABCDataset(dataset_path,
                          'train',
                          'normals',
                          transform=PointCloudNormalize('box'), sample_frac=0.05, seed=seed)

    train_loader = DataLoader(train_ds, shuffle=True, batch_size=64)

    model, optimizer, scheduler = get_model(exp_id, n_epochs, finetune_head, lr, weight_decay)
    val_loss4 = train(model, train_loader, test_loader, optimizer, scheduler, n_epochs, val_every=50)
    print(f'Run {i}: {val_loss4[-1]}')
    run_results.append(val_loss4[-1])
    
np.mean(run_results), np.std(run_results, ddof=1)