In [10]:
import argparse
import numpy as np
import open3d
import os
import easydict
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from data import CustomData
from models import IterativeBenchmark
from loss import EMDLosspy
from metrics import compute_metrics, summary_metrics, print_train_info
from utils import time_calc

In [11]:
def setup_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

In [12]:
def config_params():
    args = easydict.EasyDict({'root': '/home/trojan/skia_projects/pcd_registration/pointnet_reg/PCReg.PyTorch/dataset',
                              'train_npts': 2048,
                              'seed': 17,
                              'gn': True,
                              'epoches': 50,
                              'batchsize': 4,
                              'num_workers': 1,
                              'in_dim': 3,
                              'niters': 8,
                              'lr': 0.0001,
                              'milestones': [30],
                              'gamma': 0.1,
                              'saved_path': './models',
                              'saved_frequency': 10
                              })
    return args

In [13]:
def compute_loss(ref_cloud, pred_ref_clouds, loss_fn):
    losses = []
    discount_factor = 0.5
    for i in range(8):
        loss = loss_fn(ref_cloud[..., :3].contiguous(),
                       pred_ref_clouds[i][..., :3].contiguous())
        losses.append(discount_factor**(8 - i)*loss)
    return torch.sum(torch.stack(losses))

In [14]:
@time_calc
def train_one_epoch(train_loader, model, loss_fn, optimizer):
    losses = []
    r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], []
    for ref_cloud, src_cloud, gtR, gtt in tqdm(train_loader):
        ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \
                                         gtR.cuda(), gtt.cuda()
        optimizer.zero_grad()
        R, t, pred_ref_clouds = model(src_cloud.permute(0, 2, 1).contiguous(),
                                     ref_cloud.permute(0, 2, 1).contiguous())
        loss = compute_loss(ref_cloud, pred_ref_clouds, loss_fn)
        loss.backward()
        optimizer.step()

        cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \
        cur_t_isotropic = compute_metrics(R, t, gtR, gtt)
        losses.append(loss.item())
        r_mse.append(cur_r_mse)
        r_mae.append(cur_r_mae)
        t_mse.append(cur_t_mse)
        t_mae.append(cur_t_mae)
        r_isotropic.append(cur_r_isotropic.cpu().detach().numpy())
        t_isotropic.append(cur_t_isotropic.cpu().detach().numpy())
    r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \
        summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic)
    results = {
        'loss': np.mean(losses),
        'r_mse': r_mse,
        'r_mae': r_mae,
        't_mse': t_mse,
        't_mae': t_mae,
        'r_isotropic': r_isotropic,
        't_isotropic': t_isotropic
    }
    return results

In [15]:
@time_calc
def test_one_epoch(test_loader, model, loss_fn):
    model.eval()
    losses = []
    r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], []
    with torch.no_grad():
        for ref_cloud, src_cloud, gtR, gtt in tqdm(test_loader):
            ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \
                                             gtR.cuda(), gtt.cuda()
            R, t, pred_ref_clouds = model(src_cloud.permute(0, 2, 1).contiguous(),
                                         ref_cloud.permute(0, 2, 1).contiguous())
            loss = compute_loss(ref_cloud, pred_ref_clouds, loss_fn)
            cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \
            cur_t_isotropic = compute_metrics(R, t, gtR, gtt)

            losses.append(loss.item())
            r_mse.append(cur_r_mse)
            r_mae.append(cur_r_mae)
            t_mse.append(cur_t_mse)
            t_mae.append(cur_t_mae)
            r_isotropic.append(cur_r_isotropic.cpu().detach().numpy())
            t_isotropic.append(cur_t_isotropic.cpu().detach().numpy())
    model.train()
    r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \
        summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic)
    results = {
        'loss': np.mean(losses),
        'r_mse': r_mse,
        'r_mae': r_mae,
        't_mse': t_mse,
        't_mae': t_mae,
        'r_isotropic': r_isotropic,
        't_isotropic': t_isotropic
    }
    return results

In [16]:
def main():
    args = config_params()
    print(args)

    setup_seed(args.seed)
    if not os.path.exists(args.saved_path):
        os.makedirs(args.saved_path)
    summary_path = os.path.join(args.saved_path, 'summary')
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    checkpoints_path = os.path.join(args.saved_path, 'checkpoints')
    if not os.path.exists(checkpoints_path):
        os.makedirs(checkpoints_path)

    train_set = CustomData(args.root, args.train_npts)
    test_set = CustomData(args.root, args.train_npts, False)
    train_loader = DataLoader(train_set, batch_size=args.batchsize,
                              shuffle=True, num_workers=args.num_workers)
    test_loader = DataLoader(test_set, batch_size=args.batchsize, shuffle=False,
                             num_workers=args.num_workers)
    model = IterativeBenchmark(in_dim=args.in_dim, niters=args.niters, gn = args.gn)
    model = model.cuda()
    loss_fn = EMDLosspy()
    loss_fn = loss_fn.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=args.milestones,
                                                     gamma=args.gamma,
                                                     last_epoch=-1)

    writer = SummaryWriter(summary_path)

    test_min_loss, test_min_r_mse_error, test_min_rot_error = \
        float('inf'), float('inf'), float('inf')
    for epoch in range(args.epoches):
        print('=' * 20, epoch + 1, '=' * 20)
        train_results = train_one_epoch(train_loader, model, loss_fn, optimizer)
        print_train_info(train_results)
        test_results = test_one_epoch(test_loader, model, loss_fn)
        print_train_info(test_results)

        if epoch % args.saved_frequency == 0:
            writer.add_scalar('Loss/train', train_results['loss'], epoch + 1)
            writer.add_scalar('Loss/test', test_results['loss'], epoch + 1)
            writer.add_scalar('RError/train', train_results['r_mse'], epoch + 1)
            writer.add_scalar('RError/test', test_results['r_mse'], epoch + 1)
            writer.add_scalar('rotError/train', train_results['r_isotropic'],
                              epoch + 1)
            writer.add_scalar('rotError/test', test_results['r_isotropic'],
                              epoch + 1)
            writer.add_scalar('Lr', optimizer.param_groups[0]['lr'], epoch + 1)
        test_loss, test_r_error, test_rot_error = \
            test_results['loss'], test_results['r_mse'], test_results[
                'r_isotropic']
        if test_loss < test_min_loss:
            saved_path = os.path.join(checkpoints_path, "test_min_loss.pth")
            torch.save(model.state_dict(), saved_path)
            test_min_loss = test_loss
        if test_r_error < test_min_r_mse_error:
            saved_path = os.path.join(checkpoints_path,
                                      "test_min_rmse_error.pth")
            torch.save(model.state_dict(), saved_path)
            test_min_r_mse_error = test_r_error
        if test_rot_error < test_min_rot_error:
            saved_path = os.path.join(checkpoints_path,
                                      "test_min_rot_error.pth")
            torch.save(model.state_dict(), saved_path)
            test_min_rot_error = test_rot_error
        scheduler.step()

In [17]:
if __name__ == '__main__':
    main()

{'root': '/home/trojan/skia_projects/pcd_registration/pointnet_reg/PCReg.PyTorch/dataset', 'train_npts': 2048, 'seed': 17, 'gn': True, 'epoches': 50, 'batchsize': 4, 'num_workers': 1, 'in_dim': 3, 'niters': 8, 'lr': 0.0001, 'milestones': [30], 'gamma': 0.1, 'saved_path': './models', 'saved_frequency': 10}


 10%|████▍                                       | 4/40 [00:02<00:22,  1.59it/s]
Traceback (most recent call last):
  File "/home/trojan/anaconda3/envs/pointnet_reg/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/home/trojan/anaconda3/envs/pointnet_reg/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/trojan/anaconda3/envs/pointnet_reg/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/home/trojan/anaconda3/envs/pointnet_reg/lib/python3.8/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 