## Jul 21, 2022

# Simple modular debugger
- Load config: `configs/papers/panodepth/train_ddad.yaml`
- Create a module instance to debug
- Run / visualize

In [None]:
%load_ext autoreload
%autoreload 2

import copy
import os
import torch
import numpy as np
import pythreejs as pjs
import warnings
warnings.filterwarnings('ignore')

from matplotlib.cm import get_cmap
from tqdm.notebook import tqdm
from PIL import Image
from IPython.core.display import display

os.chdir('..')
np.set_printoptions(precision=4)
!pwd

In [None]:
import numpy as np
from collections import defaultdict

import torch
import torch.nn.functional as F

from vidar.arch.losses.MultiCamPhotometricLoss import MultiCamPhotometricLoss
from vidar.arch.losses.MultiViewPhotometricLoss import calc_smoothness
from vidar.arch.networks.layers.panodepth.flow_reversal import FlowReversal
from vidar.datasets.PanoCamOuroborosDataset import PANO_CAMERA_NAME
from vidar.geometry.camera import Camera
from vidar.geometry.camera_pano import PanoCamera
from vidar.utils.config import cfg_has
from vidar.utils.depth import inv2depth, depth2inv
from vidar.utils.tensor import match_scales, make_same_resolution
from vidar.utils.viz import viz_photo
from vidar.utils.write import viz_depth

In [None]:
from common import to_numpy, to_uint8, visualize_3d, visualize_3d_list

In [None]:
from vidar.utils.config import read_config

config = read_config('configs/papers/panodepth/train_ddad.yaml')

config.arch.networks.depth.decoder.out_shape = [128, 1024]
# config.arch.networks.depth.decoder.out_shape = [64, 512]
    
# Resize depth for easy debugging
# config.datasets.train.augmentation.resize_supervision = True
# config.datasets.train.dataloader.num_workers = 0
config.datasets.validation.labels += ['lidar']
config.datasets.validation.dataloader.batch_size = 1
config.datasets.validation.dataloader.num_workers = 0

In [None]:
# config.arch.losses.reprojection.reprojection_pairs = [
#     ['camera_09', 0, 'camera_09', -1],
#     ['camera_09', 0, 'camera_09', 1],
# ]

In [None]:
from vidar.utils.setup import setup_dataset, setup_dataloader

# dataset = setup_dataset(config.datasets.train, verbose=True)
# dataloader = setup_dataloader(dataset, config.datasets.train.dataloader, 'train')

dataset = setup_dataset(config.datasets.validation, verbose=True)
# dataloader = setup_dataloader(dataset, config.datasets.validation.dataloader, 'val')

In [None]:
from torch.utils.data.dataloader import default_collate

batch_from_loader = default_collate([dataset[0][0]])
batch_from_loader.keys()

## DEBUG FeatTransform

In [None]:
import copy
import cv2

from vidar.arch.networks.layers.panodepth.depth_sweeping import FeatTransform

pad = 10
padding = 255 * np.ones((pad, 2048, 3), dtype=np.uint8)

boxes = {}
display(Image.fromarray(out['log_images']['panodepth'][:128]))
images.append(out['log_images']['panodepth'][:128])
images.append(padding)


decoder_required_keys = ('intrinsics', 'pose_to_pano')
meta_info = {}
t = 0       # Transforming features should be done in the same time frame.
for cam, sample in batch_from_loader.items():
    if not cam.startswith('camera'):
        continue
    meta_info[cam] = {k: sample[k][t] for k in decoder_required_keys if k in sample}

In [None]:
# meta_info['camera_pano']
# config.arch.networks.depth.decoder.out_shape = [16, 128]
# config.arch.networks.depth.decoder.out_shape = [32, 256]
config.arch.networks.depth.decoder.out_shape = [128, 1024]
oscale = config.arch.networks.depth.decoder.ref_shape[0] // config.arch.networks.depth.decoder.out_shape[0]
oscale

In [None]:
config.arch.networks.depth.decoder.out_shape

In [None]:
# distances = [3, 5, 10, 30]
out_shape = config.arch.networks.depth.decoder.out_shape

distances = [10]
for d in distances:
    transformed = []
    for camera in ['camera_01', 'camera_05', 'camera_06', 'camera_07', 'camera_08', 'camera_09']:
#         module = FeatTransform(camera, 1.0, (3, 384, 640), 1.0*oscale, (3, 256, 2048), given_depth=d)
#         module = FeatTransform(camera, 1.0, (3, 384, 640), 1.0*oscale, (3, 128, 1024), given_depth=d)
        module = FeatTransform(camera, 1.0, (3, 384, 640), 1.0*oscale, [3] + out_shape, given_depth=d)
        transformed.append(module(batch_from_loader[camera]['rgb'][0], meta_info))
    
    num_views = torch.concat([t.sum(axis=1, keepdim=True) != 0.0 for t in transformed], axis=1)
    num_views = num_views.sum(axis=1, keepdim=True).clamp(min=1.0)
#     transformed = torch.stack(transformed, axis=1).sum(axis=1) / num_views
    transformed = torch.stack(transformed, axis=1).sum(axis=1)
    transformed = to_uint8(to_numpy(transformed[0].detach()))
    
    display(Image.fromarray(transformed).resize((1024, 128)))

## DEBUG DepthNet

In [None]:
# from vidar.utils.config import read_config

# config = read_config('configs/papers/panodepth/train_ddad.yaml')

# # config.arch.networks.depth.decoder.out_shape = [256, 2048]
# config.arch.networks.depth.decoder.out_shape = [128, 1024]
# # config.arch.networks.depth.decoder.out_shape = [64, 512]

In [None]:
from vidar.utils.config import load_class

depth_net = load_class('MultiCamDepthNet', 'vidar/arch/networks/depth')(config.arch.networks.depth)
depth_net.eval()

In [None]:
from vidar.utils.types import is_dict

_input_keys = ('rgb', 'intrinsics', 'pose_to_pano')
return_logs = True

ctx = 0
filtered_batch = {}
for cam, sample in batch_from_loader.items():
    if is_dict(sample):
        filtered_batch[cam] = {k: sample[k][ctx] for k in _input_keys if k in sample}

net_output = depth_net(filtered_batch, return_logs)

In [None]:
net_output.keys()

In [None]:
camera_order = ['camera_07', 'camera_05', 'camera_01', 'camera_06', 'camera_08', 'camera_09']
images = np.hstack([to_uint8(to_numpy(batch_from_loader[c]['rgb'][0][0])) for c in camera_order])
# batch_from_loader['camera_01']['rgb'][0].shape

In [None]:
net_output['log_images'].keys()
display(Image.fromarray(images))
display(Image.fromarray(net_output['log_images']['input_agg_feats']).resize((1024, 128*5)))

In [None]:
net_output['log_images'].keys()
display(Image.fromarray(images))
display(Image.fromarray(net_output['log_images']['input_agg_feats']))

## DEBUG Loss module

In [None]:
from vidar.utils.config import load_class

self = load_class('PanoDepthPhotometricLoss', 'vidar/arch/losses')(config.arch.losses.reprojection)
self.eval()


In [None]:
from vidar.datasets.augmentations.resize import resize_torch_preserve

return_logs = True

pano_invdepths = [depth2inv(
    resize_torch_preserve(batch_from_loader['camera_pano']['depth'][0], (128, 1024)))] * 4

output = {'inv_depths': pano_invdepths}
out = self(batch_from_loader, output, return_logs=return_logs)

In [None]:
# from vidar.arch.networks.layers.panodepth.depth_sweeping import FeatTransform

# decoder_required_keys = ('intrinsics', 'pose_to_pano')
# meta_info = {}
# t = 0       # Transforming features should be done in the same time frame.
# for cam, sample in batch_from_loader.items():
#     if not cam.startswith('camera'):
#         continue
#     meta_info[cam] = {k: sample[k][t] for k in decoder_required_keys if k in sample}

In [None]:
# distances = [3, 5, 10, 30]
# for d in distances:
#     transformed = []
#     for camera in ['camera_01', 'camera_05', 'camera_06', 'camera_07', 'camera_08', 'camera_09']:
#         module = FeatTransform(camera, 1.0, (3, 384, 640), (3, 256, 2048), given_depth=d)
#         transformed.append(module(batch_from_loader[camera]['rgb'][0], meta_info))
    
#     num_views = torch.concat([t.sum(axis=1, keepdim=True) != 0.0 for t in transformed], axis=1)
#     num_views = num_views.sum(axis=1, keepdim=True).clamp(min=1.0)
#     transformed = torch.stack(transformed, axis=1).sum(axis=1) / num_views
#     transformed = to_uint8(to_numpy(transformed[0].detach()))
        
#     display(Image.fromarray(transformed))

In [None]:
Image.fromarray(out['log_images']['panodepth'])

In [None]:
### Flow reversal by 4-points, (1m, 200m) inv_depth 0.5
camera_order = ['camera_07', 'camera_05', 'camera_01', 'camera_06', 'camera_08', 'camera_09']
images = np.hstack([out['log_images']['warped_{}'.format(c)][::2, ::2] for c in camera_order])
Image.fromarray(images)
# Image.fromarray(out['log_images']['warped_camera_01'][::2, ::2])