## Jun 23, 2022

# Generate dense PanoDepth GT
- Load config: `configs/papers/panodepth/train_ddad.yaml`
- Use PanoDepth GT
- Visualize flows (from Pano to Camera, from Pano to Camera by flow reversal)
- Visualize synthesized depth on camera from pano and compair it with depth on camera
    - LiDAR -> Proj on PanoSpace -> Compute flows to Camera -> Flow reversal -> Grid sampling of panodepth using reversed flow
    - LiDAR -> Proj on Camera
- (Optional) Synthesize RGB on Camera using depth by reversed flow

In [None]:
%load_ext autoreload
%autoreload 2

import copy
import os
import torch
import numpy as np
import pythreejs as pjs
import warnings
warnings.filterwarnings('ignore')

from matplotlib.cm import get_cmap
from tqdm.notebook import tqdm
from PIL import Image
from IPython.core.display import display

os.chdir('..')
np.set_printoptions(precision=4)
!pwd

In [None]:
import numpy as np
from collections import defaultdict

import torch
import torch.nn.functional as F

from vidar.arch.losses.MultiCamPhotometricLoss import MultiCamPhotometricLoss
from vidar.arch.losses.MultiViewPhotometricLoss import calc_smoothness
from vidar.arch.networks.layers.panodepth.flow_reversal import FlowReversal
from vidar.datasets.PanoCamOuroborosDataset import PANO_CAMERA_NAME
from vidar.geometry.camera import Camera
from vidar.geometry.camera_pano import PanoCamera
from vidar.utils.config import cfg_has
from vidar.utils.depth import inv2depth, depth2inv
from vidar.utils.tensor import match_scales, make_same_resolution
from vidar.utils.viz import viz_photo
from vidar.utils.write import viz_depth

In [None]:
from vidar.utils.config import read_config

config = read_config('configs/papers/panodepth/train_ddad.yaml')

# Resize depth for easy debugging
# config.datasets.train.augmentation.resize_supervision = True
# config.datasets.train.dataloader.num_workers = 0
config.datasets.validation.labels += ['lidar']
config.datasets.validation.dataloader.batch_size = 1
config.datasets.validation.dataloader.num_workers = 0

In [None]:
from vidar.utils.setup import setup_dataset, setup_dataloader

# dataset = setup_dataset(config.datasets.train, verbose=True)
# dataloader = setup_dataloader(dataset, config.datasets.train.dataloader, 'train')

dataset = setup_dataset(config.datasets.validation, verbose=True)
dataloader = setup_dataloader(dataset, config.datasets.train.dataloader, 'val')

In [None]:
batch_from_loader = next(iter(dataloader[0]))
batch_from_loader.keys()

## Utilities

In [None]:
def to_numpy(tensor):
    return tensor.permute(1, 2, 0).numpy()

def to_uint8(array):
    return (array * 255.0).astype(np.uint8)

In [None]:
import pythreejs as pjs

def visualize_3d(xyz, rgb=None, size=0.03, height=480, width=480):
    points_buf = pjs.BufferAttribute(array=xyz)
    geometryAttrs = {'position': points_buf}

    if rgb is not None:
        colors_buf = pjs.BufferAttribute(array=rgb)
        geometryAttrs['color'] = colors_buf
    
    geometry = pjs.BufferGeometry(attributes=geometryAttrs)

    material = pjs.PointsMaterial(vertexColors='VertexColors', size=size)
    pointCloud = pjs.Points(geometry=geometry, material=material)

    pythreejs_camera = pjs.PerspectiveCamera(    
        up=[1, 0, 1],
        children=[pjs.DirectionalLight(color='white', intensity=0.5)])

    pythreejs_camera.rotateX(np.pi/4)
    pythreejs_camera.position = (-15., 0., 30.)

    scene = pjs.Scene(children=[
                    pointCloud,
                    pythreejs_camera,
                    pjs.AmbientLight(color='#777777')])
    
    axes = pjs.AxesHelper(size=3)
    scene.add(axes)
        
    control = pjs.OrbitControls(controlling=pythreejs_camera)
    renderer = pjs.Renderer(camera=pythreejs_camera, 
                        scene=scene, 
                        width=width,
                        height=height,
                        preserveDrawingBuffer=True,
                        controls=[control])
    
    return renderer

## DEBUG

In [None]:
# from vidar.utils.config import load_class

# depth_net = load_class('MultiCamDepthNet', 'vidar/arch/networks/depth')(config.arch.networks.depth)

In [None]:
# filtered_batch['camera_pano']['intrinsics'].shape

In [None]:
# from vidar.utils.types import is_dict

# _input_keys = ('rgb', 'intrinsics', 'pose_to_pano')
# filtered_batch = {}
# t = 0
# for cam, sample in batch_from_loader.items():
#     if is_dict(sample):
#         filtered_batch[cam] = {k: sample[k][t] if 'pano' not in cam else sample[k]
#                                             for k in _input_keys if k in sample}

# out = depth_net(filtered_batch)
# out.keys()

In [None]:
from vidar.utils.config import load_class

self = load_class('PanoDepthPhotometricLoss', 'vidar/arch/losses')(config.arch.losses.reprojection)
self.eval()

In [None]:
# import torch.nn.functional as F
# from vidar.arch.blocks.depth.SigmoidToInvDepth import SigmoidToInvDepth

# # min_depth, max_depth = config.arch.networks.depth.min_depth, config.arch.networks.depth.max_depth
# min_depth, max_depth = (1.0, 200.0)
# print(min_depth, max_depth)

# scale_inv_depth = SigmoidToInvDepth(min_depth=min_depth, max_depth=max_depth)
# init_out = torch.rand(batch_from_loader['camera_pano']['depth'].shape) * 0.0 + 0.5
# inv_depth = scale_inv_depth(init_out)

# print(inv2depth(inv_depth).min(), inv2depth(inv_depth).max())

In [None]:
from vidar.datasets.augmentations.resize import resize_torch_preserve

return_logs = True

pano_invdepths = [depth2inv(
    resize_torch_preserve(batch_from_loader['camera_pano']['depth'], (128, 1024)))] * 4

output = {'inv_depths': pano_invdepths}
out = self(batch_from_loader, output, return_logs=return_logs)

In [None]:
out['log_images'].keys()

In [None]:
Image.fromarray(out['log_images']['panodepth'])

In [None]:
### Flow reversal test
camera_order = ['camera_07', 'camera_05', 'camera_01', 'camera_06', 'camera_08', 'camera_09']
images = np.hstack([out['log_images']['warped_{}'.format(c)][::2, ::2] for c in camera_order])
Image.fromarray(images)

# Effect of varying depth hypothesis
## Prepare dense pano depth

In [None]:
from vidar.arch.networks.layers.panodepth.depth_sweeping import FeatTransform

decoder_required_keys = ('intrinsics', 'pose_to_pano')
meta_info = {}
t = 0       # Transforming features should be done in the same time frame.
for cam, sample in batch_from_loader.items():
    if not cam.startswith('camera'):
        continue
    meta_info[cam] = {k: sample[k][t] for k in decoder_required_keys if k in sample}

In [None]:
from vidar.geometry.pose import Pose

img_height, img_width = (384, 640)
xyz_all, rgb_all = [], []
for ii in tqdm(range(0, len(dataset[0]), 1)):
    batch = dataset[0][ii]
    xyz_lidar = batch['lidar_pointcloud'].astype(np.float32)
#     extrinsics = batch['point_cloud']['extrinsics']
#     xyz_ego = extrinsics[:3, :3] @ xyz_lidar.T + extrinsics[:3, 3:]
    
    pose = batch['lidar_pose'].astype(np.float32)
    xyz_world = pose[:3, :3] @ xyz_lidar.T + pose[:3, 3:]
    
    # Get colors
    rgb_world = np.zeros_like(xyz_world).T

    for c in dataset[0].cameras:
        camera = f'camera_0{c}'
        # Tcw(camera_pose): Camera -> World pose transformation
        K = batch[camera]['intrinsics'][0].numpy()
#         Tcw = batch[camera]['extrinsics'][0]
#         Twc = np.linalg.inv(Tcw)
        Twc = batch[camera]['extrinsics'][0].numpy()
        xyz_camera = Twc[:3, :3] @ xyz_lidar.T + Twc[:3, 3:]
        ix, iy, iz = K @ xyz_camera
        ix, iy = ((ix / iz).astype(np.int16), (iy / iz).astype(np.int16))

        proj_on_image = np.logical_and.reduce([
            xyz_camera[2] > 0,
            ix >= 0, ix < img_width,
            iy >= 0, iy < img_height,
        ])

        image = to_numpy(batch[camera]['rgb'][0])
        rgb_world[proj_on_image] = image[iy[proj_on_image], ix[proj_on_image], :]
        
    xyz_all.append(xyz_world)
    rgb_all.append(rgb_world)
    
xyz_all_world = np.hstack(xyz_all).T
rgb_all = np.vstack(rgb_all)

In [None]:
batch = dataset[0][0]

pose = batch['lidar_pose'].astype(np.float32)
pose_inv = np.linalg.inv(pose)
xyz_all = pose_inv[:3, :3] @ xyz_all_world.T + pose_inv[:3, 3:]
xyz_all = xyz_all.T

In [None]:
# visualize_3d(xyz_all, rgb_all, size=0.1)

## Synthesize PanoRGB image

In [None]:
order = np.argsort(np.linalg.norm(xyz_all, 2, axis=1))[::-1]
xyz_all = xyz_all[order]
rgb_all = rgb_all[order]

In [None]:
batch = dataset[0][0]

K = torch.FloatTensor(batch['camera_pano']['intrinsics'])[None]
Twc = torch.FloatTensor(batch['camera_pano']['Twc'])[None]
hw = batch['camera_pano']['hw']
xyz_all_tensor = torch.FloatTensor(xyz_all).T[None]

In [None]:
coords = PanoCamera(K, hw, Twc=Twc).project_points(xyz_all_tensor, from_world=True)

coords[..., 0] = (coords[..., 0] + 1)/2 * hw[1]
coords[..., 1] = (coords[..., 1] + 1)/2 * hw[0]
coords = coords.long()
mask =  (coords[..., 0] >= 0) & \
        (coords[..., 0] < hw[1]) & \
        (coords[..., 1] >= 0) & \
        (coords[..., 1] < hw[0])

ix, iy = coords[mask].T

pano_dense_depth = 0.5 * torch.ones([*hw, 3], dtype=torch.float32)
pano_dense_depth[iy, ix, :] = torch.FloatTensor(rgb_all[mask.view(-1)])

In [None]:
Image.fromarray(to_uint8(pano_dense_depth.numpy()))

## Draw depth-sweeping example

In [None]:
import copy
import cv2

images = []
pad = 10
padding = 255 * np.ones((pad, 2048, 3), dtype=np.uint8)

boxes = {}
display(Image.fromarray(out['log_images']['panodepth'][:128]))
images.append(out['log_images']['panodepth'][:128])
images.append(padding)

pano_dense_depth_rgb = to_uint8(pano_dense_depth.clone().numpy())
display(Image.fromarray(pano_dense_depth_rgb))
images.append(pano_dense_depth_rgb)
images.append(padding)

pano_dense_depth_box = copy.deepcopy(pano_dense_depth_rgb)

boxes.update({
    5: {
        'start_point': (505, 120),
        'end_point': (535, 150),
        'color': [255, 0, 0],
        'thickness': 3,
    }
})
pano_dense_depth_box = cv2.rectangle(pano_dense_depth_box, 
                                     boxes[5]['start_point'], boxes[5]['end_point'], boxes[5]['color'], boxes[5]['thickness'])


boxes.update({
    30: {
        'start_point': (760, 150),
        'end_point': (810, 200),
        'color': [217, 0, 255],
        'thickness': 3,
    }
})

pano_dense_depth_box = cv2.rectangle(pano_dense_depth_box, 
                                     boxes[30]['start_point'], boxes[30]['end_point'], boxes[30]['color'], boxes[30]['thickness'])



boxes.update({
    10: {
        'start_point': (1190, 150),
        'end_point': (1250, 210),
        'color': [255, 192, 0],
        'thickness': 3,
    }
})
pano_dense_depth_box = cv2.rectangle(pano_dense_depth_box, 
                                     boxes[10]['start_point'], boxes[10]['end_point'], boxes[10]['color'], boxes[10]['thickness'])



display(Image.fromarray(pano_dense_depth_box))
images.append(pano_dense_depth_box)
images.append(padding)

# distances = [3, 5, 10, 30, 50, 90]
distances = [3, 5, 10, 30]
for d in distances:
    transformed = []
    for camera in ['camera_01', 'camera_05', 'camera_06', 'camera_07', 'camera_08', 'camera_09']:
        module = FeatTransform(camera, 1.0, (3, 384, 640), (3, 256, 2048), given_depth=d)
        transformed.append(module(batch_from_loader[camera]['rgb'][0], meta_info))
    
    num_views = torch.concat([t.sum(axis=1, keepdim=True) != 0.0 for t in transformed], axis=1)
    num_views = num_views.sum(axis=1, keepdim=True).clamp(min=1.0)
    transformed = torch.stack(transformed, axis=1).sum(axis=1) / num_views
    transformed = to_uint8(to_numpy(transformed[0].detach()))
    
    
    if d in boxes:
        clr = boxes[d]['color']
        thickness = 10
        
        transformed[ :thickness,   :] = clr
        transformed[-thickness:,   :] = clr
        transformed[  :,  :thickness] = clr
        transformed[  :, -thickness:] = clr
    
    
    for k, box in boxes.items():             
        transformed = np.ascontiguousarray(transformed, dtype=np.uint8)
#         thickness = 1 if k != d else box['thickness']
        thickness = box['thickness']
        transformed = cv2.rectangle(transformed, box['start_point'], box['end_point'], box['color'], thickness)

    display(Image.fromarray(transformed))
    images.append(transformed)
    images.append(padding)
    
images = images[:-1]

print(distances)

In [None]:
# Draw with lines
height, width = images[-1].shape[:2]

images_resized = [cv2.resize(img, None, fx=width/img.shape[1], fy=width/img.shape[1]) for img in images]
images_resized = np.vstack(images_resized)

for d, box in boxes.items():
    start = [int((box['start_point'][0] + box['end_point'][0])/2), int((pad + height) * 2 + box['start_point'][1])]
    end = [int((box['start_point'][0] + box['end_point'][0])/2), int(images_resized.shape[0] - height + + box['end_point'][1])]
    color = box['color']
    thickness = 2
    images_resized = cv2.line(images_resized, start, end, color, thickness)

Image.fromarray(images_resized).save('motivation_multi_depth_sweeping_v2.png')
Image.fromarray(images_resized)