In [1]:
%load_ext autoreload
%autoreload
import torch
import torch.nn as nn
from collections import OrderedDict

from datasets.kitti_raw_monosf import KITTI_Raw_EigenSplit_Train, KITTI_Raw_EigenSplit_Valid
from datasets.kitti_2015_train import KITTI_2015_MonoSceneFlow_Full

from models.Model import Model
from models.JointModel import JointModel
from losses import Loss

from augmentations import Augmentation_Resize_Only
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
from pprint import pprint

class Args:
    cuda = True
    use_bn = False
    momentum = 0.9
    beta = 0.999
    weight_decay=0.0
    use_mask = False
    use_flow_mask = False
    flow_min_w = 0.5
    flow_reduce_mode='sum'
    ssim_w = 0.85
    sf_lr_w = 0.0
    pose_lr_w = 0.0
    mask_lr_w = 1.0
    disp_lr_w = 1.0
    disp_pts_w = 0.0
    sf_pts_w = 0.2
    sf_sm_w = 200
    fb_w = 0.0
    pose_sm_w = 200
    pose_pts_w = 0.2
    disp_sm_w = 0.2
    disp_smooth_w = 0.1
    mask_reg_w = 0.2
    encoder_name="resnet"
    model_name='joint'
    static_cons_w = 1.0
    mask_cons_w = 0.2
    mask_sm_w = 0.1
    flow_diff_thresh=1e-3
    evaluation=True
    num_scales = 4
    pt_encoder=True
    do_pose_c2f=False
    use_disp_min=False
    flow_pts_w=0.2
    flow_sm_w=200
    use_static_mask=False
    use_census_mask=False
    batch_size=2

args = Args()

model = JointModel(args).cuda()

state_dict = torch.load('pretrained/49.ckpt')['model']
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:]
    new_state_dict[name] = v
model.load_state_dict(new_state_dict)
model = model.eval()

del state_dict
del new_state_dict

augmentation = Augmentation_Resize_Only(args).cuda()
loss = Loss(args).cuda()

# val_dataset = KITTI_2015_MonoSceneFlow_Full(args, root='/external/datasets/kitti2015/')
val_dataset = KITTI_Raw_EigenSplit_Train(args, root='/external/datasets/kitti_data_jpg/', flip_augmentations=False, num_examples=150)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=2, pin_memory=True)

In [2]:
%autoreload 

ins = []
outs = []

for i, data in enumerate(val_loader):
    with torch.no_grad():
        # Get input and target tensor keys
        input_keys = list(filter(lambda x: "input" in x, data.keys()))
        target_keys = list(filter(lambda x: "target" in x, data.keys()))
        tensor_keys = input_keys + target_keys
        
        # Possibly transfer to Cuda
        for k, v in data.items():
            if k in tensor_keys:
                data[k] = v.cuda(non_blocking=True)
                
        aug_data = augmentation(data)
        out = model(aug_data)
        ins.append(aug_data)
        outs.append(out)

In [49]:
import numpy as np
import torch.nn.functional as tf
from utils.flow import flow_to_png_middlebury
from utils.loss_utils import _disp2depth_kitti_K
from utils.helpers import BackprojectDepth, Project3D
from utils.inverse_warp import pose2sceneflow

depth_imgs = []
pose_imgs = []
sf_imgs = []
img_l2s = []

for (aug_data, out) in zip(ins, outs):
    img_l2 = aug_data['input_l2'].cpu().detach()
    disp_l2 = out['disps_l2_pp'][0].cpu().detach()
    pose_b = out['pose_b'][0].cpu().detach()
    flow_b = out['flows_b_pp'][0].cpu().detach()
    K = aug_data['input_k_l1_aug'].cpu().detach()
    inv_K = torch.inverse(K)

    b, _, h, w = flow_b.shape

    backproj = BackprojectDepth(b, h, w)
    proj = Project3D(b, h, w)

    disp_l2 = disp_l2 * w
    depth_l2 = _disp2depth_kitti_K(disp_l2, K[:, 0, 0])
    
    depth_l2 = tf.interpolate(depth_l2, [h//2, w//2], align_corners=True, mode='bilinear')
    
    cmap = plt.get_cmap('plasma')
    
    depth_img = cmap(depth_l2[0].squeeze(dim=1).numpy().astype(np.uint8), bytes=True).squeeze()[:, :, :-1]
    depth_imgs.append(depth_img)
    
    depth_img = cmap(depth_l2[1].squeeze(dim=1).numpy().astype(np.uint8), bytes=True).squeeze()[:, :, :-1]
    depth_imgs.append(depth_img)

    # pose
    pose_flow = pose2sceneflow(depth_l2.squeeze(dim=1), None, inv_K, pose_mat=pose_b)
    pose_flow = tf.interpolate(pose_flow, [h//2, w//2], align_corners=True, mode='bilinear')
    
    pose_img = flow_to_png_middlebury(pose_flow[0].numpy())
    pose_imgs.append(pose_img)
    pose_img = flow_to_png_middlebury(pose_flow[1].numpy())
    pose_imgs.append(pose_img)

    # sf
    flow_b = tf.interpolate(flow_b, [h//2, w//2], align_corners=True, mode='bilinear')
    
    sf_img = flow_to_png_middlebury(flow_b[0].numpy())
    sf_imgs.append(sf_img)
    sf_img = flow_to_png_middlebury(flow_b[1].numpy())
    sf_imgs.append(sf_img)
    
    img = tf.interpolate(img_l2, [h//2, w//2], align_corners=True, mode='bilinear').permute(0, 2, 3, 1)
    
    img_l2s.append((img[0].numpy() * 255).astype(np.uint8))
    img_l2s.append((img[1].numpy() * 255).astype(np.uint8))

In [50]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import imageio

num_frames = len(depth_imgs)
fps = 30 

frames = []
for (i, d, p, s) in zip(img_l2s, depth_imgs, pose_imgs, sf_imgs):
    frame = np.zeros((256, 832, 3)).astype(np.uint8)
    frame[0*128: 1*128, 0*416: 1*416, :] = i
    frame[1*128: 2*128, 0*416: 1*416, :] = d
    frame[0*128: 1*128, 1*416: 2*416, :] = p
    frame[1*128: 2*128, 1*416: 2*416, :] = s
    frames.append(frame)

imageio.mimwrite('frames_ds.gif', frames, format='gif', fps=fps)