# Mulitview YCB Dataset

In [1]:
%matplotlib inline
import cv2
import numpy as np
import torch
from torch.autograd import Variable
import quat_math
import pickle

from PIL import Image
import scipy.io as scio
from functools import partial
from object_pose_utils.utils import to_np

import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = 20, 12
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import

## Helpful Image Viewing Functions

In [2]:
def imshowCV(img, axis = False, show = True):
    if not axis:
        plt.axis('off')
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    if(show):
        plt.show()
    
def imshow(img, axis = False, colorbar = False, show = True):
    if not axis:
        plt.axis('off')
    plt.imshow(img)
    if(colorbar):
        plt.colorbar()
    if(show):
        plt.show()
    
def torch2Img(img, show = True):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    disp_img = to_np(img)
    if len(disp_img.shape) == 4:
        disp_img = disp_img[0]
    disp_img = disp_img.transpose((1,2,0))
    disp_img = disp_img * std + mean
    return disp_img
    
def imshowTorch(img, axis = False, show = True):
    if not axis:
        plt.axis('off')
    disp_img = torch2Img(img)
    plt.imshow(disp_img.astype(np.uint8))
    if(show):
        plt.show()

def plotImageScatter(img, choose, show = True):
    coords = np.unravel_index(choose, img.shape[:2])    
    plt.axis('off')
    plt.imshow(img.astype(np.uint8))    
    plt.scatter(coords[1], coords[0], 50)
    #plt.colorbar()
    if(show):
        plt.show()

## Location of YCB Dataset

In [3]:
#dataset_root = '/ssd0/datasets/ycb/YCB_Video_Dataset'
dataset_root = '/home/bokorn/data/ycb/debug/'

## Select Object Indices of Interest

| Object Indices |[]()|[]()|
|---|---|---|
| __1.__ 002_master_chef_can | __8.__ 009_gelatin_box      | __15.__ 035_power_drill       |
| __2.__ 003_cracker_box     | __9.__ 010_potted_meat_can  | __16.__ 036_wood_block        |
| __3.__ 004_sugar_box       | __10.__ 011_banana          | __17.__ 037_scissors          |
| __4.__ 005_tomato_soup_can | __11.__ 019_pitcher_base    | __18.__ 040_large_marker      |
| __5.__ 006_mustard_bottle  | __12.__ 021_bleach_cleanser | __19.__ 051_large_clamp       |
| __6.__ 007_tuna_fish_can   | __13.__ 024_bowl            | __20.__ 052_extra_large_clamp |
| __7.__ 008_pudding_box     | __14.__ 025_mug             | __21.__ 061_foam_brick        |

In [4]:
object_list = [14]

## Select Dataset
Modes include train, syn, grid, valid, test and can be concatinated with "\_", e.g. "train\_syn"

In [5]:
mode = "train"

## Select Output Data Types

In [6]:
from object_pose_utils.datasets.pose_dataset import OutputTypes as otypes

output_format = [otypes.OBJECT_LABEL,
                 otypes.QUATERNION, 
                 otypes.IMAGE_CROPPED, 
                 otypes.DEPTH_POINTS_MASKED_AND_INDEXES]

## Initialize YCB Dataset

In [7]:
from object_pose_utils.datasets.ycb_dataset import YcbDataset as YCBDataset
from object_pose_utils.datasets.image_processing import ImageNormalizer

ycb_dataset = YCBDataset(dataset_root, mode=mode, 
                         object_list = object_list, 
                         output_data = output_format, 
                         postprocessors = [ImageNormalizer()],
                         image_size = [640, 480], num_points=1000)

## Select Image Set Parameters
Inteval between images and max number of images

In [8]:
interval = 4
video_len = 3

## Initialize YCB Dataset

In [9]:
from object_pose_utils.datasets.ycb_video_dataset import YcbVideoDataset as YCBVideoDataset

dataset = YCBVideoDataset(ycb_dataset, 
                          interval = interval, 
                          video_len = video_len)

## Select Object If Mutiple Objects Are Avalible

In [10]:
print('Avalible objects: {}'.format(*dataset.getObjectIds()))
dataset.setObjectId(object_list[0])

Avalible objects: 14


## Select Video Id

In [11]:
print('Avalible Videos: {}'.format(*dataset.getVideoIds()))
video_id = '0000'
dataset.setVideoId(video_id)
print('{} image sets avaible in video {}'.format(len(dataset), video_id))

Avalible Videos: 0000
9 image sets avaible in video 0000


## Sample Dataset

In [12]:
# Iterate over the dataset

for data, trans in dataset:
    break

# Or if you want to grab a specific index
#idx = 0
#data, trans = dataset.__getitem__(idx)
#data = dataset.getData(idx)
#trans = dataset.getCameraTransforms(idx)

## Apply Transforms

In [13]:
from object_pose_utils.utils.multi_view_utils import applyTransform, computeCameraTransform

quats = []
for mat, d in zip(trans, data):
    quats.append(d[1])

quats_trans = applyTransform(quats, trans)

## Resulting Error

In [14]:
from object_pose_utils.utils.pose_processing import quatAngularDiff
q0 = to_np(data[0][1])
for q, q_t in zip(quats, quats_trans):
    dq = quatAngularDiff(q0, q)*180/np.pi
    dq_t = quatAngularDiff(q0, q_t)*180/np.pi
    print("{} -> {}".format(dq, dq_t))


0.020641618571863796 -> 0.0
0.2910394930802486 -> 0.0
0.48652849477105786 -> 0.06277192093494893


## Using with DenseFusion Estimator

In [15]:
from dense_fusion.network import PoseNetGlobal, PoseNet
model_checkpoint = '/home/bokorn/src/DenseFusion/trained_checkpoints/ycb/pose_model_train_split_34_0.025648579025031315.pth'

estimator = PoseNet(num_points = 1000, 
                    num_obj = 21)
estimator.load_state_dict(torch.load(model_checkpoint, map_location=lambda storage, loc: storage))
#estimator.cuda();

estimator()

TypeError: forward() missing 4 required positional arguments: 'img', 'x', 'choose', and 'obj'

## Estimate rotation for each image

In [None]:
est_quats = []
for mat, d in zip(trans, data):
    idx, quat, img, points, choose, = d
    idx = idx - 1
    points, choose, img, idx = Variable(points.unsqueeze(0)), \
                               Variable(choose.unsqueeze(0)), \
                               Variable(img.unsqueeze(0)), \
                               Variable(idx.unsqueeze(0))
    
    pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
    pred_q = pred_r[0,torch.argmax(pred_c)][[1,2,3,0]]
    pred_q /= pred_q.norm()
    est_quats.append(pred_q)

est_quats_trans = applyTransform(est_quats, trans)

## Resulting Error

In [None]:
q0 = to_np(est_quats[0])
for q, q_t in zip(est_quats, est_quats_trans):
    dq = quatAngularDiff(q0, to_np(q))*180/np.pi
    dq_t = quatAngularDiff(q0, q_t)*180/np.pi
    print("{} -> {}".format(dq, dq_t))
