In [1]:
import numpy as np
import skimage.io as sio
import cv2

from matplotlib import pyplot as plt
%matplotlib inline

from PIL import Image
import copy
from pyquaternion import Quaternion

from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import LidarPointCloud

import os
import os.path as osp
import json
import tqdm

In [2]:
nusc = NuScenes(version='v1.0-mini', dataroot='../../shubham/', verbose=True) 

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.656 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


In [3]:
def translation2transform(vec):
    i = np.eye(4)
    i[:3, -1] = vec
    return i

In [4]:
scene_path = "../../sus_data/even_smaller_scene/"
img_path = os.path.join(scene_path, 'images')
pcd_path = os.path.join(scene_path, 'pcd')

os.makedirs(img_path, exist_ok=True)
os.makedirs(pcd_path, exist_ok=True)

In [5]:
# First define list of images
all_cams = ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT']#, 'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT']

# intrinsic = {}
# extrinsic = {}

# for ix in all_cams:
#     intrinsic[ix] = []
#     extrinsic[ix] = []

full_dict = []

downscale = 4


for n_idx in tqdm.tqdm_notebook(range(22)):
    # Get a particular sample
    my_sample = nusc.sample[n_idx]
    my_sample_token = my_sample['token']
    sample_record = nusc.get('sample', my_sample_token)

    # Get point sensor token
    pointsensor_token = sample_record['data']['LIDAR_TOP']

    # Get point cloud
    pointsensor = nusc.get('sample_data', pointsensor_token)
    pcl_path = os.path.join(nusc.dataroot, pointsensor['filename'])
    # pc = LidarPointCloud.from_file(pcl_path)

    # Iterate through the list
    for cam_ix in all_cams:
        img_folder = os.path.join(img_path, cam_ix)
        # os.makedirs(img_folder, exist_ok=True)
        img_save_path = os.path.join("{}_{}".format(img_folder, str(n_idx).zfill(3)) + '.jpg')

        # Get the camera token
        camera_token = sample_record['data'][cam_ix]
        cam = nusc.get('sample_data', camera_token)
        im = Image.open(osp.join(nusc.dataroot, cam['filename']))
        w, h = im.size
        
        res_w, res_h = w//downscale, h//downscale
        res_mat = np.eye(3)
        res_mat[0, 0] = res_mat[1, 1] = 1/downscale
        
        # Save the image to the location
        im = im.resize((res_w, res_h), Image.LANCZOS)
        im.save(img_save_path)
        # print(img_save_path)
        
        
        # Commpute the calibration matrices (Don't save right now)
        cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
        R1 = Quaternion(cs_record['rotation']).transformation_matrix
        T1 = translation2transform(np.array(cs_record['translation']))

        # Second step: transform from ego to the global frame.
        poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
        R2 = Quaternion(poserecord['rotation']).transformation_matrix
        T2 = translation2transform(np.array(poserecord['translation']))

        # Third step: transform from global into the ego vehicle frame for the timestamp of the image.
        poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
        T3 = translation2transform(-np.array(poserecord['translation']))
        R3 = Quaternion(poserecord['rotation']).transformation_matrix.T

        # Fourth step: transform from ego into the camera.
        cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
        T4 = translation2transform(-np.array(cs_record['translation']))
        R4 = Quaternion(cs_record['rotation']).transformation_matrix.T
        
        # intrinsic[cam_ix].append(np.array(cs_record['camera_intrinsic']).flatten())
        # extrinsic[cam_ix].append((np.linalg.inv(R4 @ T4 @ R3 @ T3)).flatten())
        intrinsic = (res_mat @ np.array(cs_record['camera_intrinsic'])).flatten()
        extrinsic = (np.linalg.inv(R4 @ T4 @ R3 @ T3)).flatten()
        
        # np.set_printoptions(suppress=True)
        
        data_dict = {
            'intrinsic': list(intrinsic),
            'extrinsic': list(extrinsic),
            'file_name': "{}/{}_{}".format("images", cam_ix, str(n_idx).zfill(3))
        }
        
        full_dict.append(data_dict)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for n_idx in tqdm.tqdm_notebook(range(22)):


  0%|          | 0/22 [00:00<?, ?it/s]

In [6]:
train_dict = []
val_dict = []
test_dict = []

for ix in full_dict:
    r = np.random.random()
    if r < 0.12:
        val_dict.append(ix)
    elif r >= 0.12 and r < 0.18:
        test_dict.append(ix)
    else:
        train_dict.append(ix)

print(len(train_dict), len(val_dict), len(test_dict))

52 9 5


In [7]:
final = {
    "frames": train_dict
}
f = open(os.path.join(scene_path, "transforms_train.json"), 'w')
json.dump(final, f)
f.close()

final = {
    "frames": test_dict
}
f = open(os.path.join(scene_path, "transforms_test.json"), 'w')
json.dump(final, f)
f.close()

final = {
    "frames": val_dict
}
f = open(os.path.join(scene_path, "transforms_val.json"), 'w')
json.dump(final, f)
f.close()

In [8]:
import torch
from torch.utils.data import Dataset
import json
import numpy as np
import os
from PIL import Image, ImageDraw
from torchvision import transforms as T

In [9]:
from ray_utils import *


In [10]:
class NusDataset(Dataset):
    def __init__(self, root_dir, split="train", img_wh=(1600, 900), downscale=1, perturbation=[]):
        self.root_dir = root_dir
        self.split = split
        self.downscale = downscale
        
        self.img_wh = img_wh
        self.define_transforms()
        self.perturbation = perturbation
        
        w, h = self.img_wh
        self.res_wh = (w // self.downscale, h // self.downscale)
        self.res_mat = np.eye(3)
        self.res_mat[0, 0] = self.res_mat[1, 1] = 1/self.downscale
        
        self.read_meta()
        self.white_back = True
    
    def read_meta(self):
        with open(os.path.join(self.root_dir,
                               f"transforms_{self.split.split('_')[-1]}.json"), 'r') as f:
            self.meta = json.load(f)
        
        self.near = 2.0
        self.far = 10.0
        self.bounds = np.array([self.near, self.far])
        w, h = self.res_wh
        
        if self.split == 'train':
            self.all_rays = []
            self.all_rgbs = []
            for t, frame in enumerate(self.meta['frames']):
                # if t >= 40:
                #     break
                K = self.res_mat @ np.array(frame['intrinsic']).reshape((3, 3))
                pose = np.array(frame['extrinsic']).reshape((4, 4))[:3, :4]
                c2w = torch.FloatTensor(pose)
                
                image_path = os.path.join(self.root_dir, "{}.jpg".format(frame['file_name']))
                img = Image.open(image_path)
                img = img.resize(self.res_wh, Image.LANCZOS)
                img = self.transform(img).view(3, -1).permute(1, 0)
                self.all_rgbs += [img]
                
                # Now for the rays
                directions = get_ray_directions(h, w, K)
                rays_o, rays_d = get_rays(directions, c2w)
                rays_t = t * torch.ones(len(rays_o), 1)
                
                # Add to list of all rays
                self.all_rays += [torch.cat([rays_o, rays_d,
                                             self.near*torch.ones_like(rays_o[:, :1]),
                                             self.far*torch.ones_like(rays_o[:, :1]),
                                             rays_t],
                                             1)]
            self.all_rays = torch.cat(self.all_rays, 0)
            self.all_rgbs = torch.cat(self.all_rgbs, 0)
    
    def define_transforms(self):
        self.transform = T.ToTensor()
    
    def __len__(self):
        if self.split == 'train':
            return len(self.all_rays)
        if self.split == 'val':
            return 8
        return len(self.meta['frames'])
    
    def __getitem__(self, idx):
        if self.split == 'train':
            sample = {'rays': self.all_rays[idx, :8],
                      'ts': self.all_rays[idx, 8].long(),
                      'rgbs': self.all_rgbs[idx]}
        else:
            # create data for each image separately
            frame = self.meta['frames'][idx]
            t = 0
            
            K = self.res_mat @ np.array(frame['intrinsic']).reshape((3, 3))
            pose = np.array(frame['extrinsic']).reshape((4, 4))[:3, :4]
            c2w = torch.FloatTensor(pose)
            
            image_path = os.path.join(self.root_dir, "{}.jpg".format(frame['file_name']))
            img = Image.open(image_path)
            img = img.resize(self.res_wh, Image.LANCZOS)
            img = self.transform(img).view(3, -1).permute(1, 0)
            valid_mask = (img[0]>0).flatten()
            
            w, h = self.res_wh
            
            # Now for the rays
            directions = get_ray_directions(h, w, K)
            rays_o, rays_d = get_rays(directions, c2w)
            rays_t = t * torch.ones(len(rays_o), 1)
            
            rays = torch.cat([rays_o, rays_d,
                              self.near*torch.ones_like(rays_o[:, :1]),
                              self.far*torch.ones_like(rays_o[:, :1])],
                             1)
            
            sample = {
                'rays': rays,
                'ts': t * torch.ones(len(rays), dtype=torch.long),
                'rgbs': img,
                'c2w': c2w,
                'valid_mask': valid_mask
            }
            
            if self.split == 'train_test':
                sample['original_rgbs'] = img
                sample['original_valid_mask'] = valid_mask
        return sample

In [16]:
df = NusDataset(scene_path, downscale=2)

In [18]:
df.all_rgbs.shape[0] // (1024*3)

9000000

In [105]:
fr =  df.meta['frames'][0]
print(np.array(fr['intrinsic']).reshape((3, 3)))
print(np.array(fr['extrinsic']).reshape((4, 4)))
fr

[[1266.41720305    0.          816.26701974]
 [   0.         1266.41720305  491.50706579]
 [   0.            0.            1.        ]]
[[  -0.94016521   -0.01558255   -0.34036239  410.87244105]
 [   0.33999683    0.02209463   -0.940167   1179.57081167]
 [   0.02217038   -0.99963444   -0.01547459    1.49367752]
 [   0.            0.            0.            1.        ]]


{'intrinsic': [1266.417203046554,
  0.0,
  816.2670197447984,
  0.0,
  1266.417203046554,
  491.50706579294757,
  0.0,
  0.0,
  1.0],
 'extrinsic': [-0.9401652123589346,
  -0.015582548073058936,
  -0.34036239167339144,
  410.8724410536508,
  0.33999683492988736,
  0.022094631607521308,
  -0.9401669955341908,
  1179.5708116715798,
  0.02217037906169335,
  -0.9996344389073835,
  -0.015474586992294873,
  1.4936775157046203,
  0.0,
  0.0,
  0.0,
  1.0],
 'file_name': 'images/CAM_FRONT_000'}