In [1]:
import extract_colmap as c
import numpy as np
import torch 
import os
import cv2
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision.transforms as T
from PIL import Image
#ghost_city_bin_path='raw/test/bin_files/ghost_city_bins/images.bin'
#medium_structure_bin_path='raw/test/bin_files/medium_structure_bins/images.bin'

ghost_city_bin_path='/home/server/Ines/models/raw/test/bin_files/ghost_city_bins/images.bin'
medium_structure_bin_path='/home/server/Ines/models/raw/test/bin_files/medium_structure_bins/images.bin'
thermitiere_bin_path='/home/server/Ines/models/raw/train/bin_files/thermitiere_bins/images.bin'
old_cliff_bin_path='/home/server/Ines/models/raw/train/bin_files/old_rainbow_cliff_bins/images.bin'

test_bin_paths=[ghost_city_bin_path, medium_structure_bin_path]
train_bin_paths=[old_cliff_bin_path, thermitiere_bin_path]

## Creating the stereo datasets, dataloaders, containing Colmap information

In [2]:
ms_objects=c.read_images_binary(medium_structure_bin_path)
ind_map_ms=list(ms_objects.keys())
gc_objects=c.read_images_binary(ghost_city_bin_path)
ind_map_gc=list(gc_objects.keys())


In [3]:
total_ind_map=ind_map_gc+ind_map_ms
#print(total_ind_map)
#total_ind_map.sort()
print(total_ind_map)

[337, 336, 335, 334, 333, 332, 331, 330, 329, 328, 327, 326, 325, 324, 323, 322, 321, 320, 319, 318, 317, 316, 315, 314, 313, 312, 311, 310, 309, 308, 307, 306, 305, 304, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294, 293, 292, 291, 290, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, 265, 264, 263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202, 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188, 187, 186, 185, 184, 183, 182, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 16, 15, 351, 14, 181, 350, 13, 180, 349, 12, 179, 3

In [4]:
# Creating dictionnaries for the train set 

th_objects=c.read_images_binary(thermitiere_bin_path)
or_objects=c.read_images_binary(old_cliff_bin_path)
th_ind_map=list(th_objects.keys())
or_ind_map=list(or_objects.keys())
train_ind_map=th_ind_map+or_ind_map

In [5]:
def dict_and_ind_maps(path1, path2):
    objects_1=c.read_images_binary(path1)
    objects_2=c.read_images_binary(path2)
    ind_map_1=list(objects_1.keys())
    ind_map_2=list(objects_2.keys())
    total_ind_map=ind_map_1+ind_map_2
    return objects_1, objects_2, total_ind_map

In [6]:
# entire process

# extraction before the dataloader!!
gc_objects, ms_objects, test_ind_map=dict_and_ind_maps(ghost_city_bin_path, medium_structure_bin_path)
or_objects, th_objects, train_ind_map=dict_and_ind_maps(old_cliff_bin_path, thermitiere_bin_path)

In [27]:
class Image_and_Pose_Pairs(torch.utils.data.Dataset):

    def create_dict(self, path_bin):
        """Input: List of strings
           Output: List of dictionnaries containing Colmap information """

        total_dict=[]
        for path in path_bin:
            dict=c.read_images_binary(path)
            total_dict.append(dict)
        return total_dict

    def get_seq_ids(self, list_dict):
        """ Input : List of dictionnaries 
            Output: List, with len=total number of images
            Maps ind_dataset <--> image sequence to which the image belongs"""
        seq_ids=[]
        for i in range(len(list_dict)):
            seq_ids+=list(i*np.ones(len(list_dict[i])))
        return seq_ids

    def get_ind_map(self, list_dict):
        """ Maps ind_dataset <--> index within the image sequence"""
        ind_map=[]
        for dict in list_dict:
            ind_map+=list(dict.keys())
        return ind_map

    def __init__(self, images_paths, bin_paths, network_name, transform_images=None, new_shape=(256,128)):
        
        self.images_path=images_paths[0]
        self.bin_paths=bin_paths
        self.transform_images= transform_images
        self.network_name= network_name
        self.new_shape=new_shape
        

        ## Processing the folder containing the jpg images 
        self.list_dir_images = os.listdir(self.images_path)
        self.list_dir_images=sorted(self.list_dir_images, key=lambda x: x.lower()) 

        ## Creating the dataset dictionnary
        self.total_dict=self.create_dict(self.bin_paths)

        # Knowing from which sequence of images each sample comes from:
        self.seq_ids=self.get_seq_ids(self.total_dict)

        # Getting the ind_map : ind_in_dataset <--> ind_in_image_sequence
        self.ind_map=self.get_ind_map(self.total_dict)
   
    
    def get_T_matrix(self, seq_id, img_id, dict):
        # Getting the information from Colmap pose calculation
        tvec=dict[seq_id][img_id].tvec
        qvec=dict[seq_id][img_id].qvec

        #Size check
        if len(qvec)!=4:
            return 'SizeError: A quaternion contains 4 values'

        if len(tvec)!=3:
            return 'SizeError: A translation contains 3 values'
        
        # Extract the values from Q
        q0 = qvec[0]
        q1 = qvec[1]
        q2 = qvec[2]
        q3 = qvec[3]
        
        # First row of the rotation matrix
        r00 = 2 * (q0 * q0 + q1 * q1) - 1
        r01 = 2 * (q1 * q2 - q0 * q3)
        r02 = 2 * (q1 * q3 + q0 * q2)
        
        # Second row of the rotation matrix
        r10 = 2 * (q1 * q2 + q0 * q3)
        r11 = 2 * (q0 * q0 + q2 * q2) - 1
        r12 = 2 * (q2 * q3 - q0 * q1)
        
        # Third row of the rotation matrix
        r20 = 2 * (q1 * q3 - q0 * q2)
        r21 = 2 * (q2 * q3 + q0 * q1)
        r22 = 2 * (q0 * q0 + q3 * q3) - 1
        
        # 3x3 rotation matrix
        rot_matrix = np.array([[r00, r01, r02],
                            [r10, r11, r12],
                            [r20, r21, r22]])

        # Full T_world-->cam matrix
        T=np.array([[r00, r01, r02, tvec[0]],
                    [r10, r11, r12, tvec[1]],
                    [r20, r21, r22, tvec[2]],
                    ])
        
        # Converting the array to a tensor (batch processing later)
        return torch.from_numpy(T)
        
    
    def __getitem__(self, index):
        """ 
        Input: index: an integer giving the position in the dataset

        Output: Dictionnary

        Keys in the dictionnary are tuples:
            ("img_array", <frame_id>)               for resized colour images,
            ("T_matrix",  <frame_id>)               for pose camera matrices

        
        <frame_id> is:
            an integer (0 or 1) representing the temporal step relative to 'index',
            0 --> t
            1 --> t+1
        """
        item={}

        # Image ids
        seq_id=int(self.seq_ids[index])
        img_id=self.ind_map[index]

        # If index= the last of the sequence, we need to work with the previous image
        #max_img_id=int(max(list(self.total_dict[seq_id].keys())))
        #if img_id==max_img_id:
        #    step=-1
        #else:
        #    step=+1

        step=1
        
        index_next=self.ind_map.index(img_id+step)

        # Loading images + necessary transformation
        img=np.array(Image.open(os.path.join(self.images_path, self.list_dir_images[index])))
        img=cv2.resize(img, self.new_shape, interpolation=cv2.INTER_NEAREST)

        img_next=np.array(Image.open(os.path.join(self.images_path, self.list_dir_images[index_next])))
        img_next=img=cv2.resize(img_next, self.new_shape, interpolation=cv2.INTER_NEAREST)

        if self.transform_images is not None:
            img=self.transform_images(img)
            img_next=self.transform_images(img_next)

        # Later= data augmentation
        # ...
        #
        
        # Pose matrices
        T=self.get_T_matrix(seq_id, img_id, self.total_dict)
        T_next=self.get_T_matrix(seq_id, img_id+step, self.total_dict)

        # Filling the dictionnary

        item[("img_array", 0)]=img
        item[("img_array", 1)]=img_next
        item[("T_matrix",  0)]=T
        item[("T_matrix",  1)]=T_next
        
        # Getting + resizing the image from the dataset folder
        img = np.array(Image.open(os.path.join(self.images_path, self.list_dir_images[index])))
        img=cv2.resize(img, self.new_shape, interpolation=cv2.INTER_NEAREST)

        return item
    

    def __len__(self):
        return len(self.list_dir_images)

In [8]:
# Trying the dataset

batch_size=8
train_paths=['/home/server/Ines/models/raw/train/images', '/home/server/Ines/models/raw/train/depth_maps']
test_paths=['/home/server/Ines/models/raw/test/images', '/home/server/Ines/models/raw/test/depth_maps']
network_name='self_supervised'
new_shape=(256,128)

train_dataset=Image_and_Pose_Pairs(train_paths, train_bin_paths, 'self_supervised')


In [9]:
train_loader=DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4, pin_memory=True)

In [10]:
print(train_dataset[243][("T_matrix", 0)])

[[-0.06313325 -0.9976945  -0.02489732 -3.24288019]
 [-0.20279135  0.03725111 -0.97851317 -1.46815524]
 [ 0.97718466 -0.05672776 -0.2046756  -1.6055572 ]]


In [11]:
def create_list_tuples(dict, datasetname):
    """Input: dictionnary created thanks to colmap script + datasetname to correct the image name
       Output

    """
ms=[]
for i in range(1, len(ms_objects)+1):
    qvec, tvec, name= ms_objects[i].qvec, ms_objects[i].tvec, ms_objects[i].name
    ms.append((qvec, tvec, name))
print(len(ms))



106


In [16]:
def generate_self_supervised(batch_size, train_im_paths, test_im_paths, train_bin_paths, test_bin_paths, new_shape, network_name):
    """
    Input: Network parameters chosen by the user 
    Output : Pytorch train / test datasets and dataloaders 
    """

    ## Image transformation and useful parameters 

    transform_image=T.ToTensor()
    test_batch_size=1

    train_dataset=Image_and_Pose_Pairs(train_im_paths, train_bin_paths, network_name, transform_images=None, new_shape=(256,128))
    test_dataset=Image_and_Pose_Pairs(test_im_paths, test_bin_paths, network_name, transform_images=None, new_shape=(256,128))

    train_loader=DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4, pin_memory=True)
    test_loader=DataLoader(test_dataset, test_batch_size, shuffle=True, num_workers=4, pin_memory=True)

    return train_dataset, test_dataset, train_loader, test_loader 

In [17]:
## Parameters 

#test_bin_paths=[ghost_city_bin_path, medium_structure_bin_path]
#train_bin_paths=[old_cliff_bin_path, thermitiere_bin_path]

batch_size=8
train_im_paths=['/home/server/Ines/models/raw/train/images', '/home/server/Ines/models/raw/train/depth_maps']
test_im_paths=['/home/server/Ines/models/raw/test/images', '/home/server/Ines/models/raw/test/depth_maps']
network_name='self_supervised'
new_shape=(256,128)

## Test generation method

train_dataset, test_dataset, train_loader, test_loader = generate_self_supervised(batch_size, train_im_paths, test_im_paths, train_bin_paths, test_bin_paths, new_shape, network_name)


In [26]:
ind=0
for test_item in test_loader:
    if ind<4:
        print('INDEX', ind)
        print('Pose matrix')
        print(test_dataset[ind][("T_matrix", 1)])
        ind+=1
    else:
        break

INDEX 0
Pose matrix
[[ 9.62124827e-01  2.71629800e-01 -2.30882819e-02 -2.58623899e+01]
 [ 1.84646765e-01 -7.11642995e-01 -6.77842031e-01  1.24432007e+01]
 [-2.00552710e-01  6.47905470e-01 -7.34844958e-01  4.53820553e+00]]
INDEX 1
Pose matrix
[[ 9.61212758e-01  2.75100265e-01 -1.97453066e-02 -2.58964661e+01]
 [ 1.90533011e-01 -7.14081132e-01 -6.73635887e-01  1.26656769e+01]
 [-1.99417162e-01  6.43745276e-01 -7.38799577e-01  4.91454295e+00]]
INDEX 2
Pose matrix
[[ 9.57116956e-01  2.88913716e-01 -2.13541009e-02 -2.59418909e+01]
 [ 1.99529952e-01 -7.10854545e-01 -6.74443188e-01  1.25056394e+01]
 [-2.10035547e-01  6.41260228e-01 -7.38017879e-01  5.35667896e+00]]
INDEX 3
Pose matrix
[[  0.94843812   0.3088604    0.0712067  -26.59606946]
 [  0.29316248  -0.76939071  -0.56753299   9.91974163]
 [ -0.12050269   0.55914505  -0.82026576   3.17994192]]


In [None]:
# Test computation reprojection loss:

# Intrinsics matrix given in the trainer parameters 

K=None
for train_item in train_loader:
    ## Init
    im=train_item("img_array", 0)
    next_im=train_item("img_array", 1)

    T=train_item("T_matrix", 0)
    T_next=train_item("T_matrix", 1)

    T_inv=np.pinv(T)
    T_next_inv=np.pinv(T_next)

    ## Proper device 

    ## Reprojection loss 

    # R t--> t+1

    # R t+1 --> t


## Working on the reprojection process 

Goal= applying the reprojection process + loss computation on mini-batches

In [28]:
import torch.nn as nn

In [160]:
class BackprojectDepth(nn.Module):
    """Layer to transform a depth image into a point cloud
    """
    def __init__(self, batch_size, height, width):
        super(BackprojectDepth, self).__init__()

        self.batch_size = batch_size
        self.height = height
        self.width = width

        # Creating all the p=(u,v) pixels 
        meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
        self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
        self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
                                      requires_grad=False)

        self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
                                 requires_grad=False)

        self.pix_coords = torch.unsqueeze(torch.stack(
            [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
        self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
        self.pix_coords =torch.cat([self.pix_coords, self.ones], 1)
        #self.pix_coords =torch.transpose(self.pix_coords, 0 ,2)
        #print(self.pix_coords.size())
        #self.pix_coords =torch.transpose(self.pix_coords, 0 ,1)
        self.pix_coords=nn.Parameter(self.pix_coords)

                                       
        # UNDERSTANDING PIX COORDS
        
        print('pix size', self.pix_coords.size())
        

    def forward(self, depth, inv_K):
        print(self.pix_coords.size())
        
        cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
        cam_points = depth.view(self.batch_size, 1, -1) * cam_points
        cam_points = torch.cat([cam_points, self.ones], 1)
        print('cam points size', cam_points.size())

        return cam_points

In [159]:
batch_size=8
height=5
width=2
backproject_depth=BackprojectDepth(batch_size, height, width)


pix size torch.Size([8, 3, 10])


In [195]:
K=torch.from_numpy(np.ones((8,3,4)))
K=K.type(torch.FloatTensor)
inv_K=np.linalg.pinv(K)
inv_K=torch.from_numpy(inv_K)
depth=torch.from_numpy(np.ones((8,height,width)))
points_cam_ref=backproject_depth(depth, inv_K)
# OK!!

torch.Size([8, 3, 10])
cam points size torch.Size([8, 4, 10])


In [196]:
## Understanding cam ref projection --> world ref projection.

print(points_cam_ref.size())
T=torch.from_numpy(np.ones((batch_size, 4,4)))
points_world_ref=torch.matmul(T, points_cam_ref)
print(points_world_ref.size())

torch.Size([8, 4, 10])
torch.Size([8, 4, 10])


In [213]:
## From world ref to the pixel plane of the second camera

class Project3D(nn.Module):
    """Layer which projects 3D points into a camera with intrinsics K and with inverse pose matrix T_inv
    """
    def __init__(self, batch_size, height, width, eps=1e-7):
        super(Project3D, self).__init__()

        self.batch_size = batch_size
        self.height = height
        self.width = width
        self.eps = eps

    def forward(self, points_world_ref, K, T_inv):
        K=K.type(torch.FloatTensor)
        T_inv=T_inv.type(torch.FloatTensor)
        points_world_ref=points_world_ref.type(torch.FloatTensor)
        P = torch.matmul(K, T_inv)[:, :3, :]
        print(P.size())
        points_new_cam_ref=torch.matmul(P, points_world_ref)
        pix_coords = points_new_cam_ref[:, :2, :] / (points_new_cam_ref[:, 2, :].unsqueeze(1) + self.eps)

      
        
        
        print( 'Pix size', pix_coords.size())
      
        
        
        pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
        print( 'Pix size', pix_coords.size())
        pix_coords = pix_coords.permute(0, 2, 3, 1)
        print( 'Pix size', pix_coords.size())
        pix_coords[..., 0] /= self.width - 1
        print( 'Pix size', pix_coords.size())
        pix_coords[..., 1] /= self.height - 1
        print( 'Pix size', pix_coords.size())
        #pix_coords = (pix_coords - 0.5) * 2
        return None

In [214]:
project3d=Project3D(batch_size, height, width)
T_inv=torch.from_numpy(np.ones((batch_size, 4,4)))
pix_coords=project3d(points_world_ref, K, T_inv)


torch.Size([8, 3, 4])
Pix size torch.Size([8, 2, 10])
Pix size torch.Size([8, 2, 5, 2])
Pix size torch.Size([8, 5, 2, 2])
Pix size torch.Size([8, 5, 2, 2])
Pix size torch.Size([8, 5, 2, 2])
