In [1]:
from torch import nn
import torch
import pandas as pd
import numpy as np
import os
from torchvision.io import read_image
from torchvision import datasets
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import transforms3d as tf
from vit_pytorch import ViT

In [32]:
def createPickle():
    stereo_left_df = pd.read_csv("indoor_forward_9_snapdragon_with_gt/left_images.txt", delimiter=' ', skiprows=1, names=["id", "timestamp", "image_name"], index_col=False) 
    labels_df = pd.read_csv("indoor_forward_9_snapdragon_with_gt/groundtruth.txt", delimiter=' ', skiprows=1, names=["timestamp", "tx", "ty", "tz", "qx", "qy", "qz", "qw", "img_start_idx"])
    for i in range(len(labels_df)):
        minid = abs(stereo_left_df["timestamp"]-labels_df.iloc[i]["timestamp"]).idxmin()
        labels_df["img_start_idx"][i] = minid
    labels_df.to_pickle("./stereo_indexed_labels.pickle")
createPickle()

In [29]:
class StereoDataset(Dataset):
    def __init__(self, data_dir, label_delta_len):
        self.stereo_data = self.preprocess_images()

        self.labels_df = pd.read_pickle("./stereo_indexed_labels.pickle")
        labels_df_nostamp = self.labels_df.loc[:, self.labels_df.columns != 'timestamp']
        labels_df_nostamp = labels_df_nostamp.loc[:, labels_df_nostamp.columns != 'events_start_idx']
        self.labels = torch.tensor(labels_df_nostamp.to_numpy(), dtype=torch.float32)

        self.delta = label_delta_len

    def __len__(self):
        return self.labels.shape[0] - self.delta

    def set_label_delta(self, delta):
        self.delta = delta

    def __getitem__(self, idx):
        dpose = torch.zeros(7)
        l1 = self.labels[idx]
        l2 = self.labels[idx+self.delta]
        dl = l2-l1 # position delta
        dpose[:3] = dl[:3]
        q1 = l1[3:][[3,0,1,2]] # rearange quaternion, w goes first
        q2 = l2[3:][[3,0,1,2]] # rearange quaternion, w goes first
        dq = tf.quaternions.qmult(q2, tf.quaternions.qinverse(q1)) # orientation delta
        dpose[3:] = torch.tensor(dq, dtype=torch.float32) # pose delta is the label

        start = self.labels_df.iloc[idx]["img_start_idx"]
        finish = self.labels_df.iloc[idx+self.delta]["img_start_idx"]

        # could highly optimize this : )
        # torch version https://stackoverflow.com/questions/65584330/add-a-index-selected-tensor-to-another-tensor-with-overlapping-indices-in-pytorc/65584479#65584479
        events = np.array(self.stereo_data[int(start):int(finish)], dtype=np.uint16)
        print(events)
        mc = 400 # 345 is max dim
        img = np.zeros((mc,mc), dtype=np.float32)

        np.add.at(img,tuple([events[:,1], events[:,0]]), events[:,2] - .5)
        events = torch.tensor(img).unsqueeze(0)

        return events, dpose


    def preprocess_images(self):
        data_dir = "indoor_forward_9_snapdragon_with_gt/"
        img_annot_cols=["stamp","name","none"]
        left_images = pd.read_csv(data_dir + "left_images.txt", delimiter=' ', skiprows=1, names=img_annot_cols)
        right_images = pd.read_csv(data_dir + "right_images.txt", delimiter=' ', skiprows=1, names=img_annot_cols)
        images = []

        print("Preprocessing images")
        for idx, i in enumerate(zip(left_images['name'], right_images['name'])):
            if idx % 100 == 0:
                print(idx)
            left_image = Image.open(data_dir + i[0])
            right_image = Image.open(data_dir +  i[1])
            left_image = left_image.resize((160,120))
            right_image = right_image.resize((160,120))
            concat = Image.new('L', (left_image.width + right_image.width, left_image.height))
            concat.paste(left_image, (0,0))
            concat.paste(right_image, (left_image.width,0))
            images.append(concat)

        tensors = []
        convert_tensor = transforms.ToTensor()
        print("Creaing tensors")
        for i, v in enumerate(images):
            if i % 100 == 0:
                print(i)
            tensors.append(convert_tensor(v))

        print("Stacking tensors")
        output = []
        for i in range(2, len(tensors)):
            if i % 100 == 0:
                print(i)            
            output.append(torch.stack(tensors[i-2:i+1]))

        return output

data_dir = "indoor_forward_9_davis_with_gt"
vio_dataset = StereoDataset(data_dir, 1)

Preprocessing images
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
Creaing tensors
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
Stacking tensors
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000


In [34]:
vio_dataset.set_label_delta(100)
events = vio_dataset.__getitem__(1000)[0].squeeze(0)
plt.imshow(events)

  events = np.array(self.stereo_data[int(start):int(finish)], dtype=np.uint16)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (61,) + inhomogeneous part.