In [1]:
from torch import nn
import torch
import pandas as pd
import numpy as np
import os
from torchvision.io import read_image
from torchvision import datasets
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import transforms3d as tf
from vit_pytorch import ViT

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# for i in range(len(labels_df)):
#     minid = abs(events_df["timestamp"]-labels_df.iloc[i]["timestamp"]).idxmin()
#     labels_df["events_start_idx"][i] = minid
# labels_df.to_pickle("event_indexed_labels.pickle")
# DANGER: takes ~ 40 minutes

In [3]:
class EventDataset(Dataset):
    def __init__(self, data_dir, label_file, label_delta_len):
        events_cols=["timestamp", "x", "y", "polarity"]
        self.events_df = pd.read_csv(os.path.join(data_dir,"events.txt"), delimiter=' ', skiprows=1, names=events_cols)
        events_df_nostamp = self.events_df.loc[:, self.events_df.columns != 'timestamp']
        self.events_data = torch.from_numpy(events_df_nostamp.to_numpy(dtype=np.float32))
        
        self.labels_df = pd.read_pickle("event_indexed_labels.pickle")
        labels_df_nostamp = self.labels_df.loc[:, self.labels_df.columns != 'timestamp']
        labels_df_nostamp = labels_df_nostamp.loc[:, labels_df_nostamp.columns != 'events_start_idx']
        self.labels = torch.tensor(labels_df_nostamp.to_numpy(), dtype=torch.float32)

        self.delta = label_delta_len

    def __len__(self):
        return self.labels.shape[0] - self.delta

    def __getitem__(self, idx):
        dpose = torch.zeros(7)
        l1 = self.labels[idx]
        l2 = self.labels[idx+self.delta]
        dl = l2-l1 # position delta
        dpose[:3] = dl[:3]
        q1 = l1[3:][[3,0,1,2]] # rearange quaternion, w goes first
        q2 = l2[3:][[3,0,1,2]] # rearange quaternion, w goes first
        dq = tf.quaternions.qmult(q2, tf.quaternions.qinverse(q1)) # orientation delta
        dpose[3:] = torch.tensor(dq, dtype=torch.float32) # pose delta is the label

        start = self.labels_df.iloc[idx]["events_start_idx"]
        finish = self.labels_df.iloc[idx+self.delta]["events_start_idx"]

        # could highly optimize this : )
        # torch version https://stackoverflow.com/questions/65584330/add-a-index-selected-tensor-to-another-tensor-with-overlapping-indices-in-pytorc/65584479#65584479
        events = np.array(self.events_data[int(start):int(finish)], dtype=np.uint16)
        mc = 400 # 345 is max dim
        img = np.zeros((mc,mc), dtype=np.float32)
        np.add.at(img,[events[:,1], events[:,0]], events[:,2] - .5)
        events = torch.tensor(img).unsqueeze(0)

        return events, dpose

    def set_label_delta(self, delta):
        self.delta = delta

data_dir = "indoor_forward_9_davis_with_gt"
ENC_SEQ_LEN = 1
vio_dataset = EventDataset(data_dir, "groundtruth.txt", ENC_SEQ_LEN)

In [4]:
# import time
# s = time.time()
# for i in range(len(vio_dataset)):
#     size = vio_dataset.__getitem__(i)[0].shape[0]
# (time.time() - s) / 1000 # per one sample, very quick

In [5]:
vio_dataset.__getitem__(1002)[0].shape



torch.Size([1, 400, 400])

In [6]:
model = ViT(
    image_size = 400,
    patch_size = 20,
    num_classes = 7,
    dim = 1024,
    depth = 6,
    heads = 16,
    mlp_dim = 2048,
    dropout = 0.1,
    emb_dropout = 0.,
    channels=1
)

img = vio_dataset.__getitem__(1002)[0].unsqueeze(0) # batch 1

preds = model(img) # (1, 10)
preds



tensor([[-0.6508, -0.8224,  0.2350,  0.7810, -0.0408, -0.0881,  0.1149]],
       grad_fn=<AddmmBackward0>)

In [7]:
model = model.to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
dataloader = torch.utils.data.DataLoader(vio_dataset, batch_size=8, shuffle=True, num_workers=4)
next(iter(dataloader))[0].shape



torch.Size([8, 1, 400, 400])

In [8]:
# https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html training loop from here

torch.cuda.empty_cache()

num_epochs = 20
for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    for phase in ['train']:
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()   # Set model to evaluate mode

        running_loss = 0.0

        # Iterate over data.
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(vio_dataset)

        print(f'{phase} Loss: {epoch_loss:.4f}')

Epoch 0/19
----------




In [None]:
# vio_dataset.set_label_delta(10)
# events = np.array(vio_dataset.__getitem__(1002)[0], dtype=np.uint16)
# mc = int(events.max())
# img = np.zeros((mc+1,mc+1))
# np.add.at(img,[events[:,1], events[:,0]], events[:,2] - .5)
# # img[events[:,1],events[:,0]] += (events[:,2] - .5)
# plt.imshow(img), img.max()
# test = np.zeros((3,3))
# index = [[1,1],[0,0]]
# values = [1,1]
# np.add.at(test,index,values)
# test

In [None]:
# class IMUTransformer(nn.Module):
#     def __init__(self):
#         super().__init__()
#         encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, dim_feedforward=8, batch_first=True)
#         transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=1)
#         self.encoder = transformer_encoder
#         self.fc1 = nn.Linear(in_features=3, out_features=8)
#         self.fc2 = nn.Linear(in_features=8, out_features=7)

#     def forward(self, x):
#         x = torch.relu(self.encoder(x))
#         x = torch.relu(self.fc1(x[:,-1,:]))
#         x = self.fc2(x)
#         return x

# model = IMUTransformer()
# model

In [None]:
# model = model.to(device)
# criterion = torch.nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# dataloader = torch.utils.data.DataLoader(vio_dataset, batch_size=4, shuffle=True, num_workers=4) # 4 might work :?)
# next(iter(dataloader))[0].shape