# Copy

In [1]:
import torch
import json

from torch import nn
from torchvision import transforms
from models import MultiviewViltForQuestionAnswering
from PIL import Image
from isvqa_data_setup import ISVQA
from collections import Counter
from torch.utils.data import DataLoader
from nuscenesqa_data_setup import NuScenesQA
from torch.utils.data import random_split
from nuscenes.nuscenes import NuScenes


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
seed = 42

In [3]:
nusc = NuScenes(version='v1.0-trainval', dataroot='/home/nikostheodoridis/nuscenes', verbose=False)

In [4]:
qa_folder = "/home/nikostheodoridis/nuscenes-qa"
nuscenes_folder = "/home/nikostheodoridis/nuscenes"
train_nuscenesqa = NuScenesQA(qa_folder_path=qa_folder,
                              qa_set="train",
                              nusc=nusc,
                              nuscenes_folder_path=nuscenes_folder)



In [5]:
valtest_nuscenesqa = NuScenesQA(qa_folder_path=qa_folder,
                                qa_set="val",
                                nusc=nusc,
                                nuscenes_folder_path=nuscenes_folder)

In [6]:
seed = 42

In [7]:
val_size = int(0.5 * len(valtest_nuscenesqa))
test_size = len(valtest_nuscenesqa) - val_size
generator = torch.Generator().manual_seed(seed)

val_nuscenesqa, test_nuscenesqa = random_split(valtest_nuscenesqa, [val_size, test_size], generator=generator)


In [8]:
train_loader = DataLoader(train_nuscenesqa,
                          batch_size=6,
                          shuffle=True)

val_loader = DataLoader(val_nuscenesqa,
                        batch_size=6,
                        shuffle=False)

test_loader = DataLoader(test_nuscenesqa,
                         batch_size=6,
                         shuffle=False)

In [9]:
batch = next(iter(train_loader))

In [10]:
batch[0]["pixel_values"].shape

torch.Size([6, 6, 3, 352, 608])

In [11]:
batch[1].shape

torch.Size([6, 30])

In [12]:
type(batch[0])

collections.OrderedDict

In [13]:
model = MultiviewViltForQuestionAnswering(6, 210, 768, True, True).to("cuda")

output = model(**batch[0], labels=batch[1])

ValueError: Target size (torch.Size([6, 30])) must be the same as input size (torch.Size([6, 3129]))

In [None]:
x = torch.tensor([[1, 2], [3, 4]])
y = torch.tensor([[0, 2], [3, 4]])

(torch.eq(x, y).sum(dim=1) == 2).sum()

In [None]:
def accuracy(predictions, targets):
    cnt = torch.eq(torch.eq(predictions, targets).sum(dim=1), 30).sum()
    return cnt / len(predictions)

In [None]:
def train_one_epoch(model, loader, optimizer, accuracy):
    model.train()
    accuracies = []

    for X, y in loader():
        outputs = model(**X, labels=y)
        loss = outputs.loss
        accuracies.append(accuracy)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
# Create the set with the question-answer pairs

qa_path = "/home/nikostheodoridis/isvqa"
nuscenes_path = "/home/nikostheodoridis/nuscenes/samples"

isvqa = ISVQA(qa_path, nuscenes_path, device="cpu")

In [None]:
len(isvqa)

In [None]:
print(isvqa[0][0]["pixel_values"].device)
print(isvqa[0][1].device)

In [None]:
dataloader = DataLoader(isvqa, batch_size=8)

In [None]:
batch = next(iter(dataloader))

In [None]:
print(batch[0]["pixel_values"].device)
print(batch[1].device)

In [None]:
len(isvqa)

In [None]:
model = MultiviewViltForQuestionAnswering(6, 210, 768, True, True)

In [None]:
out = model(**batch[0], labels=batch[1])

In [None]:
batch[0]["input_ids"]

In [None]:
len(trainval_qa)

In [None]:
len(test_qa)

In [None]:
test_qa[0]

In [None]:
cnt = 0
for data in isvqa:
    for image in data["image_names"]:
        try:
            x = Image.open(f"/home/nikostheodoridis/nuscenes/samples/{image}.jpg")
        except FileNotFoundError:
            cnt += 1

cnt

In [None]:
answers = set()
for data in isvqa:
    counter = Counter(data["answers"])
    answers.add(max(counter, key=counter.get))

len(answers)

In [None]:
from collections import Counter

my_list = [1, 2, 2, 3, 3, 3, 4, 5]

counter = Counter(my_list)
most_common_element = counter.most_common(1)[0][0]
print(f"The element that appears most frequently is {most_common_element}")

In [None]:
counter

In [None]:
trainval_qa[0]

In [None]:
def get_questions_from_sample(qa_set, sample):
    questions = []
    for qa_sample in qa_set["questions"]:
        if qa_sample["sample_token"] == sample["token"]:
            questions.append(qa_sample["question"])

    return questions

In [None]:
len(trainval_qa)

In [None]:
len(test_qa)

In [None]:
trainval_qa = clean_data(trainval_qa)

In [None]:
test_qa = clean_data(test_qa)

In [None]:
len(trainval_qa)

In [None]:
len(test_qa)

In [None]:
cnt = 0

for data in deepcopy(trainval_qa["data"]):
    counts = {}
    for answer in list(set(data["answers"])):
        counts[answer] = data["answers"].count(answer)


    if max(counts, key=counts.get) == "<unk>":
        cnt += 1
        trainval_qa["data"].remove(data)

cnt

In [None]:
len(trainval_qa["data"])

In [None]:
trainval_qa["data"][0]