In [24]:

from tqdm import tqdm
import torch
from torch import nn
from torchvision.ops import box_convert
import yaml
from transformers import OwlViTProcessor, OwlViTForObjectDetection

In [7]:
from src.dataset import get_dataloaders
from src.losses import ContrastiveDetectionLoss

In [8]:
import importlib
import src.dataset

In [9]:
import src.dataset
importlib.reload(src.dataset)

<module 'src.dataset' from '/scratch/sd5251/cap/OWL4PACO/OWL-ViT-Object-Detection/src/dataset.py'>

In [10]:
import src.utils
importlib.reload(src.utils)

<module 'src.utils' from '/scratch/sd5251/cap/OWL4PACO/OWL-ViT-Object-Detection/src/utils.py'>

In [11]:
import src.utils
importlib.reload(src.losses)
from src.losses import ContrastiveDetectionLoss

In [12]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [15]:
processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32") # Image Processor + Text Tokenizer
model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
model = model.to(device)

In [16]:
train_dataloader, test_dataloader = get_dataloaders(4, processor)

In [17]:
def get_training_config():
    with open("config.yaml", "r") as stream:
        data = yaml.safe_load(stream)
        return data["training"]

In [18]:
training_cfg = get_training_config()

In [19]:
criterion = ContrastiveDetectionLoss()

In [20]:
optimizer = torch.optim.AdamW(
                model.parameters(),
                lr=float(training_cfg["learning_rate"]),
                weight_decay=training_cfg["weight_decay"],
                )

In [25]:
num_epochs = training_cfg["n_epochs"]
num_training_steps = num_epochs * len(train_dataloader)

progress_bar = tqdm(range(num_training_steps))

  0%|          | 0/100 [00:00<?, ?it/s]

In [25]:
model.train()
for epoch in range(training_cfg["n_epochs"]):
    for i, (inputs, target_labels, boxes, metadata) in enumerate(train_dataloader):
        optimizer.zero_grad()
        
        inputs['input_ids'] = inputs['input_ids'].view(-1,16)
        inputs['attention_mask'] = inputs['attention_mask'].view(-1,16)
        
        inputs = inputs.to(device)
        
        outputs = model(**inputs)
        
        
        logits = outputs["logits"]
        pred_boxes = outputs["pred_boxes"]
        
        batch_size = boxes.shape[0]
        
        target_labels = target_labels.to(device)
        boxes = boxes.to(device)
        
        loss = criterion(logits, pred_boxes, boxes, target_labels, metadata)
        loss.backward()
        optimizer.step()
        progress_bar.update(1)
        progress_bar.set_description(f"Loss: {loss.item():.3f}")

Loss: 473.836: 100%|██████████| 100/100 [00:45<00:00,  2.58it/s]

In [None]:
pred_boxes[:, :, 2:].shape

In [None]:
target_labels = nn.functional.one_hot(torch.zeros(1).to(torch.int64), num_classes=num_queries).to(device)
target_labels = target_labels.repeat(batch_size,1,1)

In [None]:
inputs["input_ids"].shape

In [2]:
from datetime import datetime

In [6]:
datetime.now().strftime("%Y%m%d_%H%M")

'20231116_0021'

In [1]:
! python3 main.py

Loss: 512.766:   0%|                      | 12/45840 [00:18<13:16:56,  1.04s/it]^C
Traceback (most recent call last):
  File "/scratch/sd5251/cap/OWL4PACO/OWL-ViT-Object-Detection/main.py", line 58, in <module>
    for i, (inputs, target_labels, boxes, metadata) in enumerate(train_dataloader):
  File "/ext3/miniconda3/envs/owl_boto/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 630, in __next__
    data = self._next_data()
  File "/ext3/miniconda3/envs/owl_boto/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1328, in _next_data
    idx, data = self._get_data()
  File "/ext3/miniconda3/envs/owl_boto/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1294, in _get_data
    success, data = self._try_get_data()
  File "/ext3/miniconda3/envs/owl_boto/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1132, in _try_get_data
    data = self._data_queue.get(timeout=timeout)
  File "/ext3/miniconda3/envs/owl_boto/lib/python3

In [50]:
target_boxes = torch.randn(4, 1, 4)

In [51]:
num_pos_queries = torch.tensor([9,10,10,10,9,10,10])

In [54]:
a = target_boxes.expand(4,10,4)

In [56]:
a[1] = a[1][:9]

RuntimeError: The expanded size of the tensor (10) must match the existing size (9) at non-singleton dimension 0.  Target sizes: [10, 4].  Tensor sizes: [9, 4]

In [66]:
a = [(torch.tensor([527]), torch.tensor([0])), (torch.tensor([575]), torch.tensor([0])), (torch.tensor([575]), torch.tensor([0])), (torch.tensor([571]), torch.tensor([0]))]

In [75]:
a

[(tensor([527]), tensor([0])),
 (tensor([575]), tensor([0])),
 (tensor([575]), tensor([0])),
 (tensor([571]), tensor([0]))]

In [91]:
torch.tensor(a)[:,0].shape

torch.Size([4])

In [70]:
b = [(torch.tensor([502, 503, 527, 550, 551, 571, 572, 573, 574, 575]), torch.tensor([9, 7, 4, 3, 2, 8, 6, 5, 1, 0])), (torch.tensor([262, 283, 284, 285, 306, 356, 357, 380, 405, 478]), torch.tensor([3, 2, 1, 0, 7, 4, 6, 9, 5, 8])), (torch.tensor([479, 503, 527, 550, 551, 571, 572, 573, 574, 575]), torch.tensor([8, 5, 2, 3, 1, 9, 7, 4, 6, 0])), (torch.tensor([455, 479, 503, 527, 550, 551, 572, 573, 574, 575]), torch.tensor([8, 7, 5, 4, 9, 2, 6, 3, 1, 0]))]

In [73]:
b

[(tensor([502, 503, 527, 550, 551, 571, 572, 573, 574, 575]),
  tensor([9, 7, 4, 3, 2, 8, 6, 5, 1, 0])),
 (tensor([262, 283, 284, 285, 306, 356, 357, 380, 405, 478]),
  tensor([3, 2, 1, 0, 7, 4, 6, 9, 5, 8])),
 (tensor([479, 503, 527, 550, 551, 571, 572, 573, 574, 575]),
  tensor([8, 5, 2, 3, 1, 9, 7, 4, 6, 0])),
 (tensor([455, 479, 503, 527, 550, 551, 572, 573, 574, 575]),
  tensor([8, 7, 5, 4, 9, 2, 6, 3, 1, 0]))]

In [88]:
c = torch.stack([torch.stack(t, dim=1) for t in b])

In [147]:
c

tensor([[[502,   9],
         [503,   7],
         [527,   4],
         [550,   3],
         [551,   2],
         [571,   8],
         [572,   6],
         [573,   5],
         [574,   1],
         [575,   0]],

        [[262,   3],
         [283,   2],
         [284,   1],
         [285,   0],
         [306,   7],
         [356,   4],
         [357,   6],
         [380,   9],
         [405,   5],
         [478,   8]],

        [[479,   8],
         [503,   5],
         [527,   2],
         [550,   3],
         [551,   1],
         [571,   9],
         [572,   7],
         [573,   4],
         [574,   6],
         [575,   0]],

        [[455,   8],
         [479,   7],
         [503,   5],
         [527,   4],
         [550,   9],
         [551,   2],
         [572,   6],
         [573,   3],
         [574,   1],
         [575,   0]]])

In [104]:
target_labels = torch.tensor([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

        [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

        [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

        [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])

In [106]:
target_labels.shape

torch.Size([4, 1, 40])

In [142]:
target_labels = torch.nn.functional.one_hot(torch.arange(10).to(torch.int64), num_classes=40)

In [143]:
target_labels

tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 

In [146]:
target_labels.shape

torch.Size([10, 40])

In [144]:
target_labels.sum(dim=-1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [125]:
torch.arange(4).view(-1, 1).repeat(1, 10).view(-1)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [136]:
torch.zeros(10).to(torch.int64)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [155]:
checkpoint = torch.load("./checkpoints/20231116_0310_model.pt_model.pt")

FileNotFoundError: [Errno 2] No such file or directory: './checkpoints/20231116_0310_model.pt_model.pt'

In [156]:
checkpoint["epoch"]

1

In [3]:
import json

In [6]:
train_data = json.load(open("data/owlvit_train.json", "r"))

In [9]:
train_data["annotations"][910]

{'image_file_name': '../paco_data/paco/paco_frames/2bd96ede-bc7b-45e9-bb34-e370c5dffc61_003995.jpeg',
 'bbox': [1370.07, -1.58, 271.8, 815.41],
 'pos_queries': ['A red towel',
  'A striped, opaque, fabric towel',
  'An opaque, fabric, red towel',
  'A fabric towel',
  'A striped, opaque, red towel',
  'An opaque, fabric towel',
  'A red, fabric towel',
  'A striped towel',
  'A fabric, red towel',
  'A red towel',
  'A striped towel',
  'A striped, opaque towel',
  'An opaque, red towel',
  'A fabric towel',
  'An opaque towel',
  'An opaque, striped towel',
  'A red, striped towel',
  'A fabric, opaque, red towel',
  'A red towel',
  'A striped, red, opaque towel',
  'A striped towel',
  'An opaque, striped towel',
  'A red, fabric towel',
  'A red towel',
  'A striped, red towel',
  'A striped towel',
  'An opaque, striped towel',
  'An opaque towel',
  'A fabric towel',
  'A striped towel',
  'An opaque, striped, fabric towel',
  'A striped towel',
  'A striped, opaque, red towel',


In [14]:
import os

In [15]:
p = "../paco_data/paco/paco_frames/2bd96ede-bc7b-45e9-bb34-e370c5dffc61_003995.jpeg"

In [17]:
os.path.join("/scratch/hk3820/capstone/data/paco_frames/v1/paco_frames", os.path.basename(p))

'/scratch/hk3820/capstone/data/paco_frames/v1/paco_frames/2bd96ede-bc7b-45e9-bb34-e370c5dffc61_003995.jpeg'