In [1]:
import json
import os
from collections import Counter
from tqdm import tqdm
from collections import defaultdict

import numpy as np
import torch
import yaml
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from transformers import OwlViTProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TRAIN_ANNOTATIONS_FILE = "/scratch/hk3820/capstone/data/paco_annotations/paco_ego4d_v1_train.json"
VAL_ANNOTATIONS_FILE = "/scratch/hk3820/capstone/data/paco_annotations/paco_ego4d_v1_val.json"
IMAGES_PATH = "/scratch/hk3820/capstone/data/paco_frames/v1/paco_frames"

In [3]:
annotations_file = VAL_ANNOTATIONS_FILE

In [4]:
with open(annotations_file) as f:
    data = json.load(f)
    n_total = len(data)

In [50]:
train_data = {"annotations": []}
for ann in data["annotations"][:200]:
    try:
        image = [img for img in data["images"] if img["id"] == ann["image_id"]][0]
        bbox = ann["bbox"]
        ann_color = [attr["name"] for attr in data["attributes"] if attr["id"] == ann["attribute_ids"][0]][0]
        ann_category = [cat["name"] for cat in data['categories'] if cat["id"]==ann["category_id"]][0]
        pos_queries = [" ".join(["A", ann_color, ann_category])]
        
        random_chosen_color_id = np.random.choice(attr_type_to_attr_idxs["color"])
        random_chosen_color = [attr["name"] for attr in data["attributes"] if attr["id"] == random_chosen_color_id][0]
        
        random_neg_categories = [cat["name"] 
                               for cat in data['categories'] 
                               if (cat["id"] in image["neg_category_ids"]) and (cat["supercategory"]=="OBJECT")]
        random_neg_category = np.random.choice(random_neg_categories)
        neg_queries = [" ".join(["A", random_chosen_color, random_neg_category])]*2
        train_example = {
            "image_file_name": image["file_name"],
            "bbox": bbox,
            "pos_queries": pos_queries,
            "neg_queries": neg_queries
        }
        train_data["annotations"].append(train_example)
    except:
        continue

In [52]:
with open('ego4d_dummy_train.json', 'w') as out_file:
     json.dump(train_data, out_file, indent = 4, ensure_ascii = False)

In [None]:
class OwlDataset(Dataset):
    def __init__(self, owl_processor, annotations_file):
        self.images_dir = IMAGES_PATH
        self.owl_processor = owl_processor

        with open(annotations_file) as f:
            data = json.load(f)

        self.

    def load_image(self, idx: int) -> Image.Image:
        url = list(self.data[idx].keys()).pop()
        path = os.path.join(self.images_dir, os.path.basename(url))
        image = Image.open(path).convert("RGB")
        return image, path

    def load_target(self, idx: int):
        annotations = list(self.data[idx].values())

        # values results in a nested list
        assert len(annotations) == 1
        annotations = annotations.pop()

        labels = []
        boxes = []
        for annotation in annotations:
            labels.append(annotation["label"])
            boxes.append(annotation["bbox"])

        return labels, boxes

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, path = self.load_image(idx)
        labels, boxes = self.load_target(idx)
        w, h = image.size
        metadata = {
            "width": w,
            "height": h,
            "impath": path,
        }
        image = self.image_processor(images=image, return_tensors="pt")[
            "pixel_values"
        ].squeeze(0)

        return image, torch.tensor(labels), torch.tensor(boxes), metadata


def get_dataloaders(
    train_annotations_file=TRAIN_ANNOTATIONS_FILE,
    test_annotations_file=TEST_ANNOTATIONS_FILE,
):
    image_processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")

    train_dataset = OwlDataset(image_processor, train_annotations_file)
    test_dataset = OwlDataset(image_processor, test_annotations_file)

    with open(LABELMAP_FILE) as f:
        labelmap = json.load(f)

    train_labelcounts = Counter()
    for i in range(len(train_dataset)):
        train_labelcounts.update(train_dataset.load_target(i)[0])

    # scales must be in order
    scales = []
    for i in sorted(list(train_labelcounts.keys())):
        scales.append(train_labelcounts[i])

    scales = np.array(scales)
    scales = (np.round(np.log(scales.max() / scales) + 3, 1)).tolist()

    train_labelcounts = {}
    train_dataloader = DataLoader(
        train_dataset, batch_size=1, shuffle=True, num_workers=4
    )
    test_dataloader = DataLoader(
        test_dataset, batch_size=1, shuffle=False, num_workers=4
    )

    return train_dataloader, test_dataloader, scales, labelmap


In [15]:
a = np.array([True,False])

In [20]:
np.expand_dims(~a, -1)

array([[False],
       [ True]])

In [18]:
a.shape

(2,)