In [2]:
import numpy as np
import scipy.io as scio

import torch
from torch.utils.data import Dataset
from torchvision.io import read_image, write_jpeg
import torchvision.transforms as transforms
from torchvision.transforms.functional import crop

In [15]:
class EmoticDataset(Dataset):
    def __init__(self, anns_dir, img_dir):
        anns = scio.loadmat(anns_dir)["train"]
        self.anns = np.fromiter(filter(lambda x: x["folder"].item() != "framesdb/images", iter(anns[0])), dtype=anns.dtype)
        self.img_dir = img_dir

    def __len__(self):
        self.anns.size

    def __getitem__(self, idx):
        ann = self.anns[idx]

        img_loc = "../data/cvpr_emotic/" + ann["folder"].item() + '/' + ann["filename"].item()
        context_img = read_image(img_loc)
        
        bbox =  ann["person"]["body_bbox"].item()[0]
        subject_img = crop(image, bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0])

        return subject_img, context_img

In [4]:
anns = scio.loadmat('../data/Annotations/Annotations.mat')
anns = anns["train"]
anns = np.fromiter(filter(lambda x: x["folder"].item() != "framesdb/images", iter(anns[0])), dtype=anns.dtype)

In [136]:
set(map(lambda x: x["folder"].item(), iter(anns)))

{'ade20k/images', 'emodb_small/images', 'mscoco/images'}

In [220]:
sizes = np.array(list(map(lambda x: [x["image_size"]["n_row"].item().item(), x["image_size"]["n_col"].item().item()], iter(anns))))
print(np.max(sizes, axis=0))
print(np.min(sizes, axis=0))
print(np.average(sizes, axis=0))

[5616 7360]
[112 150]
[472.44436702 622.68873403]


In [221]:
boxes = [ann["person"]["body_bbox"][0][0][0] for ann in anns]
box_sizes = np.array([[b[2] - b[0], b[3] - b[1]] for b in boxes], dtype=np.uint16)
print(np.max(box_sizes, axis=0))
print(np.min(box_sizes, axis=0))
print(np.average(box_sizes, axis=0))

[3697 4138]
[27 44]
[212.63902439 307.7416957 ]


In [169]:
anns.size

12915

In [5]:
idx = 1352
ann = anns[idx]

In [6]:
ann.dtype

dtype([('filename', 'O'), ('folder', 'O'), ('image_size', 'O'), ('original_database', 'O'), ('person', 'O')])

In [7]:
ann["folder"].item() + '/' + ann["filename"].item()

'mscoco/images/COCO_train2014_000000015394.jpg'

In [8]:
image = read_image("../data/cvpr_emotic/" + ann["folder"].item() + '/' + ann["filename"].item())

In [9]:
image.shape

torch.Size([3, 480, 640])

In [275]:
bbox =  ann["person"]["body_bbox"].item()[0]
cropped = crop(image, bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0])
write_jpeg(cropped, "../data/try_crop/cropped.jpg")

In [258]:
[ann["image_size"]["n_row"].item().item(), ann["image_size"]["n_col"].item().item()]

[427, 640]

In [226]:
ann["original_database"]["name"].item().item()

'mscoco'

In [227]:
ann["original_database"]["info"]

array([[array([[(array([[481931]], dtype=int32), array([[473666]], dtype=int32))]],
              dtype=[('image_id', 'O'), ('annotations_id', 'O')])                  ]],
      dtype=object)

In [228]:
ann["person"].dtype

dtype([('body_bbox', 'O'), ('annotations_categories', 'O'), ('annotations_continuous', 'O'), ('gender', 'O'), ('age', 'O')])

In [260]:
ann["person"]["body_bbox"].item()[0]

array([238, 113, 374, 335], dtype=uint16)

In [230]:
ann["person"]["annotations_categories"]

array([[array([[(array([[array(['Engagement'], dtype='<U10')]], dtype=object),)]],
              dtype=[('categories', 'O')])                                        ]],
      dtype=object)

In [231]:
ann["person"]["annotations_continuous"]

array([[array([[(array([[7]], dtype=uint8), array([[4]], dtype=uint8), array([[6]], dtype=uint8))]],
              dtype=[('valence', 'O'), ('arousal', 'O'), ('dominance', 'O')])                       ]],
      dtype=object)

In [234]:
ann["person"]["gender"]

array([[array(['Male'], dtype='<U4')]], dtype=object)

In [235]:
ann["person"]["age"]

array([[array(['Adult'], dtype='<U5')]], dtype=object)