In [2]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image, write_jpeg, decode_image
import torchvision.transforms as transforms
from torchvision.transforms.functional import crop
from torchvision.utils import draw_bounding_boxes

from PIL import Image
import scipy.io as scio

In [3]:
emotions = ['Affection', 'Anger', 'Annoyance', 'Anticipation', 'Aversion', 'Confidence', 'Disapproval', 'Disconnection', 'Disquietment',
            'Doubt/Confusion', 'Embarrassment', 'Engagement', 'Esteem', 'Excitement', 'Fatigue', 'Fear', 'Happiness', 'Pain', 'Peace',
            'Pleasure', 'Sadness', 'Sensitivity', 'Suffering', 'Surprise', 'Sympathy', 'Yearning']

emotion_encode = {e: i for i, e in enumerate(emotions)}
emotion_decode = {i: e for e, i in emotion_encode.items()}

In [4]:
class EmoticDataset(Dataset):
    def __init__(self, subject_size, context_size, anns_dir, img_dir):
        anns = scio.loadmat(anns_dir)["train"]
        self.anns = np.fromiter(filter(lambda x: x["folder"].item() != "framesdb/images", iter(anns[0])), dtype=anns.dtype)
        self.img_dir = img_dir
        self.subject_transform = transforms.Resize(subject_size)
        self.context_transform = transforms.Resize(context_size)

    def __len__(self):
        return self.anns.size

    def __getitem__(self, idx):
        ann = self.anns[idx]

        img_loc = "../data/cvpr_emotic/" + ann["folder"].item() + '/' + ann["filename"].item()
        context_img = read_image(img_loc)
        
        bbox =  ann["person"]["body_bbox"][0][0][0].astype(int)
        subject_img = crop(context_img, bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0])

        label = np.zeros(len(emotions))
        ems = [i.item() for i in ann["person"]["annotations_categories"][0][0].item()[0][0]]
        for e in ems:
            label[emotion_encode[e]] = 1.

        subject_img = self.subject_transform(subject_img.float())
        context_img = self.context_transform(context_img.float())

        return subject_img, context_img, label

In [None]:
train_data = EmoticDataset("../data/Annotations/Annotations.mat", "../data/cvpr_emotic/")
train_dataloader = DataLoader(train_data)

In [5]:
anns = scio.loadmat('../data/pami/Annotations/Annotations.mat')
anns = anns["train"][0]
# anns = np.fromiter(filter(lambda x: x["folder"].item() != "framesdb/images", iter(anns[0])), dtype=anns.dtype)

In [6]:
anns.shape

(17077,)

In [7]:
set(map(lambda x: x["folder"].item(), iter(anns)))

{'ade20k/images', 'emodb_small/images', 'framesdb/images', 'mscoco/images'}

In [8]:
emotions = sorted(list(set(j.item() for ann in anns for i in ann["person"]["annotations_categories"][0] for j in i.item()[0][0])))

In [9]:
sizes = np.array(list(map(lambda x: [x["image_size"]["n_row"].item().item(), x["image_size"]["n_col"].item().item()], iter(anns))))
print(np.max(sizes, axis=0))
print(np.min(sizes, axis=0))
print(np.average(sizes, axis=0))

[ 6272 11617]
[112 150]
[558.01680623 738.75464074]


In [10]:
boxes = [ann["person"]["body_bbox"][0][0][0] for ann in anns]
box_sizes = np.array([[b[2] - b[0], b[3] - b[1]] for b in boxes], dtype=np.uint16)
print(np.max(box_sizes, axis=0))
print(np.min(box_sizes, axis=0))
print(np.average(box_sizes, axis=0))

[4337 4426]
[22 34]
[232.18199918 347.39585407]


In [11]:
anns.size

17077

In [12]:
tf = transforms.ToTensor()

In [13]:
data_idx = list()
for i, ann in enumerate(anns):
    for s in range(len(ann["person"][0])):
        data_idx.append([i, s])

len(data_idx)

23706

In [65]:
idx = 633
idx, p_idx = data_idx[idx]
ann = anns[idx]

In [63]:
ann.dtype

dtype([('filename', 'O'), ('folder', 'O'), ('image_size', 'O'), ('original_database', 'O'), ('person', 'O')])

In [64]:
ann["folder"].item() + '/' + ann["filename"].item()

'mscoco/images/COCO_train2014_000000204837.jpg'

In [55]:
# image = read_image("../data/pami/emotic/" + ann["folder"].item() + '/' + ann["filename"].item())
image = (tf(Image.open("../data/pami/emotic/" + ann["folder"].item() + '/' + ann["filename"].item())) * 255).to(torch.uint8)

In [56]:
image.shape

torch.Size([3, 480, 640])

In [57]:
bbox =  ann["person"]["body_bbox"][0][p_idx][0].astype(int)
cropped = crop(image, bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0])
write_jpeg(cropped, "../data/try_crop/cropped.jpg")

In [67]:
draw_bbox = draw_bounding_boxes(image, torch.tensor([[bbox[0], bbox[1], bbox[2], bbox[3]]]), colors=["red"], width=4)
write_jpeg(draw_bbox, "../data/try_crop/draw_bbox.jpg")

In [65]:
[ann["image_size"]["n_row"].item().item(), ann["image_size"]["n_col"].item().item()]

[427, 640]

In [66]:
ann["original_database"]["name"].item().item()

'mscoco'

In [67]:
ann["original_database"]["info"]

array([[array([[(array([[94268]], dtype=int32), array([[530216]], dtype=int32))]],
              dtype=[('image_id', 'O'), ('annotations_id', 'O')])                 ]],
      dtype=object)

In [27]:
ann["person"].dtype

dtype([('body_bbox', 'O'), ('annotations_categories', 'O'), ('combined_categories', 'O'), ('annotations_continuous', 'O'), ('combined_continuous', 'O'), ('gender', 'O'), ('age', 'O')])

In [260]:
ann["person"]["body_bbox"].item()[0]

array([238, 113, 374, 335], dtype=uint16)

In [75]:
[i.item() for i in ann["person"]["annotations_categories"][0][0][0][0][0][0]]

['Confidence', 'Engagement']

In [51]:
ann["person"]["annotations_categories"][0][0]

array([[(array([[array(['Happiness'], dtype='<U9')]], dtype=object),)]],
      dtype=[('categories', 'O')])

In [78]:
ann["person"]["annotations_categories"][0][0]

array([[(array([[array(['Happiness'], dtype='<U9')]], dtype=object),)]],
      dtype=[('categories', 'O')])

In [80]:
[i.item() for i in ann["person"]["annotations_categories"][0][0][0][0][0][0]]

['Happiness']

In [231]:
ann["person"]["annotations_continuous"]

array([[array([[(array([[7]], dtype=uint8), array([[4]], dtype=uint8), array([[6]], dtype=uint8))]],
              dtype=[('valence', 'O'), ('arousal', 'O'), ('dominance', 'O')])                       ]],
      dtype=object)

In [234]:
ann["person"]["gender"]

array([[array(['Male'], dtype='<U4')]], dtype=object)

In [235]:
ann["person"]["age"]

array([[array(['Adult'], dtype='<U5')]], dtype=object)