In [1]:
from transformers import AutoImageProcessor, AutoModelForObjectDetection

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
checkpoint = "hustvl/yolos-tiny"
teacher_processor = AutoImageProcessor.from_pretrained(checkpoint)
teacher_model = AutoModelForObjectDetection.from_pretrained(
    checkpoint,
)

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


In [3]:
from datasets import load_dataset, load_from_disk
import os


train_dir = "C:/source/repos/Models-Inferencing-On-Client-Sandbox/datasets/image-face-dataset/train"
test_dir = "C:/source/repos/Models-Inferencing-On-Client-Sandbox/datasets/image-face-dataset/test"
validation_dir = "C:/source/repos/Models-Inferencing-On-Client-Sandbox/datasets/image-face-dataset/validation"

is_data_local: bool = os.path.exists(train_dir) and os.path.exists(test_dir) and os.path.exists(validation_dir)
is_using_subset: bool = True

if is_data_local:
    train_dataset = load_from_disk(train_dir)
    test_dataset = load_from_disk(test_dir)
    validation_dataset = load_from_disk(validation_dir)
else:
    train_dataset = load_dataset("wider_face", split="train[:5%]")
    test_dataset = load_dataset("wider_face", split="test[:5%]")
    validation_dataset = load_dataset("wider_face", split="validation[:5%]")
    train_dataset.save_to_disk(train_dir)
    test_dataset.save_to_disk(test_dir)
    validation_dataset.save_to_disk(validation_dir)

In [4]:
from datasets import DatasetDict

dataset = DatasetDict({
    'train': train_dataset,
    'test': test_dataset,
    'validation': validation_dataset,
})

dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'faces'],
        num_rows: 644
    })
    test: Dataset({
        features: ['image', 'faces'],
        num_rows: 805
    })
    validation: Dataset({
        features: ['image', 'faces'],
        num_rows: 161
    })
})

In [5]:

def add_coco_fields(example, idx):
    bboxes = example['faces']['bbox']
    areas = [bbox[2] * bbox[3] for bbox in bboxes]
    
    example['image_id'] = idx
    example['faces']['area'] = areas
    example['faces']['category'] = [0 for _ in example["faces"]["expression"]]
    
    return example

In [6]:
for split in dataset.keys():
    dataset[split] = dataset[split].map(add_coco_fields, with_indices=True)
    
categories = ["face"]

In [7]:
dataset["train"][0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1385>,
 'faces': {'area': [18178.0],
  'bbox': [[449.0, 330.0, 122.0, 149.0]],
  'blur': [0],
  'category': [0],
  'expression': [0],
  'illumination': [0],
  'invalid': [False],
  'occlusion': [0],
  'pose': [0]},
 'image_id': 0}

In [8]:
import albumentations
import numpy as np
import torch

transform = albumentations.Compose(
    [
        albumentations.Resize(480, 480),
        albumentations.HorizontalFlip(p=1.0),
        albumentations.RandomBrightnessContrast(p=1.0),
    ],
    bbox_params=albumentations.BboxParams(format="coco", label_fields=["category"]),
)

In [9]:
def formatted_anns(image_id, category, area, bbox):
    annotations = []
    for i in range(0, len(category)):
        new_ann = {
            "image_id": image_id,
            "category_id": category[i],
            "isCrowd": 1 if len(bbox)>1 else 0,
            "area": area[i],
            "bbox": list(bbox[i]),
        }
        annotations.append(new_ann)

    return annotations

In [19]:
# transforming a batch
def transform_aug_ann(examples):
    image_ids = examples["image_id"]
    images, bboxes, area, categories = [], [], [], []
    for image, faces in zip(examples["image"], examples["faces"]):
        image = np.array(image.convert("RGB"))[:, :, ::-1]
        out = transform(image=image, bboxes=faces["bbox"], category=faces["category"])

        area.append(faces["area"])
        images.append(out["image"])
        bboxes.append(out["bboxes"])
        categories.append(out["category"])

    targets = [
        {"image_id": id_, "annotations": formatted_anns(id_, cat_, ar_, box_)}
        for id_, cat_, ar_, box_ in zip(image_ids, categories, area, bboxes)
    ]
    
    print(targets)

    return teacher_processor(images=images, annotations=targets, return_tensors="pt")

In [21]:
dataset["train"] = dataset["train"].with_transform(transform_aug_ann)

In [23]:
example = dataset["train"][0]

example

[{'image_id': 0, 'annotations': [{'image_id': 0, 'category_id': 0, 'isCrowd': 0, 'area': 18178.0, 'bbox': [212.34375, 114.36823104693141, 57.1875, 51.638989169675085]}]}]


{'pixel_values': tensor([[[ 1.9920, -0.0629, -0.6109,  ..., -0.5082, -0.5082, -0.5082],
          [ 2.1462, -0.1486, -0.6452,  ..., -0.5596, -0.5596, -0.5596],
          [ 2.1633, -0.1657, -0.6281,  ..., -0.3369, -0.3369, -0.3369],
          ...,
          [ 2.0263, -1.3302, -2.1179,  ..., -2.0323, -1.9980, -2.0494],
          [ 2.0434, -1.1760, -1.9638,  ..., -2.0665, -2.0837, -2.1179],
          [ 2.0092, -1.1589, -1.9124,  ..., -2.1179, -2.0665, -1.8953]],
 
         [[ 2.2710, -0.1099, -0.8803,  ..., -1.0203, -1.0203, -1.0203],
          [ 2.2710, -0.5126, -1.2479,  ..., -1.5980, -1.5980, -1.5980],
          [ 2.2360, -0.5476, -1.3179,  ..., -1.6155, -1.6155, -1.6155],
          ...,
          [ 2.3410, -0.9853, -1.7381,  ..., -1.1429, -1.1078, -1.1604],
          [ 2.3410, -0.8452, -1.4405,  ..., -1.2129, -1.2129, -1.3179],
          [ 2.3235, -0.8102, -1.4230,  ..., -1.4055, -1.2479, -1.0728]],
 
         [[ 2.5006,  0.0779, -0.7413,  ..., -1.0376, -1.0376, -1.0376],
          [ 