In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from tqdm import tqdm
from PIL import Image
from hcmus.core import appconfig
from hcmus.lbs import LabelStudioConnector

In [None]:
def fetch_backgrounds():
    result = []
    template_connector = LabelStudioConnector(
        url=appconfig.LABEL_STUDIO_URL,
        api_key=appconfig.LABEL_STUDIO_API_KEY,
        project_id=appconfig.LABEL_STUDIO_PROJECT_MAPPING["template"],
        temp_dir=appconfig.LABEL_STUDIO_TEMP_DIR
    )
    tasks = template_connector.get_tasks()
    dataset = template_connector.download_dataset(tasks)
    for item in dataset:
        img = item.get("image")
        boxes = item.get("target").get("boxes")
        result.append({
            "background": np.array(Image.open(img)),
            "boxes": boxes
        })
    return result


In [18]:
backgrounds = fetch_backgrounds()

Loading tasks: 100%|██████████| 1/1 [00:00<00:00, 13.52it/s]
[32m2025-05-17 10:42:01.575[0m | [1mINFO    [0m | [36mhcmus.lbs._label_studio_connector[0m:[36mdownload_dataset[0m:[36m48[0m - [1mNo labels input, auto extract 1 labels.[0m
Downloading images: 100%|██████████| 3/3 [00:00<00:00, 5918.59it/s]


In [None]:
from hcmus.utils import viz_utils

def fetch_objects():
    result = []
    train_connector = LabelStudioConnector(
        url=appconfig.LABEL_STUDIO_URL,
        api_key=appconfig.LABEL_STUDIO_API_KEY,
        project_id=appconfig.LABEL_STUDIO_PROJECT_MAPPING["train"],
        temp_dir=appconfig.LABEL_STUDIO_TEMP_DIR
    )
    tasks = train_connector.get_tasks()
    label_dict = train_connector.extract_labels(tasks)
    dataset = train_connector.download_dataset(tasks, label_dict)
    for item in tqdm(dataset, "Extract objects"):
        img = item.get("image")
        img_object = Image.open(img)
        boxes = item.get("target").get("boxes")
        crops = viz_utils.crop_image(img_object, boxes)
        labels = item.get("target").get("labels")
        for i in range(len(boxes)):
            box = boxes[i]
            crop = np.array(crops[i])
            label = labels[i]
            result.append({
                "path": img,
                "box": box,
                "object": crop,
                "label": list(label_dict.keys())[label],
                "label_id": label
            })
    return result

In [92]:
objects = fetch_objects()

Loading tasks: 100%|██████████| 18/18 [00:04<00:00,  3.63it/s]
Downloading images: 100%|██████████| 1750/1750 [00:00<00:00, 16327.51it/s]
Extract objects...: 100%|██████████| 1750/1750 [00:03<00:00, 564.08it/s] 


In [66]:
len(objects)

1787

In [63]:
from hcmus.data._augment_template import AugmentTemplate

background = backgrounds[0]
background_np = background.get("background")
selected_objects = [x.get("object") for x in objects[0:10]]
selected_labels = [x.get("label") for x in objects[0:10]]
augment_template = AugmentTemplate()
new_background, new_boxes = augment_template.augment(background.get("background"), background.get("boxes"))
new_sample, fit_boxes, fit_labels = augment_template.place(new_background, new_boxes, selected_objects, selected_labels)


### Generate new samples

Augmenting...: 100%|██████████| 1000/1000 [01:41<00:00,  9.86it/s]


In [95]:
train_dataset[:10]

[{'image': array([[[0, 0, 0],
          [0, 0, 0],
          [0, 0, 0],
          ...,
          [0, 0, 0],
          [0, 0, 0],
          [0, 0, 0]],
  
         [[0, 0, 0],
          [0, 0, 0],
          [0, 0, 0],
          ...,
          [0, 0, 0],
          [0, 0, 0],
          [0, 0, 0]],
  
         [[0, 0, 0],
          [0, 0, 0],
          [0, 0, 0],
          ...,
          [0, 0, 0],
          [0, 0, 0],
          [0, 0, 0]],
  
         ...,
  
         [[0, 0, 0],
          [0, 0, 0],
          [0, 0, 0],
          ...,
          [0, 0, 0],
          [0, 0, 0],
          [0, 0, 0]],
  
         [[0, 0, 0],
          [0, 0, 0],
          [0, 0, 0],
          ...,
          [0, 0, 0],
          [0, 0, 0],
          [0, 0, 0]],
  
         [[0, 0, 0],
          [0, 0, 0],
          [0, 0, 0],
          ...,
          [0, 0, 0],
          [0, 0, 0],
          [0, 0, 0]]], dtype=uint8),
  'target': {'boxes': [(236, 191, 295, 272),
    (264, 138, 286, 220),
    (275, 90, 296, 15

### Check pipeline

In [96]:
from hcmus.pipelines import yolo_augmentation_pipeline

In [100]:
yolo_augmentation_pipeline.execute(
    output_dir="local/dataset",
    n_augment=10
)

Loading tasks: 100%|██████████| 1/1 [00:00<00:00, 12.70it/s]
[32m2025-05-17 11:51:06.904[0m | [1mINFO    [0m | [36mhcmus.lbs._label_studio_connector[0m:[36mdownload_dataset[0m:[36m48[0m - [1mNo labels input, auto extract 1 labels.[0m
Downloading images: 100%|██████████| 3/3 [00:00<00:00, 2943.37it/s]
Loading tasks: 100%|██████████| 18/18 [00:04<00:00,  3.70it/s]
Downloading images: 100%|██████████| 1750/1750 [00:00<00:00, 14343.56it/s]
Extract objects: 100%|██████████| 1750/1750 [00:03<00:00, 542.40it/s] 
Augmenting: 100%|██████████| 10/10 [00:01<00:00,  9.68it/s]
[32m2025-05-17 11:51:16.319[0m | [1mINFO    [0m | [36mhcmus.pipelines.yolo_augmentation_pipeline[0m:[36msave_yolo_v8_dataset_from_dicts[0m:[36m159[0m - [1mYOLOv8-compatible dataset saved to: local/dataset[0m
