In [1]:
import json
import os
import numpy as np
from sklearn.model_selection import StratifiedGroupKFold
from pycocotools.coco import COCO

# load json 
annotation = './dataset/train_ann_len_under_40.json'
coco = COCO(annotation)

with open(annotation) as f: data = json.load(f)

var = [(ann['image_id'], ann['category_id']) for ann in data['annotations']]
X = np.ones((len(data['annotations']),1))
y = np.array([v[1] for v in var])
groups = np.array([v[0] for v in var])

cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=7)

# fold별 train, val set을 coco에서 file_name을 가져와 txt파일로 저장
for idx, (train_idx, val_idx) in enumerate(cv.split(X, y, groups)):
    train_img_ids = groups[train_idx]
    val_img_ids = groups[val_idx]
    train_img_ids = list(set(train_img_ids))
    val_img_ids = list(set(val_img_ids))
    train_img_names = [coco.imgs[img_id]['file_name'] for img_id in train_img_ids]
    val_img_names = [coco.imgs[img_id]['file_name'] for img_id in val_img_ids]

    os.makedirs(f'./ultralytics_dataset/fold_{idx}', exist_ok=True)

    with open(f'./ultralytics_dataset/fold_{idx}/train_fold_{idx}.txt', 'w') as f:
        for img_name in train_img_names:
            f.write(f"/home/taeyoung4060ti/바탕화면/level2-objectdetection-cv-01/ultralytics-main/ultralytics_dataset/fold_{idx}/train/images/"+img_name.replace('train/', '')+'\n')

    with open(f'./ultralytics_dataset/fold_{idx}/val_fold_{idx}.txt', 'w') as f:
        for img_name in val_img_names:
            f.write(f"/home/taeyoung4060ti/바탕화면/level2-objectdetection-cv-01/ultralytics-main/ultralytics_dataset/fold_{idx}/val/images/"+img_name.replace('train/', '')+'\n')

    for i in ['train', 'val']:
        os.makedirs(f'./ultralytics_dataset/fold_{idx}/{i}/images', exist_ok=True)
        os.makedirs(f'./ultralytics_dataset/fold_{idx}/{i}/labels', exist_ok=True)

    for img_name in train_img_names:
        os.system(f'cp ./dataset/{img_name} ./ultralytics_dataset/fold_{idx}/train/images/')
        os.system(f'cp ./dataset/labels/{img_name.removeprefix('train/').replace("jpg", "txt").replace('/train', '')} ./ultralytics_dataset/fold_{idx}/train/labels/')

    for img_name in val_img_names:
        os.system(f'cp ./dataset/{img_name} ./ultralytics_dataset/fold_{idx}/val/images/')
        os.system(f'cp ./dataset/labels/{img_name.removeprefix('train/').replace("jpg", "txt")} ./ultralytics_dataset/fold_{idx}/val/labels/')

    import shutil
    shutil.copyfile('./ultralytics_dataset/test.txt', f'./ultralytics_dataset/fold_{idx}/test.txt')


    import yaml

    # 데이터셋 루트 경로 설정
    dataset_path = f"/home/taeyoung4060ti/바탕화면/level2-objectdetection-cv-01/ultralytics-main/ultralytics_dataset/fold_{idx}/"

    # YAML 파일에 들어갈 정보 설정
    data = {
        'path': dataset_path,  # 데이터셋 루트 경로
        'train': f'train_fold_{idx}.txt',  # 학습 데이터 경로
        'val': f'val_fold_{idx}.txt',      # 검증 데이터 경로
        'test': 'test.txt',    # 테스트 데이터 경로
        'names': {
            0: 'General trash',
            1: 'Paper',
            2: 'Paper pack',
            3: 'Metal',
            4: 'Glass',
            5: 'Plastic',
            6: 'Styrofoam',
            7: 'Plastic bag',
            8: 'Battery',
            9: 'Clothing'
        }
    }

    # YAML 파일 생성
    yaml_path = f'dataset_yaml/rtdetr_fold_{idx}.yaml'
    with open(yaml_path, 'w') as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False, allow_unicode=True)
    
    break

loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


In [8]:
# check distribution
import pandas as pd
from collections import Counter

def get_distribution(y):
    y_distr = Counter(y)
    y_vals_sum = sum(y_distr.values())

    return [f'{y_distr[i]/y_vals_sum:.2%}' for i in range(np.max(y) +1)]

distrs = [get_distribution(y)]
index = ['training set']

for fold_ind, (train_idx, val_idx) in enumerate(cv.split(X,y, groups)):
    train_y, val_y = y[train_idx], y[val_idx]
    train_gr, val_gr = groups[train_idx], groups[val_idx]

    assert len(set(train_gr) & set(val_gr)) == 0 
    
    distrs.append(get_distribution(train_y))
    distrs.append(get_distribution(val_y))
    index.append(f'train - fold{fold_ind}')
    index.append(f'val - fold{fold_ind}')

categories = [d['name'] for d in data['categories']]
pd.DataFrame(distrs, index=index, columns = [categories[i] for i in range(np.max(y) + 1)])

Unnamed: 0,General trash,Paper,Paper pack,Metal,Glass,Plastic,Styrofoam,Plastic bag,Battery,Clothing
training set,17.70%,26.60%,3.75%,3.91%,4.21%,12.69%,5.31%,22.93%,0.75%,2.15%
train - fold0,17.54%,27.15%,3.84%,3.86%,4.33%,12.33%,5.32%,22.85%,0.66%,2.13%
val - fold0,18.40%,24.27%,3.39%,4.16%,3.68%,14.19%,5.28%,23.25%,1.15%,2.24%
train - fold1,17.71%,27.09%,3.71%,3.83%,3.93%,12.50%,5.20%,23.02%,0.75%,2.25%
val - fold1,17.65%,24.64%,3.92%,4.25%,5.32%,13.44%,5.74%,22.56%,0.73%,1.75%
train - fold2,17.73%,26.22%,3.69%,3.92%,3.94%,12.69%,5.40%,23.44%,0.71%,2.26%
val - fold2,17.59%,28.07%,4.00%,3.89%,5.23%,12.68%,4.96%,20.96%,0.89%,1.73%
train - fold3,17.84%,25.65%,3.81%,3.98%,4.53%,13.04%,5.46%,22.74%,0.77%,2.18%
val - fold3,17.17%,30.37%,3.52%,3.64%,2.96%,11.29%,4.69%,23.68%,0.65%,2.03%
train - fold4,17.69%,26.90%,3.71%,3.98%,4.31%,12.88%,5.16%,22.59%,0.85%,1.93%


In [None]:
import json
import os
from datetime import datetime

import numpy as np
from sklearn.model_selection import StratifiedGroupKFold
from pycocotools.coco import COCO
from tqdm import tqdm
from ultralytics import RTDETR
import pandas as pd
import wandb

wandb.login(key="20ced4618a33e8061ca7264d38e0409df13c8daa")

from ultralytics.data.augment import Albumentations
from ultralytics.utils import LOGGER, colorstr

# load json
annotation = "./dataset/train_ann_len_under_40.json"
coco = COCO(annotation)

with open(annotation) as f:
    data = json.load(f)

var = [(ann["image_id"], ann["category_id"]) for ann in data["annotations"]]
X = np.ones((len(data["annotations"]), 1))
y = np.array([v[1] for v in var])
groups = np.array([v[0] for v in var])

cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=7)

# fold별 train, val set을 coco에서 file_name을 가져와 txt파일로 저장
for idx, (train_idx, val_idx) in enumerate(cv.split(X, y, groups)):
    train_img_ids = groups[train_idx]
    val_img_ids = groups[val_idx]
    train_img_ids = list(set(train_img_ids))
    val_img_ids = list(set(val_img_ids))
    train_img_names = [coco.imgs[img_id]["file_name"] for img_id in train_img_ids]
    val_img_names = [coco.imgs[img_id]["file_name"] for img_id in val_img_ids]

    os.makedirs(f"./ultralytics_dataset/fold_{idx}", exist_ok=True)

    with open(f"./ultralytics_dataset/fold_{idx}/train_fold_{idx}.txt", "w") as f:
        for img_name in train_img_names:
            f.write(
                f"/home/taeyoung4060ti/바탕화면/level2-objectdetection-cv-01/ultralytics-main/ultralytics_dataset/fold_{idx}/train/images/"
                + img_name.replace("train/", "")
                + "\n"
            )

    with open(f"./ultralytics_dataset/fold_{idx}/val_fold_{idx}.txt", "w") as f:
        for img_name in val_img_names:
            f.write(
                f"/home/taeyoung4060ti/바탕화면/level2-objectdetection-cv-01/ultralytics-main/ultralytics_dataset/fold_{idx}/val/images/"
                + img_name.replace("train/", "")
                + "\n"
            )

    for i in ["train", "val"]:
        os.makedirs(f"./ultralytics_dataset/fold_{idx}/{i}/images", exist_ok=True)
        os.makedirs(f"./ultralytics_dataset/fold_{idx}/{i}/labels", exist_ok=True)

    for img_name in train_img_names:
        os.system(
            f"cp ./dataset/{img_name} ./ultralytics_dataset/fold_{idx}/train/images/"
        )
        os.system(
            f'cp ./dataset/labels/{img_name.removeprefix('train/').replace("jpg", "txt").replace('/train', '')} ./ultralytics_dataset/fold_{idx}/train/labels/'
        )

    for img_name in val_img_names:
        os.system(
            f"cp ./dataset/{img_name} ./ultralytics_dataset/fold_{idx}/val/images/"
        )
        os.system(
            f'cp ./dataset/labels/{img_name.removeprefix('train/').replace("jpg", "txt")} ./ultralytics_dataset/fold_{idx}/val/labels/'
        )

    import shutil

    shutil.copyfile(
        "./ultralytics_dataset/test.txt", f"./ultralytics_dataset/fold_{idx}/test.txt"
    )

    import yaml

    # 데이터셋 루트 경로 설정
    dataset_path = f"/home/taeyoung4060ti/바탕화면/level2-objectdetection-cv-01/ultralytics-main/ultralytics_dataset/fold_{idx}/"

    # YAML 파일에 들어갈 정보 설정
    data = {
        "path": dataset_path,  # 데이터셋 루트 경로
        "train": f"train_fold_{idx}.txt",  # 학습 데이터 경로
        "val": f"val_fold_{idx}.txt",  # 검증 데이터 경로
        "test": "test.txt",  # 테스트 데이터 경로
        "names": {
            0: "General trash",
            1: "Paper",
            2: "Paper pack",
            3: "Metal",
            4: "Glass",
            5: "Plastic",
            6: "Styrofoam",
            7: "Plastic bag",
            8: "Battery",
            9: "Clothing",
        },
    }

    # YAML 파일 생성
    yaml_path = f"dataset_yaml/rtdetr_fold_{idx}.yaml"
    with open(yaml_path, "w") as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False, allow_unicode=True)

    from ultralytics.data.augment import Albumentations
    from ultralytics.utils import LOGGER, colorstr

    def __init__(self, p=1.0):
        self.p = p
        self.transform = None
        prefix = colorstr("albumentations: ")

        try:
            import albumentations as A

            # List of possible spatial transforms
            spatial_transforms = {
                "Affine",
                "BBoxSafeRandomCrop",
                "CenterCrop",
                "CoarseDropout",
                "Crop",
                "CropAndPad",
                "CropNonEmptyMaskIfExists",
                "D4",
                "ElasticTransform",
                "Flip",
                "GridDistortion",
                "GridDropout",
                "HorizontalFlip",
                "Lambda",
                "LongestMaxSize",
                "MaskDropout",
                "MixUp",
                "Morphological",
                "NoOp",
                "OpticalDistortion",
                "PadIfNeeded",
                "Perspective",
                "PiecewiseAffine",
                "PixelDropout",
                "RandomCrop",
                "RandomCropFromBorders",
                "RandomGridShuffle",
                "RandomResizedCrop",
                "RandomRotate90",
                "RandomScale",
                "RandomSizedBBoxSafeCrop",
                "RandomSizedCrop",
                "Resize",
                "Rotate",
                "SafeRotate",
                "ShiftScaleRotate",
                "SmallestMaxSize",
                "Transpose",
                "VerticalFlip",
                "XYMasking",
            }  # from https://albumentations.ai/docs/getting_started/transforms_and_targets/#spatial-level-transforms

            # Transforms
            T = [
                A.Blur(p=0.3),
                A.ToGray(p=0.3),
                A.CLAHE(p=0.3),
                A.RandomBrightnessContrast(p=0.3),
                A.HueSaturationValue(p=0.3),
                A.RandomSnow(p=0.3),
                A.HorizontalFlip(p=0.5),
            ]

            # Compose transforms
            self.contains_spatial = any(
                transform.__class__.__name__ in spatial_transforms for transform in T
            )
            self.transform = (
                A.Compose(
                    T,
                    bbox_params=A.BboxParams(
                        format="yolo", label_fields=["class_labels"]
                    ),
                )
                if self.contains_spatial
                else A.Compose(T)
            )
            LOGGER.info(
                prefix
                + ", ".join(
                    f"{x}".replace("always_apply=False, ", "") for x in T if x.p
                )
            )
        except ImportError:  # package not installed, skip
            pass
        except Exception as e:
            LOGGER.info(f"{prefix}{e}")

    Albumentations.__init__ = __init__

    # COCO 사전 훈련된 RT-DETR-l 모델 로드
    model = RTDETR("rtdetr-l.pt")
    model.train(
        data=yaml_path,
        epochs=30,
        imgsz=1024,
        batch=8,
        device=[1],
        cache="disk",
        project="Ultralytics",
        name="RT-DETR-l",
    )
    validations = model.val(
        data=yaml_path,
        batch=8,
        imgsz=1024,
        save_json=True,
        save_txt=True,
    )


In [5]:
validations.results_dict

{'metrics/precision(B)': 0.8510079093550562,
 'metrics/recall(B)': 0.6553374095716663,
 'metrics/mAP50(B)': 0.7301296009268983,
 'metrics/mAP50-95(B)': 0.6371883944488608,
 'fitness': 0.6464825150966645}