<a href="https://colab.research.google.com/github/recon48/20242R0136COSE47402/blob/main/preprocessing_caltech101.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Preparation

In [None]:
!pip install ultralytics

from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt

import os
from PIL import Image

# COCO 클래스 매핑
category_to_coco_label = {
    "airplanes": "airplane",
    "car_side": "car",
    "cellphone": "cell phone",
    "chair": "chair",
    "cup": "cup",
    "elephant": "elephant",
    "pizza": "pizza",
    "scissors": "scissors",
    "stop_sign": "stop sign",
    "umbrella": "umbrella",
    "laptop": "laptop"
}

# 타겟 카테고리 지정
target_categories = [
    "airplanes", "car_side", "cellphone", "chair", "cup",
    "elephant", "pizza", "scissors", "stop_sign", "umbrella", "laptop"
]




## Dataset

In [None]:
!git clone https://github.com/mlvlab/ProMetaR.git
%cd ProMetaR/

!git clone https://github.com/KaiyangZhou/Dassl.pytorch.git
%cd Dassl.pytorch/

# Install dependencies
!pip install -r requirements.txt
!cp -r dassl ../
# Install this library (no need to re-build if the source code is modified)
# !python setup.py develop
%cd ..

!pip install -r requirements.txt

%mkdir outputs
%mkdir data

%cd data
%mkdir caltech-101

!cp "/content/drive/My Drive/101_ObjectCategories.tar.gz" ./  import tarfile

# 압축 해제
with tarfile.open('101_ObjectCategories.tar.gz', 'r:gz') as tar:
    tar.extractall(path='./caltech-101')

%cd caltech-101

%cd ../../
!pwd
!cp "/content/drive/My Drive/split_zhou_Caltech101.json" ./data/caltech-101/

fatal: destination path 'ProMetaR' already exists and is not an empty directory.
/content/ProMetaR
fatal: destination path 'Dassl.pytorch' already exists and is not an empty directory.
/content/ProMetaR/Dassl.pytorch
/content/ProMetaR
mkdir: cannot create directory ‘outputs’: File exists
mkdir: cannot create directory ‘data’: File exists
/content/ProMetaR/data
mkdir: cannot create directory ‘caltech-101’: File exists
/content/ProMetaR/data/caltech-101
/content/ProMetaR
/content/ProMetaR


## Detect and Crop

In [None]:
def detect_and_crop_yolo(image_path, target_label, model_path='yolov8n.pt'):


    model = YOLO(model_path)


    img = Image.open(image_path).convert("RGB")


    results = model(img)


    boxes = results[0].boxes.xyxy  # [x_min, y_min, x_max, y_max]
    confidences = results[0].boxes.conf
    classes = results[0].boxes.cls  # 클래스 인덱스

    if len(boxes) == 0:
        print(f"No objects detected in {image_path}.")
        return None

    model_classes = model.names
    for idx, box in enumerate(boxes):
        class_name = model_classes[int(classes[idx])]
        if class_name == target_label:

            x_min, y_min, x_max, y_max = map(int, box.tolist())


            cropped_image = img.crop((x_min, y_min, x_max, y_max))
            print(f"Detected and cropped '{target_label}' in {image_path}.")
            return cropped_image


    print(f"No target '{target_label}' detected in {image_path}.")
    return None



0: 608x640 1 airplane, 1 bird, 235.9ms
Speed: 3.8ms preprocess, 235.9ms inference, 5.3ms postprocess per image at shape (1, 3, 608, 640)
No target 'chair' detected in /content/ProMetaR/data/caltech-101/101_ObjectCategories/chair/image_0062.jpg.
none


Crop and Save

In [None]:
def crop_and_save_all_images(input_dir, output_dir, target_categories):
    """
    101_ObjectCategories 폴더 내 target_categories에 포함된 이미지는 Crop하고,
    포함되지 않은 이미지는 원본을 저장합니다.

    Args:
        input_dir (str): 원본 이미지 폴더 경로.
        output_dir (str): Crop된 이미지를 저장할 폴더 경로.
        target_categories (list): Crop을 진행할 카테고리 리스트.
    """
    no_detection_count = 0  # 탐지되지 않은 이미지 수
    total_images = 0       # 전체 이미지 수
    detection_count=0


    for category in os.listdir(input_dir):
        category_path = os.path.join(input_dir, category)
        if not os.path.isdir(category_path):
            continue


        category_output_path = os.path.join(output_dir, category)
        os.makedirs(category_output_path, exist_ok=True)

        for image_file in os.listdir(category_path):
            total_images += 1
            print(f"진행 완료 이미지 수 : {total_images}")
            print(f"no detection 이미지 수 : {no_detection_count}")
            print(f"detection 이미지 수 : {detection_count}")
            image_path = os.path.join(category_path, image_file)

            try:
                if category in target_categories:

                    target_label = category_to_coco_label.get(category, None)
                    if not target_label:
                        print(f"Category '{category}' does not map to a COCO label. Skipping...")
                        continue

                    cropped_image = detect_and_crop_yolo(image_path, target_label)
                    if cropped_image is None:

                        print(f"No detection for {image_path}. Saving original image.")
                        cropped_image = Image.open(image_path).convert("RGB")
                        no_detection_count += 1
                    else : detection_count +=1
                else:

                    print(f"Category '{category}' not in target_categories. Saving original image.")
                    cropped_image = Image.open(image_path).convert("RGB")


                save_path = os.path.join(category_output_path, image_file)
                cropped_image.save(save_path)
                print(f"Saved image: {save_path}")
            except Exception as e:
                print(f"Error processing {image_path}: {e}")


    print(f"Total images processed: {total_images}")
    print(f"Number of images with no detection: {no_detection_count}")
    print(f"Number of images with detection: {detection_count}")


input_dir = "/content/ProMetaR/data/caltech-101/101_ObjectCategories"  # 원본 이미지 폴더
output_dir = "/content/drive/MyDrive/cropped_101_ObjectCategories"  # Crop된 이미지 저장 폴더


crop_and_save_all_images(input_dir, output_dir, target_categories)


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
detection 이미지 수 : 1068

0: 448x640 5 cars, 1 truck, 177.8ms
Speed: 5.4ms preprocess, 177.8ms inference, 1.3ms postprocess per image at shape (1, 3, 448, 640)
Detected and cropped 'car' in /content/ProMetaR/data/caltech-101/101_ObjectCategories/car_side/image_0105.jpg.
Saved image: /content/drive/MyDrive/cropped_101_ObjectCategories/car_side/image_0105.jpg
진행 완료 이미지 수 : 8278
no detection 이미지 수 : 212
detection 이미지 수 : 1069

0: 448x640 1 bus, 156.5ms
Speed: 5.9ms preprocess, 156.5ms inference, 1.2ms postprocess per image at shape (1, 3, 448, 640)
No target 'car' detected in /content/ProMetaR/data/caltech-101/101_ObjectCategories/car_side/image_0071.jpg.
No detection for /content/ProMetaR/data/caltech-101/101_ObjectCategories/car_side/image_0071.jpg. Saving original image.
Saved image: /content/drive/MyDrive/cropped_101_ObjectCategories/car_side/image_0071.jpg
진행 완료 이미지 수 : 8279
no detection 이미지 수 : 213
detection 이미지 수 : 1069

0: 448x640 2 