In [20]:
import os
import yaml
from collections import Counter

def count_classes_from_yaml(yaml_path, images_dir, labels_dir):
    """
    Count the number of instances for each class based on the data.yaml file.

    Args:
        yaml_path (str): Path to the data.yaml file.
        images_dir (str): Path to the directory containing image files.
        labels_dir (str): Path to the directory containing label (.txt) files.

    Returns:
        dict: A dictionary with class names as keys and instance counts as values.
    """
    # Load the data.yaml file
    with open(yaml_path, 'r') as file:
        data = yaml.safe_load(file)

    # Get class names from the data.yaml file
    class_names = data.get('names', [])

    # Initialize a counter for all classes
    class_counts = Counter()

    # Verify that the labels directory exists
    if not os.path.exists(labels_dir):
        print(f"Error: Labels directory {labels_dir} does not exist.")
        return {}

    # Count instances in .txt files
    for label_file in os.listdir(labels_dir):
        if label_file.endswith('.txt'):
            with open(os.path.join(labels_dir, label_file), 'r') as f:
                lines = f.readlines()
                for line in lines:
                    class_id = int(line.split()[0])  # The first value is the class ID
                    class_counts[class_id] += 1

    # Map class IDs to class names and return results
    class_counts_named = {class_names[class_id]: count for class_id, count in class_counts.items()}
    return class_counts_named

# Define paths before running the program
yaml_path = "/home/kjj73/dev_ws/yolo/yolov8/data/dataset_001/data.yaml"
images_dir = "/home/kjj73/dev_ws/yolo/yolov8/data/dataset_001/valid/images"
labels_dir = "/home/kjj73/dev_ws/yolo/yolov8/data/dataset_001/valid/labels"

# Run the program
class_counts = count_classes_from_yaml(yaml_path, images_dir, labels_dir)
print("Class counts:")
for class_name, count in class_counts.items():
    print(f"{class_name}: {count}")

Class counts:
green: 155
green arrow: 155
red: 155
green and green arrow: 155


In [3]:
import os
from PIL import Image

data_path = r"C:\Users\KJJ\Desktop\code\data\dataset_edit\valid\images"
for img_file in os.listdir(data_path):
    try:
        img = Image.open(os.path.join(data_path, img_file))
        img.verify()  # 이미지 손상 여부 검사
    except Exception as e:
        print(f"Error with image {img_file}: {e}")


: 

In [18]:
import os

def remove_classes_and_empty_labels(images_path, labels_path, remove_indices):
    # 삭제 대상 인덱스를 문자열로 변환
    remove_indices = set(map(str, remove_indices))

    # 라벨 폴더 탐색
    for label_file in os.listdir(labels_path):
        label_path = os.path.join(labels_path, label_file)

        # 라벨 파일 읽기
        with open(label_path, 'r') as f:
            lines = f.readlines()

        # 클래스 제거 작업
        filtered_lines = [
            line for line in lines if line.split()[0] not in remove_indices
        ]

        # 필터링된 라벨 파일을 다시 작성
        if filtered_lines:
            with open(label_path, 'w') as f:
                f.writelines(filtered_lines)
        else:
            # 라벨이 비어 있으면 이미지와 함께 삭제
            image_path = os.path.join(images_path, label_file.replace('.txt', '.jpg'))
            if os.path.exists(image_path):
                os.remove(image_path)
            os.remove(label_path)

# 데이터 경로 설정
images_folder = "/home/kjj73/dev_ws/yolo/yolov8/data/dataset_001/valid/images"  # 이미지 폴더 경로
labels_folder = "/home/kjj73/dev_ws/yolo/yolov8/data/dataset_001/valid/labels"  # 라벨 폴더 경로

# 제거할 클래스 인덱스 (예: [0, 2])
remove_classes = [4, 5]

# 함수 실행
remove_classes_and_empty_labels(images_folder, labels_folder, remove_classes)
