In [1]:
import os
import shutil
from collections import Counter

# 원본 폴더 경로
paths = ["./origindata/valid/", "./origindata/train/", "./origindata/test/"]

# 처리된 데이터를 저장할 폴더 경로
output_labels_folder = "./data/labels/"
output_images_folder = "./data/images/"

# 클래스 이름 목록 (인덱스에 따라 매칭)
class_names = [
    "red", "yellow", "green", "stop_sign", 
    "40_sign", "80_sign", "start_sign", "person", "others"
]

# 클래스별 카운트 초기화
class_counts = {name: 0 for name in class_names}

# 삭제 대상 클래스
delete_classes = {"2", "3", "4", "5", "6", "7", "9", "10", "11", "13"}

# 새로운 폴더 생성
os.makedirs(output_labels_folder, exist_ok=True)
os.makedirs(output_images_folder, exist_ok=True)

# 데이터 처리
for path in paths:
    # 원본 폴더 경로
    labels_folder = os.path.join(path, "labels")
    images_folder = os.path.join(path, "images")

    # labels 폴더에서 모든 txt 파일을 순회
    for filename in os.listdir(labels_folder):
        if filename.endswith(".txt"):
            txt_path = os.path.join(labels_folder, filename)
            modified_lines = []
            class_counter = Counter()  # 클래스 빈도 카운터

            # txt 파일의 모든 줄 읽기
            with open(txt_path, "r") as file:
                lines = file.readlines()  # 모든 줄 읽기
                for line in lines:
                    parts = line.strip().split()
                    if parts:  # 비어 있지 않은 라인만 처리
                        if parts[0] in delete_classes:
                            continue  # 삭제 대상 클래스 줄은 저장하지 않음
                        elif parts[0] == "0":
                            parts[0] = "2"
                        elif parts[0] == "1":
                            parts[0] = "0"
                        elif parts[0] == "8":
                            parts[0] = "4"
                        elif parts[0] == "12":
                            parts[0] = "5"
                        elif parts[0] == "14":
                            parts[0] = "3"

                        # 클래스 카운트
                        class_counter[parts[0]] += 1
                        modified_lines.append(" ".join(parts))

            if not modified_lines:
                # 모든 줄이 삭제 대상 클래스였던 경우
                image_path = os.path.join(images_folder, filename.replace(".txt", ".jpg"))
                if os.path.exists(image_path):
                    os.remove(image_path)
                os.remove(txt_path)
                print(f"Deleted file (all lines matched delete_classes): {txt_path}")
            else:
                # 가장 빈도가 높은 클래스 찾기
                if class_counter:
                    most_common_class, _ = class_counter.most_common(1)[0]
                    image_class = class_names[int(most_common_class)]

                    # 클래스별 카운트를 증가시키며 파일 이름 생성
                    class_counts[image_class] += 1
                    new_image_name = f"{image_class}_{class_counts[image_class]:05d}"

                    # 텍스트 파일 복사본에 저장
                    output_txt_path = os.path.join(output_labels_folder, f"{new_image_name}.txt")
                    with open(output_txt_path, "w") as file:
                        file.write("\n".join(modified_lines) + "\n")

                    # 이미지 파일 복사 및 이름 변경
                    image_path = os.path.join(images_folder, filename.replace(".txt", ".jpg"))
                    output_image_path = os.path.join(output_images_folder, f"{new_image_name}.jpg")
                    if os.path.exists(image_path):
                        shutil.copy2(image_path, output_image_path)

# 결과 출력
print("Class Counts by Index:", class_counts)


Deleted file (all lines matched delete_classes): ./origindata/valid/labels\000001_jpg.rf.5eb4f984823b1a44afc9bf98804daa50.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels\000002_jpg.rf.d65ebeef4d1cb26e3fc1a826770b729f.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels\000004_jpg.rf.00126fe7725236ec5dc849bac78543c2.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels\00000_00000_00002_png.rf.109f031ac8e60eba952da43b054389c0.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels\00000_00000_00003_png.rf.e1d0c6904cad2c27548ec3c332b2e088.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels\00000_00000_00008_png.rf.823f2aa00055169915d8979d389796eb.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels\00000_00000_00010_png.rf.ea7f6dcad45043cc1275a3722f259c45.txt
Deleted file (all lines matched delete_classes): ./origindata/valid/labels

In [2]:
import os
from collections import Counter

paths = ["./data/valid/", "./data/train/", "./data/test/"]

# 카테고리 정의 (주어진 순서대로)
categories = ["red", "yellow", "green", "stop_sign", "40_sign", "80_sign", "start_sign", "person", "others"]

# 출력 결과를 저장할 파일 경로
output_file = "category_counts.txt"

# 파일에 기록 시작
with open(output_file, "w") as out_file:
    total = 0  # 전체 총합을 저장할 변수

    for path in paths:
        # 폴더 경로 설정
        labels_folder = path + "labels"
        index_counter = Counter()  # 경로마다 카운터 초기화

        # labels 폴더에서 모든 txt 파일을 순회
        for filename in os.listdir(labels_folder):
            if filename.endswith(".txt"):
                txt_path = os.path.join(labels_folder, filename)

                # txt 파일 읽기
                with open(txt_path, "r") as file:
                    lines = file.readlines()
                    for line in lines:
                        parts = line.strip().split()
                        if parts:  # 비어 있지 않은 라인만 처리
                            first_index = parts[0]
                            if first_index.isdigit():
                                first_index = int(first_index)
                                if 0 <= first_index < len(categories) - 1:  # 마지막 인덱스는 "others"
                                    index_counter[categories[first_index]] += 1
                                    total += 1  # 카운트 합계에 추가
                                else:
                                    index_counter["others"] += 1  # 범위를 벗어난 값은 "others"로 분류
                                    total += 1  # 카운트 합계에 추가

        # 경로별 카운트 결과 작성 및 출력
        print(f"{path}'s Category Counts:")
        out_file.write(f"{path}'s Category Counts:\n")
        for category in categories:
            count = index_counter.get(category, 0)
            print(f"{category}: {count}")
            out_file.write(f"{category}: {count}\n")
        print("")  # 경로별 결과 간격 추가
        out_file.write("\n")

    # 총합 작성 및 출력
    print(f"Total Count: {total}")
    out_file.write(f"Total Count: {total}\n")

print(f"Results have been saved to {output_file}")


./data/valid/'s Category Counts:
red: 108
yellow: 0
green: 122
stop_sign: 81
40_sign: 55
80_sign: 56
start_sign: 0
person: 0
others: 0

./data/train/'s Category Counts:
red: 585
yellow: 0
green: 542
stop_sign: 285
40_sign: 235
80_sign: 323
start_sign: 0
person: 0
others: 0

./data/test/'s Category Counts:
red: 94
yellow: 0
green: 110
stop_sign: 50
40_sign: 53
80_sign: 61
start_sign: 0
person: 0
others: 0

Total Count: 2760
Results have been saved to category_counts.txt
