# 이미지 기준

In [2]:
import os
from PIL import Image

# Define the base directories for training and validation datasets
base_directories = [
    '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet',
    '/workspace/mnt2/data/tomato_except3/crop/resize/1/ValidationDataSet'
]

# Define standard sizes for each class as (width, height)
# Example: {'0_healthy': (100, 100), '1_PowderyMildew': (200, 200), ...}
standard_sizes = {
    '0_healthy': (125, 111),
    '1_PowderyMildew': (200, 392),
    '2_GrayMold': (331, 421),
    '3_YellowLeafCurlVirus': (131, 152),
    '4_LeafMold': (313, 204),
    '5_LateBlight': (67, 92),
}

# Function to delete images smaller than the standard size
def delete_smaller_images(sub_directory, standard_size):
    for image_file in os.listdir(sub_directory):
        image_path = os.path.join(sub_directory, image_file)
        try:
            with Image.open(image_path) as img:
                width, height = img.size
                # Check if the image is smaller than the standard size
                if width < standard_size[0] or height < standard_size[1]:
                    os.remove(image_path)
                    print(f"Deleted '{image_path}' as it is smaller than the standard size.")
        except IOError:
            print(f"Error opening or reading image '{image_path}'.")

# Iterate over each base directory, subdirectory, and delete images that don't meet the standard size
for base_dir in base_directories:
    for class_dir, standard_size in standard_sizes.items():
        class_dir_path = os.path.join(base_dir, class_dir)
        if os.path.exists(class_dir_path):
            delete_smaller_images(class_dir_path, standard_size)

Deleted '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet/0_healthy/V006_79_0_00_07_03_12_0_c14_20201116_0014_S01_1_bbox_1.jpg' as it is smaller than the standard size.
Deleted '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet/0_healthy/V006_77_0_00_11_03_12_0_c14_20201116_0016_S01_1_bbox_1.jpg' as it is smaller than the standard size.
Deleted '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet/1_PowderyMildew/772082_20211108_2_1_a5_3_2_12_2_381_bbox_1.jpg' as it is smaller than the standard size.
Deleted '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet/1_PowderyMildew/991105_20211129_2_1_a5_3_2_11_1_862_bbox_1.jpg' as it is smaller than the standard size.
Deleted '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet/1_PowderyMildew/686918_20211101_2_1_a5_3_2_12_2_56_bbox_1.jpg' as it is smaller than the standard size.
Deleted '/workspace/mnt2/data/tomato_except3/crop/resize/1/TrainingDataSet/1_PowderyMilde

# YOLO형식일때 라벨 삭제 코드

In [37]:
import os
from PIL import Image

# Define the base directories for training and validation datasets
base_directories = [
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Training',
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Validation',
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/71_data/Training',
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/73_data/Training',
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/TestDataSet/104_data/Validation',
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/TestDataSet/71_data/Validation',
    '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/TestDataSet/73_data/Validation',
    # '/workspace/mnt/data/tomato_except4_revise2/ValidationDataSet',
    # '/workspace/mnt/data/tomato_except4_revise2/TestDataSet'
]

# Define standard sizes for each class as (width, height)
standard_sizes = {
    '0_healthy': (126, 111),
    '1_PowderyMildew': (200, 392),
    '2_GrayMold': (253, 379),
    '3_YellowLeafCurlVirus': (131, 152),
    '4_LeafMold': (313, 203),
}

def update_labels(sub_directory, standard_sizes):
    for image_file in os.listdir(sub_directory):
        if image_file.lower().endswith(('.jpg', '.png')):  # Check if the file is an image
            image_path = os.path.join(sub_directory, image_file)
            label_path = image_path.rsplit('.', 1)[0] + '.txt'
            
            if os.path.exists(label_path):
                with open(label_path, 'r') as file:
                    lines = file.readlines()
                
                updated_lines = []
                labels_to_remove = [] 
                for line in lines:
                    class_id, _, _, width, height = map(float, line.split())
                    class_id = int(class_id)
                    
                    # Extract class_id prefix from each key in standard_sizes and compare
                    matching_keys = [key for key in standard_sizes if key.split('_')[0] == str(class_id)]
                    
                    if matching_keys:
                        standard_size = standard_sizes[matching_keys[0]]
                        with Image.open(image_path) as img:
                            img_width, img_height = img.size
                            # YOLO 형식의 상대 크기를 실제 픽셀 크기로 변환
                            absolute_width = width * img_width
                            absolute_height = height * img_height
                            # If the image meets the standard size for its class, keep the label
                            if absolute_width >= standard_size[0] and absolute_height >= standard_size[1]:
                                updated_lines.append(line)
                            else:
                                # 조건을 만족하지 않는 라벨은 삭제 대상이므로 labels_to_remove에 추가
                                labels_to_remove.append(line.strip())
                                
                if labels_to_remove:
                    print(f"Labels to remove for '{image_path}': {labels_to_remove}")
                
                # Rewrite the label file only with the labels that meet the criteria
                if updated_lines:
                    with open(label_path, 'w') as file:
                        file.writelines(updated_lines)
                else:
                    # If no labels left, delete both image and label file
                    os.remove(image_path)
                    os.remove(label_path)
                    print(f"Deleted '{image_path}' and '{label_path}' as no labels meet the size criteria.")

# Iterate over each base directory, subdirectory, and update labels or delete files that don't meet the criteria
for base_dir in base_directories:
    for class_dir in standard_sizes.keys():  # Iterate using keys from the standard sizes dictionary
        class_dir_path = os.path.join(base_dir, class_dir)
        if os.path.exists(class_dir_path):
            update_labels(class_dir_path, standard_sizes)

Labels to remove for '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Training/0_healthy/815095_20211112_2_0_0_3_2_13_0_033.jpg': ['0 0.829458 0.00971483 0.297854 0.0191325']
Labels to remove for '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Training/0_healthy/816184_20211112_2_0_0_3_2_13_0_0802.jpg': ['0 0.683874 0.00784337 0.308789 0.0153162']
Labels to remove for '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Training/0_healthy/853791_20211115_2_0_0_3_2_13_0_0419.jpg': ['0 0.808655 0.0148534 0.300449 0.0294486']
Labels to remove for '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Training/1_PowderyMildew/990102_20211129_2_1_a5_3_2_11_1_881.jpg': ['1 0.352513 0.0327411 0.4866 0.0652032']
Labels to remove for '/workspace/mnt/data/tomato_except4_revise2/tmp/origin/104_data/Training/1_PowderyMildew/687065_20211101_2_1_a5_3_2_12_2_103.jpg': ['1 0.157458 0.040948 0.306549 0.0816355']
Labels to remove for '/workspace/mnt/dat

In [34]:
class_id = 3
matching_keys = [key for key in standard_sizes if key.split('_')[0] == str(class_id)]
standard_sizes

{'0': (103, 111),
 '1_PowderyMildew': (200, 392),
 '2_GrayMold': (331, 421),
 '3_YellowLeafCurlVirus': (38, 51),
 '4_LeafMold': (102, 121),
 '5_LateBlight': (99, 50)}

In [36]:
standard_sizes[matching_keys[0]]

(38, 51)

In [23]:
standard_sizes = {
    '0': (103, 111),
    '1_PowderyMildew': (200, 392),
    '2_GrayMold': (331, 421),
    '3_YellowLeafCurlVirus': (38, 51),
    '4_LeafMold': (102, 121),
    '5_LateBlight': (99, 50),
}

In [11]:
class_id = 0

In [12]:
if str(class_id) in standard_sizes:
    standard_size = standard_sizes[str(class_id)]
    print(standard_size)

(103, 111)
