In [9]:
cd /workspace/mnt2/data/tomato_except3/origin

/workspace/mnt2/data/tomato_except3/origin


In [2]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Train/Valid Split

In [3]:
images_path = [
    './71_data/Training/0_healthy',
    './71_data/Training/3_YellowLeafCurlVirus',
    './71_data/Training/4_LeafMold',
    './73_data/Training/0_healthy',
    './73_data/Training/5_LateBlight',
    './104_data/Training/0_healthy',
    './104_data/Training/1_PowderyMildew',
    './104_data/Training/2_GrayMold',
    './104_data/Validation/0_healthy',
]

# 클래스 이름들을 저장할 리스트 생성
class_names_list = []

# 이미지 경로를 저장할 딕셔너리 초기화
class_to_images = {}

for path in images_path:
    class_name = path.split('_')[-1]
    
    # 이미지 로드
    imgs = [os.path.join(path, fname) for fname in os.listdir(path) if fname.lower().endswith('.jpg')]
    if not imgs:
        print(f"No images found in path: {path}")
    
    # 클래스 이름 중복 확인 및 이미지 경로 추가
    if class_name in class_names_list:
        class_to_images[class_name].extend(imgs)
    else:
        class_names_list.append(class_name)
        class_to_images[class_name] = imgs

# 각 클래스별로 train과 val로 나누기
train_data = []
val_data = []

for class_name, images in class_to_images.items():
    labels = [class_name] * len(images)
    train_imgs, val_imgs = train_test_split(images, test_size=0.12, stratify=labels, random_state=4885)
    train_data.extend(train_imgs)
    val_data.extend(val_imgs)

print(f"Total train images: {len(train_data)}")
print(f"Total val images: {len(val_data)}")

Total train images: 36728
Total val images: 5011


In [4]:
# 데이터를 저장할 디렉토리 매핑
dir_mapping = {
    "healthy": {
        "train": "./TrainingDataSet/0_healthy/",
        "val": "./ValidationDataSet/0_healthy/",
        "test": "./TestDataSet/0_healthy/"
    },
    "PowderyMildew": {
        "train": "./TrainingDataSet/1_PowderyMildew/",
        "val": "./ValidationDataSet/1_PowderyMildew/",
        "test": "./TestDataSet/1_PowderyMildew/"
    },
    "GrayMold": {
        "train": "./TrainingDataSet/2_GrayMold/",
        "val": "./ValidationDataSet/2_GrayMold/",
        "test": "./TestDataSet/2_GrayMold/"
    },
    "YellowLeafCurlVirus": {
        "train": "./TrainingDataSet/3_YellowLeafCurlVirus/",
        "val": "./ValidationDataSet/3_YellowLeafCurlVirus/",
        "test": "./TestDataSet/3_YellowLeafCurlVirus/"
    },
    "LeafMold": {
        "train": "./TrainingDataSet/4_LeafMold/",
        "val": "./ValidationDataSet/4_LeafMold/",
        "test": "./TestDataSet/4_LeafMold/"
    },
    "LateBlight": {
        "train": "./TrainingDataSet/5_LateBlight/",
        "val": "./ValidationDataSet/5_LateBlight/",
        "test": "./TestDataSet/5_LateBlight/"
    }
}

# 데이터를 적절한 디렉토리로 복사
def copy_files(data, data_type):
    for image_path in data:
        # 이미지 경로에서 클래스 이름과 디렉토리 경로 추출
        directory, file_name = os.path.split(image_path)
        class_name = directory.split('/')[-1].split('_')[-1]
        destination = dir_mapping[class_name][data_type]
        
        # 디렉토리가 존재하지 않으면 생성
        if not os.path.exists(destination):
            os.makedirs(destination)
        
        # 이미지를 목적지 디렉토리로 복사
        shutil.copy(image_path, os.path.join(destination, file_name))
        
        # 동일한 이름의 .txt 파일이 있는지 확인 후 복사
        txt_file_name = os.path.splitext(file_name)[0] + '.txt'
        txt_file_path = os.path.join(directory, txt_file_name)
        
        if os.path.exists(txt_file_path):
            shutil.copy(txt_file_path, os.path.join(destination, txt_file_name))

# train_data 및 val_data 복사
copy_files(train_data, "train")
copy_files(val_data, "val")

In [10]:
# 주어진 폴더 경로 목록
folders = [
    "./ValidationDataSet/0_healthy/",
    "./ValidationDataSet/1_PowderyMildew/",
    "./ValidationDataSet/2_GrayMold/",
    "./ValidationDataSet/3_YellowLeafCurlVirus/",
    "./ValidationDataSet/4_LeafMold/",
    "./ValidationDataSet/5_LateBlight/",
    "./TrainingDataSet/0_healthy/",
    "./TrainingDataSet/1_PowderyMildew/",
    "./TrainingDataSet/2_GrayMold/",
    "./TrainingDataSet/3_YellowLeafCurlVirus/",
    "./TrainingDataSet/4_LeafMold/",
    "./TrainingDataSet/5_LateBlight/"
]

# 각 폴더에서 이미지 파일과 텍스트 파일 개수 파악
for folder in folders:
    if os.path.exists(folder):
        images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg','.jpeg', '.png'))]
        texts = [f for f in os.listdir(folder) if f.lower().endswith('.txt')]
        print(f"{folder}: {len(images)} images, {len(texts)} text files")
    else:
        print(f"{folder} does not exist!!")

./ValidationDataSet/0_healthy/: 2235 images, 2235 text files
./ValidationDataSet/1_PowderyMildew/: 1412 images, 1412 text files
./ValidationDataSet/2_GrayMold/: 1101 images, 1101 text files
./ValidationDataSet/3_YellowLeafCurlVirus/: 135 images, 135 text files
./ValidationDataSet/4_LeafMold/: 102 images, 102 text files
./ValidationDataSet/5_LateBlight/: 26 images, 26 text files
./TrainingDataSet/0_healthy/: 16386 images, 16386 text files
./TrainingDataSet/1_PowderyMildew/: 10351 images, 10351 text files
./TrainingDataSet/2_GrayMold/: 8071 images, 8071 text files
./TrainingDataSet/3_YellowLeafCurlVirus/: 985 images, 985 text files
./TrainingDataSet/4_LeafMold/: 745 images, 745 text files
./TrainingDataSet/5_LateBlight/: 190 images, 190 text files


In [11]:
validation_image_count = 0
training_image_count = 0
validation_text_count = 0
training_text_count = 0

# 각 폴더에서 이미지 파일과 텍스트 파일 개수 파악
for folder in folders:
    if os.path.exists(folder):
        images = [f for f in os.listdir(folder) if f.lower().endswith('.jpg')]
        texts = [f for f in os.listdir(folder) if f.lower().endswith('.txt')]
        
        if "ValidationDataSet" in folder:
            validation_image_count += len(images)
            validation_text_count += len(texts)
        elif "TrainingDataSet" in folder:
            training_image_count += len(images)
            training_text_count += len(texts)
    else:
        print(f"{folder} does not exist!")

print(f"Total images in ValidationDataSet: {validation_image_count}")
print(f"Total text files in ValidationDataSet: {validation_text_count}")
print(f"Total images in TrainingDataSet: {training_image_count}")
print(f"Total text files in TrainingDataSet: {training_text_count}")
print(f"Total images combined: {validation_image_count + training_image_count}")
print(f"Total text files combined: {validation_text_count + training_text_count}")

Total images in ValidationDataSet: 5011
Total text files in ValidationDataSet: 5011
Total images in TrainingDataSet: 36728
Total text files in TrainingDataSet: 36728
Total images combined: 41739
Total text files combined: 41739


# Test Split

In [None]:
cd /workspace/mnt2/data/tomato_except3/origin/TestDataSet

In [None]:
images_path = [
    './71_data/Validation/0_healthy',
    './71_data/Validation/3_YellowLeafCurlVirus',
    './71_data/Validation/4_LeafMold',
    './73_data/Validation/0_healthy',
    './73_data/Validation/5_LateBlight',
    './104_data/Validation/1_PowderyMildew',
    './104_data/Validation/2_GrayMold',
]

# 클래스 이름들을 저장할 리스트 생성
class_names_list = []

# 이미지 경로를 저장할 딕셔너리 초기화
class_to_images = {}

for path in images_path:
    class_name = path.split('_')[-1]
    
    # 이미지 로드
    imgs = [os.path.join(path, fname) for fname in os.listdir(path) if fname.lower().endswith('.jpg')]
    if not imgs:
        print(f"No images found in path: {path}")
    
    # 클래스 이름 중복 확인 및 이미지 경로 추가
    if class_name in class_names_list:
        class_to_images[class_name].extend(imgs)
    else:
        class_names_list.append(class_name)
        class_to_images[class_name] = imgs

# 각 클래스별로 test로 나누기
test_data = []

for class_name, images in class_to_images.items():
    test_data.extend(images)

print(f"Total test images: {len(test_data)}")

In [None]:
# test_data
copy_files(test_data, "test")

In [None]:
# 주어진 폴더 경로 목록
folders = [
    './TestDataSet/0_healthy',
    './TestDataSet/1_PowderyMildew',
    './TestDataSet/2_GrayMold',
    './TestDataSet/3_YellowLeafCurlVirus',
    './TestDataSet/4_LeafMold',
    './TestDataSet/5_LateBlight'
]

# 각 폴더에서 이미지 파일과 텍스트 파일 개수 파악
for folder in folders:
    if os.path.exists(folder):
        images = [f for f in os.listdir(folder) if f.lower().endswith('.jpg')]
        texts = [f for f in os.listdir(folder) if f.lower().endswith('.txt')]
        print(f"{folder}: {len(images)} images, {len(texts)} text files")
    else:
        print(f"{folder} does not exist!!")

In [None]:
test_image_count = 0
test_text_count = 0

# 각 폴더에서 이미지 파일과 텍스트 파일 개수 파악
for folder in folders:
    if os.path.exists(folder):
        images = [f for f in os.listdir(folder) if f.lower().endswith('.jpg')]
        texts = [f for f in os.listdir(folder) if f.lower().endswith('.txt')]
        test_image_count += len(images)
        test_text_count += len(texts)

print(f"Total test images combined: {test_image_count}")
print(f"Total test text files combined: {test_text_count}")
