In [8]:
# equipment classes
target_classes = ['AI1', 'AI2', 'AI3', 'AI4', 'AI5', 'AI6', 'AI7', 'AI8', 'AI9', 'AI10', 'AI11', 'AI12', 'AI13', 'AI14', 'AI15', 'AI16', 'AI17', 'AI18', 'AI19', 'AI20', 'AI21', 'AI22']

In [None]:
import os
import shutil
import random
from pathlib import Path
import math

def split_equipment_dataset(source_dir="dataset/equipment", 
                          train_dir="dataset/equipment_train", 
                          test_dir="dataset/equipment_test", 
                          train_ratio=0.8):
    """
    divide dataset on equipment into train and test sets according to the specified ratio
    
    Args:
        source_dir (str): path dataset
        train_dir (str): path train dataset 
        test_dir (str): path test dataset
        train_ratio (float): ratio train set (0.8 = 80%)
    """

    # create directories for train and test
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    total_files = 0
    train_files = 0
    test_files = 0
    
    for class_name in target_classes:
        source_class_dir = os.path.join(source_dir, class_name)
        
        if not os.path.exists(source_class_dir):
            print(f"Warning: {source_class_dir} ไม่พบ!")
            continue

        # create directories for each class
        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
        
        # list class image files
        image_files = [f for f in os.listdir(source_class_dir)
                       if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]

        # random shuffle files
        random.shuffle(image_files)

        # calculate train
        train_count = math.floor(len(image_files) * train_ratio)
        
        # divide files into train and test
        train_images = image_files[:train_count]
        test_images = image_files[train_count:]
        
        # copy to train directory
        for image_file in train_images:
            src = os.path.join(source_class_dir, image_file)
            dst = os.path.join(train_class_dir, image_file)
            shutil.copy2(src, dst)
        
        # copy to test directory
        for image_file in test_images:
            src = os.path.join(source_class_dir, image_file)
            dst = os.path.join(test_class_dir, image_file)
            shutil.copy2(src, dst)

        # summarize results
        print(f"Class {class_name}:")
        print(f"  - Total images: {len(image_files)}")
        print(f"  - Train images: {len(train_images)} ({len(train_images)/len(image_files)*100:.1f}%)")
        print(f"  - Test images: {len(test_images)} ({len(test_images)/len(image_files)*100:.1f}%)")
        print()
        
        total_files += len(image_files)
        train_files += len(train_images)
        test_files += len(test_images)
    
    print("=" * 50)
    print("สรุปรวม:")
    print(f"Total images: {total_files}")
    print(f"Train images: {train_files} ({train_files/total_files*100:.1f}%)")
    print(f"Test images: {test_files} ({test_files/total_files*100:.1f}%)")
    print(f"Train directory: {train_dir}")
    print(f"Test directory: {test_dir}")

# set random seed for reproducibility
random.seed(42)

In [10]:
# เรียกใช้ function เพื่อแบ่ง dataset
split_equipment_dataset(
    source_dir="dataset/equipment",
    train_dir="dataset/equipment_train", 
    test_dir="dataset/equipment_test",
    train_ratio=0.8
)

Class AI1:
  - Total images: 126
  - Train images: 100 (79.4%)
  - Test images: 26 (20.6%)

Class AI2:
  - Total images: 209
  - Train images: 167 (79.9%)
  - Test images: 42 (20.1%)

Class AI3:
  - Total images: 128
  - Train images: 102 (79.7%)
  - Test images: 26 (20.3%)

Class AI4:
  - Total images: 165
  - Train images: 132 (80.0%)
  - Test images: 33 (20.0%)

Class AI5:
  - Total images: 133
  - Train images: 106 (79.7%)
  - Test images: 27 (20.3%)

Class AI6:
  - Total images: 103
  - Train images: 82 (79.6%)
  - Test images: 21 (20.4%)

Class AI7:
  - Total images: 112
  - Train images: 89 (79.5%)
  - Test images: 23 (20.5%)

Class AI8:
  - Total images: 109
  - Train images: 87 (79.8%)
  - Test images: 22 (20.2%)

Class AI9:
  - Total images: 162
  - Train images: 129 (79.6%)
  - Test images: 33 (20.4%)

Class AI10:
  - Total images: 138
  - Train images: 110 (79.7%)
  - Test images: 28 (20.3%)

Class AI11:
  - Total images: 77
  - Train images: 61 (79.2%)
  - Test images: 16 

In [None]:
def check_dataset_structure(train_dir="equipment_train", test_dir="equipment_test"):
    """
    ตรวจสอบโครงสร้างของ dataset ที่แบ่งแล้ว
    """
    print("โครงสร้าง Dataset หลังจากแบ่ง:")
    print("=" * 50)
    
    for dataset_type, dataset_dir in [("Train", train_dir), ("Test", test_dir)]:
        print(f"\n{dataset_type} Dataset ({dataset_dir}):")
        
        if not os.path.exists(dataset_dir):
            print(f"  ไม่พบ directory: {dataset_dir}")
            continue
        
        total_images = 0
        for class_name in target_classes:
            class_dir = os.path.join(dataset_dir, class_name)
            if os.path.exists(class_dir):
                image_count = len([f for f in os.listdir(class_dir) 
                                 if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))])
                print(f"  - {class_name}: {image_count} images")
                total_images += image_count
            else:
                print(f"  - {class_name}: ไม่พบ directory")
        
        print(f"  Total: {total_images} images")

# เรียกใช้ function เพื่อตรวจสอบโครงสร้าง (สามารถเรียกใช้หลังจากแบ่ง dataset แล้ว)
# check_dataset_structure()