In [19]:
import zipfile
import os

def unzip_file(zip_file_path, extract_to):
    # 创建目标目录（如果不存在）
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    # 打开ZIP文件并提取所有内容
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
        print(f"已成功解压到: {extract_to}")

# 示例用法
unzip_file('data.zip', 'DATA')

已成功解压到: DATA


In [49]:
import os
from shutil import copy
import random

def create_directory(dir_path):
    """Create a directory if it does not exist."""
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# Get the names of all folders in the 'photos' directory, excluding .txt files
file_path = './DATA/data/picture'
flower_classes = [cls for cls in os.listdir(file_path) if os.path.isdir(os.path.join(file_path, cls)) and ".txt" not in cls]

# Create 'train' folder and subdirectories for each of the categories
train_dir = 'picture/train'
create_directory(train_dir)
for cls in flower_classes:
    create_directory(os.path.join(train_dir, cls))

# Create 'val' folder and subdirectories for each of the categories
val_dir = 'picture/val'
create_directory(val_dir)
for cls in flower_classes:
    create_directory(os.path.join(val_dir, cls))

# Split ratio: training set : validation set = 9 : 1
split_rate = 0.1

# Iterate through all images of each category and split them into training and validation sets based on the ratio
for cls in flower_classes:
    cls_path = os.path.join(file_path, cls)  # Subdirectory for a specific category
    images = os.listdir(cls_path)  # List of all image names in the directory
    num_images = len(images)
    
    # Randomly select images for the validation set from the images list
    eval_indices = random.sample(images, k=int(num_images * split_rate))  
    
    for index, image in enumerate(images):
        image_path = os.path.join(cls_path, image)  # Full path of the current image
        
        # Check if the current image is in the validation set list
        if image in eval_indices:
            new_path = os.path.join(val_dir, cls)  # Path for the validation set
        else:
            new_path = os.path.join(train_dir, cls)  # Path for the training set
            
        copy(image_path, new_path)  # Copy the image to the new path
        
        # Display processing progress bar
        print("\r[{}] processing [{}/{}]".format(cls, index + 1, num_images), end="")
    
    print()  # New line

# Now create a separate test set from the validation set (previously called test)
test_dir = 'picture/test'
create_directory(test_dir)
for cls in flower_classes:
    create_directory(os.path.join(test_dir, cls))

# Move a portion of images from val to test
for cls in flower_classes:
    cls_val_path = os.path.join(val_dir, cls)  # Path to validation images for this class
    val_images = os.listdir(cls_val_path)  # List of all validation images for this class
    
    # Randomly select images to move to test from validation set
    test_indices = random.sample(val_images, k=int(len(val_images) * split_rate))
    
    for image in test_indices:
        src_image_path = os.path.join(cls_val_path, image)  # Full path of the image to move to test
        dest_image_path = os.path.join(test_dir, cls, image)  # Destination path for test
        
        copy(src_image_path, dest_image_path)  # Copy image to test directory
        
        print(f"Moved {image} from {cls} validation to test.")

print("Processing done!")  # Processing complete message

[neutral] processing [6198/6198]
[fear] processing [5121/5121]
[disgust] processing [547/547]
[happy] processing [8989/8989]
[sad] processing [6077/6077]
[surprise] processing [4002/4002]
[angry] processing [4953/4953]
Moved 29438.jpg from neutral validation to test.
Moved 11354.jpg from neutral validation to test.
Moved 25817.jpg from neutral validation to test.
Moved 25052.jpg from neutral validation to test.
Moved 12650.jpg from neutral validation to test.
Moved 5945.jpg from neutral validation to test.
Moved 23538.jpg from neutral validation to test.
Moved 8763.jpg from neutral validation to test.
Moved 23729.jpg from neutral validation to test.
Moved 22075.jpg from neutral validation to test.
Moved 22688.jpg from neutral validation to test.
Moved 20618.jpg from neutral validation to test.
Moved 15571.jpg from neutral validation to test.
Moved 14983.jpg from neutral validation to test.
Moved 2092.jpg from neutral validation to test.
Moved 18422.jpg from neutral validation to test.
