In [None]:
import os
import shutil
import random
from tqdm import tqdm
import csv

In [None]:
def create_subset(source, dest, num_train_images, num_val_images):
    os.makedirs(dest)

    shutil.copy(os.path.join(source, "LOC_synset_mapping.txt"), dest)

    a = num_train_images // 1000
    b = num_train_images % 1000
    train_images_per_class = [a] * 1000
    for i in range(b):
        train_images_per_class[i] += 1

    source_train_folder = os.path.join(os.path.join(source, "ILSVRC", "Data", "CLS-LOC", "train"))
    source_val_folder = os.path.join(os.path.join(source, "ILSVRC", "Data", "CLS-LOC", "val"))
    dest_train_folder = os.path.join(os.path.join(dest, "ILSVRC", "Data", "CLS-LOC", "train"))
    dest_val_folder = os.path.join(os.path.join(dest, "ILSVRC", "Data", "CLS-LOC", "val"))

    # Copy train images
    tqdm_class_folders = tqdm(os.listdir(source_train_folder), unit="class")
    tqdm_class_folders.set_description(f"Copy train images")
    for i, class_folder in enumerate(tqdm_class_folders):
        # Create corresponding class folder in destination
        source_class_path = os.path.join(source_train_folder, class_folder)
        dest_class_path = os.path.join(dest_train_folder, class_folder)
        os.makedirs(dest_class_path)
        
        # List all images in the class folder
        images = os.listdir(source_class_path)
        
        # Randomly sample images (or take fewer if not enough images)
        selected_images = random.sample(images, min(len(images), train_images_per_class[i]))
        
        # Copy selected images to destination folder
        for img in selected_images:
            src_img_path = os.path.join(source_class_path, img)
            dest_img_path = os.path.join(dest_class_path, img)
            shutil.copy(src_img_path, dest_img_path)
    
    # Copy val images
    csv_source_file = open(os.path.join(source, "LOC_val_solution.csv"))
    csv_reader = csv.reader(csv_source_file)
    csv_dest_file = open(os.path.join(dest, "LOC_val_solution.csv"), "w+", newline='')
    csv_writer = csv.writer(csv_dest_file)

    header = next(csv_reader)
    header[1] = header[1].rstrip()
    csv_writer.writerow(header)

    rows = [[row[0], row[1].rstrip()] for row in csv_reader]

    val_images = os.listdir(source_val_folder)
    selected_val_images = random.sample(val_images, min(len(val_images), num_val_images))
    os.makedirs(dest_val_folder)

    for img in selected_val_images:
        src_val_img_path = os.path.join(source_val_folder, img)
        dest_val_img_path = os.path.join(dest_val_folder, img)
        shutil.copy(src_val_img_path, dest_val_img_path)

        img_name = img.split(".")[0]
        row = next(row for row in rows if img_name in row)
        csv_writer.writerow(row)
    
    csv_source_file.close()
    csv_dest_file.close()

In [None]:
create_subset(r"C:\Users\mariu\Documents\Development\Datasets\imagenet-object-localization-challenge", r"C:\Users\mariu\Documents\Studium\Praktikum\ImageNet_Evaluation_Subset", 65536, 4096)