In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip Dataset/YOLO_NATURAL.zip -d /content/data

In [None]:
!unzip Dataset/YOLO_AUG.zip -d /content/data

In [None]:
!unzip Dataset/YOLO_GAN.zip -d /content/data

In [None]:
!unzip Dataset/YOLO_SD.zip -d /content/data

In [None]:
import os
import random
import shutil

def extract_images(natural, sd, gan, aug, folder_name):
    # Create a new directory with the given folder name
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    # Create subdirectories for images and labels within the new directory
    new_images_train_dir = os.path.join(folder_name, 'images', 'train')
    new_images_val_dir = os.path.join(folder_name, 'images', 'val')
    new_labels_train_dir = os.path.join(folder_name, 'labels', 'train')
    new_labels_val_dir = os.path.join(folder_name, 'labels', 'val')
    
    os.makedirs(new_images_train_dir)
    os.makedirs(new_images_val_dir)
    os.makedirs(new_labels_train_dir)
    os.makedirs(new_labels_val_dir)
    
    # Loop through each folder and calculate the minimum number of images
    min_images = None
    for folder, count in [('YOLO_NATURAL', natural), 
                          ('YOLO_SD', sd), 
                          ('YOLO_GAN', gan), 
                          ('YOLO_AUG', aug)]:
        num_images = len(os.listdir(folder + '/images/train'))
        if min_images is None or num_images < min_images:
            min_images = num_images
    print(min_images)
    if min_images is None:
        return
    
    # Calculate the number of images for each type based on the minimum number of images
    natural_count = round(min_images * natural / 100)
    sd_count = round(min_images * sd / 100)
    gan_count = round(min_images * gan / 100)
    aug_count = round(min_images * aug / 100)
    
    # Loop through each folder and copy the required number of images
    for folder, count in [('YOLO_NATURAL', natural_count), 
                          ('YOLO_SD', sd_count), 
                          ('YOLO_GAN', gan_count), 
                          ('YOLO_AUG', aug_count)]:
        filenames = os.listdir(folder + '/images/train')
        # Shuffle the filenames
        random.shuffle(filenames)
        # Copy the required number of images to the new folder's train subdirectory
        train_count = round(count * 0.9)
        for filename in filenames[:train_count]:
            # Copy the image
            shutil.copy2(os.path.join(folder + '/images/train', filename), new_images_train_dir)
            # Copy the corresponding label file
            shutil.copy2(os.path.join(folder + '/labels/train', filename[:-4] + '.txt'), new_labels_train_dir)
        # Copy the remaining files to the new folder's validation subdirectory
        val_count = count - train_count
        for filename in filenames[train_count:train_count+val_count]:
            # Copy the image
            shutil.copy2(os.path.join(folder + '/images/train', filename), new_images_val_dir)
            # Copy the corresponding label file
            shutil.copy2(os.path.join(folder + '/labels/train', filename[:-4] + '.txt'), new_labels_val_dir)

extract_images(100, 0, 0, 0, 'natural_only') #Natural (100)
extract_images(50, 0, 0, 50, 'natural_aug') #Natural AUG (50,50)
extract_images(20, 30, 50, 0, 'natural_gan_sd') #Natural GAN SD (20,50,30)
extract_images(0, 70, 30, 0, 'gan_sd') #GAN SD (30,70)
extract_images(25, 50, 0, 25, 'natural_sd_aug') #Natural SD AUG (25,50,25)
extract_images(15, 40, 30, 15, 'natural_gan_sd_aug') #Natural GAN SD AUG (15,30,40,15)


In [None]:
# !apt install zip unzip
# !zip -r natural_only /content/data