In [0]:
import shutil
import os
import numpy as np
import random
import argparse

In [0]:
from shutil import copyfile

In [0]:
def img_train_test_split(img_source_dir, train_size):
    """
    Randomly splits images over a train and validation folder, while preserving the folder structure
    
    Parameters
    ----------
    img_source_dir : string
        Path to the folder with the images to be split. Can be absolute or relative path   
        
    train_size : float
        Proportion of the original images that need to be copied in the subdirectory in the train folder
    """    
    if not (isinstance(img_source_dir, str)):
        raise AttributeError('img_source_dir must be a string')
        
    if not os.path.exists(img_source_dir):
        raise OSError('img_source_dir does not exist')
        
    if not (isinstance(train_size, float)):
        raise AttributeError('train_size must be a float')
        
    # Set up empty folder structure if not exists
    if not os.path.exists('data'):
        os.makedirs('data')
    else:
        if not os.path.exists('data/train'):
            os.makedirs('data/train')
        if not os.path.exists('data/validation'):
            os.makedirs('data/validation')
            
    # Get the subdirectories in the main image folder
    subdirs = [subdir for subdir in os.listdir(img_source_dir) if os.path.isdir(os.path.join(img_source_dir, subdir))]

    for subdir in subdirs:
        subdir_fullpath = os.path.join(img_source_dir, subdir)
        if len(os.listdir(subdir_fullpath)) == 0:
            print(subdir_fullpath + ' is empty')
            break

        train_subdir = os.path.join('data/train', subdir)
        validation_subdir = os.path.join('data/validation', subdir)

        # Create subdirectories in train and validation folders
        if not os.path.exists(train_subdir):
            os.makedirs(train_subdir)

        if not os.path.exists(validation_subdir):
            os.makedirs(validation_subdir)

        train_counter = 0
        validation_counter = 0

        # Randomly assign an image to train or validation folder
        for filename in os.listdir(subdir_fullpath):
            if filename.endswith(".jpg") or filename.endswith(".png"): 
                fileparts = filename.split('.')

                if random.uniform(0, 1) <= train_size:
                    copyfile(os.path.join(subdir_fullpath, filename), os.path.join(train_subdir, str(train_counter) + '.' + fileparts[1]))
                    train_counter += 1
                else:
                    copyfile(os.path.join(subdir_fullpath, filename), os.path.join(validation_subdir, str(validation_counter) + '.' + fileparts[1]))
                    validation_counter += 1
                    
        print('Copied ' + str(train_counter) + ' images to data/train/' + subdir)
        print('Copied ' + str(validation_counter) + ' images to data/validation/' + subdir)

In [0]:
img_train_test_split('/content/drive/My Drive/face_age',0.5)

Copied 84 images to data/train/005
Copied 96 images to data/validation/005
Copied 592 images to data/train/001
Copied 540 images to data/validation/001
Copied 81 images to data/train/009
Copied 78 images to data/validation/009
Copied 142 images to data/train/003
Copied 142 images to data/validation/003
Copied 118 images to data/train/008
Copied 118 images to data/validation/008
Copied 77 images to data/train/010
Copied 63 images to data/validation/010
Copied 232 images to data/train/002
Copied 243 images to data/validation/002
Copied 124 images to data/train/004
Copied 136 images to data/validation/004
Copied 54 images to data/train/006
Copied 60 images to data/validation/006
Copied 60 images to data/train/007
Copied 66 images to data/validation/007
Copied 106 images to data/train/016
Copied 106 images to data/validation/016
Copied 68 images to data/train/014
Copied 69 images to data/validation/014
Copied 70 images to data/train/015
Copied 73 images to data/validation/015
Copied 52 ima

In [0]:
/content/data