In [1]:
import shutil
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split



class SplitDirTool():
    def __init__(self, train_ratio, validation_ratio, input_path , output_path):
        self.train_ratio = train_ratio
        self.validation_ratio = validation_ratio
        self.test_ratio = 1 - train_ratio - validation_ratio
        
        self.src_path = input_path
        self.dst_path = output_path
        
        self.train_dst_path = os.path.join(self.dst_path , 'train')
        self.validation_dst_path = os.path.join(self.dst_path ,'validation')
        self.test_dst_path = os.path.join(self.dst_path , 'test')
        
    def shuffle_and_distribute_files(self, root, _dir):
        dir_path = os.path.join(root, _dir)
        for root, dirs, files in os.walk(dir_path): 
            x_train, x_test = train_test_split(files, test_size=1 - self.train_ratio, random_state=1)
            x_val, x_test = train_test_split(x_test,test_size=self.test_ratio/(self.test_ratio + self.validation_ratio), random_state=1)
            return x_train , x_val , x_test
         

    def create_link(self, root, _dir, x_train, x_val, x_test):
        train_dst_dir = os.path.join(self.train_dst_path, _dir)
        val_dst_dir = os.path.join(self.validation_dst_path, _dir)
        test_dst_dir = os.path.join(self.test_dst_path, _dir)
    
        if not os.path.exists(train_dst_dir):
            os.makedirs(train_dst_dir)
        if not os.path.exists(val_dst_dir):
            os.makedirs(val_dst_dir)
        if not os.path.exists(test_dst_dir):
            os.makedirs(test_dst_dir)
        
        for train in x_train:
            abs_src_path = os.path.abspath(os.path.join(root,_dir,train))
            abs_dst_path = os.path.abspath(os.path.join(train_dst_dir, train))
            os.symlink(abs_src_path, abs_dst_path)
        for val in x_val:
            abs_src_path = os.path.abspath(os.path.join(root,_dir,val))
            abs_dst_path = os.path.abspath(os.path.join(val_dst_dir, val))
            os.symlink(abs_src_path, abs_dst_path)
        for test in x_test:
            abs_src_path = os.path.abspath(os.path.join(root,_dir,test))
            abs_dst_path = os.path.abspath(os.path.join(test_dst_dir, test))
            os.symlink(abs_src_path, abs_dst_path)
            
    def distribute(self):
        
        shutil.rmtree(self.dst_path, ignore_errors=True)
        if not os.path.exists(self.dst_path):
            os.makedirs(self.dst_path)
    
        for root, dirs, files in os.walk(self.src_path): 
            for _dir in dirs:
                # print(os.path.join(root, _dir))
                x_train , x_val , x_test = self.shuffle_and_distribute_files(root, _dir)
                self.create_link(root, _dir, x_train, x_val, x_test)
                

tool = SplitDirTool(0.6, 0.2, 'images', 'images_out')
tool.distribute()


        
            

