In [1]:
import os
import shutil
import argparse
import random as random
from typing import List

In [2]:
INSIDE_DIRECTORY: str = "../data-fetch/inside-frames/"
OUTSIDE_DIRECTORY: str = "../data-fetch/outside-frames/"
DATA_DIRECTORY: str = "../data_bunch/"

In [3]:
def update_file_location(dir_path: str, name: str, root: str,
                         item: str) -> None:
    """Copies file from first location to second"""
    original_loc = str(os.path.join(root, item))
    target_loc = str(os.path.join(DATA_DIRECTORY, dir_path, name))
    shutil.copy(original_loc, target_loc)


def train_test_split(directory: str, feature_class: str,
                     frame_limit: int) -> None:
    """
    Parameters
    ----------
    directory: str, path to original file location
    feature_class: str, IN ['inside', 'outside']
    frame_limit: int, max number of frames to process
    """

    frame_cnt = 1
    for root, _, files in os.walk(directory, topdown=False):
        for item in files:
            _, ext = os.path.splitext(item)
            if ext == '.png':
                name = f"{frame_cnt}_{item}"
                t = random.random()
                if t <= .8:
                    update_file_location(f"train/{feature_class}", name, root,
                                         item)
                elif t >= .8 and t <= .9:
                    update_file_location(f"test/{feature_class}", name, root,
                                         item)
                elif t >= .9:
                    update_file_location(f"valid/{feature_class}", name, root,
                                         item)
                frame_cnt += 1
            if frame_cnt >= frame_limit:
                break

In [4]:
class Bunchify(object):
    def __init__(self, inside_path: str, outside_path: str, frame_limit: int):
        self.inside_path = inside_path
        self.outside_path = outside_path
        self.frame_limit = frame_limit

    def create_train_test_directories(self):
        category_folders: List[str] = ['inside', 'outside']
        split_folders: List[str] = ['train', 'test', 'valid']
        for split in split_folders:
            for category in category_folders:
                new_path = os.path.join(DATA_DIRECTORY, split, category)
                try:
                    os.makedirs(new_path)
                except FileExistsError:
                    pass

    def run_split(self) -> None:
        path_dict = {'inside': self.inside_path, 'outside': self.outside_path}
        for cat_type, cat_path in path_dict.items():
            train_test_split(directory=cat_path,
                             feature_class=cat_type,
                             frame_limit=self.frame_limit)

In [5]:
bunch = Bunchify(inside_path=INSIDE_DIRECTORY,
                 outside_path=OUTSIDE_DIRECTORY,
                 frame_limit=3500)
bunch.create_train_test_directories()
bunch.run_split()