In [1]:
import os 
import random
import shutil
import pathlib 
import cv2
from tqdm import tqdm
from pathlib import Path
import argparse

In [50]:
def extract_frames(input_directory, output_directory, width = 360, height = 360):
    video_extension = {'.mov', '.MP4', '.m4v', '.MOV', '.mp4'}
    for video_file in input_directory.glob("*"):
        if video_file.suffix in video_extension:
            output_file = output_directory / video_file.name
            Path(output_file).mkdir(parents = True, exist_ok = True)
            video_cap = cv2.VideoCapture(str(video_file))
            success, image = video_cap.read()
            count = 0
            while success:
                image = cv2.resize(image, (width, height))
                cv2.imwrite("{}/frame%04d.jpg".format(output_file) % count, image)  # save frame as JPEG file
                success, image = video_cap.read()
                count += 1

def batch_frames(input_directory, output_directory):
    for folder in input_directory.glob("**"):
        files = sorted(file for file in folder.glob("*") if file.is_file())
        total = []
        folder_counter = 0
        for file in files:
            total.append(file)
            if len(total) >= 12:
                Path("{}/{}".format(output_directory, folder_counter)).mkdir(parents = True, exist_ok = True)
                for video in total:
                    shutil.move(str(video), "{}/{}/{}".format(output_directory, folder_counter, video.name))
                total = []
                folder_counter += 1

In [101]:
if __name__ == "__main__":    
    train_input = Path("./Adobe/train")
    train_output = Path("./Adobe/train/frames")
    train_tmp = train_input / "tmp"
    train_tmp.mkdir(parents = True, exist_ok = True)
    extract_frames(train_input, train_tmp)
    batch_frames(train_tmp, train_output)
    shutil.rmtree(train_tmp)
    
    test_input = Path("./Adobe/test")
    test_output = Path("./Adobe/test/frames")
    test_tmp = test_input / "tmp"
    test_tmp.mkdir(parents = True, exist_ok = True)
    extract_frames(test_input, test_tmp)
    batch_frames(test_tmp, test_output)
    shutil.rmtree(test_tmp)

    test_file = [folder for folder in test_output.glob("*")]
    sampled = random.sample(test_file, 60)
    validation_output = Path("./Adobe/validation")
    for samples in sampled:
        shutil.move("{}".format(samples), "{}\{}".format(validation_output, samples))

In [108]:
assert len(list(train_output.glob("*"))) == 303
assert len(list(test_output.glob("*"))) == 47
assert len(list(validation_output.glob("*"))) == 60