# Purpose of this notebook
To cache all image frames of all the videos in the training dataset of the NFL Helmet Assignment competition.  
The image frames are resized to the specified output width and height in the global variables below.

# Gratitude
This notebook is inspired from [this notebook by coldfir3](https://www.kaggle.com/coldfir3/nfl-helmet-jpg-dataset-genrator).

In [None]:
import pathlib
import os
import shutil
import cv2
import matplotlib.pyplot as plt

OUTPUT_IMG_WIDTH = 1280//4
OUTPUT_IMG_HEIGHT = 720//4
IMG_FORMAT = 'jpg'

In [None]:
# clean up all the current folders
for p in pathlib.Path('.').iterdir():
    if os.path.isdir(p):
        print(f'Removing {p}')
        shutil.rmtree(p)

In [None]:
# create a text file to trick the Kaggle system so that we can export the output as dataset
with open('readme.txt', 'w') as outfile:
    outfile.write('dummy text to trick the Kaggle system so that we can export the output as dataset')

In [None]:
# create the image frames
train_vid_dir = '../input/nfl-health-and-safety-helmet-assignment/train'

out_base_dir = './out_frames'
if not os.path.exists(out_base_dir):
    os.mkdir(out_base_dir)

path = pathlib.Path(train_vid_dir)

for vid_dir in path.iterdir():
    # construct jpg file name with number formatting
    vid_name = vid_dir.stem
    frame_filename = f'{vid_name}_%05d.{IMG_FORMAT}'
    
    # create the folder for this video
    out_vid_dir = os.path.join(out_base_dir, vid_name)
    if not os.path.exists(out_vid_dir):
        os.mkdir(out_vid_dir)
    
    # extract the frames
    frame_out_dir = os.path.join(out_vid_dir, frame_filename)
    command = f'ffmpeg -i {vid_dir} -vf scale={OUTPUT_IMG_WIDTH}:{OUTPUT_IMG_HEIGHT} {frame_out_dir}'
    result = os.system(command)

In [None]:
# zip the output folder
os.system(f'zip -r {out_base_dir}.zip {out_base_dir}')

# remove the output folder
shutil.rmtree(out_base_dir)