In [1]:
import csv
import glob
import os
import os.path
from subprocess import call

def extract_files():
    """After we have all of our videos split between train and test, and
    all nested within folders representing their classes, we need to
    make a data file that we can reference when training our RNN(s).
    This will let us keep track of image sequences and other parts
    of the training process.
    We'll first need to extract images from each of the videos. We'll
    need to record the following data in the file:
    [train|test], class, filename, nb frames
    Extracting can be done with ffmpeg:
    `ffmpeg -i video.mpg image-%04d.jpg`
    """
    data_file = []
    folders = ['LSA_64/train_videos', 'LSA_64/test_videos']

    for folder in folders:
        class_folders = glob.glob(os.path.join(folder, '*'))

        for vid_class in class_folders:
            class_files = glob.glob(os.path.join(vid_class, '*.avi'))

            for video_path in class_files:
                # Get the parts of the file.
                video_parts = get_video_parts(video_path)

                train_or_test, classname, filename_no_ext, filename = video_parts

                # Only extract if we haven't done it yet. Otherwise, just get
                # the info.
                if not check_already_extracted(video_parts):
                    # Now extract it.
                    src = os.path.join(train_or_test, classname, filename)
                    dest = os.path.join(train_or_test, classname,
                        filename_no_ext + '-%04d.jpg')
                    call(["ffmpeg", "-i", src, dest])

                # Now get how many frames it is.
                nb_frames = get_nb_frames_for_video(video_parts)

                data_file.append([train_or_test, classname, filename_no_ext, nb_frames])

                print("Generated %d frames for %s" % (nb_frames, filename_no_ext))

    with open('LSA_64/data_file.csv', 'w') as fout:
        writer = csv.writer(fout)
        writer.writerows(data_file)

    print("Extracted and wrote %d video files." % (len(data_file)))

def get_nb_frames_for_video(video_parts):
    """Given video parts of an (assumed) already extracted video, return
    the number of frames that were extracted."""
    train_or_test, classname, filename_no_ext, _ = video_parts
    generated_files = glob.glob(os.path.join(train_or_test, classname,
                                filename_no_ext + '*.jpg'))
    return len(generated_files)

def get_video_parts(video_path):
    """Given a full path to a video, return its parts."""
    parts = video_path.split(os.path.sep)
    filename = parts[2]
    filename_no_ext = filename.split('.')[0]
    classname = parts[1]
    train_or_test = parts[0]

    return train_or_test, classname, filename_no_ext, filename

def check_already_extracted(video_parts):
    """Check to see if we created the -0001 frame of this file."""
    train_or_test, classname, filename_no_ext, _ = video_parts
    return bool(os.path.exists(os.path.join(train_or_test, classname,
                               filename_no_ext + '-0001.jpg')))

def main():
    """
    Extract images from videos and build a new file that we
    can use as our data input file. It can have format:
    [train|test], class, filename, nb frames
    """
    extract_files()

if __name__ == '__main__':
    main()

Generated 122 frames for 033_001_001
Generated 122 frames for 033_001_002
Generated 122 frames for 033_001_003
Generated 122 frames for 033_001_004
Generated 122 frames for 033_001_005
Generated 152 frames for 033_002_001
Generated 152 frames for 033_002_002
Generated 152 frames for 033_002_003
Generated 152 frames for 033_002_004
Generated 152 frames for 033_002_005
Generated 122 frames for 033_003_001
Generated 152 frames for 033_003_002
Generated 152 frames for 033_003_003
Generated 122 frames for 033_003_004
Generated 122 frames for 033_003_005
Generated 152 frames for 033_004_001
Generated 122 frames for 033_004_002
Generated 122 frames for 033_004_003
Generated 122 frames for 033_004_004
Generated 122 frames for 033_004_005
Generated 122 frames for 033_005_001
Generated 152 frames for 033_005_002
Generated 152 frames for 033_005_003
Generated 122 frames for 033_005_004
Generated 152 frames for 033_005_005
Generated 152 frames for 033_006_001
Generated 152 frames for 033_006_002
G

Generated 122 frames for 064_006_003
Generated 122 frames for 064_006_004
Generated 92 frames for 064_006_005
Generated 92 frames for 064_007_001
Generated 92 frames for 064_007_002
Generated 92 frames for 064_007_003
Generated 92 frames for 064_007_004
Generated 92 frames for 064_007_005
Generated 92 frames for 064_008_001
Generated 92 frames for 064_008_002
Generated 92 frames for 064_008_003
Generated 122 frames for 064_008_004
Generated 92 frames for 064_008_005
Generated 122 frames for 064_009_001
Generated 122 frames for 064_009_002
Generated 122 frames for 064_009_003
Generated 152 frames for 064_009_004
Generated 122 frames for 064_009_005
Generated 122 frames for 064_010_001
Generated 122 frames for 064_010_002
Generated 122 frames for 064_010_003
Generated 92 frames for 064_010_004
Generated 152 frames for 064_010_005
Generated 118 frames for 007_001_001
Generated 118 frames for 007_001_002
Generated 118 frames for 007_001_003
Generated 118 frames for 007_001_004
Generated 11

Generated 122 frames for 039_002_001
Generated 122 frames for 039_002_002
Generated 122 frames for 039_002_003
Generated 122 frames for 039_002_004
Generated 122 frames for 039_002_005
Generated 152 frames for 039_003_001
Generated 122 frames for 039_003_002
Generated 122 frames for 039_003_003
Generated 122 frames for 039_003_004
Generated 152 frames for 039_003_005
Generated 122 frames for 039_004_001
Generated 122 frames for 039_004_002
Generated 92 frames for 039_004_003
Generated 122 frames for 039_004_004
Generated 92 frames for 039_004_005
Generated 122 frames for 039_005_001
Generated 152 frames for 039_005_002
Generated 122 frames for 039_005_003
Generated 122 frames for 039_005_004
Generated 122 frames for 039_005_005
Generated 122 frames for 039_006_001
Generated 122 frames for 039_006_002
Generated 122 frames for 039_006_003
Generated 122 frames for 039_006_004
Generated 122 frames for 039_006_005
Generated 122 frames for 039_007_001
Generated 92 frames for 039_007_002
Gene

Generated 122 frames for 061_007_004
Generated 122 frames for 061_007_005
Generated 122 frames for 061_008_001
Generated 122 frames for 061_008_002
Generated 92 frames for 061_008_003
Generated 122 frames for 061_008_004
Generated 122 frames for 061_008_005
Generated 122 frames for 061_009_001
Generated 122 frames for 061_009_002
Generated 122 frames for 061_009_003
Generated 152 frames for 061_009_004
Generated 122 frames for 061_009_005
Generated 122 frames for 061_010_001
Generated 122 frames for 061_010_002
Generated 122 frames for 061_010_003
Generated 152 frames for 061_010_004
Generated 122 frames for 061_010_005
Generated 118 frames for 020_001_001
Generated 118 frames for 020_001_002
Generated 88 frames for 020_001_003
Generated 118 frames for 020_001_004
Generated 118 frames for 020_001_005
Generated 88 frames for 020_002_001
Generated 88 frames for 020_002_002
Generated 88 frames for 020_002_003
Generated 88 frames for 020_002_004
Generated 88 frames for 020_002_005
Generate

Generated 122 frames for 063_004_004
Generated 92 frames for 063_004_005
Generated 122 frames for 063_005_001
Generated 122 frames for 063_005_002
Generated 122 frames for 063_005_003
Generated 122 frames for 063_005_004
Generated 122 frames for 063_005_005
Generated 122 frames for 063_006_001
Generated 122 frames for 063_006_002
Generated 92 frames for 063_006_003
Generated 122 frames for 063_006_004
Generated 92 frames for 063_006_005
Generated 92 frames for 063_007_001
Generated 122 frames for 063_007_002
Generated 92 frames for 063_007_003
Generated 122 frames for 063_007_004
Generated 122 frames for 063_007_005
Generated 122 frames for 063_008_001
Generated 92 frames for 063_008_002
Generated 122 frames for 063_008_003
Generated 122 frames for 063_008_004
Generated 122 frames for 063_008_005
Generated 122 frames for 063_009_001
Generated 122 frames for 063_009_002
Generated 122 frames for 063_009_003
Generated 122 frames for 063_009_004
Generated 122 frames for 063_009_005
Generat

Generated 152 frames for 049_009_003
Generated 122 frames for 049_009_004
Generated 122 frames for 049_009_005
Generated 152 frames for 049_010_001
Generated 122 frames for 049_010_002
Generated 152 frames for 049_010_003
Generated 152 frames for 049_010_004
Generated 152 frames for 049_010_005
Generated 122 frames for 050_001_001
Generated 182 frames for 050_001_002
Generated 152 frames for 050_001_003
Generated 152 frames for 050_001_004
Generated 182 frames for 050_001_005
Generated 122 frames for 050_002_001
Generated 152 frames for 050_002_002
Generated 122 frames for 050_002_003
Generated 122 frames for 050_002_004
Generated 152 frames for 050_002_005
Generated 152 frames for 050_003_001
Generated 152 frames for 050_003_002
Generated 122 frames for 050_003_003
Generated 152 frames for 050_003_004
Generated 122 frames for 050_003_005
Generated 122 frames for 050_004_001
Generated 122 frames for 050_004_002
Generated 122 frames for 050_004_003
Generated 122 frames for 050_004_004
G

Generated 122 frames for 062_006_005
Generated 92 frames for 062_007_005
Generated 92 frames for 062_008_005
Generated 122 frames for 062_009_005
Generated 92 frames for 062_010_005
Generated 148 frames for 022_001_005
Generated 118 frames for 022_002_005
Generated 118 frames for 022_003_005
Generated 148 frames for 022_004_005
Generated 118 frames for 022_005_005
Generated 118 frames for 022_006_005
Generated 118 frames for 022_007_005
Generated 118 frames for 022_008_005
Generated 118 frames for 022_009_005
Generated 88 frames for 022_010_005
Generated 122 frames for 063_001_005
Generated 122 frames for 063_002_005
Generated 152 frames for 063_003_005
Generated 92 frames for 063_004_005
Generated 122 frames for 063_005_005
Generated 92 frames for 063_006_005
Generated 122 frames for 063_007_005
Generated 122 frames for 063_008_005
Generated 122 frames for 063_009_005
Generated 182 frames for 063_010_005
Generated 152 frames for 060_001_005
Generated 122 frames for 060_002_005
Generat