In [1]:
import cv2
import os
import re
from scipy.misc import imresize

In [2]:

DATA_DIRECTORY = '../../data'
DATA_INPUT_DIR = os.path.join(DATA_DIRECTORY, 'original', 'movies')
DATA_OUTPUT_DIR = os.path.join(DATA_DIRECTORY, 'fabricated', 'movies')

# Can accept multiple movies at once
#MOVIE_FILES = ['mother.mov']
MOVIE_FILES = os.listdir(DATA_INPUT_DIR)

In [3]:
## Settings

# Will not actually save/manipulate files, it will just show verbose info
DRY_RUN = False
CROP_IMAGES = True
RESIZE = (200, 200)

In [4]:
def crop_image(image):
    height = image.shape[0]
    width = image.shape[1]
    if width > height: # this is a landscape image
        shortest_side = height
        longest_side = width
        diff = longest_side - shortest_side
        half_diff = diff / 2
        
        y_start = 0
        y_end = shortest_side
        x_start = half_diff
        x_end = shortest_side + half_diff
    else: # this is a portrait image
        shortest_side = width
        longest_side = height
        diff = longest_side - shortest_side
        half_diff = diff / 2

        y_start = half_diff
        y_end = shortest_side + half_diff
        x_start = 0
        x_end = shortest_side
        
    return image[int(y_start):int(y_end), int(x_start):int(x_end)]


In [5]:
def get_sign_parts_from_movie_filename(filename):
    file_parts = re.split('\.', filename)
    sign_info_string = '.'.join(file_parts[:-1])
    sign_info_parts = re.split('--', sign_info_string)
    
    if len(sign_info_parts) != 3:
        print('Expecting to split {} into 3 parts but only got {}'.format(filename, len(sign_info_parts)))
        print('Be sure that {} is in the format "signname--authorname--version.mov"'.format(filename))
    
    sign_name = sign_info_parts[0]
    author_name = sign_info_parts[1]
    sign_version = sign_info_parts[2]

    return sign_name, author_name, sign_version
get_sign_parts_from_movie_filename('mother--kj_price--1.mov')

('mother', 'kj_price', '1')

In [6]:
def is_real_file(filename):
    # ignore things like .DS_Store
    # make sure the first letter is not a period (.)
    return filename[0] != '.'
is_real_file('.DS_Store')

False

In [7]:
def ensure_directory_exists(directory):
    if not os.path.isdir(directory):
        print('creating path {}'.format(directory))
        os.makedirs(directory)
    else:
        print('Nothing to do, {} already exists'.format(directory))

In [8]:
def resize_img_array(img):
    return imresize(img, RESIZE)

In [9]:
def save_video_frames_to_image_file(video, path_output_dir, author_name, sign_version):
    count_error = 0
    print('reading "{}" --- saving to "{}"'.format(video, path_output_dir))
    vidcap = cv2.VideoCapture(video)
    count_success = 0
    while vidcap.isOpened():
        read_success, image = vidcap.read()
        if read_success:
            image = crop_image(image)
            image = resize_img_array(image)
            if DRY_RUN:
                write_success = True
            else:
                img_filename = '{}-{}-%d.png'.format(author_name, sign_version) #The "%d" will be auto incremented by what's in the directory
                write_success = cv2.imwrite(os.path.join(path_output_dir, img_filename) % count_success, image)
            if write_success:
                count_success += 1
            else:
                count_error += 1
        else:
            break
    print('found {} images'.format(count_success + count_error))
    print('{} images successfully saved and {} images which could not be saved'.format(count_success, count_error))
    cv2.destroyAllWindows()
    vidcap.release()

In [10]:
def main():
    if DRY_RUN:
        print('this is just a test run (disable "DRY_RUN" to actually save images)')
        print()
    for filename in MOVIE_FILES:
        if not is_real_file(filename):
            continue
        input_file_path = os.path.join(DATA_INPUT_DIR, filename)
        sign_name, author_name, sign_version = get_sign_parts_from_movie_filename(filename)
        output_file_path = os.path.join(DATA_OUTPUT_DIR, sign_name)
        
        ensure_directory_exists(output_file_path)
        save_video_frames_to_image_file(input_file_path, output_file_path, author_name, sign_version)
main()

Nothing to do, ../../data/fabricated/movies/I already exists
reading "../../data/original/movies/I--kj--2.mov" --- saving to "../../data/fabricated/movies/I"


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  


found 209 images
209 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/7 already exists
reading "../../data/original/movies/7--kj--2.mov" --- saving to "../../data/fabricated/movies/7"
found 153 images
153 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/D already exists
reading "../../data/original/movies/D--kj--1.mov" --- saving to "../../data/fabricated/movies/D"
found 163 images
163 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/Z already exists
reading "../../data/original/movies/Z--kj--1.mov" --- saving to "../../data/fabricated/movies/Z"
found 210 images
210 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/W already exists
reading "../../data/original/movies/W--kj--2.mov" --- saving to "../../data/fabricated/movies/W"
found 210 images
210 images 

found 123 images
123 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/P already exists
reading "../../data/original/movies/P--kj--2.mov" --- saving to "../../data/fabricated/movies/P"
found 165 images
165 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/C already exists
reading "../../data/original/movies/C--kj--1.mov" --- saving to "../../data/fabricated/movies/C"
found 205 images
205 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/N already exists
reading "../../data/original/movies/N--kj--2.mov" --- saving to "../../data/fabricated/movies/N"
found 191 images
191 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/M already exists
reading "../../data/original/movies/M--kj--1.mov" --- saving to "../../data/fabricated/movies/M"
found 253 images
253 images 

found 161 images
161 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/nothing already exists
reading "../../data/original/movies/nothing--kj--1.mov" --- saving to "../../data/fabricated/movies/nothing"
found 155 images
155 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/Y already exists
reading "../../data/original/movies/Y--kj--1.mov" --- saving to "../../data/fabricated/movies/Y"
found 123 images
123 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/4 already exists
reading "../../data/original/movies/4--kj--2.mov" --- saving to "../../data/fabricated/movies/4"
found 185 images
185 images successfully saved and 0 images which could not be saved
Nothing to do, ../../data/fabricated/movies/G already exists
reading "../../data/original/movies/G--kj--1.mov" --- saving to "../../data/fabricated/movies/G"
found 113 