# Prepare data

### Imports

In [1]:
from __future__ import print_function, division

import os
from os.path import join as pj
import shutil
from glob import glob

import numpy as np
np.random.seed = 0  # for reproducibility

import pandas as pd

import matplotlib
%matplotlib inline
from matplotlib import pylab as plt
# %config InlineBackend.figure_format = 'retina'

from matplotlib.patches import Circle
import matplotlib.patheffects as PathEffects

import seaborn as sns

from PIL import Image

import json

from tqdm import tqdm_notebook as tqdm

In [2]:
import cv2

### Handy funtions

In [3]:
def list_dir_with_full_paths(dir_path):
    dir_abs_path = os.path.abspath(dir_path)
    return sorted([os.path.join(dir_abs_path, file_name) for file_name in os.listdir(dir_abs_path)])

### Constants

In [4]:
# IMAGE_HEIGHT, IMAGE_WIDTH = 300, 300

In [5]:
RAW_DATA_DIR = './data/trainset/'
IMAGES_FROM_VIDEOS_DIR = './data/images_from_videos'

In [6]:
IMAGES_BY_CLASSES_DIR = './data/images_by_class'

### Load switch frames

In [7]:
with open(pj(RAW_DATA_DIR, 'ideal.txt')) as fin:
    video_name_to_switch_frame = dict()
    for line in fin.readlines():
        line_splitted = line.strip().split(' ')
        video_name, switch_frame = line_splitted[0], int(line_splitted[-1])
        
        video_name_to_switch_frame[video_name] = switch_frame

###  Extract images from videos

In [8]:
def extract_images_from_video(video_path, images_dir, switch_frame):
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    
    video_capture = cv2.VideoCapture(video_path)
    _, _ = video_capture.read()  # mock read
    
    count = 0
    success, image = video_capture.read()
    while success:
        if count < switch_frame or switch_frame == -1:
            label = 0  # red traffic light
        else:
            label = 1  # green traffic light
        
        image_path = pj(images_dir, '{:03}_{}_{}.jpg'.format(count, video_name, label))
        cv2.imwrite(image_path, image)
        
        success, image = video_capture.read()
        count += 1

In [9]:
if not os.path.exists(IMAGES_FROM_VIDEOS_DIR):
    os.mkdir(IMAGES_FROM_VIDEOS_DIR)

    video_paths = list(filter(lambda x: x.endswith('.avi'), list_dir_with_full_paths(RAW_DATA_DIR)))
    for video_path in tqdm(video_paths[1:]):
        video_base_name = os.path.basename(video_path)
        images_dir = pj(IMAGES_FROM_VIDEOS_DIR, os.path.splitext(video_base_name)[0])
        os.mkdir(images_dir)
        
        switch_frame = video_name_to_switch_frame[video_base_name]
        extract_images_from_video(video_path, images_dir, switch_frame)
else:
    print('Directory {} already exists!'.format(IMAGES_FROM_VIDEOS_DIR))

Directory ./data/images_from_videos already exists!


### Prepare images for classification

In [10]:
def parse_image_name(image_name):
    image_name = os.path.splitext(image_name)[0]  # delete file's extension
    image_name_splitted = image_name.split('_')
    
    frame, video_name, label = int(image_name_splitted[0]), image_name_splitted[1], int(image_name_splitted[2])
    
    return frame, video_name, label

In [11]:
all_images_paths = glob(pj(IMAGES_FROM_VIDEOS_DIR, '**/*.jpg'), recursive=True)

In [12]:
if not os.path.exists(IMAGES_BY_CLASSES_DIR):
    os.mkdir(IMAGES_BY_CLASSES_DIR)
    os.mkdir(pj(IMAGES_BY_CLASSES_DIR, '0'))
    os.mkdir(pj(IMAGES_BY_CLASSES_DIR, '1'))
    
    for image_path in tqdm(all_images_paths) :
        frame, video_name, label = parse_image_name(os.path.basename(image_path))
        shutil.copy(image_path, pj(IMAGES_BY_CLASSES_DIR, str(label)))
else:
    print('Directory {} already exists!'.format(IMAGES_BY_CLASSES_DIR))


