# Building the Neural Network

## Library Imports

In [121]:
#### KERAS IMPORTS ####
from keras import backend as K
K.set_image_dim_ordering('tf')

import keras
from keras.models import Sequential
from keras import layers
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions, preprocess_input
from keras.applications import InceptionV3, ResNet50
from keras.preprocessing import image
from keras.models import Model

#### OTHER IMPORTS ####
import cv2
import numpy as np
import pandas as pd
import os
from os import listdir
from os.path import isfile, join
import joblib
import re
import skvideo.io
import datetime
import shutil
import uuid
from sklearn.metrics.pairwise import cosine_similarity
import imageio

# from PIL import ImageGrab
import matplotlib.pyplot as plt
from IPython.display import clear_output

% pylab inline
% matplotlib inline

print("BACKEND: ", keras.backend.backend())

Populating the interactive namespace from numpy and matplotlib
BACKEND:  tensorflow


## Functions

In [11]:
def resize_img(orig_img, new_dim):
    '''
    DESCRIPTION:
        - resizes the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - new_dim is the base number of pixels for the new image.
    OUTPUT:
        - resized is a numpy array of the resized image.
    '''
    r = float(new_dim) / orig_img.shape[1]
    dim = (new_dim, int(orig_img.shape[0] * r))
    resized = cv2.resize(orig_img, dim, interpolation=cv2.INTER_AREA)
    
    return resized

In [12]:
def rotate_img(orig_img, deg_rot, scale):
    '''
    DESCRIPTION:
        - rotates the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - scale (btwn 0 and 1) zooms in on the image. scale (> 1) zooms out on the image. 
        - scale can be used to crop the image based only on the center.
    OUTPUT:
        - rotated_img is a numpy array of the rotated image.
    '''
    (height, width) = orig_img.shape[:2]
    center = (width/2, height/2)
    matrix = cv2.getRotationMatrix2D(center,
                                     angle=deg_rot,
                                     scale=scale)
    rotated_img = cv2.warpAffine(orig_img,
                                 matrix,
                                 (width, height))
    
    return rotated_img

In [13]:
def crop_img(orig_img, h1, h2, w1, w2):
    '''
    DESCRIPTION:
        - crops the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - h1 and h2 defines height
        - w1 and w2 defines the width
    OUTPUT:
        - cropped_img is a numpy array of the cropped image.
    '''
    cropped_img = orig_img[h1:h2, w1:w2]

    return cropped_img

In [15]:
def augment(image_path, new_path, repeat=5):
    '''
    DESCRIPTION:
        - randomly augments the image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - repeat is an integer value stating the number of augmented images per clean image.
        - new_path is the relative directory to save the augmented images to.
    OUTPUT:
        - new_img is a numpy array of the augmented image.
    '''    
    img_arr = cv2.imread(image_path)
    
    img_paths = []
    
    for i in range(repeat):
        new_dim = int(img_arr.shape[1] * np.random.uniform(low=0.1, high=0.3))
        new_img_arr = resize_img(img_arr, new_dim)

        deg = np.random.randint(15, 345)
        scale = np.random.uniform(low=1, high=4)
        new_img_arr = rotate_img(new_img_arr, deg, scale)
        
        lower_height = np.random.randint(15, new_img_arr.shape[0])
        lower_width = np.random.randint(15, new_img_arr.shape[1])
        upper_height = np.random.randint(lower_height, 10000)
        upper_width = np.random.randint(lower_width, 10000)

        new_img_arr = crop_img(new_img_arr, h1=lower_height, h2=upper_height, w1=lower_width, w2=upper_width)
        
        if not os.path.isdir(new_path):
            os.mkdir(new_path)
            print("Created {} directory".format(new_path))
            
        new_img_path = os.path.join(new_path, str(uuid.uuid4()) + '.jpg')

        new_img = cv2.imwrite(new_img_path, new_img_arr)
        img_paths.append(new_img_path)
        if not new_img:
            print("Check image path: ", new_img_path)
            img_paths.append('')
    
    return img_paths

In [16]:
def get_files(paths, augment=False, aug_file_path=None):
    '''
    DESCRIPTION:
        - Generates the list of image file paths.
    INPUT:
        - paths is an iterable object with valid directories.
        - If augment is True, then the images in the paths are augmented.
            - aug_file_path must also be specified.
    OUTPUT:
        - If augment is True, then clean_files is a list of clean image file paths 
          and aug_files is a list of augmented file paths.
        - If augment is False, then only one list of file paths are given.
    '''
    clean_files = []
    for path in paths:
        if os.path.isdir(path):
            clean_files += [path + f for f in listdir(path) if isfile(join(path, f))]
        else:
            print("{} is invalid.".format(path))
            
    if augment:
        if aug_file_path:
            aug_files = []
            for item in clean_files:
                aug_img = augment(item, repeat=5, new_path=aug_file_path)
                
            aug_files = [aug_file_path + f for f in listdir(aug_file_path) if isfile(join(aug_file_path, f))]
        else:
            print("Enter in a directory to save augmented images.")
            return clean_files

        return clean_files, aug_files
    
    return clean_files

In [17]:
def move_files(file_paths, perc_list, dir_list):
    '''
    DESCRIPTION:
        - Moves files to specific directories.
    INPUT:
        - file_paths is an iterable object with valid file paths.
        - perc_list is an iterable object with floats that sum to 1.
        - dir_list
    OUTPUT:
        - If augment is True, then clean_files is a list of clean image file paths 
          and aug_files is a list of augmented file paths.
        - If augment is False, then only one list of file paths are given.
    '''
    if len(perc_list) > len(dir_list):
        print("Warning: more percentages ({}) than available directories ({})".format(len(perc_list, len(dir_list))))
    
    if len(perc_list) < len(dir_list):
        print("Error: Too few percentages.")
        return False
    
    for i, d in enumerate(dir_list):
        if not os.path.isdir(d):
            os.mkdir(d)
        
        num_files = int(len(file_paths) * perc_list[i]) - 1
        count = 0
        cycle_count = 0
        for f in file_paths:
            if os.path.isfile(f):
                shutil.move(f, dest)
                count += 1
                
            file_paths.pop(0)
            if count == num_files:
                break
    return True

In [None]:
#### DONT NEED SINCE USING DATA GENERATOR #####
# def get_clean_aug_arrays(tot_count, files):
#     '''
#     DESCRIPTION:
#         - produces a list of numpy arrays for each image in the files list.
#     INPUT: 
#         - tot_count is the number of files to traverse through.
#         - files is the list of image files.
#     OUTPUT:
#         - X is the list of numpy arrays for the clean images.
#         - X_aug is the list of numpy arrays for the augmented images.
#     '''
#     X = []
#     X_aug = []
    
#     if tot_count > len(files):
#         print("tot_count exceeds the number of files.")
#         return False
    
#     for i in range(tot_count):
#         # Convert the clean image
#         clean_img = image.load_img(files[i], target_size=(224,224))
#         clean_img = image.img_to_array(clean_img)
#         if clean_img == None:
#             continue
#         X.append(clean_img)

#         # Augment then convert the new image
#         temp_imgs = augment(files[i])
#         for temp_img in temp_imgs:
#             if temp_img == '':
#                 continue
#             else:
#                 prep_img = image.load_img(temp_img, target_size=(224,224))
#                 prep_img = image.img_to_array(prep_img)
#                 if prep_img == None:
#                     continue
#             X_aug.append(prep_img)

#     if len(X) == len(X_aug):
#         print("Augmenting worked correctly.")
        
#     X = np.array(X)
#     X = preprocess_input(X)
    
#     X_aug = np.array(X_aug)
#     X_aug = preprocess_input(X_aug)
        
#     return X, X_aug

In [None]:
#### NEED TO UPDATE SINCE USING DATA GENERATORS #####

# def get_metrics(y_train_pred, y_test_pred, y_train, y_test):
#     '''
#     DESCRIPTION:
#         - Get the various classification metrics for the training and test data.
#     INPUT:
#         - fit_model must be a pretrained model. 
#         - y_train_pred and y_test_pred must be one dimensional numpy arrays if binary classifier.
#         - y_train and y_test must be one dimensional numpy arrays if binary classifier.
#     OUTPUT:
#         - prints the scores.
#     '''
#     true_pred_train = []
#     true_pred_test = []
    
#     for pred1 in y_train_pred:
#         if pred1[0] > 0.5:
#             true_pred_train.append(1)
#         else:
#             true_pred_train.append(0)
    
#     for pred2 in y_test_pred:
#         if pred2[0] > 0.5:
#             true_pred_test.append(1)
#         else:
#             true_pred_test.append(0)

#     acc_train = accuracy_score(y_train, true_pred_train)
#     acc_test = accuracy_score(y_test, true_pred_test)
    
#     print("ACC TRAIN: ", acc_train)
#     print("ACC TEST: ", acc_test)
#     print("\n")
    
#     pre_train = precision_score(y_train, true_pred_train)
#     pre_test = precision_score(y_test, true_pred_test)

#     print("PRE TRAIN: ", pre_train)
#     print("PRE TEST: ", pre_test)
#     print("\n")
    
#     rec_train = recall_score(y_train, true_pred_train)
#     rec_test = recall_score(y_test, true_pred_test)

#     print("REC TRAIN: ", rec_train)
#     print("REC TEST: ", rec_test)
#     print("\n")    
    
#     f1_train = f1_score(y_train, true_pred_train)
#     f1_test = f1_score(y_test, true_pred_test)

#     print("F1 TRAIN: ", f1_train)
#     print("F1 TEST: ", f1_test)    

In [21]:
def get_generators(train_dir, test_dir, rescale=False, image_gen=None):
    '''
    DESCRIPTION:
        - Creates the data generators for the model.
    INPUT:
        - If the training data needs to be augmented with more data, then set rescale to True.
        - Also, make sure to input a valid ImageDataGenerator object.
    OUTPUT:
        - train_gen and test_gen are outputted
    '''
    if not os.path.isdir(train_dir):
        print("Error: invalid train data directory.")
        return False
    
    if not os.path.isdir(test_dir):
        print("Error: invalid test data directory.")
        return False 
    
    if not rescale:
        train_datagen = image.ImageDataGenerator(rescale=1./255)
    else:
        try:
            train_gen = image_gen
        except:
            print("Please input a valid generator.")
            return False
    test_datagen = image.ImageDataGenerator(rescale=1./255)
    
    train_gen = train_datagen.flow_from_directory(train_dir,
                                                  target_size=(224,224),
                                                  batch_size=100,
                                                  class_mode='binary')
    
    test_gen = test_datagen.flow_from_directory(test_dir,
                                                target_size=(224,224),
                                                batch_size=100,
                                                class_mode='binary')
    
    return train_gen, test_gen

In [22]:
def get_model(input_shape, weights='imagenet'):
    '''
    DESCRIPTION:
        - Compiles the keras VGG16 model.
    INPUT:
        - Input shape should match the backend type:
            - Tensorflow: (224,224,3)
            - Theano: (3,224,224)
    OUTPUT:
        - final_model is outputted.
    '''
    model = VGG16(include_top=False, weights=weights, input_shape=input_shape)
    last = model.output

    # Freeze convolutional layers
    for layer in model.layers:
        layer.trainable = False

    x = Dropout(0.5)(last)
    x = Flatten()(x)
    x = Dense(1)(x)
    preds = Activation(activation='sigmoid')(x)

    final_model = Model(input=model.input, output=preds)

    final_model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                loss='binary_crossentropy', metrics=['accuracy'])
    
    return final_model

In [25]:
def train_model(model, nb_epoch, generators, model_dir):
    '''
    DESCRIPTION:
        - Trains the compiled keras model,
    INPUT:
        - model is a compiled keras model.
        - nb_epoch is the number of epochs to run.
        - generators are the training and validation data generators.
        - model_dir is the directory to save the trained model and weights.
    OUTPUT:
        - the trained model is outputted.
    '''
    train_generator, validation_generator = generators
    
    model.fit_generator(
        train_generator,
        steps_per_epoch=100,
        validation_data=validation_generator,
        validation_steps=10,
        epochs=nb_epoch)
    
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)
    
    model.save(os.path.join(model_dir, 'model.h5'))
    model.save_weights(os.path.join(model_dir,'model_weights.h5'))
    return model

In [87]:
def split_model(model):
    '''
    DESCRIPTION:
        - Splits the top layer of the model with the rest of the model.
    INPUT:
        - model should be pretrained.
    OUTPUT:
        - Returns the bottom_layers and a newly created top_layer.
    '''
    bottom_layers = model
    bottom_layers.layers.pop()
    bottom_layers.layers.pop()
    inp = bottom_layers.input
    out = bottom_layers.layers[-1].output

    bottom_layers = Model(inp, out)
    
    top_layer = Sequential()
    top_layer.add(Dropout(0.5, input_shape=bottom_layers.output_shape))
    top_layer.add(Dense(1))
    top_layer.add(Activation(activation='sigmoid'))

    top_layer.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                loss='binary_crossentropy', metrics=['accuracy'])
    
    return bottom_layers, top_layer

In [18]:
def mse(image_1, image_2):
    '''
    DESCRIPTION:
        - Calculates the mean square error between two images.
    INPUT:
        - image_1 and image_2 are image files that are in a numpy array.
    OUTPUT:
        - Outputs the error (or difference) between the two images.
    '''
    if isinstance(image_1, numpy.ndarray) and isinstance(image_2, numpy.ndarray):
        err = np.sum((image_1.astype("float") - image_2.astype("float")) ** 2)
        err /= float(image_1.shape[0] * image_1.shape[1])
    else:
        print("Input valid numpy arrays")
        return False
    
    return err

In [100]:
# def get_frames(file_path, top_layer, bottom_layers, consecutive, path):
#     '''
#     DESCRIPTION:
#         - Given a video file, this function predicts frame-by-frame if the picture is "good" or "bad".
#     INPUT:
#         - file_path is a valid video file. Must be a string.
#         - model should be fit and able to be predicted on. Ideally should be a binary classifier for this use case.
#         - consecutive should be the number of consecutive good photos the model needs to see before saving the photo.
#     OUTPUT:
#         - prints if the frame is good or bad.
#     '''
# #     vid = skvideo.io.VideoCapture(file_path)
# #     vid = cv2.VideoCapture(file_path)
#     vid = imageio.get_reader(file_path)
#     feature_vec_list = []
#     orig_frames = []
#     good_frames = []
#     curr_feat_vec = []
    
#     good_count = 0
#     good_frames_count = 0

#     try:
#         while True:
#             ret, frame = vid.read()
#             if not ret:
#                 vid.release()
#                 print("Released Video Resource")
#                 break

#             resized = np.array([cv2.resize(frame, (224, 224)).astype(np.float32)])
#             feat_vec = bottom_layers.predict(resized)
#             if not curr_feat_vec:
#                 curr_feat_vec = feat_vec
            
#             if cosine_similarity(curr_feat_vec, feat_vec) > 0.75:
#                 feature_vec_list.append(feat_vec)
#                 orig_frames.append(frame)
#             else:
#                 pred = top_layer.predict(feature_vec_list)
#                 if not os.path.isdir(path):
#                     os.mkdir(path)
                    
#                 file_path = os.path.join(path, str(uuid.uuid4()) + '.jpg')
#                 cv2.imwrite(file_path, orig_frames[np.argmax(pred)])
#                 good_frames.append(orig_frames[np.argmax(pred)])
#                 good_frames_count += 1
#                 print("File Path (CHANGE SCENE): {}".format(file_path))
                
#                 feature_vec_list = []
#                 orig_frames = []
                
#             if len(feature_vec_list) == consecutive:
#                 pred = top_layer.predict(feature_vec_list)
#                 for p in pred:
#                     if p > 0.5:
#                         good_count += 1
#                     else: 
#                         good_count = 0
#                         break
                        
#                 if good_count == consecutive:
#                     file_path = os.path.join(path, str(uuid.uuid4()) + '.jpg')
#                     cv2.imwrite(file_path, orig_frames[np.argmax(pred)])
#                     good_frames.append(orig_frames[np.argmax(pred)])
#                     good_frames_count += 1
#                     print("File Path (CONSECUTIVE): {}".format(file_path))

#                 pred_arr = []
#                 orig_frames = []
#         print("Good Frames Count: {}".format(good_frames_count))
                
#         return good_frames

#     except KeyboardInterrupt:
#         vid.release()
#         print("Released Video Resource")

In [124]:
def get_frames(file_path, top_layer, bottom_layers, consecutive, path):
    '''
    DESCRIPTION:
        - Given a video file, this function predicts frame-by-frame if the picture is "good" or "bad".
    INPUT:
        - file_path is a valid video file. Must be a string.
        - model should be fit and able to be predicted on. Ideally should be a binary classifier for this use case.
        - consecutive should be the number of consecutive good photos the model needs to see before saving the photo.
    OUTPUT:
        - prints if the frame is good or bad.
    '''
    try:
        vid = imageio.get_reader(file_path)
    except:
        print("Invalid video file")
        return None
    
    feature_vec_list = []
    orig_frames = []
    good_frames = []
    curr_feat_vec = []
    
    good_count = 0
    good_frames_count = 0

    for i in range(vid.get_length()):
        frame = vid.get_data(i)

        resized = np.array([cv2.resize(frame, (224, 224)).astype(np.float32)])
        feat_vec = bottom_layers.predict(resized)
        if curr_feat_vec == []:
            curr_feat_vec = feat_vec

        if cosine_similarity(curr_feat_vec, feat_vec) > 0.75:
            feature_vec_list.append(feat_vec)
            orig_frames.append(frame)
        else:
            pred = top_layer.predict(feature_vec_list)
            if not os.path.isdir(path):
                os.mkdir(path)

            file_path = os.path.join(path, str(uuid.uuid4()) + '.jpg')
            cv2.imwrite(file_path, orig_frames[np.argmax(pred)])
            good_frames.append(orig_frames[np.argmax(pred)])
            good_frames_count += 1
            print("File Path (CHANGE SCENE): {}".format(file_path))

            feature_vec_list = []
            orig_frames = []

        if len(feature_vec_list) == consecutive:
            pred = top_layer.predict(feature_vec_list)
            for p in pred:
                if p > 0.5:
                    good_count += 1
                else: 
                    good_count = 0
                    break

            if good_count == consecutive:
                file_path = os.path.join(path, str(uuid.uuid4()) + '.jpg')
                cv2.imwrite(file_path, orig_frames[np.argmax(pred)])
                good_frames.append(orig_frames[np.argmax(pred)])
                good_frames_count += 1
                print("File Path (CONSECUTIVE): {}".format(file_path))

            pred_arr = []
            orig_frames = []
    print("Good Frames Count: {}".format(good_frames_count))

    return good_frames

### Preprocessing the data

In [None]:
# instagram-scraper humansofny, humansofamsterdam, officialhumansofbombay, humansofnewtown, humansofpdx, humansofseoul

In [None]:
clean_files, aug_files = get_clean_files(['data/humansofny',
                                          'data/humansofamsterdam',
                                          'data/officialhumansofbombay',
                                          'data/humansofnewtown',
                                          'data/humansofpdx',
                                          'data/humansofseoul'],
                                          augment=True,
                                          aug_file_path='data/aug_images'
                                        )

### Split 70% of images to training data, 20% of images to test data, and 10% of images to holdout

In [None]:
clean_data_dir = ['data/train_data/clean','data/test_data/clean','data/holdout_data/clean']
aug_data_dir = ['data/train_data/aug','data/test_data/aug','data/holdout_data/aug']

In [None]:
success_clean = move_files(clean_files,
                           [0.7,0.2,0.1],
                           clean_data_dir)

success_aug = move_files(aug_files,
                         [0.7,0.2,0.1],
                         aug_data_dir)

### Turn images into an image generator

In [26]:
generators = get_generators(rescale=False, 
                            image_gen=None, 
                            train_dir='data/train_data', 
                            test_dir='data/test_data')

Found 22021 images belonging to 2 classes.
Found 7786 images belonging to 2 classes.


### Train the model

In [28]:
model = get_model(input_shape=(224,224,3))
fit_model = train_model(model,
                        nb_epoch=10,
                        generators=generators,
                        model_dir='data')



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Video Processing

### Split model into top layer and other layers

In [88]:
bot, top = split_model(model=keras.models.load_model('data/model.h5'))

In [125]:
print("BEFORE GET_FRAMES: ")
print(datetime.datetime.now())

BEFORE GET_FRAMES: 
2017-06-18 00:33:20.882983


In [126]:
snap = get_frames("data/humansofamsterdam/15147725_796232163848807_1530264403381846016_n.mp4",
                  top_layer=top,
                  bottom_layers=bot,
                  consecutive=10,
                  path='data/good_photos')
print("AFTER GET_FRAMES: ")
print(datetime.datetime.now())



ValueError: Error when checking : the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 arrays but instead got the following list of 10 arrays: [array([[ 0.       ,  0.       ,  0.       , ...,  0.       ,  3.4589026,
         0.       ]], dtype=float32), array([[ 0.       ,  0.       ,  0.       , ...,  0.       ,  3.4589026,
         0.    ...

In [128]:
top.input_shape

(None, None, 25088)

In [120]:
for f in v:
    v.r

imageio.core.util.Image

In [None]:
good_photos = cv2.cvtColor(snap[2], cv2.COLOR_BGR2BGRA)
plt.imshow(good_photos)

In [None]:
mse(snap[0],snap[5])