# Building the Neural Network

## Library Imports

In [35]:
#### KERAS IMPORTS ####
from keras import backend as K
K.set_image_dim_ordering('tf')

import keras
from keras.models import Sequential
from keras import layers
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions, preprocess_input
from keras.applications import InceptionV3, ResNet50
from keras.preprocessing import image
from keras.models import Model

#### SKLEARN IMPORTS ####
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#### OTHER IMPORTS ####
import cv2
import numpy as np
import pandas as pd
from os import listdir
from os.path import isfile, join
import joblib
import re

# from PIL import ImageGrab
import matplotlib.pyplot as plt

% matplotlib inline

print("BACKEND: ", keras.backend.backend())

('BACKEND: ', u'tensorflow')


## Functions

In [None]:
def prepare_image(image_path, model='vgg_16'):
    '''
    DESCRIPTION:
        - Preprocess an image to be in the correct size
    INPUT:
        - image_path is the location of the image file
        - model is the type of pretrained CNN
    OUTPUT:
        - resized is the resized image
    '''
    if model == 'vgg_16':
        try:
            resized = cv2.resize(cv2.imread(image_path), (224, 224)).astype(np.float32)
        except:
            print("Image cannot be resized: ", image_path)
            return None

        resized[:,:,0] -= 103.939
        resized[:,:,1] -= 116.779
        resized[:,:,2] -= 123.68
        resized = resized.transpose((2,0,1))
        resized = np.expand_dims(resized, axis=0)
    return resized

In [None]:
def resize_img(orig_img, new_dim):
    '''
    DESCRIPTION:
        - resizes the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - new_dim is the base number of pixels for the new image.
    OUTPUT:
        - resized is a numpy array of the resized image.
    '''
    r = float(new_dim) / orig_img.shape[1]
    dim = (new_dim, int(orig_img.shape[0] * r))
    resized = cv2.resize(orig_img, dim, interpolation=cv2.INTER_AREA)
    
#     plt.imshow(resized)
#     plt.xticks([])
#     plt.yticks([])
    
    return resized

In [None]:
def rotate_img(orig_img, deg_rot, scale):
    '''
    DESCRIPTION:
        - rotates the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - scale (btwn 0 and 1) zooms in on the image. scale (> 1) zooms out on the image. 
        - scale can be used to crop the image based only on the center.
    OUTPUT:
        - rotated_img is a numpy array of the rotated image.
    '''
    (height, width) = orig_img.shape[:2]
    center = (width/2, height/2)
    matrix = cv2.getRotationMatrix2D(center,
                                     angle=deg_rot,
                                     scale=scale)
    rotated_img = cv2.warpAffine(orig_img,
                                 matrix,
                                 (width, height))
#     plt.imshow(rotated_img)
#     plt.xticks([])
#     plt.yticks([])
    
    return rotated_img

In [None]:
def crop_img(orig_img, h1, h2, w1, w2):
    '''
    DESCRIPTION:
        - crops the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - h1 and h2 defines height
        - w1 and w2 defines the width
    OUTPUT:
        - cropped_img is a numpy array of the cropped image.
    '''
    cropped_img = orig_img[h1:h2, w1:w2]
    
#     plt.imshow(cropped_img)
#     plt.xticks([])
#     plt.yticks([])

    return cropped_img

In [None]:
def augment(image_path, repeat=5):
    '''
    DESCRIPTION:
        - randomly augments the image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
    OUTPUT:
        - new_img is a numpy array of the augmented image.
    '''    
    img_arr = cv2.imread(image_path)
    
    img_paths = []
    
    for i in range(repeat):
        new_dim = int(img_arr.shape[1] * np.random.uniform(low=0.1, high=0.3))
        new_img_arr = resize_img(img_arr, new_dim)
    #     print("RESIZE: ", new_img_arr)

        deg = np.random.randint(15, 345)
        scale = np.random.uniform(low=1, high=4)
        new_img_arr = rotate_img(new_img_arr, deg, scale)
    #     print("ROTATE: ", new_img_arr)

        lower_height = np.random.randint(25, new_img_arr.shape[0])
        lower_width = np.random.randint(25, new_img_arr.shape[1])
        upper_height = np.random.randint(lower_height, 10000)
        upper_width = np.random.randint(lower_width, 10000)

        new_img_arr = crop_img(new_img_arr, h1=lower_height, h2=upper_height, w1=lower_width, w2=upper_width)
    #     print("CROP: ", new_img_arr)

        new_img_path = 'data/aug_images/' + 'aug_' + str(i) + "_" + re.sub(r"data/[a-zA-Z]*/", '', image_path)

        new_img = cv2.imwrite(new_img_path, new_img_arr)
        img_paths.append(new_img_path)
        if not new_img:
            print("Check image path: ", new_img_path)
            img_paths.append('')
    
    return img_paths

In [None]:
def get_clean_aug_arrays(tot_count, files):
    '''
    DESCRIPTION:
        - produces a list of numpy arrays for each image in the files list.
    INPUT: 
        - tot_count is the number of files to traverse through.
        - files is the list of image files.
    OUTPUT:
        - X is the list of numpy arrays for the clean images.
        - X_aug is the list of numpy arrays for the augmented images.
    '''
    X = []
    X_aug = []
    
    if tot_count > len(files):
        print("tot_count exceeds the number of files.")
        return False
    
    for i in range(tot_count):
        # Convert the clean image
        clean_img = image.load_img(files[i], target_size=(224,224))
        clean_img = image.img_to_array(clean_img)
        if clean_img == None:
            continue
        X.append(clean_img)

        # Augment then convert the new image
        temp_imgs = augment(files[i])
        for temp_img in temp_imgs:
            if temp_img == '':
                continue
            else:
                prep_img = image.load_img(temp_img, target_size=(224,224))
                prep_img = image.img_to_array(prep_img)
                if prep_img == None:
                    continue
            X_aug.append(prep_img)

    if len(X) == len(X_aug):
        print("Augmenting worked correctly.")
        
    X = np.array(X)
    X = preprocess_input(X)
    
    X_aug = np.array(X_aug)
    X_aug = preprocess_input(X_aug)
        
    return X, X_aug

In [121]:
def get_metrics(fit_model, X_train, X_test, y_train, y_test):
    '''
    DESCRIPTION:
        - Get the various classification metrics for the training and test data.
    INPUT:
        - fit_model must be a pretrained model. 
        - X_train and X_test must be numpy arrays in the correct dimensions.
        - y_train and y_test must be one dimensional numpy arrays if binary classifier.
    OUTPUT:
        - prints the scores.
    '''
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)
    
    true_pred_train = []
    true_pred_test = []
    
    for pred1 in pred_train:
        if pred1[0] > 0.5:
            true_pred_train.append(1)
        else:
            true_pred_train.append(0)
    
    for pred2 in pred_test:
        if pred2[0] > 0.5:
            true_pred_test.append(1)
        else:
            true_pred_test.append(0)

    acc_train = accuracy_score(y_train, true_pred_train)
    acc_test = accuracy_score(y_test, true_pred_test)
    
    print("ACC TRAIN: ", acc_train)
    print("ACC TEST: ", acc_test)
    print("\n")
    
    pre_train = precision_score(y_train, true_pred_train)
    pre_test = precision_score(y_test, true_pred_test)

    print("PRE TRAIN: ", pre_train)
    print("PRE TEST: ", pre_test)
    print("\n")
    
    rec_train = recall_score(y_train, true_pred_train)
    rec_test = recall_score(y_test, true_pred_test)

    print("REC TRAIN: ", rec_train)
    print("REC TEST: ", rec_test)
    print("\n")    
    
    f1_train = f1_score(y_train, true_pred_train)
    f1_test = f1_score(y_test, true_pred_test)

    print("F1 TRAIN: ", f1_train)
    print("F1 TEST: ", f1_test)    

## Preprocessing the data

### Get a list of filenames corresponding to the images

In [None]:
ny = ['data/humansofny/' + f for f in listdir('data/humansofny/') if isfile(join('data/humansofny/', f))]
am = ['data/humansofamsterdam/' + f for f in listdir('data/humansofamsterdam/') if isfile(join('data/humansofamsterdam/', f))]
bo = ['data/humansofbombay/' + f for f in listdir('data/humansofbombay/') if isfile(join('data/humansofbombay/', f))]
nt = ['data/humansofnewtown/' + f for f in listdir('data/humansofnewtown/') if isfile(join('data/humansofnewtown/', f))]
pd = ['data/humansofpdx/' + f for f in listdir('data/humansofpdx/') if isfile(join('data/humansofpdx/', f))]
se = ['data/humansofseoul/' + f for f in listdir('data/humansofseoul/') if isfile(join('data/humansofseoul/', f))]

# files = ny + am + bo + nt + pd + se
files = am
clean_files = [loc for loc in am if '.mp4' not in loc]

### Turn each image into a numpy array

### Create "bad" images by resizing, rotating, and cropping the "good" images

In [None]:
tot_count = int(len(clean_files) * 1)
X, X_aug = get_clean_aug_arrays(tot_count, clean_files)

#### Pickle the X matrix

In [None]:
joblib.dump(X, 'data/X.pkl')
joblib.dump(X_aug, 'data/X_aug.pkl')

#### Load the X matrix

In [2]:
X = joblib.load('data/X.pkl')
X_aug = joblib.load('data/X_aug.pkl')

### Label the photos 
* 1 -> original images
* 0 -> augmented images

In [3]:
y = [1 for _ in range(len(X))]
y_aug = [0 for _ in range(len(X_aug))]

In [83]:
X_tot = np.append(X, X_aug, axis=0)
y_tot = np.append(y, y_aug, axis=0).reshape((-1,1))
# y_tot = keras.utils.np_utils.to_categorical(y_tot, nb_classes=2)

In [84]:
X_train, X_test, y_train, y_test = train_test_split(X_tot, y_tot, test_size=0.2)

### Initialize the model

In [106]:
X_train = X_train.reshape((3470, 224, 224, 3))
X_test = X_test.reshape((868, 224, 224, 3))

In [94]:
model = VGG16(include_top=False, weights='imagenet', input_shape=(224,224,3))
# model = ResNet50(include_top=False, weights='imagenet')
# model = InceptionV3(include_top=False, weights='imagenet', input_shape=(224,224,3))
last = model.output

# Freeze convolutional layers
for layer in model.layers:
    layer.trainable = False
    
x = Dropout(0.5)(last)
x = Flatten()(x)
x = Dense(1)(x)
preds = Activation(activation='sigmoid')(x)

final_model = Model(input=model.input, output=preds)

final_model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
            loss='binary_crossentropy', metrics=['accuracy'])

In [98]:
final_model.fit(X_train, y_train, batch_size=100, nb_epoch=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


ValueError: Error when checking model input: expected input_31 to have shape (None, 224, 224, 3) but got array with shape (868, 3, 224, 224)

In [109]:
score = final_model.evaluate(X_test, y_test, batch_size=100)
print("TEST SCORE: ", score)

('TEST SCORE: ', [0.016284651539159683, 0.99769585473196842])


In [118]:
final_model.save('data/final_model.h5')

In [None]:
final_model = 

In [None]:
y_pred_train = final_model.predict(X_train)
y_pred_test = final_model.predict(X_test)

In [125]:
get_metrics(final_model, X_train, X_test, y_train, y_test)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()