# Building the Neural Network

## Library Imports

In [1]:
#### KERAS IMPORTS ####
from keras import backend as K
K.set_image_dim_ordering('th')

import keras
from keras.models import Sequential
from keras import layers
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD

#### SKLEARN IMPORTS ####
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#### OTHER IMPORTS ####
import cv2
import numpy as np
import pandas as pd
from os import listdir
from os.path import isfile, join
import joblib
import re

# from PIL import ImageGrab
import matplotlib.pyplot as plt

% matplotlib inline

print("BACKEND: ", keras.backend.backend())

Using TensorFlow backend.


('BACKEND: ', u'tensorflow')


## Functions

In [2]:
def prepare_image(image_path, model='vgg_16'):
    '''
    DESCRIPTION:
        - Preprocess an image to be in the correct size
    INPUT:
        - image_path is the location of the image file
        - model is the type of pretrained CNN
    OUTPUT:
        - resized is the resized image
    '''
    if model == 'vgg_16':
        try:
            resized = cv2.resize(cv2.imread(image_path), (224, 224)).astype(np.float32)
        except:
            print("Image cannot be resized: ", image_path)
            return None

        resized[:,:,0] -= 103.939
        resized[:,:,1] -= 116.779
        resized[:,:,2] -= 123.68
        resized = resized.transpose((2,0,1))
        resized = np.expand_dims(resized, axis=0)
    return resized

In [3]:
def resize_img(orig_img, new_dim):
    '''
    DESCRIPTION:
        - resizes the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - new_dim is the base number of pixels for the new image.
    OUTPUT:
        - resized is a numpy array of the resized image.
    '''
    r = float(new_dim) / orig_img.shape[1]
    dim = (new_dim, int(orig_img.shape[0] * r))
    resized = cv2.resize(orig_img, dim, interpolation=cv2.INTER_AREA)
    
#     plt.imshow(resized)
#     plt.xticks([])
#     plt.yticks([])
    
    return resized

In [4]:
def rotate_img(orig_img, deg_rot, scale):
    '''
    DESCRIPTION:
        - rotates the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - scale (btwn 0 and 1) zooms in on the image. scale (> 1) zooms out on the image. 
        - scale can be used to crop the image based only on the center.
    OUTPUT:
        - rotated_img is a numpy array of the rotated image.
    '''
    (height, width) = orig_img.shape[:2]
    center = (width/2, height/2)
    matrix = cv2.getRotationMatrix2D(center,
                                     angle=deg_rot,
                                     scale=scale)
    rotated_img = cv2.warpAffine(orig_img,
                                 matrix,
                                 (width, height))
#     plt.imshow(rotated_img)
#     plt.xticks([])
#     plt.yticks([])
    
    return rotated_img

In [5]:
def crop_img(orig_img, h1, h2, w1, w2):
    '''
    DESCRIPTION:
        - crops the original image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
        - h1 and h2 defines height
        - w1 and w2 defines the width
    OUTPUT:
        - cropped_img is a numpy array of the cropped image.
    '''
    cropped_img = orig_img[h1:h2, w1:w2]
    
#     plt.imshow(cropped_img)
#     plt.xticks([])
#     plt.yticks([])

    return cropped_img

In [14]:
def augment(image_path):
    '''
    DESCRIPTION:
        - randomly augments the image.
    INPUT: 
        - orig_img is a numpy array (use cv2.imread() to transform img into numpy array).
    OUTPUT:
        - new_img is a numpy array of the augmented image.
    '''
#     aug_method = np.random.randint(1,4)
    
    img_arr = cv2.imread(image_path)
    
#     if aug_method == 1:
    new_dim = int(img_arr.shape[1] * np.random.uniform(low=0.1, high=0.3))
    new_img_arr = resize_img(img_arr, new_dim)
#     print("RESIZE: ", new_img_arr)
    
#     elif aug_method == 2:
    deg = np.random.randint(15, 345)
    scale = np.random.uniform(low=1, high=4)
    new_img_arr = rotate_img(new_img_arr, deg, scale)
#     print("ROTATE: ", new_img_arr)
    
#     elif aug_method == 3:
    lower_height = np.random.randint(1, new_img_arr.shape[0])
    lower_width = np.random.randint(1, new_img_arr.shape[1])
    upper_height = np.random.randint(lower_height, 10000)
    upper_width = np.random.randint(lower_width, 10000)
        
    new_img_arr = crop_img(new_img_arr, h1=lower_height, h2=upper_height, w1=lower_width, w2=upper_width)
#     print("CROP: ", new_img_arr)
    
    new_img_path = 'data/aug_imagess/' + 'aug_' + re.sub(r"data/[a-zA-Z]*/", '', image_path)
    
    new_img = cv2.imwrite(new_img_path, new_img_arr)
    if not new_img:
        print("Check image path: ", new_img_path)
        return [], ''
    
    return new_img_arr, new_img_path

In [7]:
def get_clean_aug_arrays(tot_count, files):
    '''
    DESCRIPTION:
        - produces a list of numpy arrays for each image in the files list.
    INPUT: 
        - tot_count is the number of files to traverse through.
        - files is the list of image files.
    OUTPUT:
        - X is the list of numpy arrays for the clean images.
        - X_aug is the list of numpy arrays for the augmented images.
    '''
    X = []
    X_aug = []
    
    if tot_count > len(files):
        print("tot_count exceeds the number of files.")
        return False
    
    for i in range(tot_count):
        # Convert the clean image
        clean_img = prepare_image(files[i])
        if clean_img == None:
            continue
        X.append(clean_img.reshape((3, 224, 224)))

        # Augment then convert the new image
        temp_img_arr, temp_img = augment(files[i])
        if temp_img_arr == []:
            continue
        else:
            prep_img = prepare_image(temp_img)
            if prep_img == None:
                continue
        X_aug.append(prep_img.reshape((3, 224, 224)))

    if len(X) == len(X_aug):
        print("Augmenting worked correctly.")
        
    X = np.array(X)
    X_aug = np.array(X_aug)
        
    return X, X_aug

In [8]:
def neural_net(block1, block2, block3, block4, num_classes, weights_path=None):
    '''
    DESCRIPTION:
        - Sets the structure of the neural network
    INPUT:
        - Optional weights. Default to None.
        - block1 through block4 are the number of neurons at each block.
            - the number of neurons at each conv layer in a single block are assumed to be the same.
        - num_classes is the number of classes in the target variable.
    OUTPUT:
        - Structured model.
    '''
    model = Sequential()
    
    ## BLOCK 1 ##
    model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
    model.add(Convolution2D(block1, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block1, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    ## BLOCK 2 ##
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block2, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block2, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block2, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    ## BLOCK 3 ##
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block3, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block3, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(block3, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    ## BLOCK 4 ##
    model.add(Flatten())
    model.add(Dense(block4, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(block4, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    if weights_path:
        model.load_weights(weights_path)

    return model

In [9]:
def get_metrics(fit_model, X_train, X_test, y_train, y_test):
    '''
    DESCRIPTION:
        - Get the various classification metrics for the training and test data.
    INPUT:
        - fit_model must be a pretrained model. 
        - X_train and X_test must be numpy arrays in the correct dimensions.
        - y_train and y_test must be one dimensional numpy arrays if binary classifier.
    OUTPUT:
        - prints the scores.
    '''
    pred_train = model.predict_classes(X_train)
    pred_test = model.predict_classes(X_test)

    acc_train = accuracy_score(y_train, pred_train)
    acc_test = accuracy_score(y_test, pred_test)
    
    print("ACC TRAIN: ", acc_train)
    print("ACC TEST: ", acc_test)
    print("\n")
    
    pre_train = precision_score(y_train, pred_train)
    pre_test = precision_score(y_test, pred_test)

    print("PRE TRAIN: ", pre_train)
    print("PRE TEST: ", pre_test)
    print("\n")
    
    rec_train = recall_score(y_train, pred_train)
    rec_test = recall_score(y_test, pred_test)

    print("REC TRAIN: ", rec_train)
    print("REC TEST: ", rec_test)
    print("\n")    
    
    f1_train = f1_score(y_train, pred_train)
    f1_test = f1_score(y_test, pred_test)

    print("F1 TRAIN: ", f1_train)
    print("F1 TEST: ", f1_test)    

## Preprocessing the data

### Get a list of filenames corresponding to the images

In [10]:
ny = ['data/humansofny/' + f for f in listdir('data/humansofny/') if isfile(join('data/humansofny/', f))]
am = ['data/humansofamsterdam/' + f for f in listdir('data/humansofamsterdam/') if isfile(join('data/humansofamsterdam/', f))]
bo = ['data/humansofbombay/' + f for f in listdir('data/humansofbombay/') if isfile(join('data/humansofbombay/', f))]
nt = ['data/humansofnewtown/' + f for f in listdir('data/humansofnewtown/') if isfile(join('data/humansofnewtown/', f))]
pd = ['data/humansofpdx/' + f for f in listdir('data/humansofpdx/') if isfile(join('data/humansofpdx/', f))]
se = ['data/humansofseoul/' + f for f in listdir('data/humansofseoul/') if isfile(join('data/humansofseoul/', f))]

files = ny # + am + bo + nt + pd + se

### Turn each image into a numpy array

### Create "bad" images by resizing, rotating, and cropping the "good" images

In [16]:
tot_count = int(len(files) * 1)
X, X_aug = get_clean_aug_arrays(tot_count, files)



Augmenting worked correctly.


#### Pickle the X matrix

In [17]:
joblib.dump(X, 'data/X.pkl')
joblib.dump(X_aug, 'data/X_aug.pkl')

['data/X_aug.pkl']

#### Load the X matrix

In [None]:
X = joblib.load('data/X.pkl')
X_aug = joblib.load('data/X_aug.pkl')

### Label the photos 
* 1 -> original images
* 0 -> augmented images

In [18]:
y = [1 for _ in range(len(X))]
y_aug = [0 for _ in range(len(X_aug))]

In [19]:
X_tot = np.append(X, X_aug, axis=0)
y_tot = np.append(y, y_aug, axis=0).reshape((-1,1))
y_tot = keras.utils.to_categorical(y_tot, num_classes=2)

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X_tot, y_tot, test_size=0.2)

### Initialize the model

In [21]:
model = neural_net(block1=16, block2=32, block3=64, block4=128, num_classes=2)
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')

In [None]:
model.fit(X_train, y_train, batch_size=200, epochs=15)
score = model.evaluate(X_test, y_test, batch_size=200)
print("TEST SCORE: ", score)

Epoch 1/15

In [None]:
get_metrics(model, X_train, X_test, y_train[:,0], y_test[:,0])