In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import cv2, os, math, json
import matplotlib.pyplot as plt
from glob import glob
import shutil

In [28]:
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.layers.core import Dense, Flatten, Dropout, Lambda

In [4]:
path = 'data/'

In [4]:
face = len([cls_1 for cls_1 in os.listdir(path + 'train/face/') if os.path.isfile(path + 'train/face/' + cls_1)])
no_face = len([cls_2 for cls_2 in os.listdir(path + 'train/no_face/') if os.path.isfile(path + 'train/no_face/' + cls_2)])

In [5]:
face, no_face

(239, 390)

In [5]:
batch_size = 4

# Create train, valid and sample folders

In [18]:
os.mkdir(path + 'valid')
os.mkdir(path + 'sample')
os.mkdir(path + 'sample/train')
os.mkdir(path + 'sample/valid')

In [19]:
# Create sub folder for categories
os.makedirs(path + 'valid/face')
os.makedirs(path + 'valid/no_face')
os.makedirs(path + 'sample/train/face')
os.makedirs(path + 'sample/train/no_face')
os.makedirs(path + 'sample/valid/face')
os.makedirs(path + 'sample/valid/no_face')

In [22]:
# Move images from train/face/* folder into valid/face/*
g = glob(path + 'train/face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 5)): shutil.move(shuf[i], path + 'valid/face/')

In [23]:
# Move images from train/no_face/* folder into valid/no_face/*
g = glob(path + 'train/no_face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 5)): shutil.move(shuf[i], path + 'valid/no_face/')

In [26]:
# Move images from train/face/* folder into sample/train/face/*
g = glob(path + 'train/face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/train/face/')

In [28]:
# Move images from train/no_face/* folder into sample/train/no_face/*
g = glob(path + 'train/no_face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/train/no_face/')

In [27]:
# Move images from valid/no_face/* folder into sample/valid/no_face/*
g = glob(path + 'valid/no_face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/valid/no_face/')

In [29]:
# Move images from valid/face/* folder into sample/valid/face/*
g = glob(path + 'valid/face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/valid/face/')

# data gen

In [6]:
def get_batches(path, gen=ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical', 
                target_size=(224,224)):
    return gen.flow_from_directory(path, target_size=target_size,
                                   class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [7]:
def onehot(x):
    return to_categorical(x)

In [8]:
def get_data(path, target_size=(224,224)):
    batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
    return np.concatenate([batches.next() for i in range(batches.n)])

In [9]:
def get_classes(path):
    batches = get_batches(path + 'train', shuffle=False, batch_size=1)
    val_batches = get_batches(path + 'valid', shuffle=False, batch_size=1)
    #test_batches = get_batches(path + 'test', shuffle=False, batch_size=1)
    return (val_batches.classes, batches.classes, onehot(val_batches.classes), onehot(batches.classes),
        val_batches.filenames, batches.filenames)

In [10]:
def split_at(model, layer_type):
    layers = model.layers
    layer_idx = [index for index,layer in enumerate(layers)
                 if type(layer) is layer_type][-1]
    return layers[:layer_idx+1], layers[layer_idx+1:]

In [11]:
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()


def load_array(fname):
    return bcolz.open(fname)[:]

In [12]:
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)

Found 504 images belonging to 2 classes.
Found 125 images belonging to 2 classes.


In [13]:
(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames) = get_classes(path)

Found 504 images belonging to 2 classes.
Found 125 images belonging to 2 classes.


In [14]:
raw_filenames = [f.split('/')[-1] for f in filenames]
raw_val_filenames = [f.split('/')[-1] for f in val_filenames]

In [20]:
from vgg16 import Vgg16
vgg = Vgg16()

In [22]:
vgg.model.pop()

In [25]:
for layer in vgg.model.layers: layer.trainable=False

In [29]:
vgg.model.add(Dense(batches.num_classes, activation='softmax'))

In [30]:
vgg.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_2 (Lambda)            (None, 224, 224, 3)       0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 224, 224, 64)      1792      
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 112, 112, 128)     147584    
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 56, 56, 128)       0         
__________

In [31]:
vgg.compile()

In [34]:
vgg.model.fit_generator(batches, steps_per_epoch=batches.n, epochs=1, validation_data=val_batches, 
                        validation_steps=val_batches.n)

Epoch 1/1


StopIteration: cannot identify image file 'data/train/face/2002_07_20_img_548.jpg'

In [33]:
batches.n

504