In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import cv2, os, math, json
import matplotlib.pyplot as plt
from glob import glob
import shutil

In [3]:
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.layers.core import Dense, Flatten, Dropout, Lambda
from keras.optimizers import Adam
from keras.layers.convolutional import Conv2D, MaxPooling2D, Conv2DTranspose

Using TensorFlow backend.


In [4]:
import bcolz

In [5]:
# path = 'data/'
path = 'data/sample/'

In [5]:
face = len([cls_1 for cls_1 in os.listdir(path + 'train/face/') if os.path.isfile(path + 'train/face/' + cls_1)])
no_face = len([cls_2 for cls_2 in os.listdir(path + 'train/no_face/') if os.path.isfile(path + 'train/no_face/' + cls_2)])

In [6]:
face, no_face

(28204, 780)

In [6]:
batch_size = 44

# Create train, valid and sample folders

In [8]:
os.mkdir(path + 'valid')
os.mkdir(path + 'sample')
os.mkdir(path + 'sample/train')
os.mkdir(path + 'sample/valid')

In [9]:
# Create sub folder for categories
os.makedirs(path + 'valid/face')
os.makedirs(path + 'valid/no_face')
os.makedirs(path + 'sample/train/face')
os.makedirs(path + 'sample/train/no_face')
os.makedirs(path + 'sample/valid/face')
os.makedirs(path + 'sample/valid/no_face')

In [10]:
# Move images from train/face/* folder into valid/face/*
g = glob(path + 'train/face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 5)): shutil.move(shuf[i], path + 'valid/face/')

In [11]:
# Move images from train/no_face/* folder into valid/no_face/*
g = glob(path + 'train/no_face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 5)): shutil.move(shuf[i], path + 'valid/no_face/')

In [12]:
# Move images from train/face/* folder into sample/train/face/*
g = glob(path + 'train/face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/train/face/')

In [13]:
# Move images from train/no_face/* folder into sample/train/no_face/*
g = glob(path + 'train/no_face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/train/no_face/')

In [14]:
# Move images from valid/no_face/* folder into sample/valid/no_face/*
g = glob(path + 'valid/no_face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/valid/no_face/')

In [15]:
# Move images from valid/face/* folder into sample/valid/face/*
g = glob(path + 'valid/face/' + '*.jpg')
shuf = np.random.permutation(g)

for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/valid/face/')

# data gen

In [7]:
def get_batches(path, gen=ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical', 
                target_size=(224,224)):
    return gen.flow_from_directory(path, target_size=target_size,
                                   class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [8]:
def onehot(x):
    return to_categorical(x)

In [9]:
def get_data(path, target_size=(224,224)):
    batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
    return np.concatenate([batches.next() for i in range(batches.n)])

In [10]:
def get_classes(path):
    batches = get_batches(path + 'train', shuffle=False, batch_size=1)
    val_batches = get_batches(path + 'valid', shuffle=False, batch_size=1)
    #test_batches = get_batches(path + 'test', shuffle=False, batch_size=1)
    return (val_batches.classes, batches.classes, onehot(val_batches.classes), onehot(batches.classes),
        val_batches.filenames, batches.filenames)

In [11]:
def split_at(model, layer_type):
    layers = model.layers
    layer_idx = [index for index,layer in enumerate(layers)
                 if type(layer) is layer_type][-1]
    return layers[:layer_idx+1], layers[layer_idx+1:]

In [12]:
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()


def load_array(fname):
    return bcolz.open(fname)[:]

In [21]:
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)

Found 292 images belonging to 2 classes.
Found 67 images belonging to 2 classes.


In [22]:
(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames) = get_classes(path)

Found 292 images belonging to 2 classes.
Found 67 images belonging to 2 classes.


In [23]:
raw_filenames = [f.split('/')[-1] for f in filenames]
raw_val_filenames = [f.split('/')[-1] for f in val_filenames]

In [24]:
from vgg16 import Vgg16
vgg = Vgg16()

In [25]:
vgg.model.pop()

In [26]:
for layer in vgg.model.layers: layer.trainable=False

In [27]:
vgg.model.add(Dense(batches.num_classes, activation='softmax'))

In [28]:
vgg.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_2 (Lambda)            (None, 224, 224, 3)       0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 224, 224, 64)      1792      
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 112, 112, 128)     147584    
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 56, 56, 128)       0         
__________

In [29]:
vgg.compile()

In [30]:
vgg.model.fit_generator(batches, steps_per_epoch=batches.n, epochs=1, validation_data=val_batches, 
                        validation_steps=val_batches.n)

Epoch 1/1


<keras.callbacks.History at 0x7f2220ea5a58>

In [31]:
trn = get_data(path + 'train')
val = get_data(path + 'valid')

Found 292 images belonging to 2 classes.
Found 67 images belonging to 2 classes.


In [32]:
save_array(path+'results/trn.dat', trn)
save_array(path+'results/val.dat', val)

In [33]:
trn = load_array(path+'results/trn.dat')
val = load_array(path+'results/val.dat')

In [34]:
vgg.model.compile(optimizer=Adam(1e-3),
       loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
vgg.model.fit(trn, trn_labels, batch_size=batch_size, nb_epoch=3, validation_data=(val, val_labels))



Train on 292 samples, validate on 67 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f22206d3a58>

In [36]:
vgg.model.save_weights(path+'results/ft1.h5')

In [37]:
vgg.model.load_weights(path+'results/ft1.h5')

In [38]:
def split_at(model, layer_type):
    layers = model.layers
    layer_idx = [index for index,layer in enumerate(layers)
                 if type(layer) is layer_type][-1]
    return layers[:layer_idx+1], layers[layer_idx+1:]

In [39]:
conv_layers,fc_layers = split_at(vgg.model, Conv2D)

In [40]:
conv_layers

[<keras.layers.core.Lambda at 0x7f22205aa4e0>,
 <keras.layers.convolutional.Conv2D at 0x7f22205b35f8>,
 <keras.layers.convolutional.Conv2D at 0x7f22205de630>,
 <keras.layers.pooling.MaxPooling2D at 0x7f22204d3978>,
 <keras.layers.convolutional.Conv2D at 0x7f22204cc160>,
 <keras.layers.convolutional.Conv2D at 0x7f22204cc320>,
 <keras.layers.pooling.MaxPooling2D at 0x7f22204e80f0>,
 <keras.layers.convolutional.Conv2D at 0x7f22204df940>,
 <keras.layers.convolutional.Conv2D at 0x7f22204d4cf8>,
 <keras.layers.convolutional.Conv2D at 0x7f222046fe48>,
 <keras.layers.pooling.MaxPooling2D at 0x7f2220575ba8>,
 <keras.layers.convolutional.Conv2D at 0x7f2220fda668>,
 <keras.layers.convolutional.Conv2D at 0x7f2220fda320>,
 <keras.layers.convolutional.Conv2D at 0x7f2220ffdda0>,
 <keras.layers.pooling.MaxPooling2D at 0x7f2220f8f400>,
 <keras.layers.convolutional.Conv2D at 0x7f2220fb2a20>,
 <keras.layers.convolutional.Conv2D at 0x7f2220fb2c50>,
 <keras.layers.convolutional.Conv2D at 0x7f2220f52940>]

In [41]:
fc_layers

[<keras.layers.pooling.MaxPooling2D at 0x7f2220f75f60>,
 <keras.layers.core.Flatten at 0x7f2220f19470>,
 <keras.layers.core.Dense at 0x7f2220f19ba8>,
 <keras.layers.core.Dropout at 0x7f2220f3e278>,
 <keras.layers.core.Dense at 0x7f2220f3e3c8>,
 <keras.layers.core.Dropout at 0x7f2220ee7f28>,
 <keras.layers.core.Dense at 0x7f22205b9550>]

In [42]:
conv_model = Sequential(conv_layers)

In [43]:
conv_feat = conv_model.predict(trn)
conv_val_feat = conv_model.predict(val)

In [44]:
save_array(path+'results/conv_val_feat.dat', conv_val_feat)
save_array(path+'results/conv_feat.dat', conv_feat)

In [45]:
conv_feat = load_array(path+'results/conv_feat.dat')
conv_val_feat = load_array(path+'results/conv_val_feat.dat')

In [46]:
conv_val_feat.shape

(67, 14, 14, 512)

In [47]:
def get_bn_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Dropout(p/4),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(p),
        Dense(512, activation='relu'),
        Dropout(p/2),
        Dense(2, activation='softmax')
    ]

In [48]:
p=0.6

In [49]:
bn_model = Sequential(get_bn_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [50]:
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, epochs=3, 
             validation_data=(conv_val_feat, val_labels))

Train on 292 samples, validate on 67 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f222054c550>