In [1]:
from keras.engine import  Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Constants
IMG_SIZE = 200
IMG_DIR = '../project/all_females'
RATING_PATH = './Projekt_SGE_Assessment_ErikK.txt'

### Build model

In [3]:
K.clear_session()

vgg_base = VGGFace(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')

# Add custom layers
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
#X = Dropout(0.2)(last_layer)
# x = Flatten(name='flatten')(last_layer)
X = Dense(256, activation='relu', name='fc6')(last_layer)
X = Dropout(0.2)(X)
X = Dense(128, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
output = Dense(1, activation='linear')(X)

# this is the model we will train
model = Model(inputs=vgg_base.input, outputs=output)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in vgg_base.layers:
    layer.trainable = False

# Make sure weights are not trainable
print("Trainable weights:")
model.trainable_weights

model.compile(optimizer=Adam(clipnorm=1.0),loss='mean_absolute_error', metrics=['mae'])

model.summary()

Trainable weights:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 200, 200, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 200, 200, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 200, 200, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 100, 100, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 100, 100, 128)     73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 100, 100, 128)     147584    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 50, 50, 128)       0 

### Prepare data
### **Please remember to delete the train & test folders if you have run this cell before!**

In [4]:
# Read in ratings
ratings = np.genfromtxt(RATING_PATH)

### Distribute images across folders
if not os.path.isdir('./data/train/train'):
    os.makedirs('./data/train/train')
if not os.path.isdir('./data/test/test'):
    os.makedirs('./data/test/test')
        
### Move all images according to ratings
images = sorted(os.listdir(IMG_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

X_train, X_test, y_train, y_test = train_test_split(images, ratings, test_size=0.2)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

data_train = np.zeros((X_train.shape[0], IMG_SIZE, IMG_SIZE, 3))
data_test = np.zeros((X_test.shape[0], IMG_SIZE, IMG_SIZE, 3))

for idx, _im in enumerate(X_train):
    # Change the image path with yours.
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_train[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255 
    
# Same for test data
for idx, _im in enumerate(X_test):   
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_test[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255

### Loop throgh all images and store them in the dedicated folders
#for img in X_train:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/train/train', img)
#    shutil.copy(src, dest)

#for img in X_test:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/test/test', img)
#    shutil.copy(src, dest)

Train-test-split:
X_train:  (454,)
X_test:  (114,)
y_train:  (454,)
y_test:  (114,)


### Data augmentation

In [5]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

datagen.fit(data_train)

#train_generator = datagen.flow_from_directory(
#    './data/train',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

test_datagen = ImageDataGenerator(rescale=1./255)

test_datagen.fit(data_test)

#test_generator = test_datagen.flow_from_directory(
#    './data/test',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

### Train

In [6]:
checkpointer = ModelCheckpoint(filepath='./weights-v4.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=100,
    epochs=1,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[checkpointer]
)

Epoch 1/1

Epoch 00001: val_loss improved from inf to 1.10244, saving model to ./weights-v4.hdf5


In [7]:
history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=453//64,
    epochs=500,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[earlystop, checkpointer]
)

Epoch 1/500

Epoch 00001: val_loss did not improve
Epoch 2/500

Epoch 00002: val_loss did not improve
Epoch 3/500

Epoch 00003: val_loss did not improve
Epoch 4/500

Epoch 00004: val_loss improved from 1.10244 to 1.08519, saving model to ./weights-v4.hdf5
Epoch 5/500

Epoch 00005: val_loss did not improve
Epoch 6/500

Epoch 00006: val_loss did not improve
Epoch 7/500

Epoch 00007: val_loss did not improve
Epoch 8/500

Epoch 00008: val_loss did not improve
Epoch 9/500

Epoch 00009: val_loss did not improve
Epoch 10/500

Epoch 00010: val_loss improved from 1.08519 to 1.07976, saving model to ./weights-v4.hdf5
Epoch 11/500

Epoch 00011: val_loss did not improve
Epoch 12/500

Epoch 00012: val_loss improved from 1.07976 to 1.02673, saving model to ./weights-v4.hdf5
Epoch 13/500

Epoch 00013: val_loss did not improve
Epoch 14/500

Epoch 00014: val_loss did not improve
Epoch 15/500

Epoch 00015: val_loss did not improve
Epoch 16/500

Epoch 00016: val_loss did not improve
Epoch 17/500

Epoch 0

In [8]:
model.save('./models/love_classifierv4.h5')

### Choose candidates from test set

In [9]:
#freeze layers
for layer in model.layers:
    layer.trainable = False

candidates = data_test
candidates = np.expand_dims(candidates, axis=0)
predictions = np.zeros(candidates.shape[1])

for i in range(candidates.shape[1]):
    predictions[i] = model.predict(candidates[:,i])

#sort predictions
predictions = predictions[np.argsort(predictions)]
print(predictions)

[ -44.57193756   24.91769218   34.64692688   44.47531128   45.82447052
   48.15081406   54.00756454   61.11362839   62.72498322   70.58784485
   71.54866791   85.23001099   88.61009979   93.2234726    93.48945618
   98.20302582   98.89201355  112.62640381  113.94422913  114.61132812
  120.41316986  126.86641693  129.6708374   138.25990295  139.81570435
  147.0196991   149.84315491  154.3290863   160.19422913  163.05923462
  172.86106873  175.10250854  176.3145752   177.30130005  183.09207153
  189.90960693  192.25389099  193.46359253  193.55125427  199.19624329
  209.00604248  211.37651062  215.48513794  224.53437805  231.57740784
  234.38848877  235.08442688  244.204422    252.26107788  253.86560059
  259.87078857  260.29437256  292.98849487  299.96310425  301.06692505
  301.48361206  304.7258606   311.63815308  313.17712402  315.8324585
  317.42904663  327.69815063  327.99502563  337.35571289  359.45306396
  359.68722534  365.92700195  378.36444092  384.04888916  385.38565063
  391.8

### ^ Output for regression should be between 0 & 5. Need to change activation of last layer, see v4.1.