In [1]:
from keras.engine import  Model
from keras.layers import Flatten, Dense, Input, Dropout, Activation
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Constants
IMG_SIZE = 200
IMG_DIR = '../project/all_females'
RATING_PATH = './Projekt_SGE_Assessment_ErikK.txt'

### Build model

In [3]:
K.clear_session()

#create function for calling clipped ReLU
def create_relu_advanced(max_value=1.):        
    def relu_advanced(x):
        return K.relu(x, max_value=K.cast_to_floatx(max_value))
    return relu_advanced

vgg_base = VGGFace(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')

# Add custom layers
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
#X = Dropout(0.2)(last_layer)
# x = Flatten(name='flatten')(last_layer)
X = Dense(512, activation='relu', name='fc6')(last_layer)
X = Dropout(0.2)(X)
#X = Dense(256, activation='relu', name='fc7')(X)
#X = Dropout(0.2)(X)
#X = Dense(128, activation='relu', name='fc8')(X)
#X = Dropout(0.2)(X)
#X = Dense(64, activation='relu', name='fc9')(X)
#X = Dropout(0.2)(X)
#X = Dense(32, activation='relu', name='fc10')(X)
#X = Dropout(0.2)(X)
#X = Dense(16, activation='relu', name='fc11')(X)
#X = Dropout(0.2)(X)
X = Dense(8, activation='relu', name='fc12')(X)
X = Dropout(0.2)(X)
output = Dense(1, activation=create_relu_advanced(max_value=5))(X)

# this is the model we will train
model = Model(inputs=vgg_base.input, outputs=output)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in vgg_base.layers:
    layer.trainable = False

# Make sure weights are not trainable
print("Trainable weights:")
model.trainable_weights

model.compile(optimizer=Adam(lr=0.0001,clipnorm=1.0),loss='mean_absolute_error', metrics=['mae'])

model.summary()

Trainable weights:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 200, 200, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 200, 200, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 200, 200, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 100, 100, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 100, 100, 128)     73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 100, 100, 128)     147584    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 50, 50, 128)       0 

### Prepare data
### **Please remember to delete the train & test folders if you have run this cell before!**

In [4]:
# Read in ratings
ratings = np.genfromtxt(RATING_PATH)

### Distribute images across folders
if not os.path.isdir('./data/train/train'):
    os.makedirs('./data/train/train')
if not os.path.isdir('./data/test/test'):
    os.makedirs('./data/test/test')
        
### Move all images according to ratings
images = sorted(os.listdir(IMG_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

X_train, X_test, y_train, y_test = train_test_split(images, ratings, test_size=0.2)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

data_train = np.zeros((X_train.shape[0], IMG_SIZE, IMG_SIZE, 3))
data_test = np.zeros((X_test.shape[0], IMG_SIZE, IMG_SIZE, 3))

for idx, _im in enumerate(X_train):
    # Change the image path with yours.
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_train[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255 
    
# Same for test data
for idx, _im in enumerate(X_test):   
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_test[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255

### Loop throgh all images and store them in the dedicated folders
#for img in X_train:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/train/train', img)
#    shutil.copy(src, dest)

#for img in X_test:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/test/test', img)
#    shutil.copy(src, dest)

Train-test-split:
X_train:  (454,)
X_test:  (114,)
y_train:  (454,)
y_test:  (114,)


### Data augmentation

In [5]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

datagen.fit(data_train)

#train_generator = datagen.flow_from_directory(
#    './data/train',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

test_datagen = ImageDataGenerator(rescale=1./255)

test_datagen.fit(data_test)

#test_generator = test_datagen.flow_from_directory(
#    './data/test',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

### Train

In [6]:
checkpointer = ModelCheckpoint(filepath='./weights/weights-v4.2.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=100,
    epochs=1,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[checkpointer]
)

Epoch 1/1

Epoch 00001: val_loss improved from inf to 1.27245, saving model to ./weights/weights-v4.2.hdf5


In [7]:
history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=454//64,
    epochs=500,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[earlystop, checkpointer]
)

Epoch 1/500

Epoch 00001: val_loss improved from 1.27245 to 1.26232, saving model to ./weights/weights-v4.2.hdf5
Epoch 2/500

Epoch 00002: val_loss improved from 1.26232 to 1.24166, saving model to ./weights/weights-v4.2.hdf5
Epoch 3/500

Epoch 00003: val_loss improved from 1.24166 to 1.21924, saving model to ./weights/weights-v4.2.hdf5
Epoch 4/500

Epoch 00004: val_loss did not improve
Epoch 5/500

Epoch 00005: val_loss did not improve
Epoch 6/500

Epoch 00006: val_loss improved from 1.21924 to 1.21543, saving model to ./weights/weights-v4.2.hdf5
Epoch 7/500

Epoch 00007: val_loss improved from 1.21543 to 1.21276, saving model to ./weights/weights-v4.2.hdf5
Epoch 8/500

Epoch 00008: val_loss did not improve
Epoch 9/500

Epoch 00009: val_loss did not improve
Epoch 10/500

Epoch 00010: val_loss did not improve
Epoch 11/500

Epoch 00011: val_loss improved from 1.21276 to 1.17965, saving model to ./weights/weights-v4.2.hdf5
Epoch 12/500

Epoch 00012: val_loss did not improve
Epoch 13/500



Epoch 00037: val_loss did not improve
Epoch 38/500

Epoch 00038: val_loss did not improve
Epoch 39/500

Epoch 00039: val_loss improved from 1.11935 to 1.11807, saving model to ./weights/weights-v4.2.hdf5
Epoch 40/500

Epoch 00040: val_loss did not improve
Epoch 41/500

Epoch 00041: val_loss did not improve
Epoch 42/500

Epoch 00042: val_loss did not improve
Epoch 43/500

Epoch 00043: val_loss did not improve
Epoch 44/500

Epoch 00044: val_loss did not improve
Epoch 45/500

Epoch 00045: val_loss did not improve
Epoch 46/500

Epoch 00046: val_loss did not improve
Epoch 47/500

Epoch 00047: val_loss improved from 1.11807 to 1.10819, saving model to ./weights/weights-v4.2.hdf5
Epoch 48/500

Epoch 00048: val_loss improved from 1.10819 to 1.09718, saving model to ./weights/weights-v4.2.hdf5
Epoch 49/500

Epoch 00049: val_loss improved from 1.09718 to 1.09363, saving model to ./weights/weights-v4.2.hdf5
Epoch 50/500

Epoch 00050: val_loss did not improve
Epoch 51/500

Epoch 00051: val_loss d


Epoch 00075: val_loss did not improve
Epoch 76/500

Epoch 00076: val_loss did not improve
Epoch 77/500

Epoch 00077: val_loss did not improve
Epoch 78/500

Epoch 00078: val_loss did not improve
Epoch 79/500

Epoch 00079: val_loss improved from 1.07705 to 1.05873, saving model to ./weights/weights-v4.2.hdf5
Epoch 80/500

Epoch 00080: val_loss did not improve
Epoch 81/500

Epoch 00081: val_loss did not improve
Epoch 82/500

Epoch 00082: val_loss did not improve
Epoch 83/500

Epoch 00083: val_loss did not improve
Epoch 84/500

Epoch 00084: val_loss did not improve
Epoch 85/500

Epoch 00085: val_loss did not improve
Epoch 86/500

Epoch 00086: val_loss did not improve
Epoch 87/500

Epoch 00087: val_loss did not improve
Epoch 88/500

Epoch 00088: val_loss did not improve
Epoch 89/500

Epoch 00089: val_loss did not improve


In [8]:
model.save('./models/love_classifierv4.2.h5')

### Choose candidates from test set

In [9]:
#freeze layers
for layer in model.layers:
    layer.trainable = False

candidates = data_test
candidates = np.expand_dims(candidates, axis=0)
predictions = np.zeros(candidates.shape[1])

for i in range(candidates.shape[1]):
    predictions[i] = model.predict(candidates[:,i])

#sort predictions
predictions = predictions[np.argsort(predictions)]
print(predictions)
print(y_test)

[0.         0.         0.         0.         0.         0.
 0.         0.         3.32914233 4.4942522  5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         5.
 5.         5.         5.         5.         5.         

### ^ Network should make more differentiated predictions. Like 3.36633825.
See v4.2 for continuation of this approach.