In [1]:
from keras.engine import  Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Constants
IMG_SIZE = 200
IMG_DIR = '../project/all_females'
RATING_PATH = './Projekt_SGE_Assessment_ErikK.txt'

### Build model

In [3]:
K.clear_session()

vgg_base = VGGFace(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')

# Add custom layers
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
X = Dropout(0.2)(last_layer)
# x = Flatten(name='flatten')(last_layer)
X = Dense(256, activation='relu', name='fc6')(X)
X = Dropout(0.2)(X)
X = Dense(128, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
output = Dense(1, activation='linear')(X)

# this is the model we will train
model = Model(inputs=vgg_base.input, outputs=output)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in vgg_base.layers:
    layer.trainable = False

# Make sure weights are not trainable
print("Trainable weights:")
model.trainable_weights

model.compile(optimizer=Adam(clipnorm=1.0),loss='mean_absolute_error', metrics=['mae'])

model.summary()

Trainable weights:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 200, 200, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 200, 200, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 200, 200, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 100, 100, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 100, 100, 128)     73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 100, 100, 128)     147584    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 50, 50, 128)       0 

### Prepare data
### **Please remember to delete the train & test folders if you have run this cell before!**

In [4]:
# Read in ratings
ratings = np.genfromtxt(RATING_PATH)

### Distribute images across folders
if not os.path.isdir('./data/train/train'):
    os.makedirs('./data/train/train')
if not os.path.isdir('./data/test/test'):
    os.makedirs('./data/test/test')
        
### Move all images according to ratings
images = sorted(os.listdir(IMG_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

X_train, X_test, y_train, y_test = train_test_split(images, ratings, test_size=0.2)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

data_train = np.zeros((X_train.shape[0], IMG_SIZE, IMG_SIZE, 3))
data_test = np.zeros((X_test.shape[0], IMG_SIZE, IMG_SIZE, 3))

for idx, _im in enumerate(X_train):
    # Change the image path with yours.
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_train[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255 
    
# Same for test data
for idx, _im in enumerate(X_test):   
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_test[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255

### Loop throgh all images and store them in the dedicated folders
#for img in X_train:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/train/train', img)
#    shutil.copy(src, dest)

#for img in X_test:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/test/test', img)
#    shutil.copy(src, dest)

Train-test-split:
X_train:  (454,)
X_test:  (114,)
y_train:  (454,)
y_test:  (114,)


### Data augmentation

In [5]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

datagen.fit(data_train)

#train_generator = datagen.flow_from_directory(
#    './data/train',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

test_datagen = ImageDataGenerator(rescale=1./255)

test_datagen.fit(data_test)

#test_generator = test_datagen.flow_from_directory(
#    './data/test',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

### Train

In [6]:
checkpointer = ModelCheckpoint(filepath='./weights-v4.hdf5', verbose=1, save_best_only=True)

history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=100,
    epochs=1,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[checkpointer]
)

Epoch 1/1

Epoch 00001: val_loss improved from inf to 1.25166, saving model to ./weights-v4.hdf5


In [7]:
history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=453//64,
    epochs=100,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[checkpointer]
)

Epoch 1/100

Epoch 00001: val_loss improved from 1.25166 to 1.20497, saving model to ./weights-v4.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 1.20497 to 1.17674, saving model to ./weights-v4.hdf5
Epoch 3/100

Epoch 00003: val_loss did not improve
Epoch 4/100

Epoch 00004: val_loss did not improve
Epoch 5/100

Epoch 00005: val_loss did not improve
Epoch 6/100

Epoch 00006: val_loss did not improve
Epoch 7/100

Epoch 00007: val_loss did not improve
Epoch 8/100

Epoch 00008: val_loss did not improve
Epoch 9/100

Epoch 00009: val_loss did not improve
Epoch 10/100

Epoch 00010: val_loss did not improve
Epoch 11/100

Epoch 00011: val_loss did not improve
Epoch 12/100

Epoch 00012: val_loss did not improve
Epoch 13/100

Epoch 00013: val_loss did not improve
Epoch 14/100

Epoch 00014: val_loss did not improve
Epoch 15/100

Epoch 00015: val_loss did not improve
Epoch 16/100

Epoch 00016: val_loss did not improve
Epoch 17/100

Epoch 00017: val_loss did not improve
Epoch 18/100

Epoch 0


Epoch 00040: val_loss did not improve
Epoch 41/100

Epoch 00041: val_loss did not improve
Epoch 42/100

Epoch 00042: val_loss did not improve
Epoch 43/100

Epoch 00043: val_loss did not improve
Epoch 44/100

Epoch 00044: val_loss did not improve
Epoch 45/100

Epoch 00045: val_loss did not improve
Epoch 46/100

Epoch 00046: val_loss did not improve
Epoch 47/100

Epoch 00047: val_loss did not improve
Epoch 48/100

Epoch 00048: val_loss did not improve
Epoch 49/100

Epoch 00049: val_loss did not improve
Epoch 50/100

Epoch 00050: val_loss did not improve
Epoch 51/100

Epoch 00051: val_loss did not improve
Epoch 52/100

Epoch 00052: val_loss did not improve
Epoch 53/100

Epoch 00053: val_loss did not improve
Epoch 54/100

Epoch 00054: val_loss did not improve
Epoch 55/100

Epoch 00055: val_loss did not improve
Epoch 56/100

Epoch 00056: val_loss did not improve
Epoch 57/100

Epoch 00057: val_loss did not improve
Epoch 58/100

Epoch 00058: val_loss improved from 1.13711 to 1.13551, saving 


Epoch 00080: val_loss did not improve
Epoch 81/100

Epoch 00081: val_loss did not improve
Epoch 82/100

Epoch 00082: val_loss did not improve
Epoch 83/100

Epoch 00083: val_loss did not improve
Epoch 84/100

Epoch 00084: val_loss did not improve
Epoch 85/100

Epoch 00085: val_loss did not improve
Epoch 86/100

Epoch 00086: val_loss did not improve
Epoch 87/100

Epoch 00087: val_loss did not improve
Epoch 88/100

Epoch 00088: val_loss did not improve
Epoch 89/100

Epoch 00089: val_loss did not improve
Epoch 90/100

Epoch 00090: val_loss did not improve
Epoch 91/100

Epoch 00091: val_loss did not improve
Epoch 92/100

Epoch 00092: val_loss did not improve
Epoch 93/100

Epoch 00093: val_loss did not improve
Epoch 94/100

Epoch 00094: val_loss did not improve
Epoch 95/100

Epoch 00095: val_loss did not improve
Epoch 96/100

Epoch 00096: val_loss did not improve
Epoch 97/100

Epoch 00097: val_loss did not improve
Epoch 98/100

Epoch 00098: val_loss did not improve
Epoch 99/100

Epoch 00099

In [8]:
model.save('./models/love_classifierv4.h5')

### Choose candidates from test set

In [23]:
#freeze layers
for layer in model.layers:
    layer.trainable = False

candidates = data_test
candidates = np.expand_dims(candidates, axis=0)
predictions = np.zeros(candidates.shape[1])

for i in range(candidates.shape[1]):
    predictions[i] = model.predict(candidates[:,i])

#sort predictions
predictions = predictions[np.argsort(predictions)]
print(predictions)

[-3.25601158e+01 -2.73342800e+01 -2.72480888e+01 -2.29784794e+01
 -1.72175522e+01 -1.50077314e+01 -1.25359411e+01 -1.16614618e+01
 -1.08885832e+01 -1.02030315e+01 -7.15376663e+00 -3.69815183e+00
 -2.77232194e+00  8.73221010e-02  3.00755596e+00  8.44986153e+00
  9.98149204e+00  1.98134918e+01  1.99992542e+01  2.02991676e+01
  2.08332500e+01  2.47711964e+01  2.64119987e+01  2.74619751e+01
  3.25136528e+01  3.45265656e+01  3.61453629e+01  3.69692230e+01
  5.16039009e+01  5.25914345e+01  5.55510101e+01  6.14506836e+01
  6.45374680e+01  6.56490860e+01  6.82146606e+01  6.94169846e+01
  7.03983307e+01  7.23446579e+01  7.56925430e+01  8.07255707e+01
  8.27483215e+01  8.81983566e+01  8.84145813e+01  9.27170639e+01
  9.65320587e+01  1.01349007e+02  1.17217354e+02  1.22200035e+02
  1.23237373e+02  1.23272644e+02  1.29443649e+02  1.37959595e+02
  1.46341553e+02  1.49934357e+02  1.53067703e+02  1.67479187e+02
  1.73627853e+02  1.79756958e+02  1.81233932e+02  1.89983658e+02
  1.93127365e+02  2.01829

### ^ Output for regression should be between 0 & 5. Need to change activation of last layer, see v4.1.