In [6]:
from keras.engine import  Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
# Constants
IMG_SIZE = 200
IMG_DIR = '../project/all_females'
RATING_PATH = './Projekt_SGE_Assessment_ErikK.txt'

### Build model

In [8]:
K.clear_session()

vgg_base = VGGFace(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')

# Add custom layers
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
#X = Dropout(0.2)(last_layer)
# x = Flatten(name='flatten')(last_layer)
X = Dense(256, activation='relu', name='fc6')(last_layer)
X = Dropout(0.2)(X)
X = Dense(128, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
output = Dense(1, activation='linear')(X)

# this is the model we will train
model = Model(inputs=vgg_base.input, outputs=output)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in vgg_base.layers:
    layer.trainable = False

# Make sure weights are not trainable
print("Trainable weights:")
model.trainable_weights

model.compile(optimizer=Adam(clipnorm=1.0),loss='mean_absolute_error', metrics=['mae'])

model.summary()

Trainable weights:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 200, 200, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 200, 200, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 200, 200, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 100, 100, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 100, 100, 128)     73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 100, 100, 128)     147584    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 50, 50, 128)       0 

### Prepare data
### **Please remember to delete the train & test folders if you have run this cell before!**

In [9]:
# Read in ratings
ratings = np.genfromtxt(RATING_PATH)
scaler = StandardScaler()

### Distribute images across folders
if not os.path.isdir('./data/train/train'):
    os.makedirs('./data/train/train')
if not os.path.isdir('./data/test/test'):
    os.makedirs('./data/test/test')
        
### Move all images according to ratings
images = sorted(os.listdir(IMG_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

X_train, X_test, y_train, y_test = train_test_split(images, ratings, test_size=0.2)

y_train = scaler.fit_transform(y_train.reshape(-1,1))
y_test = scaler.transform(y_test.reshape(-1,1))
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

data_train = np.zeros((X_train.shape[0], IMG_SIZE, IMG_SIZE, 3))
data_test = np.zeros((X_test.shape[0], IMG_SIZE, IMG_SIZE, 3))

for idx, _im in enumerate(X_train):
    # Change the image path with yours.
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_train[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255 
    
# Same for test data
for idx, _im in enumerate(X_test):   
    _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
    _x = image.img_to_array(_img)
    _x = np.expand_dims(_x, axis=0)
    data_test[idx, :, :, :] = utils.preprocess_input(_x, version=1) / .255

### Loop throgh all images and store them in the dedicated folders
#for img in X_train:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/train/train', img)
#    shutil.copy(src, dest)

#for img in X_test:
#    src = os.path.join(IMG_DIR, img)
#    dest = os.path.join('./data/test/test', img)
#    shutil.copy(src, dest)

Train-test-split:
X_train:  (454,)
X_test:  (114,)
y_train:  (454, 1)
y_test:  (114, 1)


### Data augmentation

In [10]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

datagen.fit(data_train)

#train_generator = datagen.flow_from_directory(
#    './data/train',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

test_datagen = ImageDataGenerator(rescale=1./255)

test_datagen.fit(data_test)

#test_generator = test_datagen.flow_from_directory(
#    './data/test',
#    target_size=(IMG_SIZE, IMG_SIZE),
#    batch_size=64,
#    class_mode=None
#)

### Train

In [11]:
checkpointer = ModelCheckpoint(filepath='./weights-v4.4.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=100,
    epochs=1,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[checkpointer]
)

Epoch 1/1

Epoch 00001: val_loss improved from inf to 0.66096, saving model to ./weights-v4.4.hdf5


In [12]:
history = model.fit_generator(
    datagen.flow(data_train, y_train, batch_size=64),
    steps_per_epoch=453//64,
    epochs=500,
    validation_data=test_datagen.flow(data_test, y_test, batch_size=64),
    callbacks=[earlystop, checkpointer]
)

Epoch 1/500

Epoch 00001: val_loss did not improve
Epoch 2/500

Epoch 00002: val_loss did not improve
Epoch 3/500

Epoch 00003: val_loss did not improve
Epoch 4/500

Epoch 00004: val_loss did not improve
Epoch 5/500

Epoch 00005: val_loss did not improve
Epoch 6/500

Epoch 00006: val_loss did not improve
Epoch 7/500

Epoch 00007: val_loss did not improve
Epoch 8/500

Epoch 00008: val_loss did not improve
Epoch 9/500

Epoch 00009: val_loss did not improve
Epoch 10/500

Epoch 00010: val_loss did not improve
Epoch 11/500

Epoch 00011: val_loss did not improve


In [13]:
model.save('./models/love_classifierv4.4.h5')

### Choose candidates from test set

In [14]:
#freeze layers
#for layer in model.layers:
#    layer.trainable = False

candidates = data_test
candidates = np.expand_dims(candidates, axis=0)
predictions = np.zeros(candidates.shape[1])

for i in range(candidates.shape[1]):
    predictions[i] = model.predict(candidates[:,i])

#sort predictions
predictions = predictions[np.argsort(predictions)]
print(predictions)

[-203.15292358 -196.89613342 -177.4152832  -167.01104736 -161.61357117
 -150.23927307 -148.96322632 -133.67350769 -126.41686249 -126.15959167
 -123.8169632  -122.62071991 -122.06064606 -115.33239746 -115.03014374
 -114.93141174 -113.79706573 -112.21574402 -111.04689026 -108.41350555
 -103.25289917 -102.35009766  -92.88715363  -92.01108551  -90.55640411
  -90.16567993  -85.33348083  -80.0843277   -77.27255249  -76.95850372
  -76.64919281  -70.00288391  -68.67638397  -67.0359726   -66.89862061
  -65.95204926  -63.45209503  -63.11006165  -63.04042435  -61.3217392
  -61.08117676  -60.34743881  -54.58935928  -51.41580582  -49.91259766
  -47.93086243  -45.05858994  -44.03202057  -32.59744644  -28.14681435
  -27.83138657  -27.60424423  -27.12369728  -15.64478874  -14.73303509
   -3.37487602    1.0468601     2.05631661    2.54559994    7.03844547
   10.28412628   12.08301258   18.11764717   23.8874855    27.34967232
   37.44688034   54.39094925   60.35200882   68.33992767   70.9205246
   72.47

### ^ Output for regression should be between 0 & 5. Need to change activation of last layer, see v4.1.

In [15]:
print(y_test)

[[-1.00658718]
 [-1.00658718]
 [-1.00658718]
 [ 0.81530218]
 [ 2.02989509]
 [-1.00658718]
 [ 0.20800572]
 [-1.00658718]
 [-1.00658718]
 [-0.39929073]
 [ 0.20800572]
 [ 0.81530218]
 [-1.00658718]
 [-1.00658718]
 [-1.00658718]
 [ 1.42259863]
 [ 0.81530218]
 [-0.39929073]
 [-1.00658718]
 [ 2.02989509]
 [-1.00658718]
 [-1.00658718]
 [ 0.81530218]
 [ 0.81530218]
 [ 1.42259863]
 [-0.39929073]
 [-1.00658718]
 [ 0.81530218]
 [ 0.81530218]
 [-1.00658718]
 [ 0.20800572]
 [-0.39929073]
 [-0.39929073]
 [-1.00658718]
 [ 1.72624686]
 [-1.00658718]
 [-1.00658718]
 [ 2.02989509]
 [-1.00658718]
 [ 0.20800572]
 [ 0.81530218]
 [-0.39929073]
 [-0.39929073]
 [-0.39929073]
 [-0.39929073]
 [-1.00658718]
 [-1.00658718]
 [ 0.20800572]
 [-0.39929073]
 [-0.39929073]
 [-1.00658718]
 [ 1.42259863]
 [-1.00658718]
 [-1.00658718]
 [-0.39929073]
 [ 0.20800572]
 [ 1.42259863]
 [ 0.81530218]
 [-1.00658718]
 [ 1.42259863]
 [-1.00658718]
 [ 0.81530218]
 [ 1.42259863]
 [-1.00658718]
 [ 0.20800572]
 [-1.00658718]
 [ 2.02989