### Like v3.1 BUT no Dropout after last layer of vgg

In [1]:
from keras.engine import Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.utils import class_weight
import keras
import numpy as np
import os
import shutil
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Constants
IMG_SIZE = 200
BASE_DIR = '../project/all_females'

In [3]:
# Read in ratings
ratings = np.genfromtxt('./Projekt_SGE_Assessment_ErikK.txt')

In [4]:
# Round (obviously imbalanced sets)
ratings = np.round(ratings, 0).astype(np.int8)
print(np.unique(ratings, return_counts=True))

(array([0, 1, 2, 3, 4, 5], dtype=int8), array([197, 128,  75,  70,  50,  48]))


In [5]:
### Distribute images across folders
for r in np.unique(ratings).astype(np.int8):
    if not os.path.isdir('./data/train/{}-stars'.format(r)):
        os.makedirs('./data/train/{}-stars'.format(r))
    if not os.path.isdir('./data/test/{}-stars'.format(r)):
        os.makedirs('./data/test/{}-stars'.format(r))

In [6]:
### Move all images according to ratings
images = sorted(os.listdir(BASE_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

### Sort images according to ratings
ratings = ratings.tolist()
ratings, images = zip(*sorted(zip(ratings,images)))
ratings = np.asarray(ratings)

### Count number of ratings for each category, generate train test split for each category
unique, counts = np.unique(ratings, return_counts=True)

stars = [[] for i in counts]
stared_img = [[] for i in counts]
accum = np.cumsum(counts)
stars[0] = ratings[:accum[0]]
stared_img[0] = images[:accum[0]]

for i in range(1,len(counts)):
    stars[i] = ratings[accum[i-1]:accum[i]]
    stared_img[i] = images[accum[i-1]:accum[i]]

stars = np.asarray(stars)
stared_img = np.asarray(stared_img)

assert(stars.shape == stared_img.shape)

X_train = [[] for i in counts]
X_test = [[] for i in counts]
y_train = [[] for i in counts]
y_test = [[] for i in counts]

for i in range(len(counts)):
    X_train[i], X_test[i], y_train[i], y_test[i] = train_test_split(stared_img[i], stars[i], test_size=0.2)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

#check that all subsets of the split have the same rating
for i in range(len(counts)):
    assert(np.sum(y_train[i]) == i*len(y_train[i]))
    assert(np.sum(y_test[i]) == i*len(y_test[i]))

Train-test-split:
X_train:  (6,)
X_test:  (6,)
y_train:  (6,)
y_test:  (6,)


In [7]:
### Loop throgh all images and store them in the dedicated folders
#for i, r in enumerate(ratings.astype(np.int8)):
#    src = os.path.join(BASE_DIR, images[i])
#    dest = os.path.join('./classification/{}-stars'.format(r), images[i])
#    shutil.copy(src, dest)

for i in range(len(X_train)):
    for j in range(len(X_train[i])):
        src = os.path.join(BASE_DIR, X_train[i][j])
        dest = os.path.join('./data/train/{}-stars'.format(y_train[i][j]), X_train[i][j])
        shutil.copy(src, dest)

for i in range(len(X_test)):
    for j in range(len(X_test[i])):
        src = os.path.join(BASE_DIR, X_test[i][j])
        dest = os.path.join('./data/test/{}-stars'.format(y_test[i][j]), X_test[i][j])
        shutil.copy(src, dest)

## Time to use data augmentation with streaming

In [8]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

In [9]:
train_generator = datagen.flow_from_directory(
    './data/train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical'
)

test_generator = datagen.flow_from_directory(
    './data/test/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical'
)

Found 563 images belonging to 6 classes.
Found 275 images belonging to 6 classes.


## Some random model

In [10]:
# Add custom layers
vgg_base = VGGFace(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
#X = Dropout(0.2)(last_layer)
X = Dense(256, activation='relu', name='fc6')(last_layer)
X = Dropout(0.2)(X)
X = Dense(128, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
output = Dense(len(np.unique(ratings)), activation='softmax')(X)
model = Model(inputs=vgg_base.input, outputs=output)

In [11]:
# Freeze vgg layers
for layer in vgg_base.layers:
    layer.trainable = False

In [12]:
# Compute class weights since we have unbalaned classes
class_weight = class_weight.compute_class_weight('balanced', 
                                                 np.unique(ratings), 
                                                 ratings)

In [13]:
model.compile(optimizer=Adam(clipnorm=1.0),
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [14]:
checkpointer = ModelCheckpoint(filepath='./weights-v3.2.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

In [15]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=1,
    class_weight=class_weight,
    validation_data=test_generator,
    callbacks=[checkpointer]
)

Epoch 1/1

Epoch 00001: val_loss improved from inf to 1.40973, saving model to ./weights-v3.2.hdf5


In [16]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=453//64,
    epochs=500,
    class_weight=class_weight,
    validation_data=test_generator,
    callbacks=[earlystop, checkpointer]
)

Epoch 1/500

Epoch 00001: val_loss improved from 1.40973 to 1.36643, saving model to ./weights-v3.2.hdf5
Epoch 2/500

Epoch 00002: val_loss improved from 1.36643 to 1.35377, saving model to ./weights-v3.2.hdf5
Epoch 3/500

Epoch 00003: val_loss improved from 1.35377 to 1.33826, saving model to ./weights-v3.2.hdf5
Epoch 4/500

Epoch 00004: val_loss improved from 1.33826 to 1.33182, saving model to ./weights-v3.2.hdf5
Epoch 5/500

Epoch 00005: val_loss improved from 1.33182 to 1.31999, saving model to ./weights-v3.2.hdf5
Epoch 6/500

Epoch 00006: val_loss did not improve
Epoch 7/500

Epoch 00007: val_loss did not improve
Epoch 8/500

Epoch 00008: val_loss improved from 1.31999 to 1.31777, saving model to ./weights-v3.2.hdf5
Epoch 9/500

Epoch 00009: val_loss improved from 1.31777 to 1.30653, saving model to ./weights-v3.2.hdf5
Epoch 10/500

Epoch 00010: val_loss improved from 1.30653 to 1.29229, saving model to ./weights-v3.2.hdf5
Epoch 11/500

Epoch 00011: val_loss improved from 1.29229


Epoch 00042: val_loss did not improve
Epoch 43/500

Epoch 00043: val_loss did not improve
Epoch 44/500

Epoch 00044: val_loss did not improve
Epoch 45/500

Epoch 00045: val_loss did not improve
Epoch 46/500

Epoch 00046: val_loss did not improve
Epoch 47/500

Epoch 00047: val_loss improved from 1.00411 to 0.94649, saving model to ./weights-v3.2.hdf5
Epoch 48/500

Epoch 00048: val_loss did not improve
Epoch 49/500

Epoch 00049: val_loss did not improve
Epoch 50/500

Epoch 00050: val_loss improved from 0.94649 to 0.94300, saving model to ./weights-v3.2.hdf5
Epoch 51/500

Epoch 00051: val_loss improved from 0.94300 to 0.93910, saving model to ./weights-v3.2.hdf5
Epoch 52/500

Epoch 00052: val_loss improved from 0.93910 to 0.90681, saving model to ./weights-v3.2.hdf5
Epoch 53/500

Epoch 00053: val_loss did not improve
Epoch 54/500

Epoch 00054: val_loss did not improve
Epoch 55/500

Epoch 00055: val_loss did not improve
Epoch 56/500

Epoch 00056: val_loss did not improve
Epoch 57/500

Epo

Epoch 87/500

Epoch 00087: val_loss did not improve
Epoch 88/500

Epoch 00088: val_loss did not improve
Epoch 89/500

Epoch 00089: val_loss did not improve
Epoch 90/500

Epoch 00090: val_loss did not improve
Epoch 91/500

Epoch 00091: val_loss did not improve
Epoch 92/500

Epoch 00092: val_loss did not improve
Epoch 93/500

Epoch 00093: val_loss did not improve
Epoch 94/500

Epoch 00094: val_loss did not improve
Epoch 95/500

Epoch 00095: val_loss did not improve
Epoch 96/500

Epoch 00096: val_loss did not improve


In [17]:
model.save('./models/love_classifierv3.2.h5')

### Choose candidates from test set

In [18]:
#freeze layers
#for layer in model.layers:
#    layer.trainable = False

#transform X_test & y_test so all data is in one dimension 
realNumDim = 0
for i in range(X_test.shape[0]):
    for j in range(len(X_test[i])):
        realNumDim += 1
    
#get test data (unmodified)
data_test = np.zeros((realNumDim, IMG_SIZE, IMG_SIZE, 3))    
last_set = 0
for i in range(X_test.shape[0]): #iterate over #stars
    for idx, _im in enumerate(X_test[i]): #iterate over images
        _img = image.load_img(os.path.join(BASE_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
        _x = image.img_to_array(_img)
        _x = np.expand_dims(_x, axis=0)
        data_test[last_set, :, :, :] = utils.preprocess_input(_x, version=1) / .255
        last_set += 1

predictions = model.predict(data_test)
pred_classes = predictions.argmax(axis=-1)

print(pred_classes)
print(y_test)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 1 1 1 1 1 0 1 0 4 1 3 1 1 0 4 1 0 1 1 1 1 4 1 3 1 2 2 4 1 2 2 2 2
 2 2 2 2 4 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 4 4 4 4 4 4 4 4 5 5 5 5 5 3
 5 5 5 5]
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1])
 array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
 array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
 array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4])
 array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5])]
