### Like v3.2 BUT 1 layer deeper

In [27]:
from keras.engine import Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.utils import class_weight
import keras
import numpy as np
import os
import shutil
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
%matplotlib inline

In [9]:
# Constants
IMG_SIZE = 200
BASE_DIR = '../project/all_females'

In [10]:
# Read in ratings
ratings = np.genfromtxt('./ratings_stefan.txt')

In [11]:
# Round (obviously imbalanced sets)
ratings = np.round(ratings, 0).astype(np.int8)
print(np.unique(ratings, return_counts=True))

(array([0, 1, 2, 3, 4, 5], dtype=int8), array([105, 170, 125,  83,  80,   5]))


In [12]:
### Distribute images across folders
for r in np.unique(ratings).astype(np.int8):
    if not os.path.isdir('./data/train/{}-stars'.format(r)):
        os.makedirs('./data/train/{}-stars'.format(r))
    if not os.path.isdir('./data/test/{}-stars'.format(r)):
        os.makedirs('./data/test/{}-stars'.format(r))

In [13]:
### Move all images according to ratings
images = sorted(os.listdir(BASE_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

### Sort images according to ratings
ratings = ratings.tolist()
ratings, images = zip(*sorted(zip(ratings,images)))
ratings = np.asarray(ratings)

### Count number of ratings for each category, generate train test split for each category
unique, counts = np.unique(ratings, return_counts=True)

stars = [[] for i in counts]
stared_img = [[] for i in counts]
accum = np.cumsum(counts)
stars[0] = ratings[:accum[0]]
stared_img[0] = images[:accum[0]]

for i in range(1,len(counts)):
    stars[i] = ratings[accum[i-1]:accum[i]]
    stared_img[i] = images[accum[i-1]:accum[i]]

stars = np.asarray(stars)
stared_img = np.asarray(stared_img)

assert(stars.shape == stared_img.shape)

#X_train = [[] for i in counts]
#X_test = [[] for i in counts]
#y_train = [[] for i in counts]
#y_test = [[] for i in counts]

for i in range(len(counts)):
    X_train[i], X_test[i], y_train[i], y_test[i] = train_test_split(stared_img[i], stars[i], test_size=0.2)
    
#X_train = np.to_list(X_train)
#X_test = np.to_list(X_test)
#y_train = np.to_list(y_train)
#y_test = np.to_list(y_test)

        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

#check that all subsets of the split have the same rating
for i in range(len(counts)):
    assert(np.sum(y_train[i]) == i*len(y_train[i]))
    assert(np.sum(y_test[i]) == i*len(y_test[i]))

Train-test-split:
X_train:  (6,)
X_test:  (6,)
y_train:  (6,)
y_test:  (6,)


In [14]:
### Loop throgh all images and store them in the dedicated folders
#for i, r in enumerate(ratings.astype(np.int8)):
#    src = os.path.join(BASE_DIR, images[i])
#    dest = os.path.join('./classification/{}-stars'.format(r), images[i])
#    shutil.copy(src, dest)

for i in range(len(X_train)):
    for j in range(len(X_train[i])):
        src = os.path.join(BASE_DIR, X_train[i][j])
        dest = os.path.join('./data/train/{}-stars'.format(y_train[i][j]), X_train[i][j])
        shutil.copy(src, dest)

for i in range(len(X_test)):
    for j in range(len(X_test[i])):
        src = os.path.join(BASE_DIR, X_test[i][j])
        dest = os.path.join('./data/test/{}-stars'.format(y_test[i][j]), X_test[i][j])
        shutil.copy(src, dest)

## Time to use data augmentation with streaming

In [15]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

In [16]:
train_generator = datagen.flow_from_directory(
    './data/train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical'#, save_to_dir='aug_data/train', save_prefix='aug_train', save_format='png'
)

test_generator = datagen.flow_from_directory(
    './data/test/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical'#, save_to_dir='aug_data/test', save_prefix='aug_test', save_format='png'
)

Found 454 images belonging to 6 classes.
Found 114 images belonging to 6 classes.


## Some random model

In [17]:
# Add custom layers
vgg_base = VGGFace(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
#X = Dropout(0.2)(last_layer)
X = Dense(256, activation='relu', name='fc6')(last_layer)
X = Dropout(0.2)(X)
X = Dense(128, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
X = Dense(64, activation='relu', name='fc8')(X)
X = Dropout(0.2)(X)
output = Dense(len(np.unique(ratings)), activation='softmax')(X)
model = Model(inputs=vgg_base.input, outputs=output)

In [18]:
# Freeze vgg layers
for layer in vgg_base.layers:
    layer.trainable = False

In [19]:
# Compute class weights since we have unbalaned classes
class_weight = class_weight.compute_class_weight('balanced', 
                                                 np.unique(ratings), 
                                                 ratings)

In [20]:
model.compile(optimizer=Adam(clipnorm=1.0),
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [21]:
checkpointer = ModelCheckpoint(filepath='./weights-v3.2.1-stefan.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

In [22]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=1,
    class_weight=class_weight,
    validation_data=test_generator,
    callbacks=[checkpointer]
)

Epoch 1/1

Epoch 00001: val_loss improved from inf to 1.40414, saving model to ./weights-v3.2.1-stefan.hdf5


In [23]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=453//64,
    epochs=500,
    class_weight=class_weight,
    validation_data=test_generator,
    callbacks=[earlystop, checkpointer]
)

Epoch 1/500

Epoch 00001: val_loss improved from 1.40414 to 1.39418, saving model to ./weights-v3.2.1-stefan.hdf5
Epoch 2/500

Epoch 00002: val_loss did not improve
Epoch 3/500

Epoch 00003: val_loss improved from 1.39418 to 1.39394, saving model to ./weights-v3.2.1-stefan.hdf5
Epoch 4/500

Epoch 00004: val_loss improved from 1.39394 to 1.37841, saving model to ./weights-v3.2.1-stefan.hdf5
Epoch 5/500

Epoch 00005: val_loss did not improve
Epoch 6/500

Epoch 00006: val_loss did not improve
Epoch 7/500

Epoch 00007: val_loss did not improve
Epoch 8/500

Epoch 00008: val_loss did not improve
Epoch 9/500

Epoch 00009: val_loss did not improve
Epoch 10/500

Epoch 00010: val_loss did not improve
Epoch 11/500

Epoch 00011: val_loss did not improve
Epoch 12/500

Epoch 00012: val_loss did not improve
Epoch 13/500

Epoch 00013: val_loss did not improve
Epoch 14/500

Epoch 00014: val_loss did not improve


In [24]:
model.save('./models/love_classifierv3.2.1-stefan.h5')

### Choose candidates from test set

In [29]:
#freeze layers
#for layer in model.layers:
#    layer.trainable = False

#transform X_test & y_test so all data is in one dimension 
realNumDim = 0
for i in range(X_test.shape[0]):
    for j in range(len(X_test[i])):
        realNumDim += 1
    
#get test data (unmodified)
data_test = np.zeros((realNumDim, IMG_SIZE, IMG_SIZE, 3))    
last_set = 0
for i in range(X_test.shape[0]): #iterate over #stars
    for idx, _im in enumerate(X_test[i]): #iterate over images
        _img = image.load_img(os.path.join(BASE_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
        _x = image.img_to_array(_img)
        _x = np.expand_dims(_x, axis=0)
        data_test[last_set, :, :, :] = utils.preprocess_input(_x, version=1) / .255
        last_set += 1

predictions = model.predict(data_test)
pred_classes = predictions.argmax(axis=-1)

print(pred_classes)
print(y_test)


[0 1 1 1 1 0 1 1 0 0 1 0 0 0 3 0 0 1 1 0 0 1 0 0 2 1 2 1 1 1 1 1 0 0 0 0 1
 1 1 1 1 1 0 1 3 2 3 0 1 1 1 2 2 2 3 1 4 0 2 3 3 1 4 3 4 0 4 2 2 3 0 0 4 2
 3 2 2 1 1 1 4 3 4 3 1 2 3 4 0 3 4 3 2 4 1 2 2 3 1 1 4 4 2 4 3 2 4 1 0 3 3
 2 3 4]
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
 array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2])
 array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
 array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]) array([5])]


ValueError: Found input variables with inconsistent numbers of samples: [6, 114]

### Going 1 layer deeper improves val_acc from 0.3391 (see rater-v3.2-2nd) to 0.4087.

In [36]:
print(y_test[0])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [53]:
a = [y_test[i][j] for i in range(y_test.shape[0]) for j in range(len(y_test[i])) ]
b = [X_test[i][j] for i in range(X_test.shape[0]) for j in range(len(X_test[i])) ]

y_true = np.array(a)
x_test = np.array(b)

In [54]:
confusion_matrix(y_true,pred_classes)

array([[11,  9,  0,  1,  0,  0],
       [ 8, 17,  6,  3,  0,  0],
       [ 4,  5,  6,  5,  5,  0],
       [ 1,  2,  4,  5,  5,  0],
       [ 1,  3,  3,  5,  4,  0],
       [ 0,  0,  0,  0,  1,  0]])

In [55]:
np.where((y_true-pred_classes) == 4)

(array([108]),)

In [56]:
x_test[108]

'Ann_Marino_9_oval.jpg'