In [1]:
from keras.engine import Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from sklearn.utils import class_weight
import numpy as np
import os
import shutil
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Constants
IMG_SIZE = 200
BASE_DIR = '../project/all_females'

In [3]:
# Read in ratings
ratings = np.genfromtxt('./Projekt_SGE_Assessment_ErikK.txt')

In [4]:
# Round (obviously imbalanced sets)
ratings = np.round(ratings, 0).astype(np.int8)
print(np.unique(ratings, return_counts=True))

(array([0, 1, 2, 3, 4, 5], dtype=int8), array([197, 128,  75,  70,  50,  48]))


In [5]:
### Distribute images across folders
for r in np.unique(ratings).astype(np.int8):
    if not os.path.isdir('./train/{}-stars'.format(r)):
        os.makedirs('./train/{}-stars'.format(r))
    if not os.path.isdir('./test/{}-stars'.format(r)):
        os.makedirs('./test/{}-stars'.format(r))

In [6]:
### Move all images according to ratings
images = sorted(os.listdir(BASE_DIR))

### Make sure lengths of ratings and images correspond 
assert len(ratings) == len(images)

### Sort images according to ratings
ratings = ratings.tolist()
ratings, images = zip(*sorted(zip(ratings,images)))
ratings = np.asarray(ratings)

### Count number of ratings for each category, generate train test split for each category
unique, counts = np.unique(ratings, return_counts=True)

stars = [[] for i in counts]
stared_img = [[] for i in counts]
accum = np.cumsum(counts)
stars[0] = ratings[:accum[0]]
stared_img[0] = images[:accum[0]]

for i in range(1,len(counts)):
    stars[i] = ratings[accum[i-1]:accum[i]]
    stared_img[i] = images[accum[i-1]:accum[i]]

stars = np.asarray(stars)
stared_img = np.asarray(stared_img)

assert(stars.shape == stared_img.shape)

X_train = [[] for i in counts]
X_test = [[] for i in counts]
y_train = [[] for i in counts]
y_test = [[] for i in counts]

for i in range(len(counts)):
    X_train[i], X_test[i], y_train[i], y_test[i] = train_test_split(stared_img[i], stars[i], test_size=0.2)
    
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

        
print("Train-test-split:")
print("X_train: ",X_train.shape)
print("X_test: ",X_test.shape)
print("y_train: ",y_train.shape)
print("y_test: ",y_test.shape)

#check that all subsets of the split have the same rating
for i in range(len(counts)):
    assert(np.sum(y_train[i]) == i*len(y_train[i]))
    assert(np.sum(y_test[i]) == i*len(y_test[i]))

Train-test-split:
X_train:  (6,)
X_test:  (6,)
y_train:  (6,)
y_test:  (6,)


In [7]:
### Loop throgh all images and store them in the dedicated folders
#for i, r in enumerate(ratings.astype(np.int8)):
#    src = os.path.join(BASE_DIR, images[i])
#    dest = os.path.join('./classification/{}-stars'.format(r), images[i])
#    shutil.copy(src, dest)

for i in range(len(X_train)):
    for j in range(len(X_train[i])):
        src = os.path.join(BASE_DIR, X_train[i][j])
        dest = os.path.join('./train/{}-stars'.format(y_train[i][j]), X_train[i][j])
        shutil.copy(src, dest)

for i in range(len(X_test)):
    for j in range(len(X_test[i])):
        src = os.path.join(BASE_DIR, X_test[i][j])
        dest = os.path.join('./test/{}-stars'.format(y_test[i][j]), X_test[i][j])
        shutil.copy(src, dest)

## Time to use data augmentation with streaming

In [8]:
datagen = ImageDataGenerator(rescale=1./255, 
                             rotation_range=40,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=utils.preprocess_input)

In [9]:
train_generator = datagen.flow_from_directory(
    './train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical'
)

test_generator = datagen.flow_from_directory(
    './test/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical'
)

Found 1181 images belonging to 6 classes.
Found 474 images belonging to 6 classes.


## Some random model

In [10]:
# Add custom layers
vgg_base = VGGFace(model='senet50', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='max')
last_layer = vgg_base.get_layer('global_max_pooling2d_1').output
X = Dropout(0.2)(last_layer)
X = Dense(256, activation='relu', name='fc6')(X)
X = Dropout(0.2)(X)
X = Dense(128, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
output = Dense(len(np.unique(ratings)), activation='softmax')(X)
model = Model(inputs=vgg_base.input, outputs=output)

Downloading data from https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_notop_senet50.h5


In [11]:
# Freeze vgg layers
for layer in vgg_base.layers:
    layer.trainable = False

In [12]:
# Compute class weights since we have unbalaned classes
class_weight = class_weight.compute_class_weight('balanced', 
                                                 np.unique(ratings), 
                                                 ratings)

In [13]:
model.compile(optimizer=Adam(clipnorm=1.0),
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [14]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=1,
    class_weight=class_weight,
    validation_data=test_generator
)

Epoch 1/1


In [15]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=453//64,
    epochs=10,
    class_weight=class_weight,
    validation_data=test_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
