In [1]:
from keras.engine import Model
from keras.layers import Flatten, Dense, Input, Dropout
from keras_vggface.vggface import VGGFace
from keras.preprocessing.image import ImageDataGenerator
from keras_vggface import utils
from keras.optimizers import Adam
from keras.preprocessing import image
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from keras.models import load_model
from sklearn.utils import class_weight
import keras
import numpy as np
import os
import shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# Constants
IMG_SIZE = 224

IMG_DIR = '../project/eval_data/all_females_combined_train_s'
RATING_PATH = '../project/eval_data/all_females_combined_train_s/train_ratings.txt'

TEST_IMG_DIR = '../project/eval_data/all_females_combined_test_s'
TEST_RATING_PATH = '../project/eval_data/all_females_combined_test_s/test_ratings.txt'

VERSION = 'eval-Ensemble'
MODEL1 = 'eval-OrdReg-comb'
MODEL2 = 'eval-OrdReg-comb-BN-ex-last'

In [3]:
def getMean(train_data):
    #calculate mean values for pixels (RGB)

    data = np.zeros((train_data.shape[0], IMG_SIZE, IMG_SIZE, 3)) 
    for i in range(train_data.shape[0]):
        _img = image.load_img(os.path.join(IMG_DIR, train_data[i]), target_size=(IMG_SIZE,IMG_SIZE))
        data[i,:,:,:] = image.img_to_array(_img)
    mean = np.mean(data, axis=(0, 1, 2))
    print("Pixel means: ",mean)

    return mean

In [4]:
def preprocess(x,mean):
    x[:,:,0] -= mean[0]
    x[:,:,1] -= mean[1]
    x[:,:,2] -= mean[2]

In [5]:
def prepareDataRegression(percentage):
    # Read in ratings
    ratings = np.genfromtxt(RATING_PATH)
        
    ### Move all images according to ratings
    images = sorted(os.listdir(IMG_DIR))
    #remove train_ratings.txt from images
    images.remove('train_ratings.txt')
    #convert to np.array for indexing
    images = np.array(images)

    ### Make sure lengths of ratings and images correspond 
    assert len(ratings) == len(images)

    perm_ratings = np.random.permutation(len(ratings))
    test_len = int(len(ratings) * percentage/100)
    train_len = len(ratings) - test_len

    test_ind = perm_ratings[:test_len]
    train_ind = perm_ratings[test_len:]
    

    #get test & train data
    data_test = np.zeros((len(test_ind), IMG_SIZE, IMG_SIZE, 3))    
    data_train = np.zeros((len(train_ind), IMG_SIZE, IMG_SIZE, 3)) 
    last_set_train = 0
    last_set_test = 0

    mean = getMean(images[train_ind])
    
    for idx, _im in enumerate(images): #iterate over images
        if idx in test_ind:
            _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
            _x = image.img_to_array(_img)
            _x = np.expand_dims(_x, axis=0)
            data_test[last_set_test, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean)# / .255
            last_set_test += 1
        if idx in train_ind:
            _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
            _x = image.img_to_array(_img)
            _x = np.expand_dims(_x, axis=0)
            data_train[last_set_train, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean)# / .255
            last_set_train += 1
    
    return data_train, ratings[train_ind], data_test, ratings[test_ind], ratings, images[test_ind], mean

In [6]:
def prepareDataClassification(percentage):
    # Read in ratings
    ratings = np.genfromtxt(RATING_PATH)
    
    # Round (obviously imbalanced sets)
    ratings_rounded = np.round(ratings, 0).astype(np.int8)
    ratings = to_categorical(ratings_rounded, num_classes=len(np.unique(ratings)))
    
                
    ### Move all images according to ratings
    images = sorted(os.listdir(IMG_DIR))
    #remove train_ratings.txt from images
    images.remove('train_ratings.txt')
    #convert to np.array for indexing
    images = np.array(images)

    ### Make sure lengths of ratings and images correspond 
    assert len(ratings) == len(images)

    perm_ratings = np.random.permutation(len(ratings))
    test_len = int(len(ratings) * percentage/100)
    train_len = len(ratings) - test_len

    test_ind = perm_ratings[:test_len]
    train_ind = perm_ratings[test_len:]
    
    #get test & train data
    data_test = np.zeros((len(test_ind), IMG_SIZE, IMG_SIZE, 3))    
    data_train = np.zeros((len(train_ind), IMG_SIZE, IMG_SIZE, 3)) 
    last_set_train = 0
    last_set_test = 0

    mean = getMean(images[train_ind])
    
    for idx, _im in enumerate(images): #iterate over images
        if idx in test_ind:
            _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
            _x = image.img_to_array(_img)
            _x = np.expand_dims(_x, axis=0)
            data_test[last_set_test, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean)# / .255
            last_set_test += 1
        if idx in train_ind:
            _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
            _x = image.img_to_array(_img)
            _x = np.expand_dims(_x, axis=0)
            data_train[last_set_train, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean)# / .255
            last_set_train += 1
    
    return data_train, ratings[train_ind], data_test, ratings[test_ind], ratings_rounded, images[test_ind], mean
    #return ratings[train_ind], train_ind, ratings[test_ind], test_ind, ratings, images

In [7]:
def prepareDataOrdinalRegression(percentage):
    # Load ratings
    ratings = np.genfromtxt(RATING_PATH)
    
    # Convert ratings to the desired format
    # 1. First round them
    ratings_rounded = np.round(ratings, 1).astype(np.int8)

    # 2. Then encode as suggested by Cheng (2007)
    ratings_prepared = np.zeros((len(ratings_rounded), len(np.unique(ratings_rounded))))
    for i, r in enumerate(ratings_rounded):
        for j in range(r):
            ratings_prepared[i, j] = 1
        
    # 3. Finally, make sure assignment is correct
    assert np.all(np.sum(ratings_prepared, axis=1).astype(np.int8) == ratings_rounded)
    
    ### Move all images according to ratings
    images = sorted(os.listdir(IMG_DIR))
    #remove train_ratings.txt from images
    images.remove('train_ratings.txt')
    #convert to np.array for indexing
    images = np.array(images)

    ### Make sure lengths of ratings and images correspond 
    print(len(ratings))
    print(len(images))
    assert len(ratings) == len(images)
    
    perm_ratings = np.random.permutation(len(ratings))
    test_len = int(len(ratings) * percentage/100)
    train_len = len(ratings) - test_len

    test_ind = perm_ratings[:test_len]
    train_ind = perm_ratings[test_len:]
    
    #get test & train data
    data_test = np.zeros((len(test_ind), IMG_SIZE, IMG_SIZE, 3))    
    data_train = np.zeros((len(train_ind), IMG_SIZE, IMG_SIZE, 3)) 
    last_set_train = 0
    last_set_test = 0

    mean = getMean(images[train_ind])
    
    for idx, _im in enumerate(images): #iterate over images
        if idx in test_ind:
            _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
            _x = image.img_to_array(_img)
            _x = np.expand_dims(_x, axis=0)
            data_test[last_set_test, :, :, :] = utils.preprocess_input(_x, version=1)##preprocess(_x,mean)# / .255
            last_set_test += 1
        if idx in train_ind:
            _img = image.load_img(os.path.join(IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
            _x = image.img_to_array(_img)
            _x = np.expand_dims(_x, axis=0)
            data_train[last_set_train, :, :, :] = utils.preprocess_input(_x, version=1)##preprocess(_x,mean)# / .255
            last_set_train += 1
    
    
    return data_train, ratings_prepared[train_ind], data_test, ratings_prepared[test_ind], ratings_rounded, images[test_ind], mean

In [8]:
def getGenerator(data_train, data_test):
    train_datagen = ImageDataGenerator(rescale=1./255, 
                                rotation_range=40,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                shear_range=0.2,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                fill_mode='nearest'
                                      )#preprocessing_function=utils.preprocess_input)

    train_datagen.fit(data_train)
    
    
    test_datagen = ImageDataGenerator(rescale=1./255)
    test_datagen.fit(data_test)

    return train_datagen, test_datagen

In [9]:
def visualize_loss(history):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.figure(figsize=(15,8))
    plt.plot(epochs, loss, 'bo-', label="Training loss")
    plt.plot(epochs, val_loss, 'b', label="Validation loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [10]:
def visualize_acc(history):
    loss = history.history['acc']
    val_loss = history.history['val_acc']
    epochs = range(1, len(loss) + 1)
    plt.figure(figsize=(15,8))
    plt.plot(epochs, loss, 'ro-', label="Training acc")
    plt.plot(epochs, val_loss, 'r', label="Validation acc")
    plt.xlabel('Epochs')
    plt.ylabel('Acc')
    plt.legend()
    plt.show()

In [11]:
def visualize_mae(history):
    loss = history.history['mean_absolute_error']
    val_loss = history.history['val_mean_absolute_error']
    epochs = range(1, len(loss) + 1)
    plt.figure(figsize=(15,8))
    plt.plot(epochs, loss, 'ro-', label="Training mean_absolute_error")
    plt.plot(epochs, val_loss, 'r', label="Validation mean_absolute_error")
    plt.xlabel('Epochs')
    plt.ylabel('MAE')
    plt.legend()
    plt.show()

In [12]:
def recommend(images, pred_classes):
    if type(images) is np.ndarray: 
        images = images.tolist()
    recs = np.where(pred_classes >= 4)
    
    for i in recs[0]:
        img = mpimg.imread(os.path.join(IMG_DIR, images[i]))
        imgplot = plt.imshow(img)
        plt.show()
        plt.clf()

def recommendTest(images, pred_classes):
    if type(images) is np.ndarray: 
        images = images.tolist()
    recs = np.where(pred_classes >= 4)
    
    for i in recs[0]:
        img = mpimg.imread(os.path.join(TEST_IMG_DIR, images[i]))
        imgplot = plt.imshow(img)
        plt.show()
        plt.clf()

### Get data

In [13]:
x_train, y_train, x_val, y_val, ratings, images_val, mean = prepareDataOrdinalRegression(5)
train_generator, test_generator = getGenerator(x_train,x_val)

train_inst = x_train.shape[0]
print("x_train: ",x_train.shape)
print("y_train: ",y_train.shape)
print("x_val: ",x_val.shape)
print("y_val: ",y_val.shape)
print("ratings",ratings.shape)


681
681
Pixel means:  [159.68721602 132.8637165  119.17415002]
x_train:  (647, 224, 224, 3)
y_train:  (647, 6)
x_val:  (34, 224, 224, 3)
y_val:  (34, 6)
ratings (681,)


### Build keras model

In [14]:
#load models which should belong into the ensemble
model1 = keras.models.load_model('./models/love_classifier' + MODEL1 + '.h5')
model2 = keras.models.load_model('./models/love_classifier' + MODEL2 + '.h5')

In [15]:
#let loaded models create predictions for training set
predictions1 = model1.predict(x_train)
pred_classes1 = predictions1.sum(axis=-1)
pred_classes1 = np.round(pred_classes1, 1).astype(np.int8)

y_true = np.array(y_train).sum(axis=-1).astype(np.int8)
#print("Model1:")
#print(y_true)
#print(pred_classes1)

confusion_matrix(y_true,pred_classes1)



predictions2 = model1.predict(x_train)
pred_classes2 = predictions2.sum(axis=-1)
pred_classes2 = np.round(pred_classes2, 1).astype(np.int8)

#print("Model2:")
#print(y_true)
#print(pred_classes2)

confusion_matrix(y_true,pred_classes2)

array([[ 7, 43, 51, 37, 45,  2],
       [ 6, 42, 44, 34, 31,  4],
       [ 2, 26, 26, 27, 33,  4],
       [ 2, 33, 23, 19, 32,  2],
       [ 3,  8, 24, 20, 15,  1],
       [ 0,  0,  0,  0,  1,  0]])

In [16]:
#same for validation set
predictions1_val = model1.predict(x_val)
pred_classes1_val = predictions1_val.sum(axis=-1)
pred_classes1_val = np.round(pred_classes1_val, 1).astype(np.int8)

predictions2_val = model1.predict(x_val)
pred_classes2_val = predictions2_val.sum(axis=-1)
pred_classes2_val = np.round(pred_classes2_val, 1).astype(np.int8)

y_true_val = np.array(y_val).sum(axis=-1).astype(np.int8)

In [17]:
#combine predictions into one np array
x_train_comb = np.hstack((predictions1,predictions2))
x_train_comb = x_train_comb.reshape((x_train_comb.shape[0],2,x_train_comb.shape[1]//2))
print(x_train_comb.shape)

y_train_comb = np.hstack((y_train,y_train))
print(y_train_comb.shape)

#same for validation set
x_val_comb = np.hstack((predictions1_val,predictions2_val))
x_val_comb = x_val_comb.reshape((x_val_comb.shape[0],2,x_val_comb.shape[1]//2))
print(x_val_comb.shape)

y_val_comb = np.expand_dims(y_val,axis=-1)
print(y_val_comb.shape)

(647, 2, 6)
(647, 12)
(34, 2, 6)
(34, 6, 1)


In [18]:
# Add custom layers
inputs = Input(shape=(x_train_comb.shape[1],x_train_comb.shape[2]))
X = Dense(32, activation='relu', name='fc6')(inputs)
X = Dropout(0.2)(X)
X = Dense(16, activation='relu', name='fc7')(X)
X = Dropout(0.2)(X)
output = Dense(len(np.unique(y_true)), activation='softmax')(X)
model = Model(inputs=inputs, outputs=output)

# Freeze vgg layers
#for layer in vgg_base.layers:
#    layer.trainable = False

# Compute class weights since we have unbalaned classes
class_weight = class_weight.compute_class_weight('balanced', 
                                                 np.unique(y_true), 
                                                 y_true)

#print(class_weight)

model.compile(optimizer=Adam(lr= 0.0005,clipnorm=1.0),
              #loss='categorical_crossentropy', 
              #metrics=['accuracy'])
              loss='categorical_crossentropy', metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='./weights-' + VERSION + '.hdf5', verbose=1, save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

In [19]:
history = model.fit(x_train_comb, y_train, batch_size=32, epochs=500, validation_data=(x_val_comb, y_val), class_weight=class_weight, callbacks=[earlystop, checkpointer])

ValueError: Error when checking target: expected dense_1 to have 3 dimensions, but got array with shape (647, 6)

In [None]:
visualize_loss(history)

In [None]:
visualize_acc(history)

In [None]:
model.save('./models/love_classifier' + VERSION + '.h5')

### Predictions: Validation set

In [None]:
predictions = model.predict(x_val)
print(predictions.shape)
pred_classes = predictions.argmax(axis=-1)
pred_classes = np.round(pred_classes, 1).astype(np.int8)
print(pred_classes.shape)

pred_classes = predictions
y_true = y_val

#y_true = np.array(y_test).argmax(axis=-1).astype(np.int8)
print(y_true)
print(pred_classes)

#confusion_matrix(y_true,pred_classes)

In [None]:
recommend(images_val,pred_classes)

### Prediction: Test set

In [None]:
def prepareTestDataRegression():
    # Read in ratings
    ratings = np.genfromtxt(TEST_RATING_PATH)
        
    ### Move all images according to ratings
    images = sorted(os.listdir(TEST_IMG_DIR))
    #remove test_ratings.txt from images
    images.remove('test_ratings.txt')
    #convert to np.array for indexing
    images = np.array(images)

    ### Make sure lengths of ratings and images correspond 
    assert len(ratings) == len(images)

    data = np.zeros((len(images), IMG_SIZE, IMG_SIZE, 3)) 
    last_set = 0
    
    for idx, _im in enumerate(images): #iterate over images
        _img = image.load_img(os.path.join(TEST_IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
        _x = image.img_to_array(_img)
        _x = np.expand_dims(_x, axis=0)
        data[last_set, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean) # / .255
        last_set += 1
    
    return data, images, ratings

In [None]:
def prepareTestDataClassification():
    # Read in ratings
    ratings = np.genfromtxt(TEST_RATING_PATH)
    
    # Round (obviously imbalanced sets)
    ratings_rounded = np.round(ratings, 0).astype(np.int8)
    ratings = to_categorical(ratings_rounded, num_classes=len(np.unique(ratings_rounded)))
    
                
    ### Move all images according to ratings
    images = sorted(os.listdir(TEST_IMG_DIR))
    #remove test_ratings.txt from images
    images.remove('test_ratings.txt')
    #convert to np.array for indexing
    images = np.array(images)

    ### Make sure lengths of ratings and images correspond 
    assert len(ratings) == len(images)

    data = np.zeros((len(images), IMG_SIZE, IMG_SIZE, 3)) 
    last_set = 0
    
    for idx, _im in enumerate(images): #iterate over images
        _img = image.load_img(os.path.join(TEST_IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
        _x = image.img_to_array(_img)
        _x = np.expand_dims(_x, axis=0)
        data[last_set, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean) # / .255
        last_set += 1
    
    return data, images, ratings

In [None]:
def prepareTestDataOrdinalRegression():
    # Load ratings
    ratings = np.genfromtxt(TEST_RATING_PATH)
    
    # Convert ratings to the desired format
    # 1. First round them
    ratings_rounded = np.round(ratings, 1).astype(np.int8)

    # 2. Then encode as suggested by Cheng (2007)
    ratings_prepared = np.zeros((len(ratings_rounded), len(np.unique(ratings_rounded))))
    for i, r in enumerate(ratings_rounded):
        for j in range(r):
            ratings_prepared[i, j] = 1
        
    # 3. Finally, make sure assignment is correct
    assert np.all(np.sum(ratings_prepared, axis=1).astype(np.int8) == ratings_rounded)
    
    ### Move all images according to ratings
    images = sorted(os.listdir(TEST_IMG_DIR))
    #remove test_ratings.txt from images
    images.remove('test_ratings.txt')

    ### Make sure lengths of ratings and images correspond 
    assert len(ratings) == len(images)
    
    data = np.zeros((len(images), IMG_SIZE, IMG_SIZE, 3)) 
    last_set = 0
    
    for idx, _im in enumerate(images): #iterate over images
        _img = image.load_img(os.path.join(TEST_IMG_DIR, _im), target_size=(IMG_SIZE,IMG_SIZE))
        _x = image.img_to_array(_img)
        _x = np.expand_dims(_x, axis=0)
        data[last_set, :, :, :] = utils.preprocess_input(_x, version=1)#preprocess(_x,mean) # / .255
        last_set += 1
    
    return data, images, ratings

In [None]:
x_test, images_test, ratings_test = prepareTestDataRegression()

predictions = model.predict(x_test)
pred_classes = predictions.argmax(axis=-1)
pred_classes = np.round(pred_classes, 1).astype(np.int8)

pred_classes = predictions
y_true = ratings_test

#y_true = np.array(ratings_val).astype(np.int8)
print(y_true)
print(pred_classes)

#confusion_matrix(y_true,pred_classes)

In [None]:
recommendTest(images_test,pred_classes)