In [1]:
%matplotlib inline

import matplotlib
import os
import random
import pandas as pd
import imageio
import augmentation
import cv2
import shutil

from scipy.stats.mstats import linregress
from clint.textui import progress
from random import uniform
from keras.models import Sequential
from keras.layers import Convolution2D, Dense, Flatten, MaxPooling2D, Dropout
from keras.layers import BatchNormalization
from keras.utils import normalize

matplotlib.use('Agg')

In [2]:
def parse_csv(CSV):
    df = pd.read_csv(CSV, delim_whitespace=False, header=0, index_col=0)
    return df

In [None]:
class_names = ['test', 'validate', 'train']
root_dir = 'Images'
source_dir = 'images'

for c in class_names:
    os.mkdir(os.path.join(root_dir, c))
    
for c in class_names:
    images = [x for x in os.listdir(source_dir)]
    if c == 'train':
        selected_images = random.sample(images, len(images))
    else: 
        selected_images = random.sample(images, 630)
    for image in selected_images:
        source_path = os.path.join(source_dir, image)
        target_path = os.path.join(root_dir, c, image)
        shutil.move(source_path, target_path) 

In [3]:
def load_data_into_memory(DIR, ANNO, ATTRIBUTE, normalize=True, rollaxis=True):
    if DIR[:-1] != '/': DIR += '/'
    df = parse_csv(ANNO)
    files = filter(lambda x: x in df.index.values, os.listdir(DIR))
    X, y = [], []
    for image_path in files:
        img = imageio.imread(DIR + image_path)
        if normalize: img = img.astype('float32') / 255.
        if rollaxis: img.shape = (1,150,130)
        else: img.shape = (150,130,1)
        X.append(img)
        mu = df[ATTRIBUTE][image_path]
        y.append(mu)
    y = np.array(y)
    y = y - min(y)
    y = np.float32(y / max(y))

    x, y = np.array(X), np.array(y)
    print("Loaded {} images into memory", len(y))
    return x,y


In [4]:
def data_generator(x, y, batch_size, space, sampling_factor=3, sampling_intercept=2, weighted_sampling=False, augment=False):
    if weighted_sampling:

        def get_bin_index(bin_edges, value):
            for index in range(len(bin_edges)):
                if value <= bin_edges[index + 1]:
                    return index
            return index

        hist, bin_edges = np.histogram(y, bins=200)
        most = max(hist)
        hist_norm = hist / float(most)
        hist_norm_inv = (1. - hist_norm)
        hist_norm_inv = hist_norm_inv ** sampling_factor + 10 ** -sampling_intercept
        probs = []
        for y_ in y:
            index = get_bin_index(bin_edges, y_)
            probs.append(hist_norm_inv[index])

        should_sample = lambda pctprob: uniform(0,1) <= pctprob

    i = 0
    while True:
        Xbatch, ybatch, in_batch = [], [], 0
        while in_batch < batch_size:
            if weighted_sampling:
                while not should_sample(probs[i]):
                    i = i + 1 if i + 1 < len(y) else 0
            if augment: x_ = augmentation.applyRandomAugmentation(x[i], space)
            else: x_ = x[i]
            x_ = x_.astype('float32') / 255.
            x_.shape = (1, 150, 130)
            Xbatch.append(x_)
            ybatch.append(y[i])
            in_batch += 1
            i = i + 1 if i + 1 < len(y) else 0

        yield np.array(Xbatch), np.array(ybatch)

In [5]:
def vgg_variant(space):
    model = Sequential()

    for outputs in space['conv0filters']:
        model.add(Convolution2D(outputs, (3, 3), padding='same', input_shape=(1, 150, 130), kernel_initializer='glorot_uniform',
                                use_bias=True, activation='relu'))
        model.add(Convolution2D(outputs, (3, 3), padding='same', use_bias=True, activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))

    for outputs in space['conv1filters']:
        model.add(Convolution2D(outputs, 3, 3, padding='same', kernel_initializer='glorot_uniform', use_bias=True, activation='relu'))
        model.add(Convolution2D(outputs, 3, 3, padding='same', kernel_initializer='glorot_uniform', use_bias=True, activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))

    for outputs in space['conv2filters']:
        model.add(Convolution2D(outputs, 3, 3, padding='same', kernel_initializer='glorot_uniform', use_bias=True, activation='relu'))
        model.add(Convolution2D(outputs, 3, 3, padding='same', kernel_initializer='glorot_uniform', use_bias=True, activation='relu'))
        model.add(Convolution2D(outputs, 3, 3, padding='same', kernel_initializer='glorot_uniform', use_bias=True, activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))

    model.add(Flatten())

    for _ in range(int(space['num_fc'])):
        model.add(Dense(int(space['fcoutput']), kernel_initializer='glorot_uniform', use_bias=True, activation='relu'))
        model.add(Dropout(space['dropout']))

    model.add(Dense(1, kernel_initializer='glorot_uniform', use_bias=True))
    model.summary()
    return model

In [6]:
def get_Rsquared(y, predicted):
    m, b, r, p, e = linregress(y=y, x=predicted)
    r2 = r**2
    return r2

In [7]:
def get_metrics(model, x, y):
    predicted = model.predict(x)
    r2 = get_Rsquared(y, predicted)
    return r2

In [8]:
def train(Xtrain, ytrain, Xtrain_norm, ytrain_norm, Xvalidate, yvalidate, space):
    import sys
    from keras.optimizers import RMSprop
    from keras.callbacks import Callback

    class CorrelationEarlyStopping(Callback):
        def __init__(self, monitor='validate', patience=0, delta=.001):
            """
            :param monitor: 'validate' or 'train'
            :param patience: how many epochs to wait
            :param delta: by how much the monitored value has to be greater than the last maximum
            """
            self.rvalues = {'train': [], 'validate': []}
            self.monitor = monitor  # validate, train
            self.patience = patience
            self.delta = delta
            self.wait = 0
            self.best = 0
            self.num_epochs = 0
            self.best_model = None

        def on_epoch_end(self, epoch, logs={}):
            r2 = get_metrics(self.model, x=Xtrain_norm, y=ytrain_norm)
            self.rvalues['train'].append(r2)
            r2 = get_metrics(self.model, x=Xvalidate, y=yvalidate)
            self.rvalues['validate'].append(r2)
            print ('\n\tTrain r2: {}\n\tValidate r2: {}\n'.format(self.rvalues['train'][-1], self.rvalues['validate'][-1]))
            sys.stdout.flush()

            if self.rvalues[self.monitor][-1] - self.delta >= self.best:
                self.best = self.rvalues[self.monitor][-1]
                self.wait = 0
                self.num_epochs = epoch
                self.best_model = self.model
            else:
                if self.wait >= self.patience:
                    self.num_epochs = epoch - self.patience
                    self.model.stop_training = True
                else:
                    self.num_epochs = epoch
                    self.wait += 1

    model = vgg_variant(space)
    lr = 10**(-space['learning_rate'])
    rmsprop = RMSprop(lr=lr, rho=0.9, epsilon=1e-08)
    model.compile(loss='mean_squared_error', optimizer=rmsprop)
    monitor = CorrelationEarlyStopping(monitor='validate', patience=6, delta=0.01)
    gen = data_generator(Xtrain, ytrain, batch_size=space['batch_size'], space=space,
                         weighted_sampling=space['weighted_sampling'], augment=space['augment'],
                         sampling_factor=space['sampling_factor'], sampling_intercept=space['sampling_intercept'])
    model_history = model.fit_generator(gen, space['samples_per_epoch'], 50, 1, [monitor], (Xvalidate, yvalidate))
    return monitor.best_model, monitor.rvalues, model_history


In [9]:
if __name__ == '__main__':

    import numpy as np
    import sys
    import json

    ATTRIBUTE = 'Trustworthiness'

    ANNO = 'Annotations/' + ATTRIBUTE + '/annotations.csv'
    TRAIN_DIR = 'Images/'+'train'
    VAL_DIR = 'Images/'  + 'validate'
    TEST_DIR = 'Images/' + 'test'

    SPACE_FILE = 'Spaces/' + ATTRIBUTE + '_space.json'
    MODEL_PATH = 'Models/' + ATTRIBUTE + '.h5'

    print('Loading Train Data')
    Xtrain, ytrain = load_data_into_memory(DIR=TRAIN_DIR, ANNO=ANNO, ATTRIBUTE=ATTRIBUTE, normalize=False, rollaxis=False)
    print('Loading Train Data Again')
    Xtrain_norm, ytrain_norm = load_data_into_memory(DIR=TRAIN_DIR, ANNO=ANNO, ATTRIBUTE=ATTRIBUTE, normalize=True, rollaxis=True)
    print('Loading Validation Data')
    Xvalidate, yvalidate = load_data_into_memory(DIR=VAL_DIR, ANNO=ANNO, ATTRIBUTE=ATTRIBUTE, normalize=True, rollaxis=True)

    with open(SPACE_FILE, 'r') as f:
        opt_params = json.load(f)
        model, results, model_history = train(Xtrain, ytrain, Xtrain_norm, ytrain_norm, Xvalidate, yvalidate, opt_params)
        model.save(MODEL_PATH)
        

Loading Train Data
Loaded {} images into memory 5041
Loading Train Data Again
Loaded {} images into memory 5041
Loading Validation Data
Loaded {} images into memory 630
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 1, 150, 64)        74944     
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 1, 150, 64)        36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 1, 75, 64)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 1, 75, 64)         36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 1, 75, 64)         36928     
_________________________________________________________________
max_pooling2d_1 (Ma

In [10]:
loss = model_history.history['loss']
val_loss = sig_history.history['val_loss']
epochs = range(1, len(loss)+ 1)
plt.plot(epochs, loss, 'y', label = 'Training loss')
plt.plot(epochs, val_loss, 'r', label = 'Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

NameError: name 'sig_history' is not defined

In [None]:
import tensorflow as tf
new_model = tf.keras.models.load_model('Models/' + 'IQ' + '.h5')
new_model.summary()

In [None]:
Xtest, ytest = load_data_into_memory(DIR=TEST_DIR, ANNO=ANNO, ATTRIBUTE=ATTRIBUTE, normalize=True, rollaxis=True)
r2 = get_metrics(new_model, x=Xtest, y=ytest)
print(r2)