In [1]:
import sys
import os

# Workaround to make packages work in both Jupyter notebook and Python
module_root_name = "AgeEstimator"
module_paths = [
    os.path.abspath(os.path.join('..')),
    os.path.abspath(os.path.join('../..')),
    os.path.abspath(os.path.join('../../..'))
]
module_paths = list(filter(lambda x: x.endswith(module_root_name), module_paths))
module_path = module_paths[0] if len(module_paths) == 1 else ""
if module_path not in sys.path:
    sys.path.append(module_path)

from server.models.cnn.data_loader import DataLoader

In [2]:
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
import tensorflow as tf
import matplotlib.image as img
import numpy as np
import pandas
import multiprocessing

In [3]:
batch_size = 64
old_weights_path = "old_vgg_weights.hdf5"
best_weights_path = "best_vgg_weights.hdf5"

In [4]:
def get_img_generators():
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
    valid_datagen = ImageDataGenerator(rescale=1./255)
    test_datagen = ImageDataGenerator(rescale=1./255)
    return train_datagen, valid_datagen, test_datagen

In [5]:
def get_dataframe(x, y, name):
    # Stack to [[img, label], ...] matrix
    stk = np.column_stack((x, y))
    
    # Save as csv
    np.savetxt("%s.csv" % (name), stk, fmt="%s", delimiter=",", comments="", header="FilePath,Age")
    
    # `flow_from_dataframe` requires loading labels as string
    df = pandas.read_csv("./%s.csv" % (name), dtype=str)
    
    return df

In [6]:
def to_generator(datagen, dataframe, directory, batch_size=batch_size):
    g = datagen.flow_from_dataframe(
        dataframe=dataframe,
        directory=directory,
        x_col="FilePath",
        y_col="Age",
        target_size=(250, 250),
        batch_size=batch_size,
        class_mode='sparse')

    # tf dataset doesn't work ...
#     n_class = len(np.unique(np.array(dataframe["Age"])))
#     tf_g = tf.data.Dataset.from_generator(lambda: g,
#         output_types=(tf.float32, tf.float32),
#         output_shapes=(
#             tf.TensorShape([batch_size, 250, 250, 3]), 
#             tf.TensorShape([batch_size,])
#         )
#     )

    return g

In [7]:
def get_callbacks(log_dir):
    from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau

    es = EarlyStopping(
        monitor='val_loss',
        mode='min',
        verbose=1,
        patience=6)
    
    tb = TensorBoard(
        log_dir=log_dir,
        histogram_freq=0,
        write_graph=True,
        write_grads=False,
        write_images=False,
        embeddings_freq=0,
        embeddings_layer_names=None,
        embeddings_metadata=None,
        embeddings_data=None,
        update_freq='epoch')
    
    mc = ModelCheckpoint(
        best_weights_path,
        monitor='val_loss',
        mode='min',
        save_weights_only=True,
        save_best_only=True)
    
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=5,
        min_lr=0.001)
    
    return [es, mc, tb, reduce_lr]

In [8]:
def get_log_dir():
    log_i = 0
    log_dir = "logs/run_"
    
    while os.path.exists(log_dir + str(log_i)):
        log_i += 1

    return log_dir + str(log_i)

In [9]:
def get_model():
    from tensorflow.keras.applications import VGG16

#     x = x_in = Input((250, 250, 3), name="input")
#     x = Conv2D(32, (3,3), padding="valid",  name="fe0")(x)
#     x = Activation("relu", name="r0")(x)
#     x = MaxPooling2D(2,2,name="mp0")(x)
#     x = Conv2D(64, (3,3), padding="valid", name="fe1")(x)
#     x = Activation("relu", name="r1")(x)
#     x = MaxPooling2D(2,2,name="mp1")(x)
#     x = Conv2D(128, (3,3), padding="valid", name="fe2")(x)
#     x = Activation("relu", name="r2")(x)
#     x = MaxPooling2D(2,2,name="mp2")(x)
#     x = Flatten(name="fl")(x)
#     x = Dropout(0.5, name="d5")(x)
#     x = Dense(512, name="d1", activation="relu")(x)
#     x = Dense(1, name="d2")(x)
#     m = Model(inputs=x_in, outputs=x)

    vgg16 = VGG16(weights="imagenet", include_top=False, input_shape=(250, 250, 3))
    vgg16.trainable = False
    vgg16.summary()

    x = x_in = Input((250, 250, 3), name="input")
    x = vgg16(x)
    x = Flatten(name="fl")(x)
    x = Dropout(0.5, name="dr1")(x)
    x = Dense(512, name="d1", activation="relu")(x)
    x = Dense(1, name="d2")(x)
    m = Model(inputs=x_in, outputs=x)

    m.summary()
    
    return m

In [10]:
def train(train_generator, valid_generator, train_len, valid_len):
    epochs = 1
    
    model = get_model()
    optimizer = Adam(lr=0.001)
        
    model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=['mae'])
    
    if os.path.exists(old_weights_path):
        model.load_weights(old_weights_path)
    
    log_dir = get_log_dir()
    callbacks = get_callbacks(log_dir)

    model.fit(
        x=train_generator,
        steps_per_epoch=train_len // batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=valid_generator,
        validation_steps=valid_len // batch_size,
        callbacks=callbacks,
        workers=max(2, multiprocessing.cpu_count() - 2),
        use_multiprocessing=True
    )
    
    model.save_weights(old_weights_path)

    return model

In [11]:
def main():
    dl = DataLoader()
    x_train, y_train = dl.load_train()
    x_valid, y_valid = dl.load_valid()
    x_test, y_test = dl.load_test()
    
    train_datagen, valid_datagen, test_datagen = get_img_generators()
    train_df = get_dataframe(x_train, y_train, "train")
    valid_df = get_dataframe(x_valid, y_valid, "valid")
    test_df = get_dataframe(x_test, y_test, "test")
    
    train_generator = to_generator(train_datagen, train_df, dl.train_dir)
    valid_generator = to_generator(valid_datagen, valid_df, dl.valid_dir)
    test_generator = to_generator(test_datagen, test_df, dl.test_dir)
    
    train_len = len(x_train)
    valid_len = len(x_valid)
    test_len = len(x_test)
    
    trained_model = train(train_generator, valid_generator, train_len, valid_len)
    
    evaluation = trained_model.evaluate(
        x=test_generator, steps=len(y_test) // batch_size)
    y_hat = trained_model.predict(
        x=test_generator, steps=len(y_test) // batch_size)
    
    print(evaluation)
    
    return evaluation, y_hat, y_test

In [12]:
res = main()

Found 119780 validated image filenames belonging to 102 classes.
Found 29944 validated image filenames belonging to 96 classes.
Found 37430 validated image filenames belonging to 98 classes.
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 250, 250, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 250, 250, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 250, 250, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 125, 125, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 125, 125, 128)     73856     
_________________________________________________________________
bl

Process Keras_worker_ForkPoolWorker-3:
Process Keras_worker_ForkPoolWorker-4:
Process Keras_worker_ForkPoolWorker-2:
Process Keras_worker_ForkPoolWorker-5:
Process Keras_worker_ForkPoolWorker-1:
Process Keras_worker_ForkPoolWorker-6:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/daniel/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/daniel/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/daniel/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/daniel/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/daniel/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/multiprocessing/process.py", li



KeyboardInterrupt: 