In [26]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np
from preprocessing import get_preprocessed_dataset

In [27]:
img_size = 256
(
X_train,
gender_train,
age_train,
X_test,
gender_test,
age_test ) = get_preprocessed_dataset("/data", n_max = 200, new_size = (img_size,img_size))

In [28]:
"""
Functions to generate model architechtures:
"""

# function to create standard CNN network
# idea: expand function such that its easy to change the architechture
def CNN_classic():
    model = keras.Sequential([
        layers.Rescaling(1./255),
        #layers.Rescaling(scale = 1./127.5, offset = -1), do this if we want to be consistent with the transfer learning network
        layers.Conv2D(16, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(32, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation = 'sigmoid')], 
        name = "CNN_classic",
    )
    return model

def CNN_transfer(img_size):
    # follows this guide https://keras.io/guides/transfer_learning/

    # initialize base model from keras API
    base_model = keras.applications.Xception(
        weights="imagenet",  # Load weights pre-trained on ImageNet.
        input_shape=(img_size, img_size, 3),
        include_top=False,
    )

    # freeze weights
    base_model.trainable = False

    # define rest of model from here
    input = layers.Input(shape = (img_size,img_size,3))
    scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)
    x = scale_layer(input)
    x = base_model(x)#,training = false) maybe neccesary if we want to fine tune model
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(input, outputs, name = "CNN_transfer")
    return model



# function to create the architechture of the multitask network
def CNN_multitask(img_size):

    inputs = tf.keras.layers.Input(shape=(img_size, img_size, 3), name='input')
    main_branch = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)(inputs)
    main_branch = tf.keras.layers.Conv2D(16, 3, padding = "same", activation="relu")(main_branch)
    main_branch = tf.keras.layers.MaxPooling2D()(main_branch)
    main_branch = tf.keras.layers.Conv2D(32, 3, padding = "same", activation="relu")(main_branch)
    main_branch = tf.keras.layers.MaxPooling2D()(main_branch)
    main_branch = tf.keras.layers.Conv2D(64, 3, padding = "same", activation="relu")(main_branch)
    main_branch = tf.keras.layers.Flatten()(main_branch)
    main_branch = tf.keras.layers.Dense(128, activation='relu')(main_branch)

    task_1_branch = tf.keras.layers.Dense(256, activation='relu')(main_branch)
    task_1_branch = tf.keras.layers.Dense(128, activation='relu')(task_1_branch)
    task_1_branch = tf.keras.layers.Dense(1, activation='sigmoid', name='gender')(task_1_branch)

    task_2_branch = tf.keras.layers.Dense(256, activation='relu')(main_branch)
    task_2_branch = tf.keras.layers.Dense(128, activation='relu')(task_2_branch)
    task_2_branch = tf.keras.layers.Dense(8, activation='softmax', name='age')(task_2_branch)

    model = tf.keras.Model(inputs = inputs, outputs = [task_1_branch, task_2_branch], name = "CNN_multitask")
    return model

In [29]:
"""
Helper functions
"""

# fit all the models to their respective datasets
# ** More arguments to adjust fitting procedure **
def fit_models(model_data, no_epochs, verbose = 0):

    histories = {}
    for model,data in model_data.items():
        print(f"Training on Model: {model.name}")
        X_train, y_train, X_test, y_test = data
        
        histories[model.name] = model.fit(
                                    X_train,
                                    y_train,
                                    epochs = no_epochs,
                                    verbose = verbose,
                                    validation_data = (X_test,y_test))
        print()
    return histories

def compile_model(model, loss=keras.losses.BinaryCrossentropy(), optimizer = 'adam', metrics = ['accuracy']):
    model.compile(
        optimizer = optimizer,
        loss = loss,
        metrics = metrics)

def cross_validate(data_train, n_folds = 5, shuffle = False):
    # get array of indexes
    idx = np.arange(0,len(data_train))
    # randomise order
    if shuffle:
        np.random.shuffle(idx)
    # split into equal sized arrays (or almost equal sized)
    split_idxs = np.array_split(idx, n_folds)
    # initialize array to store train test index pairs
    train_test_fold_idx = []
    # loop over folds
    for fold in split_idxs:
        # extract test indexes
        test_idx = fold
        # remove test indexes to gain training indexes
        train_idx = idx[~np.isin(idx,fold)]
        # append tuple to fold array
        train_test_fold_idx.append((train_idx, test_idx))
    return train_test_fold_idx


In [30]:
model = CNN_transfer(img_size)
model.summary()

Model: "CNN_transfer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, 256, 256, 3)]     0         
                                                                 
 rescaling_11 (Rescaling)    (None, 256, 256, 3)       0         
                                                                 
 xception (Functional)       (None, 8, 8, 2048)        20861480  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense_41 (Dense)            (None, 1)                 2049      
                                                                 
Total params: 20,863,529
Trainable params: 2,049
Non-trainable params: 20,861,480
______________________________________

In [31]:
model_classic = CNN_classic()
compile_model(model_classic)

model_multitask = CNN_multitask(img_size)
multitask_loss = {'gender': keras.losses.BinaryCrossentropy(),
                'age': keras.losses.SparseCategoricalCrossentropy()}
compile_model(model_multitask, multitask_loss)

model_transfer = CNN_transfer(img_size)
compile_model(model_transfer)


model_data = {}
model_data[model_classic] = [X_train, gender_train, X_test, gender_test]
model_data[model_multitask] =  [X_train, (gender_train, age_train), X_test, (gender_test, age_test)]
model_data[model_transfer] = [X_train, gender_train, X_test, gender_test]

fit_models(model_data, 2, verbose = 1)

              

Training on Model: CNN_classic
Epoch 1/2
Epoch 2/2

Training on Model: CNN_multitask
Epoch 1/2
Epoch 2/2

Training on Model: CNN_transfer
Epoch 1/2
Epoch 2/2



{'CNN_classic': <keras.callbacks.History at 0x1660183a0>,
 'CNN_multitask': <keras.callbacks.History at 0x16501bd60>,
 'CNN_transfer': <keras.callbacks.History at 0x1667e9520>}