In [None]:
import numpy as np
import pickle
import json
import os
import yaml

# from silence_tensorflow import silence_tensorflow
# silence_tensorflow()
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow_addons.metrics import MatthewsCorrelationCoefficient
from tensorflow_addons.optimizers import SGDW, MultiOptimizer, Adam
import tensorflow as tf

from utils.datasets import get_generators, create_classifier_dataset
from utils.train import lr_scheduler
from utils.models import resnet_cifar, resnet, vae
from utils.misc import log_config
from config.datasets_config import DATASETS_CONFIG

In [2]:
def configure_saving():
    # Generate save directory and store in config
    save_dir = os.path.join(config['root_save_dir'], config['model_name'])
    config['save_dir'] = save_dir

    # Create save directory (if it does not exist)
    try:
        os.makedirs(save_dir, exist_ok=False)
    except FileExistsError:
        input_ = input('save_dir already exists, continue? (Y/n)  >> ')
        if input_ != 'Y':
            raise ValueError

In [3]:
def load_datasets():
    dataset_config['train_split'] = config['train_split']
    dataset_config['validation_split'] = config['validation_split']

    # Load data generators
    datagen, datagen_val, datagen_test = get_generators(
        ['train', 'val', 'test'],
        config['image_shape'],
        batch_size=1,  # batched later
        random_seed=config['random_seed'],
        dataset_config=dataset_config
    )
    classes = list(datagen.class_indices.keys())
    config['classes'] = classes
    config['num_classes'] = len(classes)

    # Load class weight
    class_weight = None
    if config['use_class_weight']:
        with open(os.path.join(dataset_config['dataset_dir'], 'class_weight.json'), 'r') as f:
            class_weight = json.load(f)
        groups = dataset_config['groups']
        class_weight = {groups[k]: v for k, v in class_weight.items() if k in groups.keys()}
        class_weight = {datagen.class_indices[k]: v for k, v in class_weight.items()}
        print('Using class weights:', class_weight)
    config['class_weight'] = class_weight

    # Load datasets
    datasets, steps = [], []
    for gen in [datagen, datagen_val, datagen_test]:
        ds = create_classifier_dataset(gen, config['image_shape'], len(classes))
        ds = ds.batch(config['batch_size'])
        ds = ds.prefetch(config['prefetch'])

        steps.append(len(gen) // config['batch_size'])
        datasets.append(ds)
    config['steps'] = steps

    return datasets

In [None]:
def get_optimizer(config_dict, base_lr):
    if config_dict['lr_scheduler'] == 'cosine':
        lr_fn = lr_scheduler.get_decay_fn(
            base_lr=base_lr,
            epochs=config_dict['epochs'],
            steps_per_epoch=config_dict['steps'][0]
        )
    elif config_dict['lr_scheduler'] == 'plateau':
        lr_fn = ReduceLROnPlateau(
            monitor='val_acc',
            factor=0.2,
            patience=5,
            verbose=1
        )
    else:
        raise ValueError
    
    if config_dict['optimizer'] == 'adam':
        return Adam(learning_rate=lr_fn)
    elif config_dict['optimizer'] == 'sgdw':
        return SGDW(learning_rate=lr_fn, momentum=0.9, weight_decay=0)
    else:
        raise ValueError


def load_model(config_dict, evaluation=False):
    """
    @param config_dict:     the configuration for the model
    @param evaluation:      whether or not the model is loaded for testing
    @return:                classification model
    """
    strategy = tf.distribute.MirroredStrategy(config_dict['gpu_used'])
    print('Number of devices:', strategy.num_replicas_in_sync)

    with strategy.scope():
        # Build model (and load pretrained weights)
        model_build_functions = {
            'cifar': resnet_cifar.get_classifier,
            'resnet50': resnet.get_classifier,
            'vae': vae.get_classifier
        }
        model = model_build_functions[config['model_type']](config_dict)

        # Set up optimizers
        optimizers_and_layers = [
            (get_optimizer(config_dict, config_dict['encoder_lr']), model.layers[1]),  # encoder
            (get_optimizer(config_dict, config_dict['head_lr']), model.layers[2])      # classification head
        ]
        optimizer = MultiOptimizer(optimizers_and_layers)

        # Print model summary and compile model
        print()
        model.summary()
        model.compile(
            optimizer=optimizer,
            loss='categorical_crossentropy',
            metrics=[
                'acc',
                tf.keras.metrics.TopKCategoricalAccuracy(k=2, name="top_2_accuracy"),
                MatthewsCorrelationCoefficient(num_classes=config_dict['num_classes'], name='MCC')
            ]
        )

    return model

In [36]:
with open('config/classifier_config.yaml') as file:
    config = yaml.safe_load(file)

dataset_config = DATASETS_CONFIG[config['dataset_type']]

np.random.seed(config['random_seed'])
tf.random.set_seed(config['random_seed'])

config['model_type'] = 'vae'
config['model_name'] = 'supervised_vae'
config['encoder_weights_path'] = None

# config['pretrained_dir'] = 'trained_models/vaes/vae_100'
# config['encoder_weights_path'] = os.path.join(config['pretrained_dir'], 'encoder.h5')

In [37]:
config['encoder_trainable'] = True  # not implemented
config['lr_schedular'] = 'plateau'  # not implemented
config['optimizer'] = 'adam'  # not implemented

# Each model type has different optimal learning rates
if config['model_type'] == 'vae':
    config['latent_dim'] = 512
    config['head_lr'] = 5e-4
    config['encoder_lr'] = 5e-4
    
elif config['model_type'].startswith('barlow'):
    config['head_lr'] = 0.5
    config['encoder_lr'] = 0.5
    
elif config['model_type'] == 'supervised':
    config['head_lr'] = 0.01
    config['encoder_lr'] = 0.01
    config['pretrained_dir'] = None
    config['encoder_weights_path'] = None

In [40]:
configure_saving()

# Load dataset and model
datasets = load_datasets()
model = load_model(config_dict=config)

# Create training callbacks
callbacks = []
if config['patience'] is not None:
    es = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=config['patience'])
    callbacks.append(es)

mc = ModelCheckpoint(
    os.path.join(config['save_dir'], 'classifier.h5'),
    monitor='val_acc', mode='max',
    verbose=1,
    save_best_only=True, save_weights_only=True
)
callbacks.append(mc)

# Print and save the configuration
log_config(config, save_config=True)

save_dir already exists, continue? (Y/n)  >> Y
Found 11883 validated image filenames belonging to 8 classes.
Found 17829 validated image filenames belonging to 8 classes.
Found 20679 validated image filenames belonging to 8 classes.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
Number of devices: 4

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 224, 224, 3)]     0         
_________________________________________________________________
projector (Sequential)       (None, 1024)              14945472  
_________________________________________________________________
dense_18 (Dense)             (None, 8)                 8200      
Total params: 14

In [None]:
# Train the model
history = model.fit(
    datasets[0],
    epochs=config['epochs'],
    steps_per_epoch=config['steps'][0],
    validation_steps=config['steps'][1],
    validation_data=datasets[1],
    callbacks=callbacks,
    class_weight=config['class_weight']
)

Epoch 1/30
Epoch 00001: val_acc improved from -inf to 0.42759, saving model to trained_models/classifiers/supervised_vae/classifier.h5
Epoch 2/30
Epoch 00002: val_acc improved from 0.42759 to 0.42873, saving model to trained_models/classifiers/supervised_vae/classifier.h5
Epoch 3/30
Epoch 00003: val_acc did not improve from 0.42873
Epoch 4/30
Epoch 00004: val_acc did not improve from 0.42873
Epoch 5/30
Epoch 00005: val_acc did not improve from 0.42873
Epoch 6/30
Epoch 00006: val_acc improved from 0.42873 to 0.42963, saving model to trained_models/classifiers/supervised_vae/classifier.h5
Epoch 7/30
Epoch 00007: val_acc improved from 0.42963 to 0.43167, saving model to trained_models/classifiers/supervised_vae/classifier.h5
Epoch 8/30
Epoch 00008: val_acc improved from 0.43167 to 0.43399, saving model to trained_models/classifiers/supervised_vae/classifier.h5
Epoch 9/30
Epoch 00009: val_acc improved from 0.43399 to 0.48319, saving model to trained_models/classifiers/supervised_vae/classi

Epoch 20/30
Epoch 00020: val_acc did not improve from 0.68139
Epoch 21/30
Epoch 00021: val_acc did not improve from 0.68139
Epoch 22/30
Epoch 00022: val_acc did not improve from 0.68139
Epoch 23/30
Epoch 00023: val_acc improved from 0.68139 to 0.68212, saving model to trained_models/classifiers/supervised_vae/classifier.h5
Epoch 24/30
Epoch 00024: val_acc did not improve from 0.68212
Epoch 25/30
Epoch 00025: val_acc did not improve from 0.68212
Epoch 26/30
Epoch 00026: val_acc did not improve from 0.68212
Epoch 27/30
Epoch 00027: val_acc did not improve from 0.68212
Epoch 28/30
Epoch 00028: val_acc did not improve from 0.68212
Epoch 29/30

In [30]:
configure_saving()

save_dir already exists, continue? (Y/n)  >> Y


'trained_models/classifiers/vae_100'

In [33]:
# Save the training history
with open(os.path.join(config['save_dir'], 'history.pickle'), 'wb') as f:
    pickle.dump(history.history, f)

# Load best model, save encoder weights (separately), and evaluate model
model.load_weights(os.path.join(config['save_dir'], 'classifier.h5'))
model.layers[1].save_weights(os.path.join(config['save_dir'], 'encoder.h5'))
model.evaluate(datasets[2], steps=config['steps'][2])



[0.8594774007797241, 0.6761718988418579, 0.8863769769668579, 0.516628623008728]