In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator, DirectoryIterator
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dropout, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import CSVLogger, EarlyStopping
from tensorflow.keras import Model
from tensorflow.errors import ResourceExhaustedError

import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path


def calc_class_weights(train_iterator):
    """
    Calculate class weighs dictionary to use as input for the cnn training. This is useful if the training set is
    imbalanced.

    The weight of class "i" is calculated as the number of samples in the most populated class divided by the number of
    samples in class i (max_class_frequency / class_frequency).
    Note that the class weights are capped at 10. This is done in order to avoid placing too much weight on
    small fraction of the dataset. For the same reason, the weight is set to 1 for any class in the training set that
    contains fewer than 5 samples.

    :param class_counts: A list with the number of files for each class.
    :return:
    """

    # Fixed parameters
    class_counts = np.unique(train_iterator.classes, return_counts=True)
    class_weights = []
    max_freq = max(class_counts[1])
    class_weights = [max_freq / count for count in class_counts[1]]
    
    print("Classes: " + str(class_counts[0]))
    print("Samples per class: " + str(class_counts[1]))
    print("Class weights: " + str(class_weights))

    return class_weights


def unfreeze_layers(model, last_fixed_layer):
    # Retrieve the index of the last fixed layer and add 1 so that it is also set to not trainable
    first_trainable = model.layers.index(model.get_layer(last_fixed_layer)) + 1

    # Set which layers are trainable.
    for layer_idx, layer in enumerate(model.layers):
        if not isinstance(layer, BatchNormalization):
            layer.trainable = layer_idx >= first_trainable
    return model


def build_model(optimiser, last_fixed_layer):
    model = EfficientNetB4(include_top=False, input_shape=(300, 400, 3), weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = Dense(6, activation="softmax", name="pred")(x)

    model = unfreeze_layers(model, last_fixed_layer)
    
    # Compile
    model = Model(model.input, outputs, name="EfficientNet")
    model.compile(
        optimizer=optimiser, loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model


def train_model(rotation, shear, zoom, brightness, lr, last_fixed_layer, batch_size, idx):
    model_name = f'efficientnetb4_{idx}'
    if os.path.exists(Path('.') / (model_name + '.h5')):
        print(f'{model_name} already trained')
        return
    print(f'Now training {model_name}')
    
    train_generator = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True,
        rotation_range=rotation,
        shear_range=shear,
        zoom_range=zoom,
        brightness_range=brightness,
        fill_mode='nearest',
        preprocessing_function=preprocess_input,
    )
    train_iterator = train_generator.flow_from_directory(
        '/home/ubuntu/store/barankin-neurips/full/train', 
        target_size=(300, 400),
        class_mode='categorical',
        batch_size=batch_size,
        follow_links=True,
        interpolation='bilinear',
    )
    loss_weights = calc_class_weights(train_iterator)

    optimiser = Adam(lr=lr)
    model = build_model(optimiser, last_fixed_layer)
    
    logger = CSVLogger(model_name + '.csv')

    model.fit(
        x=train_iterator,
        batch_size=batch_size,
        epochs=20,
        verbose=True,
        class_weight=dict(zip(range(6), loss_weights)),
        workers=8,
        callbacks=[logger]
    )
    model.save(model_name + '.h5')


for idx in range(5):
    train_model(15, 0.5, 0.5, [0.5, 1], 0.001, 'block6d_add', 64, idx)

Now training efficientnetb4_0
Found 3214 images belonging to 6 classes.
Classes: [0 1 2 3 4 5]
Samples per class: [1177  165  975  113  178  606]
Class weights: [1.0, 7.133333333333334, 1.2071794871794872, 10.415929203539823, 6.612359550561798, 1.9422442244224423]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Now training efficientnetb4_1
Found 3214 images belonging to 6 classes.
Classes: [0 1 2 3 4 5]
Samples per class: [1177  165  975  113  178  606]
Class weights: [1.0, 7.133333333333334, 1.2071794871794872, 10.415929203539823, 6.612359550561798, 1.9422442244224423]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Now training efficientnetb4_2


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Validate

In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator, DirectoryIterator
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import CSVLogger, EarlyStopping
from tensorflow.keras import Model
from tensorflow.errors import ResourceExhaustedError

import numpy as np
import os
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
import glob

base_path = "/home/ubuntu/store/efficientnet-final-size"
model_names = glob.glob("/home/ubuntu/store/efficientnet-final-size/*.h5")

for model_path in model_names:
    model_name = Path(model_path).stem
#     if os.path.exists(Path(base_path) / (model_name + '_preds.csv')):
#         print(f'{model_name} already validated')
#         continue
    print('Now validating', model_name)
    valid_generator = ImageDataGenerator(
        fill_mode='nearest',
        preprocessing_function=preprocess_input
    )
    valid_iterator = valid_generator.flow_from_directory(
        '/home/ubuntu/store/DermX-test-set/test', 
        batch_size=8, 
        target_size=(300, 400),
        class_mode='categorical',
        follow_links=True,
        interpolation='bilinear',
        shuffle=False
    )
    
    model = load_model(Path(model_path))
    preds = [np.argmax(pred) for pred in model.predict(valid_iterator)]
    actual = valid_iterator.labels
    preds_df = pd.DataFrame.from_dict({'actual': actual, 'pred': preds, 'filenames': valid_iterator.filenames}).to_pickle(Path(base_path) / (model_name + '_preds.csv'))
    

Now validating efficientnetb4_4
Found 566 images belonging to 6 classes.
Now validating efficientnetb4_0
Found 566 images belonging to 6 classes.
Now validating efficientnetb4_2
Found 566 images belonging to 6 classes.
Now validating efficientnetb4_3
Found 566 images belonging to 6 classes.
Now validating efficientnetb4_1
Found 566 images belonging to 6 classes.


## Compare

In [4]:
import glob
import pandas as pd
from pathlib import Path
from sklearn.metrics import classification_report

base_path = "/home/ubuntu/store/efficientnet-final-size"
model_preds = glob.glob("/home/ubuntu/store/efficientnet-final-size/*_preds.csv")
model_comparison_dict = {}

for model_pred in model_preds:
    model_preds_df = pd.read_pickle(Path(model_pred))
    model_comparison_dict[Path(model_pred).stem] = classification_report(
        model_preds_df['actual'], 
        model_preds_df['pred'],
        labels=[0, 1, 2, 3, 4, 5],
        target_names=['acne', 'actinic_keratosis', 'psoriasis_no_pustular', 'seborrheic_dermatitis', 'vitiligo', 'wart'],
        output_dict=True
    )['macro avg']
    model_comparison_dict[Path(model_pred).stem]['accuracy'] = len(model_preds_df[model_preds_df['actual'] == model_preds_df['pred']]) / len(model_preds_df)
    
model_comparison_df = pd.DataFrame.from_dict(model_comparison_dict, orient='index')
model_comparison_df

Unnamed: 0,precision,recall,f1-score,support,accuracy
efficientnetb4_3_preds,0.460444,0.460606,0.436414,566,0.485866
efficientnetb4_2_preds,0.450772,0.451591,0.442826,566,0.477032
efficientnetb4_1_preds,0.434676,0.456591,0.438718,566,0.482332
efficientnetb4_0_preds,0.46871,0.404031,0.382892,566,0.427562
efficientnetb4_4_preds,0.373043,0.224015,0.153004,566,0.236749
