In [36]:
from config import *
import tensorflow as tf
from tensorflow.data.experimental import AUTOTUNE
import tensorflow.keras.backend as K
import numpy as np
import random
from glob import glob
import time
import matplotlib.pyplot as plt
import efficientnet.tfkeras as efn
import scipy as sp
from functools import partial
from sklearn import metrics

# Training

Now that we have the data preprocessed, we are ready to begin training our model. In order to consume the tfrecords we created previously we will use the powerful Tensorflow Dataset API. 

## Table of Contents
1. [Creating the dataset](#Creating-the-dataset)
2. [Model and Kappa optimizer](#Model-and-Kappa-optimizer)
3. [Training](#Training)

## Creating the dataset

In [2]:
IMAGE_SIZE = 224
PREPROCESSING = 'subtract_median'
AUGMENT_P = .25
DROPOUT_P = .2
BATCH_SIZE = 64
SHUFFLE_BUFFER = BATCH_SIZE * 10
SEED = 10011

In [3]:
def test_speed(ds):
    begin = time.time()
    i = 0
    for x in ds:
        i+=1
    for x in ds:
        pass
    end = time.time()
    elapsed = (end-begin)/2
    result = '{:.3f} seconds for {} images or batches, {:.3f} ms/image or batch'.format(elapsed, i, 1000*elapsed/i)
    print(result)
    return x

In [4]:
files = {}
for ds in DATASETS:
    files[ds] = glob(os.path.join(DATA_DIR, ds, 'preproc', PREPROCESSING, f'{IMAGE_SIZE}', '*.tfrecords'))
    files[ds].sort()

In [5]:
val_files = [files['aptos'][1]]
train_files = [files['aptos'][0]]
for ds in DATASETS:
    if ds != 'aptos':
        train_files+=files[ds]
random.shuffle(train_files)

In [6]:
train_ds = tf.data.TFRecordDataset(train_files, num_parallel_reads=None)
val_ds = tf.data.TFRecordDataset(val_files)

In [7]:
# _ = test_speed(val_ds)

In [8]:
train_ds = train_ds.map(parse_function, AUTOTUNE)
val_ds = val_ds.map(parse_function, AUTOTUNE)

In [9]:
# _ = test_speed(val_ds)

In [10]:
train_ds = train_ds.shuffle(SHUFFLE_BUFFER, SEED)
val_ds = val_ds.shuffle(SHUFFLE_BUFFER, SEED)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [None]:
train_ds = train_ds.map(lambda x: {'diagnosis': tf.cast(x['diagnosis'], dtype=tf.float32), 'id': x['id'], 
                                   'img': tf.image.convert_image_dtype(tf.io.decode_jpeg(x['img']), dtype=tf.float32)}, AUTOTUNE)
val_ds = val_ds.map(lambda x: {'diagnosis': tf.cast(x['diagnosis'], dtype=tf.float32), 'id': x['id'], 
                               'img': tf.image.convert_image_dtype(tf.io.decode_jpeg(x['img']), dtype=tf.float32)}, AUTOTUNE)

In [12]:
train_ds = train_ds.repeat()

In [14]:
@tf.function
def rotate(x):
    return tf.image.rot90(x, tf.random.uniform(shape=[], minval=1, maxval=3, dtype=tf.int32))

In [15]:
@tf.function
def flip(x):
    return tf.image.random_flip_left_right(x)

In [16]:
@tf.function
def color(x):
    x = tf.image.random_hue(x, 0.08)
    x = tf.image.random_saturation(x, 0.6, 1.6)
    x = tf.image.random_brightness(x, 0.05)
    x = tf.image.random_contrast(x, 0.7, 1.3)
    return x

In [17]:
augmentations = [rotate, flip, color]

In [18]:
@tf.function
def augment(x):
    for f in augmentations:
        x = tf.cond(tf.random.uniform([], 0, 1) < AUGMENT_P, lambda: f(x), lambda: x)
    return x

In [20]:
train_ds = train_ds.map(lambda x: ({'img': augment(x['img'])}, {'diagnosis': x['diagnosis']}), AUTOTUNE)
val_ds = val_ds.map(lambda x: ({'img': x['img'], 'id': x['id']}, {'diagnosis': x['diagnosis']}), AUTOTUNE)

In [22]:
train_ds = train_ds.batch(BATCH_SIZE)
val_ds = val_ds.batch(BATCH_SIZE)

In [23]:
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)

In [24]:
_ = test_speed(val_ds)

0.689 seconds for 26 images or batches, 26.511 ms/image or batch


## Model and Kappa optimizer

In [25]:
class OptimizedRounder:
    def __init__(self):
        self.coefficients = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = metrics.cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coefficients = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')['x']
    
    def predict(self, X, coef):
        return np.digitize(X, coef)

In [26]:
class Model:
    def __init__(self, load=False, name='', image_size=IMAGE_SIZE, preprocessing=PREPROCESSING, **kwargs):
        self.optimizer = OptimizedRounder()
        if not load:
            self.log = {}
            self.log['description'] = []
            self.log['trained_for'] = 0
            self.log['name'] = name
            self.log['preprocessing'] = preprocessing
            self.log['augmentations'] = [f.__name__ for f in augmentations]
            self.log['birthday'] = get_time('%m-%d_%H-%M-%S')
            self.log['image_size'] = image_size
            self.log['augment_p'] = AUGMENT_P
            self.log['dropout_p'] = DROPOUT_P
            self.log['batch_size'] = BATCH_SIZE
            self.log['family_dir'] = os.path.join(MODELS_DIR,'{}'.format(name))
            if not os.path.exists(self.log['family_dir']):
                os.mkdir(self.log['family_dir'])
            self.log['model_dir'] = os.path.join(self.log['family_dir'], self.log['birthday'])
            if not os.path.exists(self.log['model_dir']):
                os.mkdir(self.log['model_dir'])        
            self.log['tb_dir'] = os.path.join(self.log['model_dir'], 'tb')
            if not os.path.exists(self.log['tb_dir']):
                os.mkdir(self.log['tb_dir'])
            self.log['checkpoints_dir'] = os.path.join(self.log['model_dir'], 'model_checkpoints')
            if not os.path.exists(self.log['checkpoints_dir']):
                os.mkdir(self.log['checkpoints_dir'])            
            self.log['history'] = []
    
    def update_history(self, history):
        self.log['history'].append(history)
    
    def fit(self, *args, **kwargs):
        history = self.model.fit(*args, **kwargs)
        self.log['history'].append(history)
        self.log['trained_for'] += kwargs['steps_per_epoch'] * kwargs['epochs']
        return history
    
    def predict(self, ds, with_truth):
        if with_truth:
            truth = []
            preds = []
            for x, y in val_ds:
                truth += list(y['diagnosis'].numpy())
                preds += list(self(x['img'])['diagnosis'].numpy().flatten())
            return np.array(truth), np.array(preds)
        else:
            return self.model.predict(ds).flatten()
    
    def fit_optimizer(self, truth, preds):
        self.optimizer.fit(preds, truth)
    
    def optimize_predictions(self, preds):
        return self.optimizer.predict(preds, self.optimizer.coefficients)
    
    def calculate_kappa(self, truth, preds):
        return metrics.cohen_kappa_score(truth, preds, weights='quadratic')

    def save(self, description=None):
        if description:
            self.log['description'].append(description)
        i = len(glob(os.path.join(self.log['checkpoints_dir'], '*/')))
        path = os.path.join(self.log['checkpoints_dir'], 'checkpoint_{:02d}'.format(i))
        os.mkdir(path)
        self.log['optimizer_coefficients'] = self.optimizer.coefficients
        self.log['optimizer'] = self.model.optimizer.get_config()
        self.model.save(os.path.join(path, 'model.h5'), overwrite=False, include_optimizer=False)
        save_pickle(self.log, os.path.join(path, 'log.pkl'))
        
    def __call__(self, *args, **kwargs):
        return self.model(*args, **kwargs)
     
    @staticmethod
    def load(family, birthday, checkpoint, custom_objects=None):
        model = Model(load=True)
        if MACHINE == 'kaggle':
            path = os.path.join(SRC_DIR, 'aptos-{}-{}'.format(family, ''.join([c for c in birthday if c != '-']).replace('_', '-')), 
                                f'{birthday}', 'model_checkpoints', 'checkpoint_{:02d}'.format(checkpoint))
        elif MACHINE == 'local':
            path = os.path.join(MODELS_DIR, family, birthday, 'model_checkpoints', 'checkpoint_{:02d}'.format(checkpoint))
        model.log = load_pickle(os.path.join(path, 'log.pkl'))
        model.model = tf.keras.models.load_model(os.path.join(path, 'model.h5'), compile=False, custom_objects=custom_objects)
        optimizer = tf.keras.optimizers.get(model.log['optimizer']['name']).from_config(model.log['optimizer']) 
        model.model.compile(optimizer=optimizer, loss=fix_customs(model.log['loss']), 
                            metrics=fix_customs(model.log['metrics']))
        model.optimizer.coefficients = model.log['optimizer_coefficients']
        return model

In [27]:
class Baseline(Model):
    def __init__(self, **kwargs):
        name = 'baseline'
        super().__init__(name=name, **kwargs)
        self.model = self.build_model()
        self.log['description'].append('using efficientnetb0 base as pretrained and training the top layer with regression')
        
    def build_model(self):
        optimizer = 'adam'
        loss = {'diagnosis': 'mean_squared_error'}
        metrics = {'diagnosis': ['r2']}
        self.log['loss'] = loss
        self.log['metrics'] = metrics

        img_input = tf.keras.layers.Input(shape= (IMAGE_SIZE, IMAGE_SIZE, 3), name='img')

        B0_base = efn.EfficientNetB0(include_top=False, weights='imagenet', 
                                     input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), pooling='avg')

        B0_base.trainable = False

        x = B0_base(img_input)

        x = tf.keras.layers.Dropout(rate=DROPOUT_P, name='dropout_1')(x)

        out = tf.keras.layers.Dense(1, activation=None, name='diagnosis')(x)

        model = tf.keras.models.Model(inputs={'img': img_input}, outputs={'diagnosis': out}, name='Baseline') 



        model.compile(optimizer='adam', loss=fix_customs(loss), metrics=fix_customs(metrics))
        
        return model

## Training

In [28]:
model = Baseline()

In [29]:
tb = tf.keras.callbacks.TensorBoard(log_dir = model.log['tb_dir'], write_graph=False, update_freq='batch')

In [None]:
history=model.fit(train_ds, steps_per_epoch=300, epochs=20, validation_data=val_ds, validation_steps=26,
                           callbacks = [tb], shuffle=False, verbose=1)


Train for 300 steps, validate for 26 steps
Epoch 1/20
Epoch 2/20

In [None]:
optimizer = 'adam'
loss = {'diagnosis': 'mean_squared_error'}
metrics = {'diagnosis': ['r2']}

In [None]:
weights = model.model.weights
model.model.layers[1].trainable = True
model.model.layers[-1].trainable = False
model.model.compile(optimizer='adam', loss=fix_customs(loss), metrics=fix_customs(metrics))
model.model.weights = weights
history=model.fit(train_ds, steps_per_epoch=300, epochs=10, validation_data=val_ds, validation_steps=26,
                           callbacks = [tb], shuffle=False, verbose=1)


In [None]:
weights = model.model.weights
model.model.layers[1].trainable = False
model.model.layers[-1].trainable = True
model.model.compile(optimizer='adam', loss=fix_customs(loss), metrics=fix_customs(metrics))
model.model.weights = weights
history=model.fit(train_ds, steps_per_epoch=300, epochs=20, validation_data=val_ds, validation_steps=26,
                           callbacks = [tb], shuffle=False, verbose=1)

In [None]:
weights = model.model.weights
model.model.layers[1].trainable = True
model.model.layers[-1].trainable = False
model.model.compile(optimizer='adam', loss=fix_customs(loss), metrics=fix_customs(metrics))
model.model.weights = weights
history=model.fit(train_ds, steps_per_epoch=300, epochs=10, validation_data=val_ds, validation_steps=26,
                           callbacks = [tb], shuffle=False, verbose=1)


In [None]:
weights = model.model.weights
model.model.layers[1].trainable = False
model.model.layers[-1].trainable = True
model.model.compile(optimizer='adam', loss=fix_customs(loss), metrics=fix_customs(metrics))
model.model.weights = weights
history=model.fit(train_ds, steps_per_epoch=300, epochs=20, validation_data=val_ds, validation_steps=26,
                           callbacks = [tb], shuffle=False, verbose=1)

In [None]:
model.save(description='back and forth training, top-base-top-base-top')

In [None]:
truth, preds = model.predict(val_ds, with_truth=True)

In [None]:
model.fit_optimizer(truth, preds)

In [None]:
optimized_preds =  model.optimize_predictions(preds)

In [None]:
kappa = model.calculate_kappa(truth, optimized_preds)

In [None]:
model.save(description='kappa {:.4f} after training back and forth'.format(kappa))

In [None]:
time.sleep(30)

In [None]:
!sudo poweroff

## Tests

In [34]:
model = Model.load(family='baseline', birthday='08-29_18-05-53', checkpoint=0)

In [34]:
truth, preds = model.predict(val_ds, with_truth=True)

In [37]:
model.fit_optimizer(truth, preds)

In [38]:
optimized_preds =  model.optimize_predictions(preds)

In [39]:
kappa = model.calculate_kappa(truth, optimized_preds)

In [40]:
print(kappa)

0.8279213458450164
