# Overview

Based on Wei Hao's awesome training pipeline [3]

# Features
 
- GPU with optional Mixed precision support
- TPU support *IN PROGRESS*
- Wide range of supported backend models via Keras Applications models
- Wide range of supported optimizers in Tensorflow optimizers and Tensorflow Addons
- Wide range of supported learning rate scheduling strategies via Tensorflow, Tensorflow experimental and Tensorflow Addons
- RandAug augmentation support with UNetAug added as a custom aug strategy *IN PROGRESS*
- Overall cleanup and parameterization of the pipeline



Below you can train efnB7 relatively fast ~18 seconds per epoch with 600 image size which makes this notebook able to train large models for more epochs in the 9 hour limit.

Don't forget to turn on the GPU or TPU :)


In [None]:
!pip install ../input/kerasapplications/keras-team-keras-applications-3b180cb -f ./ --no-index >/dev/null

In [None]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import tensorflow as tf 
import matplotlib.pyplot as plt 
from tqdm.notebook import tqdm 
from tensorflow.keras.layers import (
    Dense, Dropout, Activation, Flatten, Input, BatchNormalization, GlobalAveragePooling2D, Add, Conv2D, AveragePooling2D, 
    LeakyReLU, Concatenate 
)
from tensorflow.keras import Model
from tensorflow.keras.utils import Sequence
import tensorflow.keras.backend as K
import tensorflow.keras.applications as tfka

from sklearn.model_selection import train_test_split, KFold
import seaborn as sns
from tensorflow.keras.mixed_precision import experimental as mixed_precision
import tensorflow_addons as tfa
from tensorflow_addons.optimizers import AdamW

In [None]:
# Future TPU support
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
    GPU=False
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()
    GPU=True

# Training Parameters



In [None]:
EPOCHS = 10 # number of epochs to train for in each fold

BATCH_SIZE = 4 * strategy.num_replicas_in_sync # batch size of images during training

print('Batch size = '+str(BATCH_SIZE))

IM_SIZE = 512 #If you are training efficientnet please read https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/



BASE_MODEL='EfficientNetB1' # supported models are listed here https://www.tensorflow.org/api_docs/python/tf/keras/applications

# args to pass to base model e.g. for EfficientNet {drop_connect_rate=0.4} default is 0.2
BASE_MODEL_KWARGS = dict(drop_connect_rate=0.2,input_shape=(IM_SIZE,IM_SIZE,1),include_top=False,weights=None)

NFOLD = 5 # number of folds in K-fold cross-validation (CV)



START_LR = 1e-2 * strategy.num_replicas_in_sync # the initial learning rate 
FIRST_DECAY_STEPS = 1e1

LR_STRATEGY='CosineDecayRestarts' # check for options in the cell below
LR_KWARGS=dict(initial_learning_rate=START_LR, first_decay_steps=FIRST_DECAY_STEPS)

WD = 1e-2 #Weight decay
OPTIMIZER='AdamW' # check for options in the cell below
OPTIMIZER_KWARGS=dict(weight_decay=WD)

SAVE_BEST = True # default is True to save best weights on validation loss

MIXED_PRECISION = True


EALRY_STOPPING_PATIENCE = 40

TRAIN_STEPS = int(( 8 / BATCH_SIZE) * 32) # number of steps per epoch in training

VAL_STEPS = int((8 / BATCH_SIZE  ) * 16) # number of steps per epoch in validation



DATA_DIR=["../input/osic-pulmonary-fibrosis-progression","../input/osic-pulmonary-fibrosis-progression-lungs-mask"]



import datetime

def get_now_string():
  x = str(datetime.datetime.now())
  date=x[:10].split('-')
  time=x[11:].split(':')
  time[2]=time[2].split('.')[0]
  dates="".join(date)+"_"+"".join(time)
  return dates

OUTPUT_DIR=get_now_string()+"/"
os.mkdir(OUTPUT_DIR)
print("saving models to :",OUTPUT_DIR)

In [None]:
# Supported optimizers

optimizers_mapper={
    "AdamW":tfa.optimizers,
    "ConditionalGradient":tfa.optimizers,
    "LAMB":tfa.optimizers,
    "LazyAdam":tfa.optimizers,
    "NovoGrad":tfa.optimizers,
    "RectifiedAdam":tfa.optimizers,
    "SGDW":tfa.optimizers,
    "SWA":tfa.optimizers,
    "Yogi":tfa.optimizers,
    "Adadelta":tf.keras.optimizers,
    "Adagrad":tf.keras.optimizers,
    "Adam":tf.keras.optimizers,
    "Adamax":tf.keras.optimizers,
    "Ftrl":tf.keras.optimizers,
    "Nadam":tf.keras.optimizers,
    "RMSprop":tf.keras.optimizers,
    "SGD":tf.keras.optimizers
       }

# Supported Learning rates schedules
schedules_mapper={
    "CyclicalLearningRate":tfa.optimizers,
    "ExponentialCyclicalLearningRate":tfa.optimizers,
    "Triangular2CyclicalLearningRate":tfa.optimizers,
    "TriangularCyclicalLearningRate":tfa.optimizers,
    "ExponentialDecay":tf.keras.optimizers.schedules,
    "InverseTimeDecay":tf.keras.optimizers.schedules,
    "PiecewiseConstantDecay":tf.keras.optimizers.schedules,
    "PolynomialDecay":tf.keras.optimizers.schedules,
    "CosineDecay":tf.keras.experimental,
    "CosineDecayRestarts":tf.keras.experimental,
    "LinearCosineDecay":tf.keras.experimental,
    "NoisyLinearCosineDecay":tf.keras.experimental,
    
       }

In [None]:
train = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv') 

In [None]:
train.head()

In [None]:
train.SmokingStatus.unique()

In [None]:
def get_tab(df):
    vector = [(df.Age.values[0] - 30) / 30] 
    
    if df.Sex.values[0] == 'male':
       vector.append(0)
    else:
       vector.append(1)
    
    if df.SmokingStatus.values[0] == 'Never smoked':
        vector.extend([0,0])
    elif df.SmokingStatus.values[0] == 'Ex-smoker':
        vector.extend([1,1])
    elif df.SmokingStatus.values[0] == 'Currently smokes':
        vector.extend([0,1])
    else:
        vector.extend([1,0])
    return np.array(vector) 

In [None]:
A = {} 
TAB = {} 
P = [] 
for i, p in tqdm(enumerate(train.Patient.unique())):
    sub = train.loc[train.Patient == p, :] 
    fvc = sub.FVC.values
    weeks = sub.Weeks.values
    c = np.vstack([weeks, np.ones(len(weeks))]).T
    a, b = np.linalg.lstsq(c, fvc)[0]
    
    A[p] = a
    TAB[p] = get_tab(sub)
    P.append(p)

In [None]:
def get_img(path):
    d = pydicom.dcmread(path)
    return cv2.resize((d.pixel_array - d.RescaleIntercept) / (d.RescaleSlope * 1000), (IM_SIZE, IM_SIZE))

In [None]:
x, y = [], []
for p in tqdm(train.Patient.unique()):
    try:
        ldir = os.listdir(f'../input/osic-pulmonary-fibrosis-progression-lungs-mask/mask_noise/mask_noise/{p}/')
        numb = [float(i[:-4]) for i in ldir]
        for i in ldir:
            x.append(cv2.imread(f'../input/osic-pulmonary-fibrosis-progression-lungs-mask/mask_noise/mask_noise/{p}/{i}', 0).mean())
            y.append(float(i[:-4]) / max(numb))
    except:
        pass

In [None]:
class IGenerator(Sequence):
    BAD_ID = ['ID00011637202177653955184', 'ID00052637202186188008618']
    def __init__(self, keys, a, tab, batch_size=BATCH_SIZE):
        self.keys = [k for k in keys if k not in self.BAD_ID]
        self.a = a
        self.tab = tab
        self.batch_size = batch_size
        
        self.train_data = {}
        for p in train.Patient.values:
            self.train_data[p] = os.listdir(f'../input/osic-pulmonary-fibrosis-progression/train/{p}/')
    
    def __len__(self):
        return 1000
    
    def __getitem__(self, idx):
        x = []
        a, tab = [], [] 
        keys = np.random.choice(self.keys, size = self.batch_size)
        for k in keys:
            try:
                i = np.random.choice(self.train_data[k], size=1)[0]
                img = get_img(f'../input/osic-pulmonary-fibrosis-progression/train/{k}/{i}')
                x.append(img)
                a.append(self.a[k])
                tab.append(self.tab[k])
            except:
                print(k, i)
       
        x,a,tab = np.array(x), np.array(a), np.array(tab)
        x = np.expand_dims(x, axis=-1)
        return [x, tab] , a

In [None]:
def build_model(base_model=BASE_MODEL,**kwargs):

    inp = Input(shape=kwargs['input_shape'])
    del kwargs['input_shape']
    base =  getattr(tfka, base_model)(**kwargs) 
    x = base(inp)
    x = GlobalAveragePooling2D()(x)
    inp2 = Input(shape=(4,))
    x2 = tf.keras.layers.GaussianNoise(0.2)(inp2)
    x = Concatenate()([x, x2]) 
    x = Dropout(0.5)(x)
    
    # Explicit f32 because of mixed precision 
    x = Dense(1,dtype='float32')(x)
    
    model = Model([inp, inp2] , x)
    return model

# Training

In [None]:

if GPU:
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.compat.v1.Session(config=config)
    if MIXED_PRECISION:
      policy = mixed_precision.Policy('mixed_float16')
      
      mixed_precision.set_policy(policy)

        
kf = KFold(n_splits=NFOLD, random_state=42,shuffle=False)
P = np.array(P)
subs = []
folds_history = []
with strategy.scope():

    for fold, (tr_idx, val_idx) in enumerate(kf.split(P)):
        print('#####################')
        print('####### Fold %i ######'%fold)
        print('#####################')
        
        print('Training...')

        er = tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            min_delta=1e-3,
            patience=EALRY_STOPPING_PATIENCE,
            verbose=1,
            mode="auto",
            baseline=None,
            restore_best_weights=True,
        )

        cpt = tf.keras.callbacks.ModelCheckpoint(
            filepath=f'{OUTPUT_DIR}fold-%i.h5'%fold,
            monitor='val_loss', 
            verbose=1, 
            save_best_only=SAVE_BEST,
            mode='auto'
        )
        
        lrs=getattr(schedules_mapper[LR_STRATEGY], LR_STRATEGY)(**LR_KWARGS)
        lrs_call_back=tf.keras.callbacks.LearningRateScheduler(lrs)
        callbacks =[lrs_call_back,cpt,er]

        optimizer=getattr(optimizers_mapper[OPTIMIZER], OPTIMIZER)(**OPTIMIZER_KWARGS)
        #Loss scaling for GPU
        if MIXED_PRECISION:
            optimizer = mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')

        model = build_model(**BASE_MODEL_KWARGS)
        model.compile(optimizer=optimizer, loss="mae") 
        history = model.fit(IGenerator(keys=P[tr_idx], 
                                       a = A, 
                                       tab = TAB), 
                            steps_per_epoch = TRAIN_STEPS,
                            validation_data=IGenerator(keys=P[val_idx], 
                                       a = A, 
                                       tab = TAB),
                            validation_steps = VAL_STEPS, 
                            callbacks = callbacks, 
                            epochs=EPOCHS)
        folds_history.append(history.history)
        print('Training done!')

# CV Evaluation

In [None]:
if SAVE_BEST:
    mean_val_loss = np.mean([np.min(h['val_loss']) for h in folds_history])
else:
    mean_val_loss = np.mean([h['val_loss'][-1] for h in folds_history])
print('Our mean CV MAE is: ' + str(mean_val_loss))


# Future Work

Please suggest in the comments

# References

[1] Michael Kazachok's Linear Decay (based on ResNet CNN)
     Model that uses images can be found at: https://www.kaggle.com/miklgr500/linear-decay-based-on-resnet-cnn
     
[2] Ulrich GOUE's Osic-Multiple-Quantile-Regression-Starter
     Model that uses tabular data can be found at: https://www.kaggle.com/ulrich07/osic-multiple-quantile-regression-starter

[3] Wei Hao Khoong's K-Fold TF-EfficientNet Models (Training) https://www.kaggle.com/khoongweihao/k-fold-tf-efficientnet-models-training

[4] Jeremy Howard's AMAZING Fast.ai course https://docs.fast.ai/

[5] Yixing Fu's awesome guide "Image classification via fine-tuning with EfficientNet" https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/

