In [None]:
import keras
from keras.layers import Dense, Dropout, Input, MaxPooling2D, Conv2D, Flatten, MaxPool2D, AveragePooling2D, GlobalAveragePooling2D, BatchNormalization
from keras.models import Sequential, Model
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam, SGD
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
from keras.applications.densenet import DenseNet121


In [None]:
from google.colab import drive
drive.mount('/content/drive')
path = "/content/drive/MyDrive/cassava/train_tfrecords"

In [None]:
%cd drive/MyDrive/cassava/train_tfrecords/

/content/drive/MyDrive/cassava/train_tfrecords


In [None]:
from data_loader import initialize_dataset

In [None]:
# Get a list of the training record files
# tfrecord_files = glob.glob(os.path.join("data", "train_tfrecords", "*.tfrec"))
tfrecord_files = glob.glob(os.path.join(path, "*.tfrec"))


# Define the number of files for training, 70% of files
n_file_train = int(0.85 * len(tfrecord_files))

# Split the record files into 70% training and 30% validation
tfrecord_files_train = tfrecord_files[:n_file_train]
tfrecord_files_valid = tfrecord_files[n_file_train:]

In [None]:
BATCH_SIZE = 32

IMG_SIZE = (256, 256)

# Initialize the training data loader, with random flipping and rotations
dataset_train = initialize_dataset(tfrecord_files_train, batch_size=BATCH_SIZE, img_size=IMG_SIZE, flip=True, rot=(-1, 1))

# Initialize the validation loader, with no flipping or rotations
dataset_valid = initialize_dataset(tfrecord_files_valid, img_size=IMG_SIZE)

In [None]:
def save_model(model):
  import os
  model_name = 'checkpoint1.h5'
  save_dir = os.path.join(os.getcwd(), 'saved_models')
  
  # Save model and weights
  if not os.path.isdir(save_dir):
      os.makedirs(save_dir)
  model_path = os.path.join(save_dir, model_name)
  model.save(model_path, overwrite=True)
  print('Saved trained model at %s ' % model_path)

In [None]:
# Our most basic DenseNet model, just a global average pooling and a fully connected output layer following DenseNet121. No layers frozen.
# Performs suprisingly well, with val loss of 0.47 and val accuracy of 0.84

initial_lr = 5e-5
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_lr,
    decay_steps=10000,
    decay_rate=0.9)

opt = keras.optimizers.Adam(learning_rate=lr_schedule)

early_stop = EarlyStopping(monitor='val_loss', mode='min', 
                       patience=5, restore_best_weights=True, verbose=1)

def build_model():
  model_dn = DenseNet121(
      include_top= False,
      weights="imagenet",
      input_shape= (*IMG_SIZE, 3),
  )

  x = GlobalAveragePooling2D()(model_dn.output)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)
  output = Dense(5, activation='softmax')(x)

  model = Model(inputs = model_dn.input, outputs = output)

  return model

In [None]:
model = build_model()

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = opt,
    metrics=["accuracy"]
)

# model.summary()

In [None]:
train_hist = model.fit(
    x = dataset_train,
    epochs = 100,
    validation_data = dataset_valid,
    callbacks = [early_stop]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Restoring model weights from the end of the best epoch.
Epoch 00013: early stopping


In [None]:
# Adding more complexity to our model above, with two fully-connected hidden layers before the output
# More dropout with slightly higher dropout rate to mitigate overfitting
# Val loss as low as 0.43, and val accuracy pushes up to 0.85

initial_lr = 5e-5
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_lr,
    decay_steps=10000,
    decay_rate=0.9)

# reduce_lr = ReduceLROnPlateau(
#     monitor="val_loss",
#     factor=0.5,
#     patience=5,
#     verbose=1,
#     min_lr=1e-6
# )

opt = keras.optimizers.Adam(learning_rate=lr_schedule)

early_stop = EarlyStopping(monitor='val_loss', mode='min', 
                       patience=5, restore_best_weights=True, verbose=1)

def build_model2():
  model_dn = DenseNet121(
      include_top= False,
      weights="imagenet",
      input_shape= (*IMG_SIZE, 3),
  )

  x = GlobalAveragePooling2D()(model_dn.output)
  x = BatchNormalization()(x)
  x = Dropout(0.4)(x)
  x = Dense(1024,activation='relu', kernel_initializer="he_normal")(x) 
  x = Dropout(0.4)(x)
  x = Dense(512,activation='relu', kernel_initializer="he_normal")(x) 
  x = BatchNormalization()(x)
  x = Dropout(0.4)(x)
  output = Dense(5, activation='softmax')(x)

  model = Model(inputs = model_dn.input, outputs = output)

  return model

In [None]:
model2 = build_model2()

model2.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = opt,
    metrics=["accuracy"]
)

# model2.summary()

In [None]:
train_hist2 = model2.fit(
    x = dataset_train,
    epochs = 100,
    validation_data = dataset_valid,
    callbacks = [early_stop]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Restoring model weights from the end of the best epoch.
Epoch 00014: early stopping


In [None]:
# see how transfer learning does if we freeze base (DenseNet) layers

model_freeze_base = build_model2()

# Freeze all DenseNet layers
for layer in model_freeze_base.layers[:-9]:
  layer.trainable = False

initial_lr_transfer = 0.01
lr_schedule_transfer = keras.optimizers.schedules.ExponentialDecay(
    initial_lr_transfer,
    decay_steps=10000,
    decay_rate=0.9)

opt_transfer = keras.optimizers.Adam(learning_rate=lr_schedule_transfer)


model_freeze_base.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = opt_transfer,
    metrics=["accuracy"]
)

# model_freeze_base.summary()

In [None]:
# model clearly underfits

train_freeze_base = model_freeze_base.fit(
    x = dataset_train,
    epochs = 100,
    validation_data = dataset_valid,
    callbacks = [early_stop]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Restoring model weights from the end of the best epoch.
Epoch 00011: early stopping


In [None]:
# try training model with partial freezing of DenseNet layers

model_freeze_partial = build_model2()

for layer in model_freeze_partial.layers[:12]:
  layer.trainable = False

model_freeze_partial.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = opt,
    metrics=["accuracy"]
)

In [None]:
# performance not far off from unfrozen model
# minimum val loss slightly higher at 0.45, val accuracy around the same 

train_freeze_partial = model_freeze_partial.fit(
    x = dataset_train,
    epochs = 100,
    validation_data = dataset_valid,
    callbacks = [early_stop]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Restoring model weights from the end of the best epoch.
Epoch 00017: early stopping


In [None]:
save_model(model_freeze_partial)

Saved trained model at /content/drive/My Drive/cassava/train_tfrecords/saved_models/checkpoint1.h5 


In [None]:
checkpoint = keras.models.load_model("saved_models/checkpoint1.h5")

In [None]:
# Unfreeze previously frozen layers
for layer in checkpoint.layers:
  layer.trainable = True

# But freeze batch norm layers
for layer in checkpoint.layers:
  if "bn" in layer.name or "batch" in layer.name:
    layer.trainable = False

In [None]:
# use smaller learning rate

initial_lr2 = 1e-5
lr_schedule2 = keras.optimizers.schedules.ExponentialDecay(
    initial_lr2,
    decay_steps=10000,
    decay_rate=0.9)

opt_fine = keras.optimizers.Adam(learning_rate=lr_schedule2)

checkpoint.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = opt_fine,
    metrics=["accuracy"]
)

In [None]:
# Val accuracy pushed up to 0.86, but val loss does not outperform original unfrozen model
# Given the imbalanced data set, this 1 percent increase in val accuracy with no decrease at all in val loss might not be significant

train_freeze_unfreeze2 = checkpoint.fit(
    x = dataset_train,
    epochs = 100,
    validation_data = dataset_valid,
    callbacks = [early_stop]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Restoring model weights from the end of the best epoch.
Epoch 00011: early stopping


In [None]:
# Try training model with some partial freezing, trial 2
# Further trials not shown in notebook to avoid clutter, but as we increase the number of frozen blocks in the base DenseNet model,
# validation performance starts to noticeably go down 


model_freeze_partial2 = build_model2()

for layer in model_freeze_partial2.layers[:33]:
  layer.trainable = False

model_freeze_partial2.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = opt,
    metrics=["accuracy"]
)

In [None]:
train_freeze_partial2 = model_freeze_partial2.fit(
    x = dataset_train,
    epochs = 100,
    validation_data = dataset_valid,
    callbacks = [early_stop]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Restoring model weights from the end of the best epoch.
Epoch 00019: early stopping
