# Transfer Learning

## Imports

In [None]:
import os
import shutil

import pandas as pd
from keras.models import load_model
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator


## Use Latest EfficientNet Weights

Apply the latest pre-trained EfficientNetB4 weights. I used [efficientnet_weight_update_util.py](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet_weight_update_util.py) to convert the ckpt file to a h5 file.

In [None]:
SIZE = 384

base_model = keras.applications.EfficientNetB4(
    # "NoisyStudent + RA" weights ImageNet.
    # Source: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet
    weights="/workspace/efficientnetb4_notop.h5",
    input_shape=(SIZE, SIZE, 3),
    include_top=False,
)


## Freeze Base Model

Freeze the base model to ensure all the learning from the pre-trained ImageNet dataset does not get destroyed during initial training. Initially, only the top layer will be trained.

In [5]:
# Freeze base model
base_model.trainable = False


## Rebuild Top Layer

In [6]:
# Create inputs with correct shape
inputs = keras.Input(shape=(SIZE, SIZE, 3))

x = base_model(inputs, training=False)

# Add pooling layer
x = keras.layers.GlobalAveragePooling2D()(x)

# Add final dense layer
outputs = keras.layers.Dense(units=2, activation="softmax")(x)
# outputs = keras.layers.Dense(units=1, activation='sigmoid')(x)
# outputs = keras.layers.Dense(units=1)(x)

# Combine inputs and outputs to create model
model = keras.Model(inputs, outputs)


In [7]:
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 384, 384, 3)]     0         
                                                                 
 efficientnetb4 (Functional)  (None, 12, 12, 1792)     17673823  
                                                                 
 global_average_pooling2d (G  (None, 1792)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 2)                 3586      
                                                                 
Total params: 17,677,409
Trainable params: 3,586
Non-trainable params: 17,673,823
_________________________________________________________________


## Compile Model

Compile model with loss and metric options.

In [8]:
# Binary crossentropy and binary accuracy
# model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True), metrics=[keras.metrics.BinaryAccuracy()])

model.compile(loss="categorical_crossentropy", metrics=["accuracy"])


## Split Data

Split the data for train and validation. 90% for training and 10% for validation.

In [None]:
df = pd.read_csv('./train.csv')

# Split data into train and validation sets with stratification
train_df, valid_df = train_test_split(
    df, test_size=0.1, random_state=777, stratify=df["label"]
)

# Create a directory for each label (CE and LAA)
for label in df["label"].unique():
    os.makedirs(f"/workspace/train_data_cropped/train/{label}", exist_ok=True)
    os.makedirs(f"/workspace/train_data_cropped/valid/{label}", exist_ok=True)

# Move training images to subdirectories
train_df[["label", "image_id"]].apply(
    lambda x: shutil.move(
        f"/workspace/train_data_cropped/{x['image_id']}.jpeg",
        f"/workspace/train_data_cropped/train/{x['label']}/{x['image_id']}.jpeg",
    ),
    axis=1,
)

# Move validation images to subdirectories
valid_df[["label", "image_id"]].apply(
    lambda x: shutil.move(
        f"/workspace/train_data_cropped/train/{x['image_id']}.jpeg",
        f"/workspace/train_data_cropped/valid/{x['label']}/{x['image_id']}.jpeg",
    ),
    axis=1,
)


## Augment the data

Now, load the train and validation datasets. Ensuring that the `target_size` is matching the height and width for input of the model.

In [6]:
# Create a data generator for augmenting the training data.
datagen_train = ImageDataGenerator(
    samplewise_center=True,  # set each sample mean to 0
    rotation_range=8,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range=0.2,  # Randomly zoom image
    width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=True,  # randomly flip images horizontally
    vertical_flip=True,  # randomly flip images vertically
)

# Validation data does not need to be augmented.
datagen_valid = ImageDataGenerator(samplewise_center=True)


In [9]:
# Load and iterate training dataset
train_it = datagen_train.flow_from_directory(
    "/workspace/train_data_cropped/train/",
    target_size=(SIZE, SIZE),
    color_mode="rgb",
    class_mode="categorical",
    # class_mode="binary",
)

# load and iterate validation dataset
valid_it = datagen_valid.flow_from_directory(
    "/workspace/train_data_cropped/val/",
    target_size=(SIZE, SIZE),
    color_mode="rgb",
    class_mode="categorical",
    # class_mode="binary",
)


Found 578 images belonging to 2 classes.
Found 176 images belonging to 2 classes.


## Train the Model

Time to train the model! Since the base model is frozen, only the top layer is being trained here.

In [11]:
model.fit(
    train_it,
    validation_data=valid_it,
    steps_per_epoch=train_it.samples / train_it.batch_size,
    validation_steps=valid_it.samples / valid_it.batch_size,
    epochs=7,
)


Epoch 1/7


2022-10-05 11:52:12.089449: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600


Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x7f94544ee6a0>

## Evaluate the Model

In [12]:
model.evaluate(valid_it, steps=valid_it.samples / valid_it.batch_size)




[0.6109561324119568, 0.7329545617103577]

## Fine-Tuning the Model

Now the model will be re-trained again. However, this time I unfreeze the entire model, and train all the layers with a small learning rate. 

In [13]:
# Unfreeze the base model
base_model.trainable = False

# Recompile the model with a low learning rate.
model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=0.00001),  
    loss="categorical_crossentropy",
    metrics=["accuracy"],
    # loss=keras.losses.BinaryCrossentropy(from_logits=True),
    # metrics=[keras.metrics.BinaryAccuracy()]
)


In [16]:
model.fit(
    train_it,
    steps_per_epoch=12,
    validation_data=valid_it,
    validation_steps=4,
    epochs=5,
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f943f8d9c40>

In [17]:
model.evaluate(valid_it, steps=valid_it.samples / valid_it.batch_size)




[0.5930337309837341, 0.7386363744735718]

## Save Model

In [20]:
model.save("TransferLearn_EfficientNet_Mayo.h5")
