# Transfer Learning Tutorial

# Load Packages

We will begin by loading the necessary packages. 

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K

import zipfile 

# Helper Functions

The functions below are used for merging the contents of Keras history objects, and for displaying training curves. We will use these after each training run. 

In [None]:
def merge_history(hlist):
    history = {}
    for k in hlist[0].history.keys():
        history[k] = sum([h.history[k] for h in hlist], [])
    return history

def vis_training(h, start=1):
    epoch_range = range(start, len(h['loss'])+1)
    s = slice(start-1, None)

    plt.figure(figsize=[14,4])

    n = int(len(h.keys()) / 2)

    for i in range(n):
        k = list(h.keys())[i]
        plt.subplot(1,n,i+1)
        plt.plot(epoch_range, h[k][s], label='Training')
        plt.plot(epoch_range, h['val_' + k][s], label='Validation')
        plt.xlabel('Epoch'); plt.ylabel(k); plt.title(k)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    plt.show()


# Load DataFrame

We will now load information about training images into a Pandas DataFrame.

In [None]:
train = pd.read_csv("../input/aerial-cactus-identification/train.csv", dtype=str)
print(train.shape)

In [None]:
train.head(10)

# Label Distribution

Next, we will check the distribution of the labels in our training set. 

In [None]:
y_train = train.has_cactus

(train.has_cactus.value_counts() / len(train)).to_frame()

# Extract Images

The image files for this dataset are compressed into zip files. We will now extract the training images.

In [None]:
zip_ref = zipfile.ZipFile('/kaggle/input/aerial-cactus-identification/train.zip')
zip_ref.extractall()

In [None]:
train_path = "train/"
print('Number of Training Images:', len(os.listdir(train_path)))

# View Sample of Images

Next, we will view a sample of training images. 

In [None]:
sample = train.sample(n=16).reset_index()

plt.figure(figsize=(8,8))

for i, row in sample.iterrows():

    img = mpimg.imread(f'train/{row.id}')    
    label = row.has_cactus

    plt.subplot(4,4,i+1)
    plt.imshow(img)
    plt.text(0, -5, f'Class {label}', color='k')
        
    plt.axis('off')

plt.tight_layout()
plt.show()


# Data Generators

In this section, we will split the labeled observations into training and validation sets. We will then create data loaders to feed the images into our neural network during training.

In [None]:
train_df, valid_df = train_test_split(train, test_size=0.2, random_state=1, stratify=train.has_cactus)

print(train_df.shape)
print(valid_df.shape)

In [None]:
train_datagen = ImageDataGenerator(rescale=1/255)
valid_datagen = ImageDataGenerator(rescale=1/255)

In [None]:
BATCH_SIZE = 64

train_loader = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'has_cactus',
    batch_size = BATCH_SIZE,
    seed = 1,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (32,32)
)

valid_loader = valid_datagen.flow_from_dataframe(
    dataframe = valid_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'has_cactus',
    batch_size = BATCH_SIZE,
    seed = 1,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (32,32)
)


In [None]:
TR_STEPS = len(train_loader)
VA_STEPS = len(valid_loader)

print(TR_STEPS)
print(VA_STEPS)

# Build Network

In this section, we will construct our neural network. For feature extraction, we will use the [VGG16](https://www.geeksforgeeks.org/vgg-16-cnn-model/) model, as trained on the [ImageNet](https://www.image-net.org/) dataset. 

In the cell below, we will load the pretrained VGG16 model into a variable named `base_model`. We will set `include_top=False` to indicate that we only wish to use the convolutional blocks that appear before the `Flatten()` layer. We will not include the dense layers composing the classifier at the top of the network. Instead, we will design and train our own classifier. 

We set the `input_shape` parameter to indicate the shape of the images that we will be feeding into the network. 

Finally, we set the `trainable` parameter of the model to `False`. This tells Keras that we do not wish to update the weights in the base layer during training. We only wish to train the new classifier that we will design. 

In [None]:
base_model = tf.keras.applications.VGG16(input_shape=(32,32,3),
                                         include_top=False,
                                         weights='imagenet')

base_model.trainable = False

Before moving forward, let's take a look at the structure of our base model. Notice that it consists of 5 convolutional blocks, some of which contain 2 convolutional layers, and some of which contain 3. Also note that none of the weights in the model are trainable (since we have set them to not be). 

In [None]:
base_model.summary()

VGG16 is one of many pretrained models that we could have used. Common choices include VGG16, VGG19, ResNet50, and InceptionV3. A full list of the pretrained models provided by Keras can be found here: [Keras Applications](https://keras.io/api/applications/)

We are now ready to build a classifier for our neural network. In the cell below, we include `base_model` in the network as if were a single layer. 

In [None]:
cnn = Sequential([
    base_model,
    
    Flatten(),
    
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    BatchNormalization(),
    Dense(2, activation='softmax')
])

cnn.summary()

# Train Network

We are now ready to train the network.

In [None]:
opt = tf.keras.optimizers.Adam(0.001)
cnn.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', tf.keras.metrics.AUC()])

## Training Run 1

In [None]:
%%time 

h1 = cnn.fit(
    x = train_loader, 
    steps_per_epoch = TR_STEPS, 
    epochs = 10,
    validation_data = valid_loader, 
    validation_steps = VA_STEPS, 
    verbose = 1
)

In [None]:
history = merge_history([h1])
vis_training(history)

## Training Run 2

In [None]:
K.set_value(cnn.optimizer.learning_rate, 0.0001)

In [None]:
%%time 

h2 = cnn.fit(
    x = train_loader, 
    steps_per_epoch = TR_STEPS, 
    epochs = 5,
    validation_data = valid_loader, 
    validation_steps = VA_STEPS, 
    verbose = 1
)

In [None]:
history = merge_history([h1, h2])
vis_training(history, start=10)

## Training Run 3 (Fine-Tuning)


In [None]:
base_model.trainable = True

opt = tf.keras.optimizers.Adam(0.00001)
cnn.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', tf.keras.metrics.AUC()])

In [None]:
%%time 

h3 = cnn.fit(
    x = train_loader, 
    steps_per_epoch = TR_STEPS, 
    epochs = 10,
    validation_data = valid_loader, 
    validation_steps = VA_STEPS, 
    verbose = 1
)

In [None]:
h3.history['auc'] = h3.history['auc_1'] 
h3.history['val_auc'] = h3.history['val_auc_1'] 

In [None]:
history = merge_history([h1, h2, h3])
vis_training(history, start=10)