In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from PIL import Image

tfk = tf.keras
tfkl = tf.keras.layers

In [None]:
# Random seed for results reproducibility

seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
dataset_dir = "/kaggle/input/training/training"
oversampled_dataset_dir = "oversampled_dataset"

In [None]:
# Install split-folders which is the tool used to implement oversampling over
# the provided dataset

In [None]:
pip install split-folders

In [None]:
# Split the dataset into test and validation set while implementing
# oversampling to match the size of each class' dataset

import splitfolders 

splitfolders.fixed(dataset_dir, 
                   output = oversampled_dataset_dir, 
                   seed = seed, 
                   fixed = 220, # We fixed the dimension of the validation set of each class
                   oversample = True, 
                   group_prefix = None
                  )

test_dataset_dir = "./oversampled_dataset/train"
validation_dataset_dir ="./oversampled_dataset/val"

In [None]:
input_shape = (256, 256, 3)

batch_size = 16
epochs = 200

In [None]:
# Implement a data generator for the train and the validation data. This also
# apply to the dataset some data augmentation and the VGG-16 preprocess function

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input

data_generator = ImageDataGenerator(
    rotation_range = 50,
    height_shift_range = 30,
    width_shift_range = 30,
    zoom_range = 0.25,
    shear_range = 10.0,
    horizontal_flip = True,
    vertical_flip = True,
    preprocessing_function = preprocess_input
)

# Obtain a data generator with the 'ImageDataGenerator.flow_from_directory' method
train_data = data_generator.flow_from_directory(directory=test_dataset_dir,
                                                       target_size=(256,256),
                                                       color_mode='rgb',
                                                       classes=None, # can be set to labels
                                                       class_mode='categorical',
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       seed=seed)

validation_data = data_generator.flow_from_directory(directory=validation_dataset_dir,
                                                       target_size=(256,256),
                                                       color_mode='rgb',
                                                       classes=None, # can be set to labels
                                                       class_mode='categorical',
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       seed=seed)

In [None]:
# Print out some sample images in order to see how the data agmentation works

def get_sample_image(generator):
  batch = next(generator)

  image = batch[0]
  target = batch[1]

  # Visualize only the first sample
  image = image[0]
  target = target[0]
  target_idx = np.argmax(target)

  fig = plt.figure(figsize=(6, 4))
  plt.imshow(np.uint8(image))


sample_images_to_show = 10
for _ in range(0, sample_images_to_show):
    get_sample_image(train_data)

In [None]:
# Import the feature extraction part of the VGG-16 model

supernet = tfk.applications.VGG16(
    include_top = False, 
    weights = "imagenet",
    input_shape = input_shape
)

In [None]:
# Use the supernet as feature extractor implementing transfer learning

supernet.trainable = False

input_layer = tfk.Input(shape=input_shape, name='Input')
noise_layer_one = tfkl.GaussianNoise(0.35, name='GaussianNoise')(input_layer)

supernet_output = supernet(noise_layer_one)

pooling_layer = tfkl.GlobalAveragePooling2D(name='AveragePoolingLayer')(supernet_output)
pooling_layer = tfkl.Dropout(0.3, seed=seed)(pooling_layer)

classifier_layer = tfkl.Dense(units=256, name='Classifier', kernel_initializer=tfk.initializers.GlorotUniform(seed), activation='relu')(pooling_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)

output_layer = tfkl.Dense(units=14, activation='softmax', kernel_initializer=tfk.initializers.GlorotUniform(seed), name='Output')(classifier_layer)

# Create the model
model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

In [None]:
# Model compilation + summary

model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')

model.summary()

In [None]:
# Utility function to create folders and callbacks for training


from datetime import datetime

def create_folders_and_callbacks(model_name):

  exps_dir = os.path.join('challenge_one')
  if not os.path.exists(exps_dir):
      os.makedirs(exps_dir)

  now = datetime.now().strftime('%b%d_%H-%M-%S')

  exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
  if not os.path.exists(exp_dir):
      os.makedirs(exp_dir)
      
  callbacks = []

  # Early Stopping 
  es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  callbacks.append(es_callback)

  return callbacks

In [None]:
tf.get_logger().setLevel('WARNING')

# Create folders and callbacks and fit
aug_callbacks = create_folders_and_callbacks(model_name='Transfer_Learning')

# Train the model
history = model.fit(
    x=train_data,
    epochs=epochs,
    validation_data=validation_data,
    callbacks=aug_callbacks,
    steps_per_epoch=512,
).history

In [None]:
# Save best epoch model
model.save("challenge_one/Transfer_Learning_Noise")
del model

In [None]:
# Reload the saved model in order to fine tune it
ft_model = tfk.models.load_model('challenge_one/Transfer_Learning_Noise')

In [None]:
# Set all VGG layers to trainable
ft_model.get_layer('vgg16').trainable = True

# Freeze first N layers
layers_frozen = 14
for i, layer in enumerate(ft_model.get_layer('vgg16').layers[:layers_frozen]):
  layer.trainable=False

ft_model.summary()

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics='accuracy')

In [None]:
# Fine-tune the model
ft_history = ft_model.fit(
    x = train_data,
    epochs = epochs,
    validation_data = validation_data,
    callbacks = aug_callbacks,
    steps_per_epoch = 512,
).history

In [None]:
# Save best epoch model
ft_model.save("challenge_one/Fine_Tuning_Noise")