In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import random
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from PIL import Image
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from os import walk
from datetime import datetime
from tensorflow.keras.preprocessing.image import ImageDataGenerator
tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [3]:
os.listdir('/kaggle/input/homework1')

In [4]:
# Dataset folders 
dataset_dir = '/kaggle/input/homework1/training'
training_dir = os.path.join(dataset_dir, '')

In [5]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [16]:
# Utility function to create folders and callbacks for training
from datetime import datetime

def create_folders_and_callbacks(model_name):

  exps_dir = os.path.join('data_experiments')
  if not os.path.exists(exps_dir):
      os.makedirs(exps_dir)

  now = datetime.now().strftime('%b%d_%H-%M-%S')

  exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
  if not os.path.exists(exp_dir):
      os.makedirs(exp_dir)
      
  callbacks = []

  # Model checkpoint
  # ----------------
  ckpt_dir = os.path.join(exp_dir, 'ckpts')
  if not os.path.exists(ckpt_dir):
      os.makedirs(ckpt_dir)

  ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                     save_weights_only=False, # True to save only weights
                                                     save_best_only=True) # True to save only the best epoch 
  callbacks.append(ckpt_callback)

  # Visualize Learning on Tensorboard
  # ---------------------------------
  tb_dir = os.path.join(exp_dir, 'tb_logs')
  if not os.path.exists(tb_dir):
      os.makedirs(tb_dir)
      
  # By default shows losses and metrics for both training and validation
  tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir, 
                                               profile_batch=0,
                                               histogram_freq=1)  # if > 0 (epochs) shows weights histograms
  callbacks.append(tb_callback)

  # Early Stopping
  # --------------
  es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max', patience=15, restore_best_weights=True)
  callbacks.append(es_callback)

  return callbacks

In [5]:
#loading the Resnet152 TL model
base_model=tfk.applications.resnet.ResNet152(
    include_top=False, weights='imagenet',
    input_shape=(256, 256, 3),
)

In [7]:
#calling the preprocess function from resnet
from tensorflow.keras.applications.resnet import preprocess_input

In [8]:
# data generator for splitting the validation set and data augmentation
aug_train_data_gen = ImageDataGenerator(preprocessing_function=preprocess_input,validation_split=0.1,
                                        rotation_range=30,
                                        shear_range = 0.2,
                                        height_shift_range=30,
                                        width_shift_range=30,
                                        zoom_range=0.1,
                                        horizontal_flip=True,
                                        vertical_flip=True, 
                                        fill_mode='nearest',) # rescale value is multiplied to the image
aug_train_gen = aug_train_data_gen.flow_from_directory(directory=training_dir,
                                                           target_size=(256,256),
                                                           color_mode='rgb',
                                                           classes=None, # can be set to labels
                                                           class_mode='categorical',
                                                           subset='training',
                                                           batch_size=16,
                                                           shuffle=True,
                                                           seed=seed)

aug_validation_gen = aug_train_data_gen.flow_from_directory(directory=training_dir,
                                                           target_size=(256,256),
                                                           color_mode='rgb',
                                                           classes=None, # can be set to labels
                                                           class_mode='categorical',
                                                           subset='validation',
                                                           batch_size=16,
                                                           shuffle=True,
                                                           seed=seed)

In [9]:
#defining the input shape and number of epochs 
input_shape = (256, 256, 3)
epochs = 200

In [10]:
# first we freez the base model and then we add own classifier to it

base_model.trainable = False
inputs = tfkl.Input(shape=input_shape)
x = base_model(inputs)
x = tfkl.GlobalMaxPooling2D()(x)
x = tfkl.Flatten(name='Flattening')(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
x = tfkl.Dense(
    512,
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.2, seed=seed)(x)
outputs = tfk.layers.Dense(14,activation='softmax',kernel_initializer=tfk.initializers.GlorotUniform(seed), name='Output')(x)
model = tfk.Model(inputs, outputs)

In [12]:
# Build model
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')
model.summary()

In [13]:
# Create folders and callbacks and fit
aug_callbacks = create_folders_and_callbacks(model_name='/kaggle/output/homework1/CNN_Aug_resnet152')

# Train the model
history = model.fit(
    x = aug_train_gen,
    epochs = epochs,
    validation_data = aug_validation_gen,
    callbacks = aug_callbacks,
).history

In [15]:
# Save best epoch model
model.save("/kaggle/output/homework1/CNN_Aug_resnet152")

In [16]:
#getting the zip file and downloading it
!zip -r CNN_Aug_resnet152.zip /kaggle/output/homework1/CNN_Aug_resnet152

In [17]:
#plotting the validation and train loss
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [18]:
#plotting the validation and train ACC
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()