In [None]:
# -*- coding: utf-8 -*-
"""
Name: Shane Quinn
Student Number: R00144107
Email: shane.quinn1@mycit.ie
Course: MSc Artificial Intelligence
Module: Deep Learning
Date: 01/05/2021
"""

# from google.colab import drive
# drive.mount('/content/gdrive')

# !unzip "/content/gdrive/My Drive/datasets/earth_data.zip" -d "./"

# !ls


import numpy as np
import h5py
import matplotlib.pyplot as plt
from tensorflow.python.client import device_lib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import load_model
from sklearn.metrics import accuracy_score
import functools
import time
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import f1_score


def exec_time(func):
    """
    Generic Execution time recorder, pass in function. Records execution time using decorators
    Have used this in previous assignments to record execution time

    Parameters
    ----------
    func : FUNCTION
        Function we're recording and printing execution time of.
    """
    
    @functools.wraps(func)
    def record_exec_time(*args, **kwargs):
        start_time = time.perf_counter()
        mn = func(*args, **kwargs)
        execution_time = time.perf_counter() - start_time
        print("Execution Time: ", execution_time)
        return mn

    return record_exec_time


def part_a_1_data_augmentation(X, y):
    """
    Take in training data and target class labels and return image generator object

    Parameters
    ----------
    X : Numpy Array
        Training data.
    y : Numpy Array
        Training target classes.
        
    Returns
    -------
    train_generator : Training generator object
        Feed data augmentated images to NN.

    """
    
    train_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(shear_range=0.1,
                                                                     zoom_range=0.3,
                                                                     rotation_range=20,
                                                                     horizontal_flip=True, 
                                                                     vertical_flip=True)
    
    train_generator = train_data_gen.flow(X, y, batch_size = 32)
    
    return train_generator


def plotAccLoss(H, NUM_EPOCHS):
    """
    From Lecture notes

    Parameters
    ----------
    H : TYPE
        DESCRIPTION.
    NUM_EPOCHS : TYPE
        DESCRIPTION.

    Returns
    -------
    None.

    """

    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, NUM_EPOCHS), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, NUM_EPOCHS), H.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, NUM_EPOCHS), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, NUM_EPOCHS), H.history["val_accuracy"], label="val_acc")
    plt.title("Training Loss and Accuracy: ")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()
  
  
def fine_tune(X, y, X_val, y_val ):
    """
    Create transfer learning model using VGG -> Flatten -> 254 Densely conencted -> SoftMax
    
    Train densely connected layer - checkpoint and save best weights. (VGG remains untrainable)
    
    Unfreeze some weights in VGG and train with a lower learning rate
    

    Parameters
    ----------
    X : Numpy Array
        Training data.
    y : Numpy Array
        Training target classes.
    X_val : Numpy Array
        Test data.
    y_val : Numpy Array
        Test target classes.

    Returns
    -------
    model : TYPE
        DESCRIPTION.

    """

    NUM_EPOCHS = 20
    vggModel = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(64, 64, 3))
    check_name = 'weights.hdf5'
    checkpoint = tf.keras.callbacks.ModelCheckpoint(check_name, monitor='val_loss',
                                                    mode='min', save_best_only=True, verbose=1)
    vggModel.trainable = False
    model = tf.keras.models.Sequential()
    model.add(vggModel)
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(254, activation='relu'))
    model.add(tf.keras.layers.Dense(9, activation='sigmoid'))  
    print(model.summary())  
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    
    image_gen = part_a_1_data_augmentation(X, y)
    history = model.fit(image_gen, epochs=NUM_EPOCHS,  validation_data=(X_val, y_val), callbacks=[checkpoint])
    plotAccLoss(history, NUM_EPOCHS) 
    model.load_weights(check_name)
    
    #Layers to become trainable
    layers_trainable = ['block3_conv1', 'block3_conv2', 'block3_conv3', 'block4_conv1', 'block4_conv2', 'block4_conv3, block5_conv1', 'block5_conv2', 'block5_conv3']
    vggModel.trainable = True

    for layer in vggModel.layers:
        layer.trainable = False
        if layer.name in layers_trainable:
            trainableFlag = True
            layer.trainable = True

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

    history = model.fit(image_gen, epochs=NUM_EPOCHS,  validation_data=(X_val, y_val), callbacks=[checkpoint])
    model.load_weights(check_name)
    plotAccLoss(history, NUM_EPOCHS)
    
    return model 



@exec_time  
def main():
    """
    Fine tune model defined in fine_tune() method, save model
    Can load saved model in and evaluate performance

    Returns
    -------
    None.

    """

    X, y, X_val, y_val = loadDataH5()
    model_name = 'VGG_Dense_NN.h5'
    
    
    model = fine_tune(X, y, X_val, y_val)
    model.save(model_name)
    
    'Uncomment below to load and test model previously saved'
    # model = load_model(model_name)
    # results = model.evaluate(X_val, y_val, batch_size=32)
    # print(results)



    
    
    
def loadDataH5():
    """
    Extract dataset (supplied in assignment)    
    
    Returns
    -------
    trainX : NUMPY ARRAY
        Training data.
    trainY : NUMPY ARRAY
        Training target class values.
    valX : NUMPY ARRAY
        Test data.
    valY : NUMPY ARRAY
        Test target class values.
    """   
    
    with h5py.File('earth_data.h5','r') as hf:
        trainX = np.array(hf.get('trainX'))
        trainY = np.array(hf.get('trainY'))
        valX = np.array(hf.get('valX'))
        valY = np.array(hf.get('valY'))
        # print (trainX.shape,trainY.shape)
        # print (valX.shape,valY.shape)
    return trainX, trainY, valX, valY


if __name__=="__main__":
    main() 

Mounted at /content/gdrive
Archive:  /content/gdrive/My Drive/datasets/earth_data.zip
  inflating: ./earth_data.h5         
earth_data.h5  gdrive  sample_data
Local Devices: 
 [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16701630876767657105
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14674281152
locality {
  bus_id: 1
  links {
  }
}
incarnation: 14555843857171873624
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
]
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 2, 2, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 2048)  

KeyboardInterrupt: ignored