<a href="https://colab.research.google.com/github/msharovar/ECGR6119/blob/main/midterm_msharova.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import tensorflow as tf
import math
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Input, Convolution2D, MaxPool2D, Dense, Flatten, Dropout
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
#from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime
import os
import shutil
import zipfile
from random import seed
from random import random
from tensorflow.keras import layers
from tensorflow.keras.applications.vgg16 import VGG16
from keras.applications.mobilenet_v2 import MobileNetV2
from keras import backend as K

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

dataset_directory = 'dogs_vs_cats/'
checkpoint_path = "cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

img_width, img_height, img_depth = 128, 128, 3
num_classes = 2
train_valid_split = 0.25
n_batch_size = 50
number_epochs = 100

def unzip():

    from pathlib import Path
    print("Directory Path:", Path().absolute())

    # extract dataset from zip files
    with zipfile.ZipFile('dogs-vs-cats.zip', 'r') as zip_ref:
        zip_ref.extractall(dataset_directory)
    with zipfile.ZipFile('dogs_vs_cats/train.zip', 'r') as zip_ref1:
        zip_ref1.extractall(dataset_directory+'init')
        zip_ref1.close()

    # delete zip files
    os.remove(dataset_directory+'train.zip')
    os.remove(dataset_directory+'test1.zip')

    # create the following directory structure
    #  dogs_vs_cats
    #  - train
    #  --- dogs
    #  --- cats
    #  - test
    #  --- dogs
    #  --- cats
    os.mkdir(dataset_directory + 'train/')
    os.mkdir(dataset_directory + 'train/' + 'cats/')
    os.mkdir(dataset_directory + 'train/' + 'dogs/')
    os.mkdir(dataset_directory + 'test/')
    os.mkdir(dataset_directory + 'test/' + 'cats/')
    os.mkdir(dataset_directory + 'test/' + 'dogs/')

    # seed random number generator and define ratios used for train/test/validate
    seed(1)
    test_ratio = train_valid_split

    # split labeled images in a training set into train/test/validate data
    for file in os.listdir(dataset_directory + 'init/train/'):
        ratio = random()
        source_file = dataset_directory + 'init/train/' + file
        destination = dataset_directory + 'train/'
        if ratio < test_ratio:
            destination = dataset_directory + 'test/'
        if file.startswith('cat'):
            destination_file = destination + 'cats/'  + file
            shutil.move(source_file, destination_file)
        elif file.startswith('dog'):
            destination_file = destination + 'dogs/'  + file
            shutil.move(source_file, destination_file)

    os.rmdir(dataset_directory + 'init/train/')
    os.rmdir(dataset_directory + 'init/')

# Resize and auguement each image in cat/dog training dataset using DataImageLoader
def prep_dataset_DataImageLoader():
    # augment train set
    train_datagen = ImageDataGenerator(rescale = 1./255,
                                 horizontal_flip = True,
                                 vertical_flip = True,
                                 rotation_range = 90,
                                 fill_mode = 'nearest')

    # do not augment test set
    test_datagen = ImageDataGenerator(rescale = 1./255)

    train_generator = train_datagen.flow_from_directory(
    directory=r"dogs_vs_cats/train/",
    target_size=(img_width, img_height),
    color_mode="rgb",
    batch_size=n_batch_size,
    class_mode="binary",
    shuffle=True,
    #keep_aspect_ratio=True,
    seed=42)

    test_generator = test_datagen.flow_from_directory(
    directory=r"dogs_vs_cats/test/",
    target_size=(img_width, img_height),
    color_mode="rgb",
    batch_size=n_batch_size,
    class_mode='binary',
    shuffle=False,
    #keep_aspect_ratio=True,
    seed=42)

    return train_generator, test_generator

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def define_VGG16_model(train_generator, valid_generator):
    batch_size = n_batch_size

    # Calculate the number of batches per epoch
    #n_batches = len(train_images) / batch_size
    n_batches = 18697 / batch_size
    print(n_batches)
    n_batches = math.ceil(n_batches)    # round up the number of batches to the nearest whole integer

    latest = tf.train.latest_checkpoint(checkpoint_dir)

    # import VGG16 model without its top layer
    input_tensor = Input(shape = (img_width, img_height, img_depth))
    base_model = applications.vgg16.VGG16(input_tensor=input_tensor,include_top=False,weights='imagenet')

    # Add a prediction layer
    model = tf.keras.Sequential([
          base_model,
          Flatten(),
          Dropout(0.49),
          Dense(1024, activation='relu'),
          Dropout(0.19),
          Dense(1024, activation='relu'),
          Dropout(0.09),
          Dense(1, activation='sigmoid')])

    base_model.trainable = False

    # compile model
    opt = Adam(lr = 0.0001)
    # model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'])
    model.compile(optimizer = opt, loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])

    if latest != None:
      print("Loading weights")
      model.load_weights(latest)

    # Display model
    model.summary()

    val_steps = valid_generator.n

    if latest == None:
      # Create a callback that saves the model's weights every 5 epochs
      cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True, save_freq=5*n_batches)

      # fit model
      history = model.fit(train_generator, epochs=number_epochs,callbacks=[cp_callback],batch_size=batch_size)

    loss, accuracy, f1_score, precision, recall = model.evaluate(valid_generator, batch_size=batch_size)

    print(loss)
    print(accuracy)
    print(f1_score)
    print(precision)
    print(recall)

    # save model
    model.save('model_A.keras')

def define_generator_model(train_generator, valid_generator):
    batch_size = n_batch_size


# Unzip and prepare data
dir_exists = os.path.isdir(dataset_directory + 'train/dogs')
print(dataset_directory + 'train/dogs')
if dir_exists==False:
    unzip()

train,test = prep_dataset_DataImageLoader()
define_VGG16_model(train,test)


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2261843973140733400
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15240134656
locality {
  bus_id: 1
  links {
  }
}
incarnation: 9054711171413746034
physical_device_desc: "device: 0, name: Tesla V100-SXM2-16GB, pci bus id: 0000:00:04.0, compute capability: 7.0"
xla_global_id: 416903419
]
dogs_vs_cats/train/dogs
Directory Path: /content
Found 18697 images belonging to 2 classes.
Found 6303 images belonging to 2 classes.
373.94
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5




Model: "sequential_65"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 4, 4, 512)         14714688  
                                                                 
 flatten_13 (Flatten)        (None, 8192)              0         
                                                                 
 dropout (Dropout)           (None, 8192)              0         
                                                                 
 dense_26 (Dense)            (None, 1024)              8389632   
                                                                 
 dropout_1 (Dropout)         (None, 1024)              0         
                                                                 
 dense_27 (Dense)            (None, 1024)              1049600   
                                                                 
 dropout_2 (Dropout)         (None, 1024)            