In [None]:
import warnings 
warnings.filterwarnings('ignore')

# basic librareis
import zipfile
import glob
import os
import pandas as pd
import numpy as np
from PIL import Image


# plotting and visualizations
import matplotlib 
import matplotlib.pyplot as plt 
%matplotlib inline
import seaborn as sns 
import missingno as msno

# preprocessing
from keras.preprocessing.image import (ImageDataGenerator, 
                                       img_to_array, 
                                       array_to_img, 
                                       load_img)

from sklearn.model_selection import train_test_split

# metrics
from sklearn.metrics import (confusion_matrix, 
                             classification_report, 
                             accuracy_score, 
                             f1_score, 
                             roc_auc_score)
# modeling
import tensorflow as tf
from keras.models import Model,Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Dense,Flatten

from keras.applications import resnet50
from keras.applications.resnet50 import preprocess_input
from keras import optimizers
from keras import regularizers
from keras.callbacks import EarlyStopping,LearningRateScheduler


from keras import backend as K
K.clear_session()

# model plotting
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model

# mesc
from sklearn.utils import shuffle


In [None]:
# extraction of train and test data from zipfiles to data folder
print('Extracting the data from dataset.....\n')
zip_files = glob.glob('/dogs-vs-cats/*.zip')

print('{} files found in the input directory\n'.format(str(len(zip_files))))

for file in zip_files:
    with zipfile.ZipFile(file, 'r') as Z:
        Z.extractall('data')
    print('{} is extracted\n'.format(file.split('/')[-1]))

print('Extraction is completed\n')


In [None]:
train_dir = '/data/train/'
test_dir = '/data/test1/'
print('Total Images in Train, and Test Data...')
print('\nNo. of Train Images: ' + str(len(os.listdir(train_dir))))
print('No. of Test Images: ' + str(len(os.listdir(test_dir))) + '\n')

In [None]:
# category and filepath extraction helper functions
def category(path):
    return [file.split('.')[0] for file in os.listdir(path)]

def filename(path):
    return [file for file in os.listdir(path)]

# image names and labels
x_train_imgname = filename(train_dir)
x_test_imgname = filename(test_dir)
y_train_label = category(train_dir)

# creation of total dataframe and submission dataframe
print('Image data is storing into dataframes...\n')
train_image_df = pd.DataFrame({'filename': x_train_imgname, 'category': y_train_label})
submission_image_df = pd.DataFrame({'filename': x_test_imgname})

print('Training image names and labels are read to train_image_df\n')
print('Testing image names are read to submission_image_df\n')


In [None]:
# helper functions
def img_path(directory):

    paths = []
    cate = []
    ID_no = []
    for file in os.listdir(directory):
        path = os.path.join(directory, file)
        paths.append(path)
        cate.append(file.split('.')[0])
        ID_no.append(file.split('.')[1])
    return ID_no, paths, cate

def showImages(data, num_row=3, num_col=3, name='any', subtitle='off'):

    # little data sorting
    cat_df, dog_df = data[data['Category'] == 'cat'], data[data['Category'] == 'dog']

    if name == 'dog':
        X, Y  = dog_df['img_paths'], dog_df['ID_no']
    elif name == 'cat':
        X, Y  = cat_df['img_paths'], cat_df['ID_no']     
    else:
        X, Y  = data['img_paths'], data['ID_no']     # could use try and except but let's stick to minimal code

    (X_rand, Y_rand) = shuffle(X, Y)
    
    # showing images on matplotlib 
    
    fig, ax = plt.subplots(num_row, num_col, figsize=(12, 12), dpi=100)
    fig.patch.set_facecolor('#f5f6f6')
    axes = ax.ravel()
    
    for idx, ax in enumerate(axes):
        x = load_img(X_rand.iloc[idx], target_size=(125, 125))
        ax.imshow(x)
        if subtitle == 'on':
            ax.set_title("{}".format(Y_rand.iloc[idx]))
        else:
            ax.set_title('')
        ax.axis('off')
        plt.subplots_adjust(wspace=0)
        del x
    
    fig.text(0.1, 0.93, 'Hey Siri! is it a Cat or Dog?: {}s from Training Data'.format(name.capitalize()), {'fontfamily': 'serif', 'size': 18, 'weight': 'bold'})
    
    return None


In [None]:
# Implementing above function over the train dataset
ID_no, img_paths, train_images = img_path(train_dir)

print('\nDataframe is creating for training image visualization in a grid...\n')
# creating a new dataframe for data visualization
visual_df = pd.DataFrame({'ID_no': ID_no, 'Category': train_images, 'img_paths': img_paths})

print('\n'); print(visual_df.head(5))

print('\nDone!\n')


In [None]:
# Data split into train data and validation data
train_valid_df, test_df = train_test_split(train_image_df, test_size=0.04)
train_df, valid_df = train_test_split(train_valid_df, test_size=0.2)

train_images = train_df.shape[0]
valid_images = valid_df.shape[0]
holdon_images = test_df.shape[0]
test_images = submission_image_df.shape[0]

print('\nNumber of Training Images: ' + str(train_images))
print('\nNumber of Validating Images: ' + str(valid_images))
print('\nNumber of Holdon Images: ' + str(holdon_images))
print('\nNumber of Testing Images: ' + str(test_images))


In [None]:
img_size = 224
batch_size = 128

print(color.BOLD_COLOR + '\nPreparing train and validation images for training...' + color.END)
# dataframe iterators without data agumnetation

train_map = ImageDataGenerator()
valid_map = ImageDataGenerator()
test_map =  ImageDataGenerator()

print(color.BOLD)
        
#Creatinga a dataframe iterators for fitting
vani_train_data = train_map.flow_from_dataframe(
            train_df,train_dir,
            x_col = 'filename',
            y_col = 'category',
            target_size = (img_size, img_size),
            batch_size = batch_size,
            class_mode = 'categorical')

vani_valid_data = valid_map.flow_from_dataframe(
             valid_df, train_dir,
             x_col = 'filename',
             y_col = 'category',
             target_size = (img_size, img_size),
             batch_size = batch_size,
             class_mode = 'categorical')


vani_test_data = test_map.flow_from_dataframe(
             test_df, train_dir,
             x_col = 'filename',
             y_col = None,
             target_size = (img_size, img_size),
             batch_size = batch_size,
             class_mode = None,
             shuffle = False)

print(color.BOLD_COLOR + '\nDone!')

In [None]:
#Building model computational graph
vani_model = Sequential()
vani_model.add(Conv2D(16, (3,3), activation = 'relu', padding = 'same', input_shape = (224,224,3)))
vani_model.add(Conv2D(16, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))

vani_model.add(Conv2D(32, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(Conv2D(32, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))

vani_model.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(MaxPooling2D(pool_size = (2,2),strides=(2,2)))

vani_model.add(Conv2D(128, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(Conv2D(128, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(MaxPooling2D(pool_size = (2,2),strides=(2,2)))

vani_model.add(Dropout(0.3))

vani_model.add(Conv2D(256, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(Conv2D(256, (3,3), activation = 'relu', padding = 'same'))
vani_model.add(MaxPooling2D(pool_size = (2,2),strides=(2,2)))

vani_model.add(Dropout(0.3))

vani_model.add(Flatten())

vani_model.add(Dense(512, activation = 'relu'))

vani_model.add(Dropout(0.5))

vani_model.add(Dense(2, activation = 'softmax'))

print('\nVanilla Model layers and output shapes with params...\n')
vani_model.summary()

In [None]:
# compiling model with loss, opt, metrics
loss = 'categorical_crossentropy'
opt = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)
metrics = ['accuracy']

vani_model.compile(loss=loss, optimizer=opt, metrics=metrics)

print('Training on Vanilla CNN has started ....\n')
# fitting the model for the training dataset
vani_history = vani_model.fit(vani_train_data, epochs=15,
                              validation_data=vani_valid_data,
                              validation_steps=valid_images // batch_size,
                              steps_per_epoch=train_images // batch_size)

print('\nDone!\n')


In [None]:
from pathlib import Path

In [None]:
model_save_path = Path("/")  # Change to the desired path
model_save_path.mkdir(parents=True, exist_ok=True)

model_filename = "vani_model.h5"
model_filepath = model_save_path / model_filename

vani_model.save(model_filepath)