In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os, cv2, random, time, shutil
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
np.random.seed(42)

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.utils import plot_model
%matplotlib inline 

import keras
from keras.models import Sequential
from keras import backend
from keras.applications import ResNet50
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.models import Model, load_model
from keras.layers import  AveragePooling2D, GlobalAveragePooling2D,Dropout
from keras.layers import Dense, Flatten, Dropout, Lambda, Input, Concatenate, concatenate
from keras.utils import to_categorical

In [None]:
#Set some directories
train_zip_dir = '/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip'
test_zip_dir = '/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip'
extract_dir = '/kaggle/working/extracted_data'
train_dir = '/kaggle/working/train'
test_dir = '/kaggle/working/test'
os.makedirs(train_dir+'/dog', exist_ok=True)
os.makedirs(train_dir+'/cat', exist_ok=True)
os.makedirs(test_dir+'/test_data', exist_ok=True)

In [None]:
#Extract data files
import zipfile
with zipfile.ZipFile(train_zip_dir, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

with zipfile.ZipFile(test_zip_dir, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

In [None]:
def plot_images(rows, columns, images):
    '''
    Plot first 100 images
    INPUTS:
        rows: number of rows we want to display images on it
        columns: number of images we want to display in each row
        images: consist of 100 images each image consist 224*224 pixels
        labels: truth value for each image
    '''
    
    fig, x= plt.subplots(rows, columns, constrained_layout=True,figsize=(15,8))
    plt.setp(x, xticks=[], yticks=[])
    for i in range (len(x)):
        for j in range (len(x[0])):
            index = i*columns+j
            img = cv2.imread(extract_dir+'/train/' + images[index])
            x[i,j].imshow(cv2.resize(img, (150,150)))
            
num_columns =5
train_data = os.listdir(extract_dir+'/train/')[:100]
plot_images(num_columns, num_columns, train_data)

In [None]:
#Function to move the images tp their corresponding folders:
def move_files(train_path,test_path):
    print('Moving Training Files ..')
    time.sleep(1)
    for i in tqdm(os.listdir(train_path)):        
        if 'dog' in i:
            shutil.copyfile(train_path+i,train_dir+'/dog/'+i )
        elif 'cat' in i:
            shutil.copyfile(train_path+i,train_dir+'/cat/'+i )
        else:
            print('unkown File', i)
            
    print('Moving Testing Files ..')
    time.sleep(1)
    for i in tqdm(os.listdir(test_path)):                
        shutil.copyfile(test_path+i, test_dir+'/test_data/'+i)
    #Delete original data    
    shutil.rmtree(extract_dir)
        
move_files(extract_dir+'/train/', extract_dir+'/test/')

In [None]:
#Setting Image and model parameters
#Image_width,Image_height = 299,299
batch_size=64
total_samples = 25000
val_split=0.2
n_train=total_samples*(1-val_split)
n_val=total_samples*val_split
num_classes = 2
print(n_train,n_val)

In [None]:
# Define data pre-processing 
train_image_gen = ImageDataGenerator(rescale=1/255,rotation_range=20,horizontal_flip=True,validation_split=val_split)
#Data loader to load each batch on the RAM at each step.
train_generator = train_image_gen.flow_from_directory(train_dir, batch_size=batch_size,seed=42,subset='training',shuffle = True)

val_generator = train_image_gen.flow_from_directory(train_dir,
                                                    batch_size=batch_size,seed=42,subset='validation',
                                                      shuffle = True)


In [None]:
from keras.callbacks import EarlyStopping,ModelCheckpoint,TensorBoard
#Prepare call backs
savemodel=keras.callbacks.ModelCheckpoint(extract_dir,monitor='val_loss', mode='min',save_best_only=True,verbose=1)
LR_callback = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=2, factor=.5, min_lr=.00001)
EarlyStop_callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True)
my_callback=[savemodel,EarlyStop_callback, LR_callback]

In [None]:
def create_base_model(MODEL, img_size, lambda_fun = None):
    inp = Input(shape = (img_size[0], img_size[1], 3))
    x = inp
    if lambda_fun:
        x = Lambda(lambda_fun)(x)
    
    base_model = MODEL(input_tensor = x, weights = 'imagenet', include_top = False, pooling = 'avg')
        
    model = Model(inp, base_model.output)
    return model

In [None]:
model1 = create_base_model(VGG16, (224, 224), preprocess_input)
model2 = create_base_model(ResNet50, (224, 224), preprocess_input)
model3 = create_base_model(InceptionV3, (299, 299), preprocess_input)
model1.trainable = False
model2.trainable = False
model3.trainable = False

inpA = Input(shape = (224, 224, 3))
inpB = Input(shape = (299, 299, 3))
out1 = model1(inpA)
out2 = model2(inpA)
out3 = model3(inpB)

x = Concatenate()([out1, out2, out3])                
x = Dropout(0.6)(x)
x = Dense(1, activation='sigmoid')(x)
multiple_pretained_model = Model([inpA, inpB], x)

multiple_pretained_model.compile(loss = 'binary_crossentropy',
                          optimizer = 'rmsprop',
                          metrics = ['accuracy'])

multiple_pretained_model.summary()

ResNet50

In [None]:
model_1 = Sequential()
model_1.add(ResNet50(include_top = False, pooling = 'max', weights = 'imagenet'))
model_1.add(Dense(1, activation = 'sigmoid'))

model_1.layers[0].trainable = False 
model_1.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')

VGG16

In [None]:
model_2 = Sequential()
model_2.add(VGG16(include_top = False, pooling = 'max', weights = 'imagenet'))
model_2.add(Dense(2, activation = 'softmax'))

model_2.layers[0].trainable = False 
model_2.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')

InceptionV3

In [None]:
#Prepare the model.
InceptionV3_base_model = InceptionV3(weights='imagenet', include_top=False)
x = InceptionV3_base_model.output
x= GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x)
final_pred = Dense(num_classes,activation='softmax')(x)
model_3 = Model(inputs=InceptionV3_base_model.input,outputs=final_pred)

In [None]:
#Freeze frist 276 layer of the network which is 311 layer.
#Freeze low and mid level feature extractors which represented in earlier layers.
layer_to_Freeze=276    
for layer in model_3.layers[:layer_to_Freeze]:
    layer.trainable =False
for layer in model_3.layers[layer_to_Freeze:]:
    layer.trainable=True

sgd = SGD(lr = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)
model_3.compile(optimizer=sgd,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
#Training ...
history_transfer_learning = multiple_pretained_model.fit_generator(train_generator,epochs=5,
                                                steps_per_epoch=n_train//batch_size,
                                                validation_data=val_generator,
                                                validation_steps=n_val//batch_size,callbacks=my_callback)

In [None]:
loss = pd.DataFrame(model_3.history.history)
loss[['loss', 'val_loss']].plot()
loss[['accuracy', 'val_accuracy']].plot()

In [None]:
#Use Evaluate() or evalute_generator() to check your model accuracy on validation set.
score = model_1.evaluate_generator(val_generator,verbose=1)
print('Test loss: ', score[0])
print('Test accuracy', score[1])

In [None]:
# Define data pre-processing 
test_image_gen = ImageDataGenerator(rescale=1/255)
test_generator = test_image_gen.flow_from_directory(test_dir,target_size=(Image_width,Image_height),batch_size=1,seed=42,class_mode=None,shuffle=False)

In [None]:
#test_generator.reset()
y_pred = model_3.predict_generator(generator=test_generator,verbose=1)

In [None]:
submission = pd.DataFrame({'id':pd.Series(test_generator.filenames),'label':pd.Series(y_pred.clip(min=0.02,max=0.98)[:,1])})
submission['id'] = submission.id.str.extract('(\d+)')
submission['id']=pd.to_numeric(submission['id'])
submission.to_csv("Submission_InceptionV3.csv",index=False)

In [None]:
#submission.nunique(axis=0)
submission.head(10)