In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from matplotlib import style
style.use('dark_background')
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#pip install --user --upgrade tensorflow-gpu

# Unzipping the zips

In [None]:
import zipfile
base_dir_train='/kaggle/input/dogs-vs-cats/train.zip'
local_zip=zipfile.ZipFile(base_dir_train,'r')
local_zip.extractall('/tmp')
local_zip.close()


In [None]:
base_dir_test='/kaggle/input/dogs-vs-cats/test1.zip'
local_zip=zipfile.ZipFile(base_dir_test,'r')
local_zip.extractall('/tmp')
local_zip.close()

In [None]:
dir_train='/tmp/train'
train_images=os.listdir(os.path.join(dir_train))
train_images[0:9]

In [None]:
dir_test='/tmp/test1'
test_images=os.listdir(os.path.join(dir_test))
test_images[0:9]

# lets see some images

In [None]:
from keras.preprocessing.image import load_img
img1=load_img(dir_train+'/dog.890.jpg')
img2=load_img(dir_train+'/cat.8375.jpg')
f,(ax1,ax2)=plt.subplots(1,2,figsize=(15,15))
ax1.imshow(img1)
ax2.imshow(img2)
plt.show()

In [None]:
img1=load_img(dir_test+'/4644.jpg')
img2=load_img(dir_test+'/8044.jpg')
f,(ax1,ax2)=plt.subplots(1,2,figsize=(15,15))
ax1.imshow(img1)
ax2.imshow(img2)
plt.show()

# Labelling the train images

In [None]:
filename=train_images
categories=[]
for filenm in filename:
    label=filenm.split('.')[0]
    if label=='dog':
        categories.append(1)
    else:
        categories.append(0)
        
train=pd.DataFrame({'filename':filename,'category':categories}) 
train[0:10]

# Splitting the dataset into train set and validation set

In [None]:
from sklearn.model_selection import train_test_split
train_df,validate_df=train_test_split(train,test_size=0.2,random_state=0)
train_df=train_df.reset_index(drop=True)
validate_df=validate_df.reset_index(drop=True)

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras import applications
from keras.layers import Conv2D,Dense,Dropout,Activation,Flatten,BatchNormalization,MaxPooling2D,GlobalAveragePooling2D,GlobalMaxPooling2D


In [None]:
img_width=150
img_height=150
img_size=(img_width,img_height)
batch_size=16
epochs=15
nb_train_samples=train_df.shape[0]
nb_validation_samples=validate_df.shape[0]

In [None]:
base_model=applications.VGG16(include_top=False,weights='imagenet')
print('VGG16 is loaded....')

for layer in base_model.layers[:11]:
    layer.trainable=False
for layer in base_model.layers[11:]:
    layer.trainable=True    
base_model.summary()

In [None]:
top_model=Sequential()
top_model.add(GlobalAveragePooling2D(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(512,activation='relu'))
top_model.add(BatchNormalization())
top_model.add(Dropout(0.5))
top_model.add(Dense(256,activation='relu'))
top_model.add(BatchNormalization())
top_model.add(Dropout(0.5))
top_model.add(Dense(128,activation='relu'))
top_model.add(BatchNormalization())
top_model.add(Dropout(0.5))
top_model.add(Dense(1,activation='sigmoid'))
top_model.summary()


# top_model = Sequential()  
# top_model.add(GlobalAveragePooling2D(input_shape=base_model.output_shape[1:]))
# top_model.add(Dense(no_of_classes, activation='softmax')) 
# top_model.summary()

In [None]:
from keras import optimizers
model=Sequential()
model.add(base_model)
model.add(top_model)
model.summary()

model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),metrics='accuracy')

In [None]:
epochs

In [None]:
train_datagen=ImageDataGenerator(
   
    vertical_flip=True,
     rotation_range=15,
    rescale=1/255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1,
    
)
train_generator=train_datagen.flow_from_dataframe(
    train_df,
    '/tmp/train',
    x_col='filename',
    y_col='category',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='raw',
    shuffle=False
   
)
valid_datagen=ImageDataGenerator(rescale=1/255)
valid_generator=valid_datagen.flow_from_dataframe(
       validate_df,
        '/tmp/train',
        x_col='filename',
        y_col='category',
       target_size=img_size,
       batch_size=batch_size,
    class_mode='raw',
    shuffle=False
)          
    

In [None]:
nb_validation_samples

In [None]:
train_df

In [None]:
batch_size

In [None]:
from keras import backend as K

In [None]:
import math
from keras.callbacks import ModelCheckpoint
best_model_path='best_model.hdf5'
checkpointer=ModelCheckpoint(best_model_path,save_best_only=True,verbose=1)

train_size=nb_train_samples//batch_size
validation_size=nb_validation_samples//batch_size
#validation_size=int(math.ceil(nb_validation_samples//batch_size))


history=model.fit_generator(train_generator,
                            epochs=20,
                            validation_data=valid_generator,
                            steps_per_epoch=train_size,
                            validation_steps=validation_size)


In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training vs Validation accuracy')
plt.legend(['Training Accuracy','Validation Accuracy'],loc='lower right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs Validation accuracy')
plt.legend(['Training Loss','Validation Loss'],loc='upper right')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [None]:
y_val=validate_df['category']
val_pred=model.predict_generator(valid_generator,steps=np.ceil(nb_validation_samples/batch_size))
thresh=0.5
val_pred=np.where(val_pred>thresh,1,0)

In [None]:
#y_val=y_val.replace({'dog':1,'cat':0})#

In [None]:

cm=confusion_matrix(y_val,val_pred)
f,ax=plt.subplots(figsize=(8,8))
sns.heatmap(cm,annot=True,linewidth=0.01,cmap='Blues',linecolor='gray',fmt='.1f',ax=ax)
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')
plt.show()

In [None]:
accScore=accuracy_score(y_val,val_pred)
print(f' Accuracy Score is {accScore}')
print()
print('classification report is ---->')
print(classification_report(y_val,val_pred))

In [None]:
train_df

In [None]:
test_df=pd.DataFrame({'filename':test_images})
nb_samples=test_df.shape[0]
test_df

In [None]:
test_datagen=ImageDataGenerator(
rescale=1/255)
test_generator=test_datagen.flow_from_dataframe(
     test_df,
    '/tmp/test1',
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=img_size,
    batch_size=batch_size,
    shuffle=False
)

In [None]:
test_pred=model.predict_generator(test_generator,steps=np.ceil(nb_samples/batch_size))
test_pred

In [None]:
thresh=0.5
test_pred=np.where(test_pred>thresh,1,0)
test_pred[0:5]

In [None]:
test_df['filename'][0:10]

In [None]:
test_pred[0:10]

In [None]:
test_df['category']=test_pred
test_df

In [None]:
Sub_df=test_df.copy()
Sub_df['id']=test_df['filename'].str.split('.').str[0]
Sub_df['label']=test_df['category']
Sub_df.drop(['filename','category'],axis=1,inplace=True)
Sub_df.to_csv('CatsVsDogsVGGfinetunning.csv',index=False)

In [None]:
Sub_df[0:50]

# Using the power of bottleneck features

In [None]:


from keras.callbacks import EarlyStopping,ReduceLROnPlateau
earlystop = EarlyStopping(patience=10)
#earlystopping = EarlyStopping(monitor ="val_accuracy",
#                              mode = 'auto', patience = 10,
 #                             restore_best_weights = True)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
callbacks = [earlystop,learning_rate_reduction]



In [None]:
# filename=train_images
# categories=[]
# for files in filename:
#     category=files.split('.')[0]
#     if category=='dog':
#         categories.append(1)
#     else:
#         categories.append(0)

# train=pd.DataFrame({'filename':filename,'category':categories})


In [None]:
# train_df,validate_df=train_test_split(train,test_size=0.2,random_state=0)
# train_df=train_df.reset_index(drop=True)
# validate_df=validate_df.reset_index(drop=True)

In [None]:

import math
model=applications.VGG16(include_top=False,weights='imagenet')
top_model_weights_path='final_cnn_code.h5'
train_datagen1=ImageDataGenerator(
   rotation_range=15,
    rescale=1/255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)
train_generator1=train_datagen1.flow_from_dataframe(
 train_df,
    '/tmp/train/',
    x_col='filename',
    y_col='category',
    target_size=img_size,
    class_mode='raw',
    batch_size=batch_size,
     shuffle=False
    
)
train_size=int(math.ceil(nb_train_samples/batch_size))
bottleneck_features_train=model.predict_generator(train_generator1,train_size)
np.save(open('bottleneck_features_train.npy','wb'),bottleneck_features_train)

valid_datagen1=ImageDataGenerator(
  rescale=1/255
)
validation_generator1=valid_datagen1.flow_from_dataframe(
  validate_df,
    '/tmp/train/',
   x_col='filename',
    y_col='category',
    target_size=img_size,
    class_mode='raw',
    batch_size=batch_size,
     shuffle=False
)
validate_size=int(math.ceil(nb_validation_samples/batch_size))
bottleneck_features_validate=model.predict_generator(validation_generator1,validate_size)
np.save(open('bottleneck_features_validate.npy','wb'),bottleneck_features_validate)


In [None]:
train_df

In [None]:


train_labels=train_df['category']
validation_labels=validate_df['category']


epochs=50



total_train=train_df.shape[0]
total_validate=validate_df.shape[0]



In [None]:
validate_df

In [None]:
total_validate

In [None]:

train_data=np.load(open('bottleneck_features_train.npy','rb'))
train_labels.to_numpy()
validation_data=np.load(open('bottleneck_features_validate.npy','rb'))
validation_labels.to_numpy()


    
model1=Sequential()
#model.add(GlobalAveragePooling2D(input_shape=train_data.shape[1:]))
model1.add(Flatten(input_shape=train_data.shape[1:]))
model1.add(Dense(1024, activation='relu'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(512, activation='relu'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(256, activation='relu'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(128, activation='relu'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam',
              loss='binary_crossentropy', metrics=['accuracy'])

history=model1.fit(train_data, train_labels,
        epochs=epochs,
                  steps_per_epoch=total_train// batch_size,
         batch_size=batch_size,
                  validation_steps=total_validate // batch_size,
         validation_data=(validation_data, validation_labels),callbacks=callbacks)
# history=model.fit_generator(
#     train_generator,
#     steps_per_epoch=total_train// batch_size,
#     epochs=50,
#     validation_data=validation_generator,
#     validation_steps=total_validate // batch_size,
# callbacks=callbacks)
model1.save_weights('final_cnn_code.h5')



In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['training_accuracy','Validation_accuracy'],loc='upper left')
plt.show()

In [None]:

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['training_loss','validation_loss'],loc='upper left')
plt.show()

In [None]:
# %% [code]
model1.load_weights('final_cnn_code.h5')  
   
(eval_loss, eval_accuracy) = model1.evaluate(  
     validation_data, validation_labels, batch_size=batch_size, verbose=1)

print("Accuracy: {:.2f}%".format(eval_accuracy * 100))  
print("Loss: {}".format(eval_loss)) 



In [None]:
# %% [code]
y_val=validate_df['category']
y_pred=model1.predict(validation_data)

# %% [code]
thresh=0.5
y_final=np.where(y_pred>thresh,1,0)




In [None]:
# %% [code]
cm=confusion_matrix(y_val,y_final)
f,ax=plt.subplots(figsize=(8,8))
sns.heatmap(cm,annot=True,linewidths=0.01,cmap='Blues',linecolor='gray',fmt='.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel('Actual Label')
plt.title('Confusion matrix')
plt.show()



In [None]:
# %% [code]
accScore=accuracy_score(y_val,y_final)
print(f' Accuracy score is {accScore}')
print()
classificationreport=classification_report(y_val,y_final)
print(f' Classification Report is --->')
print(classificationreport)



In [None]:
# %% [code]
test_filename=test_images
test_df=pd.DataFrame({'filename':test_filename})
nb_samples=test_df.shape[0]

# %% [code]
test_datagen=ImageDataGenerator(
  rescale=1/255
)
test_generator=test_datagen.flow_from_dataframe(
   test_df,
    '/tmp/test1/',
   x_col='filename',
    y_col=None,
    target_size=img_size,
    class_mode=None,
    batch_size=batch_size,
     shuffle=False
)
test_df

In [None]:
test_size=int(math.ceil(nb_samples/batch_size))
bottleneck_features_test=model.predict_generator(test_generator,test_size)
np.save(open('bottleneck_features_test.npy','wb'),bottleneck_features_test)
test_data=np.load(open('bottleneck_features_test.npy','rb'))

# %% [code]
y_pred=model1.predict(test_data)

# %% [code]
y_pred[0:20]

In [None]:
threshold = 0.5
test_df['category'] = np.where( y_pred> threshold, 1,0)

# %% [code]
test_df

In [None]:
Sub_df1=test_df.copy()
Sub_df1['id']=test_df['filename'].str.split('.').str[0]
Sub_df1['label']=test_df['category']
Sub_df1.drop(['filename','category'],axis=1,inplace=True)
Sub_df1.to_csv('Bottleneck_CatsVsDogs.csv',index=False)