In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        




In [None]:
import tensorflow.keras
import matplotlib.pyplot as plt
import random
from keras .preprocessing.image import load_img,ImageDataGenerator
print(os.listdir("../input/"))
from sklearn.model_selection import train_test_split

In [None]:
import zipfile

In [None]:

with zipfile.ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
    
    
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")

In [None]:
!ls  train

In [None]:
filenames=os.listdir("./train")
categories=[]
for filename in filenames:
    category=filename.split(".")[0]
    if category=='dog':
        categories.append(1)
    else:
        categories.append(0)
    
df=pd.DataFrame({'filename':filenames,'category':categories})

#Cat is 0, Dog is 1

In [None]:
df.head()

In [None]:
df['category'].value_counts().plot.bar(color='g')

In [None]:
#!pip install Pillow
import PIL

image=PIL.Image.open("./train/dog.9374.jpg")

width,height=image.size

print("Width and Height are: ",width,"and ",height)

In [None]:
random_file=random.choice(filenames)
img=load_img("./train/"+random_file)

plt.imshow(img)
x=df.loc[df['filename'] == random_file]
#print(df[df['filename']==random_file])
print(x['category'])


 # Creating Model

In [None]:
FAST_RUN = False
image_width=128
image_height=128
image_size=(image_width, image_height)
image_channels=3

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dropout,Flatten,Dense, Activation,BatchNormalization


mymodel=Sequential()
mymodel.add(Conv2D(32,(3,3),activation='relu',input_shape=(image_width,image_height,image_channels)))
mymodel.add(BatchNormalization())
mymodel.add(MaxPooling2D(pool_size=(2,2)))
mymodel.add(Dropout(0.25))


#Layer 2
mymodel.add(Conv2D(64,(3,3),activation='relu'))
mymodel.add(BatchNormalization())
mymodel.add(MaxPooling2D(pool_size=(2,2)))
mymodel.add(Dropout(0.25))

#Layer 3
mymodel.add(Conv2D(128,(3,3),activation='relu'))
mymodel.add(BatchNormalization())
mymodel.add(MaxPooling2D(pool_size=(2,2)))
mymodel.add(Dropout(0.25))


#Flat Layer
mymodel.add(Flatten())
mymodel.add(Dense(512,activation='relu'))
mymodel.add(BatchNormalization())
mymodel.add(Dropout(0.5))

mymodel.add(Dense(2, activation='softmax'))

mymodel.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

mymodel.summary()

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Defining EarlyStopping to stop the model from learning, if it does not progress enough after 10 epochs

In [None]:
earlystop=EarlyStopping(patience=10)

In [None]:
learning_rate_reduction=ReduceLROnPlateau(monitor='val_loss',patience=2,verbose=1,factor=0.5,min_lr=0.00001)

In [None]:
callbacks=[earlystop,learning_rate_reduction]

# Prepare Data For Training
## 1. Create One Hot Encoding for Labels

In [None]:
df['category']=df['category'].replace({0:'cat',1:'dog'})

In [None]:
df.head()

In [None]:
print(df.shape[0])
train_df,validate_df=train_test_split(df,test_size=0.20,random_state=42)
train_df=train_df.reset_index(drop=True)
validate_df=validate_df.reset_index(drop=True)
print(train_df.shape[0])
print(validate_df.shape[0])

In [None]:
train_df.head()

In [None]:
train_df['category'].value_counts().plot.bar()

In [None]:
train_df_number=train_df.shape[0]
validate_df_number=validate_df.shape[0]
batch_size=15
print(validate_df.shape[0])

In [None]:
train_datagen=ImageDataGenerator(rotation_range=15,rescale=1./255,shear_range=0.1,
                                zoom_range=0.2,horizontal_flip=True,width_shift_range=0.1,height_shift_range=0.1)

In [None]:
train_generator=train_datagen.flow_from_dataframe(train_df,'./train',x_col='filename',y_col='category',target_size=image_size,
                                                 class_mode='categorical',batch_size=batch_size)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=image_size,
    class_mode='categorical',
    batch_size=batch_size
)

In [None]:
epochs=10
history=mymodel.fit_generator(train_generator,epochs=epochs,validation_data=validation_generator,
                             validation_steps=validate_df_number//batch_size,
                              steps_per_epoch=train_df_number//batch_size,callbacks=callbacks)

In [None]:
mymodel.save_weights("mymodel.h5")

In [None]:
print(history.history)

In [None]:
_,(ax1, ax2)=plt.subplots(2,1,figsize=(12,12))
ax1.plot(history.history['loss'],color='b',label='Training Loss')
ax1.plot(history.history['val_loss'],color='r', label='Validation Loss')
ax1.set_xticks(np.arange(1,epochs,1))
ax1.set_yticks(np.arange(0,1,0.1))

ax2.plot(history.history['accuracy'],color='g',label='Training Accuracy')
ax2.plot(history.history['val_accuracy'],color='r',label='Validation Accuracy')
ax2.set_xticks(np.arange(1,epochs,1))

legend=plt.legend(loc='best',shadow=True)
plt.tight_layout()
plt.show()

In [None]:
test_filenames=os.listdir('./test1/')

test_df=pd.DataFrame({
    'filename':test_filenames
})

In [None]:
test_df.head()
nb_samples=test_df.shape[0]

In [None]:
test_gen=ImageDataGenerator(rescale=1./255)

test_generator=test_gen.flow_from_dataframe(test_df,"./test1",x_col='filename',y_col=None, class_mode=None,
                                           target_size=image_size,batch_size=batch_size,shuffle=False)

In [None]:
predict=mymodel.predict_generator(test_generator,steps=np.ceil(nb_samples/batch_size))

print(predict)

In [None]:
test_df['category']=np.argmax(predict,axis=-1)

In [None]:
test_df.head()

In [None]:
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)

In [None]:
test_df.head()

In [None]:
test_df['category']=test_df['category'].replace({'cat':0, 
                                                 'dog':1
})

In [None]:
test_df['category'].value_counts().plot(kind='bar')

In [None]:
submission_df=test_df.copy()
submission_df['id']=submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)