In [None]:


import warnings
warnings.filterwarnings("ignore")

import zipfile
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau

In [None]:
with zipfile.ZipFile("../input/dogs-vs-cats/train.zip",'r') as zip_:
    zip_.extractall("/kaggle/files/")
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip",'r') as zip_:
    zip_.extractall("/kaggle/files/")

In [None]:
filenames = os.listdir("/kaggle/files/train/")  
categories = [] 
for filename in filenames:                                
    category = filename.split('.')[0]   
    if category == 'dog':
        categories.append(1) 
    else:
        categories.append(0) 

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
}) 

In [None]:
df.head() 

In [None]:
df.tail() 

In [None]:
df['category'].value_counts().plot.bar()


In [None]:
from keras.models import Sequential 
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization


model = Sequential() 




model.add(Conv2D(32, (3, 3), activation='relu', input_shape = [128, 128, 3]))

model.add(BatchNormalization()) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
model.add(Dropout(0.25)) 



model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))



model.add(Conv2D(128, (3, 3), activation='relu')) 
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))



model.add(Flatten())

model.add(keras.layers.Dense(512, activation = 'relu'))
model.add(keras.layers.Dropout(0.3))

model.add(keras.layers.Dense(2, activation = 'sigmoid'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 



model.summary()

In [None]:
df["category"] = df["category"].replace({0: 'cat', 1: 'dog'})

In [None]:
df.head()

In [None]:
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)

In [None]:
train_df = train_df.reset_index(drop=True) 

In [None]:
train_df.head()

In [None]:
train_df['category'].value_counts().plot.bar()

In [None]:
validate_df.head()

In [None]:
validate_df['category'].value_counts().plot.bar()

In [None]:
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size=15

In [None]:
print(train_df.shape) 
print(validate_df.shape) 

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=20,
    rescale=1./255,
    zoom_range=0.2,
    horizontal_flip=True,
)

In [None]:
train_generator = train_datagen.flow_from_dataframe( 
    train_df, 
    "/kaggle/files/train/",
    x_col='filename',
    y_col='category', 
    target_size=[128, 128], 
    class_mode='categorical', 
    batch_size=batch_size 
)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "/kaggle/files/train/", 
    x_col='filename',
    y_col='category',
    target_size=[128,128],
    class_mode='categorical',
    batch_size=batch_size
)

In [None]:
epochs=5 
                            
history = model.fit_generator(
    train_generator, 
    epochs=epochs,   
    validation_data=validation_generator, 
    validation_steps=total_validate//batch_size, 
                                                
    
    steps_per_epoch=total_train//batch_size,                           
)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12)) 
ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_xticks(np.arange(1, epochs, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history['accuracy'], color='b', label="Training accuracy")
ax2.plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
ax2.set_xticks(np.arange(1, epochs, 1))

legend = plt.legend(loc='best', shadow=True)
plt.tight_layout()
plt.show()

In [None]:
test_filenames = os.listdir("/kaggle/files/train/") 
test_df = pd.DataFrame({                                       
    'filename': test_filenames                                 
})
nb_samples = test_df.shape[0] 

In [None]:
test_gen = ImageDataGenerator(rescale=1./255)

test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "/kaggle/files/train/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=[128,128],
    batch_size=batch_size,
    shuffle=False
)


In [None]:
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))

In [None]:
np.argmax(predict, axis=-1)

In [None]:
test_df['category'] = np.argmax(predict, axis=-1)

In [None]:
test_df['category']

In [None]:
dict((v,k) for k,v in train_generator.class_indices.items())

In [None]:
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)

In [None]:
test_df

In [None]:
test_df['category'] = test_df['category'].replace({ 'dog': 1, 'cat': 0 })

In [None]:
test_df['category'].value_counts().plot.bar()

In [None]:
test_df.head(18)

In [None]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)