In [None]:
import warnings
warnings.filterwarnings("ignore")

import zipfile
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau

In [None]:
%%time
with zipfile.ZipFile("../input/dogs-vs-cats/train.zip",'r') as z:
    z.extractall("/kaggle/files/")
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip",'r') as z:
    z.extractall("/kaggle/files/")

In [None]:
#train data
files = os.listdir("/kaggle/files/train/")
images01 = []
for image in files :
    category = image.split('.')[0]
    #print(category)
    if category == 'dog' :
        images01.append(1)
        #print(images01)
    else :    
        images01.append(0)
        #print(images01)

In [None]:
df = pd.DataFrame({'file' : files, 'category' : images01})
df.head()

In [None]:
df['category'].value_counts()

In [None]:
model = keras.Sequential()

#conv_1
model.add(keras.layers.Conv2D(32, 3,input_shape = [128, 128, 3], activation = 'relu', padding = 'same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPool2D(2))
model.add(keras.layers.Dropout(0.2))

#conv_2
model.add(keras.layers.Conv2D(64, 3, activation = 'relu', padding = 'same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPool2D(2))
model.add(keras.layers.Dropout(0.2))

#conv_3
model.add(keras.layers.Conv2D(128, 3, activation = 'relu', padding = 'same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPool2D(2))
model.add(keras.layers.Dropout(0.2))

#conv_4
model.add(keras.layers.Conv2D(256, 3, activation = 'relu', padding = 'same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPool2D(2))
model.add(keras.layers.Dropout(0.2))


model.add(keras.layers.Flatten())

model.add(keras.layers.Dense(1024, activation = 'relu'))
model.add(keras.layers.Dropout(0.25))

model.add(keras.layers.Dense(2, activation = 'sigmoid'))



model.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop', metrics = ['accuracy'])
model.summary()

In [None]:
# earlystop = EarlyStopping(patience = 3)
# learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_acc', 
#                                             patience = 2, 
#                                             #verbose = 1, 
#                                             factor = 0.5) 
#                                             #min_lr = 0.0001)
# callbacks = [earlystop, learning_rate_reduction]

In [None]:
df['category'] = df['category'].map({0 : 'cat', 1 : 'dog'})

In [None]:
train, val_df = train_test_split(df, test_size=0.2, random_state=42)
train = train.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [None]:
total_train = train.shape[0]
print(total_train)
total_valid = val_df.shape[0]
print(total_valid)
batch_size = 10

In [None]:
train_data = ImageDataGenerator(rotation_range=20,
                                rescale = 1./255,
                                horizontal_flip = True)
train_generator = train_data.flow_from_dataframe(train,
                                                '/kaggle/files/train/',
                                                x_col = 'file',
                                                y_col = 'category',
                                                target_size = [128,128],
                                                class_mode = 'categorical',
                                                batch_size = batch_size)

In [None]:
valid_data = ImageDataGenerator(rescale = 1./255)
                                                         
                                                         
valid_generator = valid_data.flow_from_dataframe(val_df,
                                                '/kaggle/files/train/',
                                                x_col = 'file',
                                                y_col = 'category',
                                                target_size = [128,128],
                                                class_mode = 'categorical',
                                                batch_size = batch_size)

In [None]:
epoch = 5
history = model.fit_generator(
    train_generator, 
    epochs = epoch,
    validation_data = valid_generator,
    #validation_steps = total_valid // batch_size,
    steps_per_epoch = total_train // batch_size)

In [None]:
history.history

In [None]:
plt.plot(history.history['loss'], color = 'red', label = "train loss")
plt.plot(history.history['val_loss'], color = 'pink', label = "valid loss")
plt.xticks(np.arange(1, epoch, 1))
plt.yticks(np.arange(0, 1, 0.1))
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['accuracy'], color = 'red', label = "train accuracy")
plt.plot(history.history['val_accuracy'], color = 'pink',label = "valid accuracy")
plt.xticks(np.arange(1, epoch, 1))
plt.legend()
plt.show()

In [None]:
#testdata
files = os.listdir("/kaggle/files/test1/")
test_df = pd.DataFrame({'file' : files})    
samples = test_df.shape[0]
samples

In [None]:
test_data = ImageDataGenerator(rescale=1./255)
test_generator = test_data.flow_from_dataframe(
                test_df, 
                "/kaggle/files/test1", 
                x_col = 'file',
                y_col = 'lltegory',
                class_mode = None,
                target_size = [128,128],
                batch_size = batch_size,
                shuffle = False)

In [None]:
predict = model.predict_generator(test_generator, steps=np.ceil(samples / batch_size))

In [None]:
test_df['category'] = np.argmax(predict, axis = -1)
test_df['category'] = test_df['category'].replace({ 'dog': 1, 'cat': 0 })
test_df

In [None]:
output = test_df.copy()
output['id'] = output['file'].str.split('.').str[0]
output['label'] = output['category']
output.drop(['file', 'category'], axis = 1, inplace = True)
output.to_csv('submission.csv', index=False)

# output = pd.DataFrame({'Id': test['Id'], 'SalePrice': prediction})
# output.to_csv('/kaggle/working/submission.csv', index=False)

In [None]:
output