In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# import package
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import random
import os
from zipfile import ZipFile
from skimage.io import imread, imshow
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential,load_model 
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Flatten,Dense, Dropout, Activation, BatchNormalization

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import load_img, img_to_array

import warnings
warnings.filterwarnings('ignore')

In [None]:
# import train and test data
trainDataPath = "/kaggle/input/dogs-vs-cats/train.zip"
testDataPath = "/kaggle/input/dogs-vs-cats/test1.zip"
ZipFile(trainDataPath,mode = "r").extractall()
ZipFile(testDataPath,mode =  "r").extractall()

train_path = './train'
test_path = './test1'

In [None]:
# set parameter
SIZE  = 128
CHANNELS = 3
EPOCHS = 50
BATCH_SIZE = 150
IMG_SHAPE = 128

In [None]:
# preparing training data

filenames = os.listdir(train_path)
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'cat':
        categories.append(category)
    else:
        categories.append(category)

df = pd.DataFrame({
    'Image': filenames,
    'Category': categories})
df.head()

In [None]:
# preparing test data
test_files = os.listdir(test_path)
df_test = pd.DataFrame({'Image':test_files})

df_test.head()

In [None]:
# check the total data
sns.countplot(x='Category',data=df)
plt.show()

In [None]:
# slipt trainning data and validate data
train_df, validation_df = train_test_split(df, test_size=0.20, random_state=50)
train_df = train_df.reset_index(drop=True)
validation_df = validation_df.reset_index(drop=True)

In [None]:
# Cat files
cat_files = [file for file in filenames if file.split('.')[0] == 'cat']
print(len(cat_files))
# Dog file 
dog_files = [file for file in filenames if file.split('.')[0] == 'dog']
print(len(dog_files))

In [None]:
# data Augmentaion - trainning data
image_gen_train = ImageDataGenerator( 
    rescale=1./255, 
    rotation_range=45, 
    width_shift_range=0.3, 
    height_shift_range=0.3,
    shear_range=0.1, 
    zoom_range=0.3, 
    horizontal_flip=True, 
    fill_mode='nearest'
)

train_dataset = image_gen_train.flow_from_dataframe(
    train_df,
    directory = train_path, 
    x_col='Image',
    y_col='Category',
    target_size=(IMG_SHAPE,IMG_SHAPE),
    class_mode='categorical',
    batch_size=BATCH_SIZE
)

In [None]:
# data Augmentaion - validation data
image_gen_val = ImageDataGenerator(rescale=1./255)

validation_dataset = image_gen_val.flow_from_dataframe(
    validation_df, 
    directory = train_path, 
    x_col = 'Image',
    y_col = 'Category',
    target_size= (IMG_SHAPE,IMG_SHAPE),
    class_mode = 'categorical',
    batch_size = BATCH_SIZE
)

In [None]:
# data Augmentaion - test data
test_datagen = ImageDataGenerator(rescale=1./255)

test_dataset = test_datagen.flow_from_dataframe(
    df_test, 
    directory=test_path, 
    x_col='Image',
    y_col=None,
    class_mode=None,
    target_size= (IMG_SHAPE,IMG_SHAPE),
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [None]:
#Init Model
model = Sequential()

model.add(Conv2D(32, (3, 3),input_shape=(IMG_SHAPE, IMG_SHAPE, 3), activation = 'relu'))   
model.add(MaxPooling2D(pool_size = (2,2)))  
model.add(Dropout(rate = 0.2))

model.add(Conv2D(32, (3, 3), activation = 'relu')) 
model.add(MaxPooling2D(pool_size = (2,2)))         
model.add(Dropout(rate = 0.3))

model.add(Conv2D(64, (3, 3), activation = 'relu'))    
model.add(MaxPooling2D(pool_size = (2,2)))            
model.add(Dropout(rate = 0.4))

model.add(Conv2D(128, (3, 3), activation = 'relu'))    
model.add(MaxPooling2D(pool_size = (2,2)))           
model.add(Dropout(rate = 0.5))

model.add(Flatten())
model.add(Dense(activation = 'relu', units = 128))
model.add(Dense(2, activation='softmax'))

model.summary()


In [None]:
# model compile
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
#call backs
checkpoint_filepath = 'best_weights.hdf5'

earlyStop = EarlyStopping(monitor='val_accuracy', patience=50, 
            verbose=1, mode='auto',restore_best_weights=True)

checkpoint = ModelCheckpoint(filepath=checkpoint_filepath, 
                                      save_weights_only=False, 
                                      monitor='val_accuracy',
                                      mode='auto', 
                                      save_best_only=True)
callbacks = [earlyStop, checkpoint]

In [None]:
print(earlyStop)

In [None]:
# Model Fitting
history = model.fit(train_dataset, 
                    validation_data = validation_dataset,
                    epochs = EPOCHS,
                    callbacks = callbacks)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# test model

predict = model.predict(test_dataset)

df_test['category'] = np.argmax(predict, axis=-1)
df_test.head()

In [None]:
label_map = dict((v,k) for k,v in train_dataset.class_indices.items())
df_test['category'] = df_test['category'].replace(label_map)

In [None]:
#show result of predicted
sample_test = df_test.head(15)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['Image']
    category = row['category']
    img = load_img(test_path+'/' + filename, target_size=(SIZE,SIZE))
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')' )
plt.tight_layout()
plt.show()

In [None]:
#submit

submission_df = df_test.copy()
submission_df['id'] = submission_df['Image'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['Image', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)