# Best articl for this problem

https://gsurma.medium.com/image-classifier-cats-vs-dogs-with-convolutional-neural-networks-cnns-and-google-colabs-4e9af21ae7a8

# Import Libraries

In [None]:
from keras.preprocessing.image import ImageDataGenerator, load_img
import numpy as np
import pandas as pd
import cv2
from matplotlib import pyplot as plt
from keras.models import Sequential
from keras.layers import Conv2D, Dropout, Dense, MaxPooling2D, BatchNormalization, Flatten
import random
import os
from zipfile import ZipFile
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from PIL import Image
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Get Data from Zipfile

In [None]:
ZipFile("../input/dogs-vs-cats/train.zip","r").extractall()
ZipFile("../input/dogs-vs-cats/test1.zip", "r").extractall()

# Define Constants

In [None]:
train_dir = "./train/"
test_dir = "./test1/"
IMG_HEIGHT = 128
IMG_WIDTH = 128
IMG_CHANNEL = 3
IMG_SIZE = (IMG_WIDTH, IMG_HEIGHT)
batch_size = 64
epochs = 50

# Prepare Data

In [None]:
Dtrain = os.listdir(train_dir)
categories = []
for filename in Dtrain:
    category = filename.split(".")[0]
    if category == "dog":
        categories.append("dog")
    else:
        categories.append("cat")
df = pd.DataFrame({
    "filename" : Dtrain,
    "category" : categories
})

In [None]:
print(df.head())
print(df.tail())

# Plot Data

In [None]:
df.category.value_counts().plot.bar()
plt.title(str(len(df[df['category'] == 'cat']))+" cats" + " | " +str(len(df[df['category'] == 'dog']))+" dogs", fontsize=18)
plt.ylabel(len(df), fontsize = 14)
plt.show()

# Build Model

In [None]:
model=Sequential()

model.add(Conv2D(32, (3,3), input_shape=(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNEL), activation="relu"))
model.add((MaxPooling2D(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3,3), activation="relu"))
model.add((MaxPooling2D(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3,3), activation="relu"))
model.add((MaxPooling2D(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024, activation="relu"))
model.add(Dropout(0.25))

model.add(Dense(2, activation="sigmoid"))

model.compile(loss='binary_crossentropy',optimizer=Adam(lr=0.0001, decay=1e-6),metrics=['accuracy'])

model.summary()

# Show sample

In [None]:
sampel = random.choice(df.filename)
img = load_img(train_dir+sampel)
plt.imshow(img)

# Split Data to Train and Validation set

In [None]:
train_df, validate_df = train_test_split(df, test_size = 0.2, random_state = 42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

# Plot Train and Validation

In [None]:
train_df.category.value_counts().plot.bar()
plt.ylabel(len(train_df), fontsize = 14)
plt.title(str(len(train_df[train_df['category'] == 'dog']))+" Dogs  |  "+str((len(train_df[train_df['category'] == 'cat'])))+" Cats", fontsize=18)
plt.show()

In [None]:
validate_df.category.value_counts().plot.bar()
plt.ylabel(len(train_df), fontsize = 14)
plt.title(str(len(validate_df[validate_df['category'] == 'cat']))+" Cats  |  "+str((len(validate_df[validate_df['category'] == 'dog'])))+" Dogs", fontsize=18)
plt.show()

# Callbacks

In [None]:
earlystop = EarlyStopping(patience=10)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

callbacks = [earlystop, learning_rate_reduction]

# Traning Generator

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=20,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=IMG_SIZE,
    class_mode='categorical',
    batch_size=batch_size,
    color_mode="rgb",
    shuffle=True
)

# Validation Generator

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=IMG_SIZE,
    class_mode='categorical',
    batch_size=batch_size,
    color_mode="rgb",
    shuffle=True
)


# Example Generator

In [None]:
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=IMG_SIZE,
    class_mode='categorical',
    color_mode="rgb",
    shuffle=True
)

# Show how Generator works

In [None]:
plt.figure(figsize=(12,12))
for i in range(0,8):
    plt.subplot(2, 4, i+1)
    for x_batch, y_batch in example_generator:
        img = x_batch[0]
        plt.imshow(img)
        break
plt.tight_layout()
plt.show()

# Fit Model

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_df)//64,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=len(validate_df)//64,
    callbacks=callbacks
)

# Save model's weights

In [None]:
model.save_weights("CatsVsDogs.h5")

# Virtualize Training

In [None]:
fig,(ax1, ax2) = plt.subplots(2, 1, figsize = (12,12))
ax1.plot(history.history["loss"], color= "red", label = "Training loss")
ax1.plot(history.history["val_loss"], color= "b", label = "Validation loss")

ax1.set_xticks(np.arange(1, epochs, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history["accuracy"], color = "red", label = "Training accuracy")
ax2.plot(history.history["val_accuracy"], color = "b", label = "Validation accuracy")

ax2.set_xticks(np.arange(1,epochs, 1))
ax2.set_yticks(np.arange(0, 1, 0.1))

legend = plt.legend(loc = "best", shadow = True)
plt.tight_layout()
plt.show()

# Prepare Test set

In [None]:
Dtest = os.listdir(test_dir)
df_test = pd.DataFrame({
    "filename" : Dtest
})

# Test Generator

In [None]:
test_dataGen = ImageDataGenerator(1./255)

test_gen = test_dataGen.flow_from_dataframe(
    df_test,
    test_dir,
    target_size = IMG_SIZE,
    x_col = "filename",
    y_col = None,
    batch_size = batch_size,
    class_mode = None,
    shuffle = True,
    color_mode="rgb",

)

# Predict Model

In [None]:
predicted = model.predict(test_gen)
df_test["category"] = np.argmax(predicted, axis=-1)

# Show model accuracy

In [None]:
print("Train Accuracy:{:.3f}".format(history.history['accuracy'][-1]))
print("Test Accuracy:{:.3f}".format(history.history['val_accuracy'][-1]))

# Plot Test Data

In [None]:
df_test["category"] = df_test["category"].replace({1 : "cat", 0 : "dog"})
df_test['category'].value_counts().plot.bar()
plt.ylabel(len(df_test), fontsize = 14)
plt.title(str(len(df_test[df_test['category'] == 'dog']))+" Dogs  |  "+str((len(df_test[df_test['category'] == 'cat'])))+" Cats", fontsize=18)
plt.show()

# Show the result

In [None]:
sample_test = df_test.head(18)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img(test_dir+filename, target_size=IMG_SIZE)
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel("{}".format(category))
plt.tight_layout()
plt.show()

# Submission

In [None]:
submission_df = df_test.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)