# Extracting zip files

In [None]:
import os
print(os.listdir("../input"))

import zipfile

with zipfile.ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
    
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")

# Importing libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

# Defining train path

In [None]:
main_dir = "/kaggle/working/"
train_dir = "train"
path = os.path.join(main_dir,train_dir)

In [None]:
test_dir = "test1"
test_path = os.path.join(main_dir, test_dir)

In [None]:
# function for labels
def label(img):
    word = img.split('.')[0]
    if word == 'cat':
        return 1
    elif word == 'dog':
        return 0

# Defining train data

In [None]:
from tqdm import tqdm
import cv2
import os
import numpy as np
from random import shuffle

IMG_SIZE = 50

train_data = []
train_labels = []
def create_train_data(path):
    for img in os.listdir(path):
        labels = label(img)
        path2 = os.path.join(path, img)
        img = cv2.imread(path2, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        train_data.append(img)
        train_labels.append(labels)

# Defining X and y

In [None]:
create_train_data(path)
X = np.array(train_data).reshape(-1, 50,50,1)
y = np.array(train_labels)

In [None]:
X = X/255.0

# TF libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Conv2D, MaxPooling2D, BatchNormalization

# Defining model

In [None]:
model1 = Sequential()

model1.add(Conv2D(32,(3,3), activation = 'relu', input_shape = X.shape[1:]))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size = (2,2)))
model1.add(Dropout(0.25))

model1.add(Conv2D(64,(3,3), activation = 'relu'))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size = (2,2)))
model1.add(Dropout(0.25))


model1.add(Flatten())
model1.add(Dense(64, activation = 'relu'))
model1.add(Dense(32, activation = 'relu'))

model1.add(Dense(1, activation = 'sigmoid'))

In [None]:
model1.summary()

In [None]:
model1.compile(
    optimizer = "adam",
    loss = "binary_crossentropy",
    metrics = ["accuracy"])

# Wandb for tracking model's performance

In [None]:
%%capture
!pip install wandb

# Importing Wandb

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

# Initialising Wandb

In [None]:

wandb.init(entity='pratikraut_', project='cats-vs-dogs')


# Fitting model

In [None]:
my_callbacks = WandbCallback()
model1.fit(X, y, epochs=20, batch_size=32, validation_split=0.2, callbacks = my_callbacks)

# Preparing test data

In [None]:
test_data = []
idd = []
def process_test_data(test_path):
    for img in os.listdir(test_path):
        tpath = os.path.join(test_path, img)
        img_num = img.split('.')[0]
        img = cv2.imread(tpath, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (32, 32))
        test_data.append(np.array(img))
        idd.append(img_num)

In [None]:
process_test_data(test_path)
X_test = np.array(test_data).reshape(-1,50,50,1)
X_test = X_test/255

# Predicting on test set

In [None]:
predictions = model1.predict(X_test)

# Evaluating model

In [None]:
loss, accuracy = model1.evaluate(X_test)
print('Test Error Rate: ', round((1-accuracy)*100, 2))

# Defining 2nd model with different units

In [None]:
model2 = Sequential()

model2.add(Conv2D(64,(3,3), activation = 'relu', input_shape = X.shape[1:]))
model2.add(BatchNormalization())
model2.add(MaxPooling2D(pool_size = (2,2)))
model2.add(Dropout(0.25))

model2.add(Conv2D(128,(3,3), activation = 'relu'))
model2.add(BatchNormalization())
model2.add(MaxPooling2D(pool_size = (2,2)))
model2.add(Dropout(0.25))


model2.add(Flatten())
model2.add(Dense(512, activation = 'relu'))
model2.add(BatchNormalization())
model2.add(Dropout(0.5))
model2.add(Dense(2, activation = 'softmax'))

In [None]:
model2.summary()

In [None]:
model2.compile(
    optimizer = "adam",
    loss = "binary_crossentropy",
    metrics = ["accuracy"])

In [None]:
my_callbacks = WandbCallback()
model2.fit(X, y, epochs=20, batch_size=64, validation_split=0.2, callbacks = my_callbacks)

# Preparing data for new model

In [None]:
import pandas as pd
filenames = os.listdir("/kaggle/working/train")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(str(1))
    else:
        categories.append(str(0))

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})
df.head()

In [None]:
df['category'].value_counts().plot.bar()

# Defining model - VGG16

In [None]:
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import VGG16
from keras.models import Model

image_size = 224
input_shape = (image_size, image_size, 3)

epochs = 5
batch_size = 16

pre_trained_model = VGG16(input_shape=input_shape, include_top=False, weights="imagenet")

    
for layer in pre_trained_model.layers[:15]:
    layer.trainable = False

for layer in pre_trained_model.layers[15:]:
    layer.trainable = True
    
last_layer = pre_trained_model.get_layer('block5_pool')
last_output = last_layer.output
    
# Flatten the output layer to 1 dimension
x = GlobalMaxPooling2D()(last_output)
# Add a fully connected layer with 512 hidden units and ReLU activation
x = Dense(512, activation='relu')(x)
# Add a dropout rate of 0.5
x = Dropout(0.5)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

model.compile(loss='binary_crossentropy',optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

model.summary()

# Splitting the data

In [None]:
from sklearn.model_selection import train_test_split
train_df, validate_df = train_test_split(df, test_size=0.1)
train_df = train_df.reset_index()
validate_df = validate_df.reset_index()

# validate_df = validate_df.sample(n=100).reset_index() # use for fast testing code purpose
# train_df = train_df.sample(n=1800).reset_index() # use for fast testing code purpose

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

# Using Image data generator

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "/kaggle/working/train", 
    x_col='filename',
    y_col='category',
    class_mode='binary',
    target_size=(image_size, image_size),
    batch_size=batch_size
)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "/kaggle/working/train", 
    x_col='filename',
    y_col='category',
    class_mode='binary',
    target_size=(image_size, image_size),
    batch_size=batch_size
)

# Fitting the data

In [None]:
history = model.fit_generator(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size)

# Saving the model

In [None]:
from keras.models import load_model 
model.save("model.h5") 

# Evaluating model

In [None]:
loss, accuracy = model.evaluate_generator(validation_generator, total_validate//batch_size, workers=12)
print("Test: accuracy = %f  ;  loss = %f " % (accuracy, loss))

# Model behaviour

I trained 4 models for this dataset and I will share what I observed while training these models.

**Model 1**

Model is a simple network with Image size = 80,

2 convolution layers and 2 pooling layers respectively and activation ReLu.

3 Dense layers, 2 of them has ReLu activation and 1 is having sigmoid activation.

Epochs = 20

With this network I acheived,
Train accuracy = 98.6%,
Train  loss = 0.036.

Test accuracy = 74.4%,
Test loss = 1.686.

Here I observed that the loss was increasing with each epoch and it looked it overfitting.
I first thought of Image size which is 80, might be having something to do with the model overfitting and increasing loss so, I then, trained the second model.

**Model 2**

Model is a simple network with Image size = 100,
2 convolution layers and 2 pooling layers respectively and activation ReLu,
3 Dense layers, 2 of them has ReLu activation and 1 is having sigmoid activation.

Epochs = 20

With this network I acheived,
Train accuracy = 99.4%,
Train  loss = 0.018.

Test accuracy = 77.2%,
Test loss = 1.55.

The loss is still increasing even if I changed the Image size so, it says that the increase in the loss and overfitting is not related to the Image sizes.

**Model 3**
Keeping the the image size = 100 as it has no relation to overfitting or loss.
Model is a simple network with Image size = 100,
2 convolution layers and added Batch Normalization to the convolution layers and a Dropout = 0.25
2 pooling layers respectively and activation ReLu.
3 Dense layers, 2 of them has ReLu activation and 1 is having sigmoid activation.

Epochs = 20

With this network I acheived,
Train accuracy = 91.8%
Train  loss = 0.197

Test accuracy = 76.2%
Test loss = 0.636

With the addition of Batch Normalization and Dropout to the network the loss was not increasing anymore, there was a decrease in the loss (which means we are on the right track), the network still overfits and that is still off the track. 
Why Batch Normalization worked ?
We have 12500 images of cats in our dataset and it is obvious that all the images must have different noise (one image has cat and a pillow, second image just have a cat in it so, the pillow is the noise).
Now, everytime a batch of n size is feed into the network, it will recieve images with some difference compared to previous batches.
So the hidden layers will see different images everytime and it will be harder for them to learn from it.
This difference is called covariant shift.
To overcome this problem we use Batch Normalization and as we saw it worked.
It's like if we practise java everyday for some period then it will be easier for us to learn but if we are learning many languages then it will be harder for us to learn anything from it.


**Model 4**
Now, we still have the overfitting problem and I just tried to change the filters and I thought of decreasing the Image size.

Keeping the the image size = 50.
2 convolution layers and added Batch Normalization to the convolution layers and a Dropout = 0.25
2 pooling layers respectively and activation ReLu.
3 Dense layers, 2 of them has ReLu activation and 1 is having sigmoid activation.

Epochs = 20

With this network I acheived,
Train accuracy = 99.07%
Train  loss = 0.02

Test accuracy = 99.73%
Test loss = 0.0094

It looks like the model is not overfitting the data and is better compared to Model 3.
Then I tried to evaluate the model on the test data and I got 100% accuracy so, I think we are still in the overfitting bubble.

In the last try I tried different architecture i.e. VGG16 and it solved both the overfitting problem.

# You can see plots over here... 
https://wandb.ai/pratikraut_/cats-vs-dogs?workspace=user-pratikraut_

# I'm a begineer to this and want to learn more about it, I shared my understandings and hoping to get feedbacks and corrections....