<a href="https://colab.research.google.com/github/sairamkiran9/cat-and-dog-classification/blob/master/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Code

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from glob import glob
import numpy as np
import pandas as pd 
import random
import time 
import os
import cv2
import matplotlib.pyplot as plt
from scipy import ndimage

In [None]:
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
IMAGE_WIDTH=128
IMAGE_HEIGHT=128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3

In [None]:
#loading the dataset
! pip install -q kaggle         
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c dogs-vs-cats

In [None]:
! mkdir train
! mkdir test1
! unzip train.zip -d train
! unzip test1.zip -d test1

In [None]:
! mkdir dataset
! unzip dataset.zip -d dataset

In [None]:
os.listdir("/content/test1")

In [None]:
test_image = "/content/test1/test1/"
test_images = glob(test_image+'*')
print (len(test_images))
test_images[:10]

In [None]:
train_image = "/content/train/train/"
train_images = glob(train_image+'*')
print (len(train_images))
train_images[:10]

In [None]:
filenames = os.listdir("/content/train/train/")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

In [None]:
sample = random.choice(filenames)
image = load_img("/content/train/train/"+sample)
print(image.size)
plt.imshow(image)

In [None]:
df["category"] = df["category"].replace({0: 'cat', 1: 'dog'}) 
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

In [None]:
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size = 32

**Data Agumentation**

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "/content/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size,
    shuffle= False
)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "/content/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size,
    shuffle = False
)

In [None]:
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "/content/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical'
)

In [None]:
plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    for X_batch, Y_batch in example_generator:
        image = X_batch[0]
        # print(image.shape)
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()

In [None]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard

log_dir = "/content/logs/"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

mc = ModelCheckpoint('/content/best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
lr = ReduceLROnPlateau(monitor='val_accuracy',patience=10,verbose=1,factor=0.5,min_lr=0.001)

In [None]:
callbacks=[tensorboard_callback, lr, mc]

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


In [None]:
history = model.fit_generator(
    train_generator, 
    epochs=50,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks
)

In [None]:
df = pd.DataFrame(history.history)
df.to_csv (r'/content/history1.csv', index = False, header=True)

**Analysis of the model**

In [None]:
from matplotlib import pyplot
def summarize_diagnostics(history):
    fig, (ax1) = pyplot.subplots(1, 1, figsize=(10, 4))
    pyplot.title('Cross Entropy Loss')
    ax1.plot(history.history['loss'], color='blue', label="train-loss")
    ax1.plot(history.history['val_loss'], color='orange', label="test-loss")
    legend = pyplot.legend(loc='best', shadow=True)
    pyplot.xlabel('Epochs')
    pyplot.ylabel('Loss')
    pyplot.grid()
    pyplot.show()
    fig, (ax2) = pyplot.subplots(1, 1, figsize=(10, 4))
    pyplot.title('Classification Accuracy')
    ax2.plot(history.history['accuracy'], color='blue', label="train-accuracy")
    ax2.plot(history.history['val_accuracy'], color='orange', label="test-accuracy")
    legend = pyplot.legend(loc='best', shadow=True)
    pyplot.grid()
    pyplot.xlabel('Epochs')
    pyplot.ylabel('Accuracy')
    pyplot.show()

In [None]:
_, acc = model.evaluate_generator(validation_generator, steps=len(validation_generator), verbose=0)
print('> %.3f' % (acc * 100.0))
# learning curves
summarize_diagnostics(history)

In [None]:
predict = model.predict_generator(validation_generator, steps=np.ceil(5000/batch_size))

In [None]:
test_label = pd.DataFrame()
test_label['category'] =np.argmax(predict, axis=-1)
validate_df['category'] = validate_df['category'].replace({ 'dog': 1, 'cat': 0 })

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import itertools
cnf_matrix = confusion_matrix(validate_df['category'],test_label['category']) #confusion matrix
print(cnf_matrix)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Oranges):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
        
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="Black" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['dog=1','cat=0'],normalize= False,  title='Confusion matrix')

In [None]:
class_report = classification_report(validate_df['category'],test_label['category'])
print(class_report)

### **Testing**

In [None]:
test_filenames = os.listdir("/content/test1/test1")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]
nb_samples

In [None]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "/content/test1/test1/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    shuffle=False
)

**Loading the model**<br>

In [None]:
import tensorflow as tf
new_model = tf.keras.models.load_model('/content/best_model.h5')

In [None]:
predict = new_model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))
test_df['category'] = np.argmax(predict, axis=-1)
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)
test_df['category'] = test_df['category'].replace({ 'dog': 1, 'cat': 0 })

In [None]:
import random
i = random.randint(0,12500)
sample_test = test_df.iloc[i:i+9]
sample_test.head()
plt.figure(figsize=(8,8))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    name ="cat"
    if(category==1):
      name = "dog"
    img = load_img("/content/test1/test1/"+filename, target_size=IMAGE_SIZE)
    plt.subplot(3, 3, 1+(index)%9)
    plt.imshow(img)
    plt.xlabel(filename + '( ' + "{}".format(name) + ' )' )
plt.tight_layout()
plt.show()

**Tensorboard visualisation**

In [None]:
%load_ext tensorboard       #loading the tensorboard api

In [None]:
%tensorboard --logdir='/content/logs/train/'      #For traning data

In [None]:
%tensorboard --logdir='/content/logs/validation/'     #For validation data