# Dogs vs Cats

## 1. Importing Necessary Libraries

In [None]:
# utilities
import numpy as np # linear algebra
import pandas as pd # data processing
import pickle
import random

# visualization
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# tensorflow
import tensorflow as tf

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D

from tensorflow.keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator

# ignoring warnings
import warnings
warnings.filterwarnings('ignore')

import os
print(os.listdir("../input"))

## 2. Preprocessing

### Extracting zip files

In [None]:
from zipfile import ZipFile

with ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
    
with ZipFile("../input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")
    
#print(os.listdir("/kaggle/working/train"))

### Turning train images to numpy arrays

In [None]:
path = "/kaggle/working/train"

X = []
y = []

# if dog, label it as 1
convert = lambda category : int(category == 'dog')

# this function read the unzipped images and appends them to X and y array
def create_train_data(path):
    for p in os.listdir(path):
        category = p.split(".")[0]
        category = convert(category)
        img = cv2.imread(os.path.join(path, p), cv2.IMREAD_GRAYSCALE)
        img_np = cv2.resize(img, dsize = (80, 80))
        X.append(img_np)
        y.append(category)

In [None]:
create_train_data(path)

### Reshaping the data to be used

#### We reshape the X array in order to use in Convolutional Neural Network Layers.

In [None]:
X = np.array(X).reshape(-1,80,80,1)
y = np.array(y)

### Saving the arrays and downloading it, so we can use later

#### We can save the X and y array if we don't want to deal with them. And we can download it.

In [None]:
pickle.dump(X, open("x_train", "wb"))
pickle.dump(y, open("y_train", "wb"))

os.chdir(r'../working')
from IPython.display import FileLink
FileLink(r'y_train')

### Let's look at some images

#### Here we create a 0 to 24999 number list.

In [None]:
num_list = list(range(25000))

#### Here we chose random number from the list.

In [None]:
i = random.choice(num_list)
plt.imshow(X[i], cmap = "gray")

#### A dog

In [None]:
i = random.choice(num_list)
plt.imshow(X[i], cmap = "gray")

#### A cat

### Normalizing

#### Here we normalize the data so it converges faster to global minima.

In [None]:
X = X / 255

### Splitting data to train and validation set

#### Here we split the data to train and validation data.

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.shape, X_val.shape, y_train.shape, y_val.shape

## 3. Modeling and Predicting

#### Here we are constructing our CNN layers. We use Conv2D, MaxPooling2D, Dropout, Flatten and Dense layers.

In [None]:
model = Sequential([
    Conv2D(32, (3,3), activation = "relu", input_shape = (80,80,1)),
    Conv2D(32, (3,3), activation = "relu"),
    MaxPooling2D(2,2),
    Dropout(0.4),
    Conv2D(64, (3,3), activation = "relu"),
    Conv2D(64, (3,3), activation = "relu"),
    MaxPooling2D(2,2),
    Dropout(0.4),
    Conv2D(128, (3,3), activation = "relu"),
    Conv2D(128, (3,3), activation = "relu"),
    MaxPooling2D(2,2),
    Dropout(0.4),
    Flatten(),
    Dense(128, activation = "relu", kernel_regularizer = l2(0.001)),
    Dropout(0.5),
    Dense(1, activation = "sigmoid")
])

#### Here we set compiler configurations.

In [None]:
model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

#### Here we save the history of fitting the data.

In [None]:
history = model.fit(X_train, y_train, batch_size = 256, epochs = 50, verbose = 1, validation_data = (X_val, y_val))

#### Let's see the accuracy.

In [None]:
_, accuracy = model.evaluate(X_val, y_val)
print("Accuracy: %.2f" % (accuracy*100))

#### Let's visualize model accuracy and model loss.

In [None]:
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Test"], loc = "upper left")
plt.show()

plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("Model loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Test"], loc = "upper left")
plt.show()

#### Let's see the confusion matrix.

In [None]:
sns.heatmap(confusion_matrix(model.predict_classes(X_val), y_val), cmap = "viridis", annot = True)

#### It's time to predict the test data. Almost the same function with train function.

In [None]:
test_path = "/kaggle/working/test1"

X_test = []
id_line = []


def create_test_data(path):
    for p in os.listdir(path):
        id_line.append(p.split(".")[0])
        img = cv2.imread(os.path.join(path, p), cv2.IMREAD_GRAYSCALE)
        img_np = cv2.resize(img, dsize = (80, 80))
        X_test.append(img_np)

In [None]:
create_test_data(test_path)

### Reshaping

In [None]:
X_test = np.array(X_test).reshape(-1,80,80,1)

### Normalizing

In [None]:
X_test = X_test / 255

### Predicting Classes

In [None]:
pred = model.predict_classes(X_test)
pred = pred.flatten()

### Saving to DataFrame

In [None]:
df = pd.DataFrame({"id": id_line, "label": pred})

### Saving the DataFrame

In [None]:
df.to_csv("result.csv", index = False)

### Let's look at some images and predict them

In [None]:
def predict_class(to_predict):
    to_predict = to_predict.reshape(1,80,80,1)
    pred = model.predict_classes(to_predict)
    plt.imshow(to_predict.reshape(80,80,1), "gray")
    plt.show()
    if pred[0] == 1:
        return "dog"
    else:
        return "cat"

In [None]:
predict_class(X_test[30])

In [None]:
predict_class(X_test[2532])