In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Extrac zip files
import zipfile
def extract_images(filePath):
    with zipfile.ZipFile(filePath,"r") as z:
        z.extractall(".")
# Test
extract_images('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip')
extract_images('/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip')

In [None]:
# plot to see how it looks
from matplotlib import pyplot as plt
%matplotlib inline
classes = ['cat','dog']
def plot_sample(x, y, index):
    plt.figure(figsize=(15,2))
    plt.imshow(train_x[index])
    # plt.ylabel(x[index])
    plt.xlabel(classes[y[index]])

In [None]:
# Extract the first 3 letters from the image names, to generate one hot encoding labels
def label_pet_image(img):
    pet = img.split('.')[-3]
    if pet == 'cat': return 0
    elif pet == 'dog': return 1

In [None]:
# Process the data (both train and test set)
from tqdm import tqdm
import cv2
from random import shuffle
IMG_SIZE=64
# set sample size
SAMPLE_SIZE=20000
def process_data(DATA_FOLDER, isTrain=True):
    # Read images from extracted directory
    image_list = os.listdir(DATA_FOLDER)
    filtered_image_list = image_list[0:SAMPLE_SIZE if isTrain else len(image_list)]
    data_df = [] # data frame
    for img in tqdm(filtered_image_list):
        path = os.path.join(DATA_FOLDER,img)
        if(isTrain):
            label = label_pet_image(img)
        else:
            label = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_COLOR)
        try:
            img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
            data_df.append([np.array(img),label])
        except Exception as e:
            print("Image with issue name, path, isTrain, label", img, path, isTrain, label)
            print(str(e))            
    shuffle(data_df)
    return data_df

In [None]:
train = process_data('./train')

In [None]:
test = process_data('./test', False)

In [None]:
# plot to see how it looks
from matplotlib import pyplot as plt
%matplotlib inline
def plot_sample(data, isTrain=True):
    img = data[0]
    plt.figure(figsize=(15,2))
    plt.imshow(img)
    if isTrain:
        plt.xlabel(classes[data[1]])

In [None]:
plot_sample(train[0])

In [None]:
plot_sample(test[4], False)

In [None]:
# Prepare the train data
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
y = np.array([i[1] for i in train])
test_X = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,3)

In [None]:
# normalize our data in 0-1
X = X/255
test_X = test_X/255

In [None]:
from sklearn.model_selection import train_test_split
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size=0.3)

In [None]:
train_y

In [None]:
import tensorflow as tf
from tensorflow import keras
# First apply Artificial neural network (ANN)
ann = keras.Sequential([
    keras.layers.Flatten(input_shape=(IMG_SIZE, IMG_SIZE, 3)), # Flaten 3d to 1d
    keras.layers.Dense(3000, activation='relu'), # more hidden layer gives better perf
    keras.layers.Dense(1000, activation='relu'), # more hidden layer gives better perf
    keras.layers.Dense(100, activation='relu'), # more hidden layer gives better perf
    keras.layers.Dense(1, activation='sigmoid')    
])
ann.compile(optimizer='SGD', loss= keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
ann.fit(train_X, train_y, epochs=10)

In [None]:
ann.evaluate(validation_X, validation_y)

In [None]:
pred_y = ann.predict(test_X)

In [None]:
pred_y[0:10]

In [None]:
for i in range(0,10):
    plot_sample(test[i], False)

In [None]:
# Apply CNN
cnn = keras.Sequential([
    # cnn layer
    keras.layers.Conv2D(filters=IMG_SIZE, kernel_size=(3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE,3)),
    keras.layers.MaxPooling2D(2,2),
    # second layer for cnn
    keras.layers.Conv2D(filters=IMG_SIZE, kernel_size=(3,3), activation='relu'),
    keras.layers.MaxPooling2D(2,2),
    # dense layer
    keras.layers.Flatten(), # Flaten 3d to 1d
    keras.layers.Dense(IMG_SIZE, activation='relu'),    # cnn will already reduce layers so need less here
    keras.layers.Dense(1, activation='softmax') # softmax will normalize probability   
])
cnn.compile(optimizer='SGD', loss= keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
cnn.fit(train_X, train_y, epochs=5)

In [None]:
cnn.evaluate(validation_X, validation_y)

In [None]:
pred_y = ann.predict(test_X)

In [None]:
pred_y[0:10]

In [None]:
for i in range(0,10):
    plot_sample(test[i], False)