In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
import matplotlib.pyplot as plt
import glob
import os
from PIL import Image
from zipfile import ZipFile
import tensorflow as tf
from tensorflow import keras

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# File Loading
# https://www.kaggle.com/sohier/getting-started-loading-the-images
# https://www.kaggle.com/vanausloos/how-to-read-images-in-python
# Better File Loading
# https://www.kaggle.com/romazlobin/cats-vs-dogs

base_dir = '../input/dogs-vs-cats-redux-kernels-edition'
with ZipFile(os.path.join(base_dir, 'train.zip')) as train_zip:
    train_zip.extractall('../data')
with ZipFile(os.path.join(base_dir, 'test.zip')) as test_zip:
    test_zip.extractall('../data')

train_dir = '../data/train'
test_dir = '../data/test'

In [None]:
all_train_files = glob.glob(os.path.join(train_dir, '*.jpg'))
train_list, val_list = train_test_split(all_train_files, random_state=42)

In [None]:
fig, axes = plt.subplots(nrows=2,
                         ncols=3,
                         figsize=(18, 12))
for img_path, ax in zip(train_list, axes.ravel()):
    ax.set_title(img_path)
    ax.imshow(Image.open(img_path))

In [None]:
# plot to see how it looks
from matplotlib import pyplot as plt
%matplotlib inline
classes = ['cat','dog']
def plot_sample(data, isTrain=True):
    img = data[0]
    plt.figure(figsize=(15,2))
    plt.imshow(img)
    if isTrain:
        plt.xlabel(classes[data[1]])
    
# Extract the first 3 letters from the image names, to generate one hot encoding labels
def label_pet_image(img):
    pet = img.split('.')[-3]
    if pet == 'cat': return 0
    elif pet == 'dog': return 1
    
# Process the data (both train and test set)
from tqdm import tqdm
import cv2
from random import shuffle
IMG_SIZE=64
# set sample size
SAMPLE_SIZE=10000
def process_data(DATA_FOLDER, isTrain=True):
    # Read images from extracted directory
    image_list = os.listdir(DATA_FOLDER)
    filtered_image_list = image_list[0:SAMPLE_SIZE if isTrain else len(image_list)]
    data_df = [] # data frame
    for img in tqdm(filtered_image_list):
        path = os.path.join(DATA_FOLDER,img)
        if(isTrain):
            label = label_pet_image(img)
        else:
            label = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_COLOR)
        try:
            img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
            data_df.append([np.array(img),label])
        except Exception as e:
            print("Image with issue name, path, isTrain, label", img, path, isTrain, label)
            print(str(e))            
    shuffle(data_df)
    return data_df

In [None]:
train = process_data('../data/train')

In [None]:
plot_sample(train[0])

In [None]:
# Prepare the train data
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
y = np.array([i[1] for i in train])

# normalize our data in 0-1
X = X/255

# split into train/validation
#from sklearn.model_selection import train_test_split
#train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size=0.3)


#X

In [None]:
# https://www.kaggle.com/cdeotte/how-to-choose-cnn-architecture-mnist

# GLOBAL VARIABLES
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

In [None]:
# Experiment #1 - how many Convo2D layers?
nets = 3
model = [0] *nets

for j in range(3):
    model[j] = Sequential()
    model[j].add(Conv2D(24,kernel_size=5,padding='same',activation='relu',input_shape=(IMG_SIZE,IMG_SIZE,3)))
    model[j].add(MaxPool2D())
    if j>0:
        model[j].add(Conv2D(48,kernel_size=5,padding='same',activation='relu'))
        model[j].add(MaxPool2D())
    if j>1:
        model[j].add(Conv2D(64,kernel_size=5,padding='same',activation='relu'))
        model[j].add(MaxPool2D(padding='same'))
    model[j].add(Flatten())
    model[j].add(Dense(4096, activation='relu'))
    #https://stackoverflow.com/questions/61742556/valueerror-shapes-none-1-and-none-2-are-incompatible
    #https://stackoverflow.com/questions/48851558/tensorflow-estimator-valueerror-logits-and-labels-must-have-the-same-shape
    model[j].add(Dense(1, activation='sigmoid'))
    model[j].compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
# CREATE VALIDATION SET
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X, y, test_size = 0.333)
print(X_train2.shape)
print(Y_train2.shape)

In [None]:
# TRAIN NETWORKS
history = [0] * nets
names = ["(C-P)x1","(C-P)x2","(C-P)x3"]
epochs = 20
for j in range(nets):
    history[j] = model[j].fit(X_train2,Y_train2, batch_size=80, epochs = epochs, 
        validation_data = (X_val2,Y_val2), callbacks=[annealer], verbose=0)
    # https://towardsdatascience.com/fixing-the-keyerror-acc-and-keyerror-val-acc-errors-in-keras-2-3-x-or-newer-b29b52609af9
    print("CNN {0}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
        names[j],epochs,max(history[j].history['accuracy']),max(history[j].history['val_accuracy']) ))
    
# 2000
# CNN (C-P)x1: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.63964
# CNN (C-P)x2: Epochs=20, Train accuracy=0.99625, Validation accuracy=0.65165
# CNN (C-P)x3: Epochs=20, Train accuracy=0.91904, Validation accuracy=0.71471

# 10000
# CNN (C-P)x1: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.70961
# CNN (C-P)x2: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.73664
# CNN (C-P)x3: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.78769

In [None]:
# PLOT 
def plot_history(history, names):
    plt.figure(figsize=(15,5))
    for i in range(nets):
        plt.plot(history[i].history['val_accuracy'],linestyle=styles[i])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(names, loc='upper left')
    axes = plt.gca()
    axes.set_ylim([.7,1])
    plt.show()

## Summary
From the above experiment, it seems that 3 pairs of convolution-subsambling is the best option.  Not sure what this means from computational complexity, but that's what I'm going with for now.

# 2. How many feature maps?
In the previous experiement, we decided that 3 pairs is the way to go. How many feature maps should we include? For example, we could do
 * 784 - [**8**C5-P2] - [**16**C5-P2] - 256 - 10
 * 784 - [**16**C5-P2] - [**32**C5-P2] - 256 - 10
 * 784 - [**24**C5-P2] - [**48**C5-P2] - 256 - 10
 * 784 - [**32**C5-P2] - [**64**C5-P2] - 256 - 10
 * 784 - [**48**C5-P2] - [**96**C5-P2] - 256 - 10  
 * 784 - [**64**C5-P2] - [**128**C5-P2] - 256 - 10  

In [None]:
# BUILD CONVOLUTIONAL NEURAL NETWORKS
nets = 6
model = [0] *nets
for j in range(6):
    model[j] = Sequential()
    model[j].add(Conv2D(j*8+24,kernel_size=5,activation='relu',input_shape=(IMG_SIZE,IMG_SIZE,3)))
    model[j].add(MaxPool2D())
    model[j].add(Conv2D(j*16+48,kernel_size=5,activation='relu'))
    model[j].add(MaxPool2D())
    model[j].add(Conv2D(j*32+64,kernel_size=5,activation='relu'))
    model[j].add(MaxPool2D())
    model[j].add(Flatten())
    model[j].add(Dense(4096, activation='relu'))
    model[j].add(Dense(1, activation='sigmoid'))
    model[j].compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])    

In [None]:
# CREATE VALIDATION SET
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X, y, test_size = 0.333)
# TRAIN NETWORKS
history = [0] * nets
names = ["8 maps","16 maps","24 maps","32 maps","48 maps","64 maps"]
epochs = 20
for j in range(nets):
    history[j] = model[j].fit(X_train2,Y_train2, batch_size=80, epochs = epochs, 
        validation_data = (X_val2,Y_val2), callbacks=[annealer], verbose=0)
    print("CNN {0}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
        names[j],epochs,max(history[j].history['accuracy']),max(history[j].history['val_accuracy']) ))

In [None]:
plot_history(history, names)

##Results##

```
CNN 8 maps: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.75766
CNN 16 maps: Epochs=20, Train accuracy=0.71499, Validation accuracy=0.66306
CNN 24 maps: Epochs=20, Train accuracy=0.99955, Validation accuracy=0.77628
CNN 32 maps: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.77147
CNN 48 maps: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.76517
CNN 64 maps: Epochs=20, Train accuracy=0.99760, Validation accuracy=0.78709
```

The more the better! Although there's not a huge difference over the long haul (and what the hell happened with 16 maps?).  Going to run it again... 

```
CNN 8 maps: Epochs=20, Train accuracy=0.98756, Validation accuracy=0.77568
CNN 16 maps: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.78859
CNN 24 maps: Epochs=20, Train accuracy=0.99565, Validation accuracy=0.77628
CNN 32 maps: Epochs=20, Train accuracy=0.98516, Validation accuracy=0.77477
CNN 48 maps: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.78288
CNN 64 maps: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.77327
```

This time they all lined up - in fact 16 was the best!  Because this isn't having much impact on overall score, I'm going to go with the least expensive route, 8 maps.

Experiment 3:  How Large a Dense Layer?

In [None]:
# Build CNNs with variations of Dense Layer
nets = 6
model = [0] *nets
for j in range(6):
    model[j] = Sequential()
    model[j].add(Conv2D(j*8+24,kernel_size=5,activation='relu',input_shape=(IMG_SIZE,IMG_SIZE,3)))
    model[j].add(MaxPool2D())
    model[j].add(Conv2D(j*16+48,kernel_size=5,activation='relu'))
    model[j].add(MaxPool2D())
    model[j].add(Conv2D(j*32+64,kernel_size=5,activation='relu'))
    model[j].add(MaxPool2D())
    model[j].add(Flatten())
    if j>0:
        model[j].add(Dense(2**(j+4), activation='relu'))
    model[j].add(Dense(1, activation='sigmoid'))
    model[j].compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) 

In [None]:
# CREATE VALIDATION SET
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X, y, test_size = 0.333)
# TRAIN NETWORKS
history = [0] * nets
names = ["0N","32N","64N","128N","256N","512N","1024N","2048N"]
epochs = 50
for j in range(nets):
    history[j] = model[j].fit(X_train2,Y_train2, batch_size=80, epochs = epochs, 
        validation_data = (X_val2,Y_val2), callbacks=[annealer], verbose=0)
    print("CNN {0}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
        names[j],epochs,max(history[j].history['accuracy']),max(history[j].history['val_accuracy']) ))
    


In [None]:
plot_history(history, names)

##Results##

First run it appears 128 is the best (although they are all really close?!)

```
CNN 0N: Epochs=20, Train accuracy=0.95292, Validation accuracy=0.76486
CNN 32N: Epochs=20, Train accuracy=0.99730, Validation accuracy=0.78979
CNN 64N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.79069
CNN 128N: Epochs=20, Train accuracy=0.99985, Validation accuracy=0.79850
CNN 256N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.77988
CNN 512N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.78679
```

Running again...

This time, 32 took the crown...but why were the results so much better?  uhhhhh.  I'll bet I didn't rebuild the models!  Going to run one more time, they should get even better.  Perhaps low epochs are good for tweaking params, then you go high epochs once you've settled on something?

```
CNN 0N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.86306
CNN 32N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.89249
CNN 64N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.87838
CNN 128N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.87538
CNN 256N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.87838
CNN 512N: Epochs=20, Train accuracy=1.00000, Validation accuracy=0.87988
```

Rebuilt, and ran 50 epochs.  64N is the clear winner, and I'm sticking with it!

```
CNN 0N: Epochs=50, Train accuracy=1.00000, Validation accuracy=0.77628
CNN 32N: Epochs=50, Train accuracy=1.00000, Validation accuracy=0.77778
CNN 64N: Epochs=50, Train accuracy=1.00000, Validation accuracy=0.79159
CNN 128N: Epochs=50, Train accuracy=1.00000, Validation accuracy=0.78318
CNN 256N: Epochs=50, Train accuracy=1.00000, Validation accuracy=0.78198
CNN 512N: Epochs=50, Train accuracy=1.00000, Validation accuracy=0.73814
```