In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!unzip ../input/dogs-vs-cats-redux-kernels-edition/test.zip
!unzip ../input/dogs-vs-cats-redux-kernels-edition/train.zip
!ls -ltr train | head

In [None]:
# Cloud AutoML
PROJECT_ID = 'automl-exercise-307804'
from google.cloud import automl_v1beta1 as automl
automl_client = automl.AutoMlClient()

In [None]:
import os, cv2, re, random
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from keras import layers, models, optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split

In [None]:
TRAIN_DIR = 'train/'
TEST_DIR = 'test/'
train_dogs = ['./train/{}'.format(i) for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats = ['./train/{}'.format(i) for i in os.listdir(TRAIN_DIR) if 'cat' in i]
test_images_dogs_cats = ['./test/{}'.format(i) for i in os.listdir(TEST_DIR)]
len(train_dogs), len(train_cats), len(test_images_dogs_cats)

In [None]:
def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    return [ atoi(c) for c in re.split('(\d+)', text) ]

train_dogs.sort(key=natural_keys)
train_cats.sort(key=natural_keys)
train_images_dogs_cats = train_dogs[0:1300] + train_cats[0:1300] 

test_images_dogs_cats.sort(key=natural_keys)
len(train_images_dogs_cats), len(test_images_dogs_cats)

In [None]:
img_width = 150
img_height = 150
def prepare_data(list_of_images):
    """
    Returns two arrays: 
        x is an array of resized images
        y is an array of labels
    """
    x = [] # images as arrays
    y = [] # labels
    
    for image in list_of_images:
        x.append(cv2.resize(cv2.imread(image), (img_width,img_height), interpolation=cv2.INTER_CUBIC))
    
    for i in list_of_images:
        if 'dog' in i:
            y.append(1)
        elif 'cat' in i:
            y.append(0)
    return x, y

X, Y = prepare_data(train_images_dogs_cats)
print(K.image_data_format())
# Since K.image_data_format() is channel_last, 
# input_shape to the first keras layer will be (img_width, img_height, 3). 
# '3' since it is a color image

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(X,Y, test_size=0.2, random_state=1)
nb_train_samples = len(X_train)
nb_validation_samples = len(X_val)
batch_size = 16

In [None]:
model = models.Sequential()

model._name = 'CNN_Exp1'
model.add(layers.Conv2D(32, (3, 3), input_shape=(img_width, img_height, 3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(32, (3, 3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(64, (3, 3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(64))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1))
model.add(layers.Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

val_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_generator = train_datagen.flow(np.array(X_train), Y_train, batch_size=batch_size)
validation_generator = val_datagen.flow(np.array(X_val), Y_val, batch_size=batch_size)

# Show sample from train
from matplotlib import pyplot as plt
plt.figure(figsize=(12,12))
for i in range(0,8):
    plt.subplot(2, 4, i+1)
    for x_batch, y_batch in train_generator:
        img = x_batch[0]
        print(y_batch[0])
        plt.imshow(img)
        break
plt.tight_layout()
plt.show()

In [None]:
%%time
history = model.fit_generator(
    train_generator, 
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=15,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size
)

In [None]:
X_test, Y_test = prepare_data(test_images_dogs_cats) #Y_test in this case will be []
test_generator = val_datagen.flow(np.array(X_test), batch_size=batch_size)

In [None]:
prediction_probabilities = model.predict(test_generator, verbose=1)
counter = range(1, len(test_images_dogs_cats) + 1)
submission = pd.DataFrame({"id": counter, "label":list(prediction_probabilities)})
print(submission)
cols = ['label']
for col in cols:
    submission[col] = submission[col].map(lambda x: str(x).lstrip('[').rstrip(']')).astype(float)

submission.to_csv("dogsVScats_exp1.csv", index = False)
submission.head()

### Experiment 2

In [None]:
model = models.Sequential()

model._name = 'CNN_Exp2'
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (img_width, img_height, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(1, activation = 'sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.summary()

In [None]:
train_generator = train_datagen.flow(np.array(X_train), Y_train, batch_size=batch_size)
validation_generator = val_datagen.flow(np.array(X_val), Y_val, batch_size=batch_size)

In [None]:
%%time
history = model.fit_generator(
    train_generator, 
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=15,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size
)

In [None]:
test_generator = val_datagen.flow(np.array(X_test), batch_size=batch_size)
prediction_probabilities = model.predict(test_generator, verbose=1)
counter = range(1, len(test_images_dogs_cats) + 1)
submission = pd.DataFrame({"id": counter, "label":list(prediction_probabilities)})
print(submission)
cols = ['label']
for col in cols:
    submission[col] = submission[col].map(lambda x: str(x).lstrip('[').rstrip(']')).astype(float)

submission.to_csv("dogsVScats_exp2.csv", index = False)
submission.head()

### Experiment 3

In [None]:
model = models.Sequential()

model._name = 'CNN_Exp3'
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(img_width, img_height, 3)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())
model.add(layers.Dense(512,activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop = EarlyStopping(patience = 10)
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_acc',patience = 2,verbose = 1,factor = 0.5,min_lr = 0.00001)
callbacks = [earlystop,learning_rate_reduction]

train_generator = train_datagen.flow(np.array(X_train), Y_train, batch_size=batch_size)
validation_generator = val_datagen.flow(np.array(X_val), Y_val, batch_size=batch_size)

In [None]:
%%time
history = model.fit_generator(
    train_generator, 
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=15,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size,
    callbacks=callbacks
)

In [None]:
test_generator = val_datagen.flow(np.array(X_test), batch_size=batch_size)
prediction_probabilities = model.predict(test_generator, verbose=1)
counter = range(1, len(test_images_dogs_cats) + 1)
submission = pd.DataFrame({"id": counter, "label":list(prediction_probabilities)})
print(submission)
cols = ['label']
for col in cols:
    submission[col] = submission[col].map(lambda x: str(x).lstrip('[').rstrip(']')).astype(float)

submission.to_csv("dogsVScats_exp3.csv", index = False)
submission.head()