In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing required libraries

In [None]:
import zipfile
import os
import pandas as pd
import tensorflow as tf
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
import keras_tuner as kt

## Extracting input files

In [None]:
def extract_zip_file(file_path):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(".")

In [None]:
extract_zip_file("../input/dogs-vs-cats-redux-kernels-edition/test.zip")
extract_zip_file("../input/dogs-vs-cats-redux-kernels-edition/train.zip")

## Constructing Train Dataframe

In [None]:
def construct_train_df():
    image_list = []
    for dirname, _, filenames in os.walk("./train"):
        for filename in filenames:
            is_dog = 1 if "dog" in filename else 0
            image_list.append({"file_path": f'./train/{filename}', 'is_dog': is_dog})
    return pd.DataFrame(image_list)

In [None]:
train_df = construct_train_df()

## Constructing train and test data

In [None]:
x, y = [], []
for index, row in train_df.iterrows():
    image = cv2.imread(row['file_path'])
    image = cv2.resize(image,(64,64))
    image = image / 255
    x.append(image)
    y.append(row['is_dog'])

## Transforming to numpy array so that tensorflow could work on it

In [None]:
x, y  = np.array(x),np.array(y)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

## Building a deep learning model

In [None]:
model = Sequential()
# CNN
model.add(Conv2D(input_shape=(64, 64, 3), activation='relu', kernel_initializer='he_uniform', kernel_size=(6, 6), filters=12))
model.add(MaxPooling2D(4, 4))
model.add(Conv2D(filters=10, kernel_size=(3,3), activation='relu', kernel_initializer='he_uniform'))
model.add(MaxPooling2D(2,2))
# ANN
model.add(Flatten())
model.add(Dense(12, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

## Hyper parameter tuning

In [None]:
def build_model(hp):
    inputs = tf.keras.Input(shape=(64, 64, 3))
    x = inputs
    for i in range(hp.Int("cnn_layers", 1, 3)):
        x = Conv2D(hp.Int(f"filters_{i}", 32, 128, step=32), kernel_size=(3,3), activation='relu')(x)
        x = MaxPooling2D(pool_size=(2,2))(x)
    x = Flatten()(x)
    for i in range(hp.Int("mlp_layers", 1, 5, 1)):
        x = Dense(units=hp.Int(f'units_{i}', 32, 128, step=3), activation='relu')(x)
    if hp.Boolean("dropout"):
        x = Dropout(0.5)(x)
    outputs = Dense(units=1, activation='sigmoid')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

In [None]:
# hp = kt.HyperParameters()
# hp.values["model_type"] = "cnn"
# model = build_model(hp)
# model(x_train[:12])
# model.summary()

# hp.values["model_type"] = "mlp"
# model = build_model(hp)
# model(x_train[:12])
# model.summary()

In [None]:
tuner = kt.RandomSearch(build_model, max_trials=10, overwrite=True, objective='val_accuracy', directory="./tuning")

In [None]:
tuner.search(x_train, y_train, validation_split=0.2, epochs=10, callbacks=[tf.keras.callbacks.TensorBoard("./tensorboard")])

In [None]:
model = tuner.get_best_models()[0]

In [None]:
model.summary()

## Fitting the model

In [None]:
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5)

## Constructing test dataframe

In [None]:
def construct_test_df():
    x = []
    for dirname, _, filenames in os.walk("./test"):
        for filename in filenames:
            x.append(f'./test/{filename}')
    return pd.DataFrame({'file_path': x})

In [None]:
test_df = construct_test_df()

In [None]:
test_images = []
for index, row in test_df.iterrows():
    image = cv2.imread(row['file_path'])
    image = cv2.resize(image, (64, 64))
    image = image / 255
    test_images.append(image)

In [None]:
test_images = np.array(test_images)

## Predicting the test data

In [None]:
y_pred = model.predict(test_images)

In [None]:
y_pred.shape

In [None]:
dog = y_pred.reshape(-1)

## Creating Submission Dataframe

In [None]:
submission_df = pd.DataFrame({'id':np.arange(1, len(dog)+1), 'label': (dog > 0.5).astype('int')})

In [None]:
submission_df.to_csv("/kaggle/working/submission.csv", index=False)