<a href="https://www.kaggle.com/sanjayanbu/dogs-cats-competition?scriptVersionId=89001734" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv
/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip
/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip


## Importing required libraries

In [2]:
import zipfile
import os
import pandas as pd
import tensorflow as tf
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
import keras_tuner as kt

## Extracting input files

In [3]:
def extract_zip_file(file_path):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(".")

In [4]:
extract_zip_file("../input/dogs-vs-cats-redux-kernels-edition/test.zip")
extract_zip_file("../input/dogs-vs-cats-redux-kernels-edition/train.zip")

## Constructing Train Dataframe

In [5]:
def construct_train_df():
    image_list = []
    for dirname, _, filenames in os.walk("./train"):
        for filename in filenames:
            is_dog = 1 if "dog" in filename else 0
            image_list.append({"file_path": f'./train/{filename}', 'is_dog': is_dog})
    return pd.DataFrame(image_list)

In [6]:
train_df = construct_train_df()

## Constructing train and test data

In [7]:
x, y = [], []
for index, row in train_df.iterrows():
    image = cv2.imread(row['file_path'])
    image = cv2.resize(image,(64,64))
    image = image / 255
    x.append(image)
    y.append(row['is_dog'])

## Transforming to numpy array so that tensorflow could work on it

In [8]:
x, y  = np.array(x),np.array(y)

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

## Building a deep learning model

In [10]:
model = Sequential()
# CNN
model.add(Conv2D(input_shape=(64, 64, 3), activation='relu', kernel_initializer='he_uniform', kernel_size=(6, 6), filters=12))
model.add(MaxPooling2D(4, 4))
model.add(Conv2D(filters=10, kernel_size=(3,3), activation='relu', kernel_initializer='he_uniform'))
model.add(MaxPooling2D(2,2))
# ANN
model.add(Flatten())
model.add(Dense(12, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 59, 59, 12)        1308      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 12)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 12, 12, 10)        1090      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 10)          0         
_________________________________________________________________
flatten (Flatten)            (None, 360)               0         
_________________________________________________________________
dense (Dense)                (None, 12)                4332      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 1


User settings:

   KMP_AFFINITY=granularity=fine,noverbose,compact,1,0
   KMP_BLOCKTIME=0
   KMP_DUPLICATE_LIB_OK=True
   KMP_INIT_AT_FORK=FALSE
   KMP_SETTINGS=1

Effective settings:

   KMP_ABORT_DELAY=0
   KMP_ADAPTIVE_LOCK_PROPS='1,1024'
   KMP_ALIGN_ALLOC=64
   KMP_ALL_THREADPRIVATE=128
   KMP_ATOMIC_MODE=2
   KMP_BLOCKTIME=0
   KMP_CPUINFO_FILE: value is not defined
   KMP_DETERMINISTIC_REDUCTION=false
   KMP_DEVICE_THREAD_LIMIT=2147483647
   KMP_DISP_NUM_BUFFERS=7
   KMP_DUPLICATE_LIB_OK=true
   KMP_ENABLE_TASK_THROTTLING=true
   KMP_FORCE_REDUCTION: value is not defined
   KMP_FOREIGN_THREADS_THREADPRIVATE=true
   KMP_FORKJOIN_BARRIER='2,2'
   KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'
   KMP_GTID_MODE=3
   KMP_HANDLE_SIGNALS=false
   KMP_HOT_TEAMS_MAX_LEVEL=1
   KMP_HOT_TEAMS_MODE=0
   KMP_INIT_AT_FORK=true
   KMP_LIBRARY=throughput
   KMP_LOCK_KIND=queuing
   KMP_MALLOC_POOL_INCR=1M
   KMP_NUM_LOCKS_IN_BLOCK=1
   KMP_PLAIN_BARRIER='2,2'
   KMP_PLAIN_BARRIER_PATTERN='hyper,hy

## Hyper parameter tuning

In [11]:
def build_model(hp):
    inputs = tf.keras.Input(shape=(64, 64, 3))
    x = inputs
    for i in range(hp.Int("cnn_layers", 1, 3)):
        x = Conv2D(hp.Int(f"filters_{i}", 32, 128, step=32), kernel_size=(3,3), activation='relu')(x)
        x = MaxPooling2D(pool_size=(2,2))(x)
    x = Flatten()(x)
    for i in range(hp.Int("mlp_layers", 1, 5, 1)):
        x = Dense(units=hp.Int(f'units_{i}', 32, 128, step=3), activation='relu')(x)
    if hp.Boolean("dropout"):
        x = Dropout(0.5)(x)
    outputs = Dense(units=1, activation='sigmoid')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

In [12]:
# hp = kt.HyperParameters()
# hp.values["model_type"] = "cnn"
# model = build_model(hp)
# model(x_train[:12])
# model.summary()

# hp.values["model_type"] = "mlp"
# model = build_model(hp)
# model(x_train[:12])
# model.summary()

In [13]:
tuner = kt.RandomSearch(build_model, max_trials=10, overwrite=True, objective='val_accuracy', directory="./tuning")

In [14]:
tuner.search(x_train, y_train, validation_split=0.2, epochs=10, callbacks=[tf.keras.callbacks.TensorBoard("./tensorboard")])

Trial 10 Complete [00h 05m 16s]
val_accuracy: 0.7992500066757202

Best val_accuracy So Far: 0.8230000138282776
Total elapsed time: 01h 39m 24s


In [15]:
model = tuner.get_best_models()[0]

In [16]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 64, 64, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 62, 62, 64)        1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 32)        18464     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 32)          0     

## Fitting the model

In [17]:
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Constructing test dataframe

In [18]:
def construct_test_df():
    x = []
    for dirname, _, filenames in os.walk("./test"):
        for filename in filenames:
            x.append(f'./test/{filename}')
    return pd.DataFrame({'file_path': x})

In [19]:
test_df = construct_test_df()

In [20]:
test_images = []
for index, row in test_df.iterrows():
    image = cv2.imread(row['file_path'])
    image = cv2.resize(image, (64, 64))
    image = image / 255
    test_images.append(image)

In [21]:
test_images = np.array(test_images)

## Predicting the test data

In [22]:
y_pred = model.predict(test_images)

In [23]:
y_pred.shape

(12500, 1)

In [24]:
dog = y_pred.reshape(-1)

## Creating Submission Dataframe

In [25]:
submission_df = pd.DataFrame({'id':np.arange(1, len(dog)+1), 'label': (dog > 0.5).astype('int')})

In [26]:
submission_df.to_csv("/kaggle/working/submission.csv", index=False)