In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import zipfile
import shutil
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Supress tensorflow log outputs

### Unzip train.zip to working directory

In [None]:
with zipfile.ZipFile("../input/dogs-vs-cats/train.zip", 'r') as zip_ref:
    zip_ref.extractall("./")

### List the files in the train directory
As we can see, the files are labelled by the file name. 

In [None]:
train_path = "./train/"
print(len(os.listdir("./train")))
for file_name in os.listdir(train_path)[:5]:
    print(file_name)

The required directory structure for [image_dataset_from_directory](https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory://) requires a main directory, followed by subfolders 

In [None]:
"""
Create following directory structure:
dataset/
...dog/
......dog_image_1.jpg
......dog_image_2.jpg
...cat/
......cat_image_1.jpg
......cat_image_2.jpg
"""

os.makedirs("./dataset/dog")
os.makedirs("./dataset/cat")

for file_name in os.listdir(train_path):
    if "cat" in file_name:
        shutil.move(os.path.join(train_path, file_name), os.path.join("./dataset/cat/", file_name))
    elif "dog" in file_name:
        shutil.move(os.path.join(train_path, file_name), os.path.join("./dataset/dog/", file_name))

### Create dataset

In [None]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    "./dataset", # root directory given as image_dataset_from_directory requires directory 
    image_size=(150,150),
    label_mode="binary",
    validation_split=0.2,
    subset="training",
    batch_size=32,
    seed=1
)

val_dataset = tf.keras.utils.image_dataset_from_directory(
    "./dataset", # root directory given as image_dataset_from_directory requires directory 
    image_size=(150,150),
    label_mode="binary",
    validation_split=0.2,
    subset="validation",
    batch_size=32,
    seed=1
)


# Improve performance of data loading; see: https://www.tensorflow.org/guide/data_performance
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.cache().prefetch(AUTOTUNE)
val_dataset = val_dataset.cache().prefetch(AUTOTUNE)

### CNN Model with Regularization

In [None]:
# Data augmentation layers
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.2),
        layers.RandomZoom(0.2),
        layers.RandomContrast(0.2)
    ]
)

In [None]:
model = keras.Sequential([
    layers.Input((150, 150, 3)),
    data_augmentation,
    layers.Rescaling(scale=1./255), # rescale between 0 and 1
    
    layers.Conv2D(32, 5, activation="relu"),
    layers.MaxPooling2D(),
    layers.BatchNormalization(),  
    
    layers.Conv2D(64, 3, activation="relu"),
    layers.MaxPooling2D(),
    layers.BatchNormalization(),  
    
    layers.Conv2D(128, 3, activation="relu"),
    layers.MaxPooling2D(),
    layers.BatchNormalization(),  
    
    layers.Conv2D(256, 3, activation="relu"),
    layers.MaxPooling2D(),
    layers.BatchNormalization(),  
    
    layers.Flatten(),
    
    layers.Dense(256, activation="relu"),
    layers.BatchNormalization(), 
    layers.Dropout(0.4),
    
    layers.Dense(128, activation="relu"),
    layers.BatchNormalization(), 
    layers.Dropout(0.3),
    
    layers.Dense(64, activation="relu"),
    layers.BatchNormalization(), 
  
    layers.Dense(1, activation="sigmoid") # sigmoid for binary classification   
])

In [None]:
model.compile(
    optimizer = keras.optimizers.Adam(learning_rate=0.01),
    loss = keras.losses.BinaryCrossentropy(),
    metrics=["accuracy"]
)

### Callbacks 

In [None]:
# Add callbacks to minimize validation loss and maximize validation accuracy
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint

callbacks = [
    EarlyStopping(patience=15, monitor='val_loss', restore_best_weights=True),

	ReduceLROnPlateau(monitor='val_loss', min_lr=1e-7, patience=5, mode='min', verbose=1, factor=0.5),

	ModelCheckpoint(monitor='val_loss', filepath='./best_model.h5', save_best_only=True)
]

In [None]:
model.fit(train_dataset, epochs=100, callbacks=callbacks, validation_data=val_dataset)