In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import shutil
import matplotlib
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, callbacks, regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental import preprocessing

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# for dirname, _, filenames in os.walk('/kaggle/input/'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Extract zip files
import zipfile
def extract_images(filePath):
    with zipfile.ZipFile(filePath,"r") as z:
        z.extractall("/kaggle/working")
# Test
extract_images('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip')
extract_images('/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip')

In [None]:
cats_folder = '/kaggle/working/train/cats/'
dogs_folder = '/kaggle/working/train/dogs/'

if os.path.exists(cats_folder):
    shutil.rmtree(cats_folder)
    
if os.path.exists(dogs_folder):
    shutil.rmtree(dogs_folder)

os.mkdir(cats_folder)
os.mkdir(dogs_folder)

In [None]:
source = '/kaggle/working/train/'
  
allfiles = os.listdir(source)
  
# print(len(allfiles))
for f in allfiles:
    if os.path.isfile(source + f):
        if 'cat' in f:
            shutil.move(source + f, cats_folder + f)
        if 'dog' in f:
            shutil.move(source + f, dogs_folder + f)

In [None]:
valid_folder = '/kaggle/working/valid'
valid_cats_folder = '/kaggle/working/valid/cats'
valid_dogs_folder = '/kaggle/working/valid/dogs'

if os.path.exists(valid_folder):
    shutil.rmtree(valid_folder)

os.mkdir(valid_folder)
os.mkdir(valid_cats_folder)
os.mkdir(valid_dogs_folder)

In [None]:
for f in os.listdir(cats_folder)[10000:]:
    file_path = cats_folder + '/' + f
    if os.path.isfile(file_path):
        shutil.move(file_path, valid_cats_folder + '/' + f)
        
for f in os.listdir(dogs_folder)[10000:]:
    file_path = dogs_folder + '/' + f
    if os.path.isfile(file_path):
        shutil.move(file_path, valid_dogs_folder + '/' + f)

In [None]:
train_dir = '/kaggle/working/train'
validation_dir = '/kaggle/working/valid'
test_dir = '/kaggle/working/test'

batch_size = 64
img_height = 256
img_width = 256

train_ds = tf.keras.utils.image_dataset_from_directory(
  train_dir,
  seed=123,
  labels='inferred',
  label_mode='binary',
  image_size=(img_height, img_width),
  batch_size=batch_size,
  interpolation='nearest',
  shuffle=True)
    

val_ds = tf.keras.utils.image_dataset_from_directory(
  validation_dir,
  seed=123,
  labels='inferred',
  label_mode='binary',
  image_size=(img_height, img_width),
  batch_size=batch_size,
  interpolation='nearest',
  shuffle=False)

test_ds = tf.keras.utils.image_dataset_from_directory(
  test_dir,
  seed=123,
  labels=None,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  interpolation='nearest',
  shuffle=False)

In [None]:
class_names = train_ds.class_names
print(class_names)

test_class_names = test_ds.class_names
print(test_class_names)

In [None]:
num_classes = len(class_names)

# early_stopping = callbacks.EarlyStopping(
#     min_delta=0.001, # minimium amount of change to count as an improvement
#     patience=7, # how many epochs to wait before stopping
#     restore_best_weights=True,
# )
#, kernel_regularizer=regularizers.l2(0.01)

model = Sequential([
    layers.InputLayer(input_shape=[img_height, img_width, 3]),
    preprocessing.RandomContrast(factor=0.5),
    preprocessing.RandomFlip(mode='horizontal_and_vertical'),
    preprocessing.RandomRotation(0.20),
    layers.Rescaling(1./255),
    layers.BatchNormalization(),
    layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    layers.Dropout(0.1),    
    layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    layers.Dropout(0.1),    
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    layers.Dropout(0.1),  
    layers.Flatten(),
    layers.Dense(units=8, activation="relu"),
    layers.Dense(units=1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.01),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)


model.summary()

epochs=50
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs,
#   callbacks=[early_stopping]
)

In [None]:
acc = history.history['binary_accuracy']
val_acc = history.history['val_binary_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# history_frame = pd.DataFrame(history.history)
# history_frame

#test processing
pred_labels = model.predict(test_ds, batch_size=batch_size)
pred_str_labels = ['dog' if num > 0.5 else 'cat' for num in pred_labels]
# pred_str_labels[:10]

In [None]:
# plt.figure(figsize=(300,300))
# for img in test_ds.take(1):
#     for i in range(64):
#         ax = plt.subplot(64, 1, i + 1)
#         plt.imshow(img[i].numpy().astype("uint8"))
#         plt.title(pred_str_labels[i])
#         plt.axis("off")
#     plt.show()

In [None]:
test_filenames = os.listdir("/kaggle/working/test")
submission_df = pd.read_csv('/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv')
for i, fname in enumerate(test_filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    submission_df.at[index-1, 'label'] = pred_labels[index-1]
submission_df.to_csv('/kaggle/working/submission.csv', index=False)
submission_df.head()