In [None]:
import os
import zipfile
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.applications as zoo
import tensorflow.keras.layers as L
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score

In [None]:
!unzip -q ../input/dogs-vs-cats/train.zip -d . 
!unzip -q ../input/dogs-vs-cats/test1.zip -d . 

In [None]:
filenames = os.listdir("./train")
categories = []
for filename in filenames:
    categories.append(filename.split('.')[0])

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

In [None]:
train_df, val_test_df = train_test_split(df, test_size=7000, random_state=42)
val_df, test_df = train_test_split(val_test_df, test_size=0.5, random_state=42)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(18, 5))
ax[0].bar(['cat', 'dog'], train_df.category.value_counts(), color=['b', 'r'])
ax[0].set_title('Train')
ax[1].bar(['cat', 'dog'], val_df.category.value_counts(), color=['b', 'r'])
ax[1].set_title('Validation')
ax[2].bar(['cat', 'dog'], test_df.category.value_counts(), color=['b', 'r'])
ax[2].set_title('Test')
plt.show()

In [None]:
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 32
EARLY_STOP_PATIENCE = 7

In [None]:
data_generator = ImageDataGenerator(
    rescale=1./255
)

train = data_generator.flow_from_dataframe(
    train_df, 
    "./train", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

val = data_generator.flow_from_dataframe(
    val_df, 
    "./train", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

test = data_generator.flow_from_dataframe(
    test_df, 
    "./train", 
    x_col='filename',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

In [None]:
base = zoo.VGG16(include_top=False, input_shape=IMAGE_SIZE +(3,))
base.trainable = False
base.summary()

In [None]:
model = tf.keras.Sequential()
model.add(base)
model.add(L.GlobalAveragePooling2D())
model.add(L.Flatten())
model.add(L.BatchNormalization())
model.add(L.ReLU())
model.add(L.Dense(256))
model.add(L.Dropout(0.3))
model.add(L.BatchNormalization())
model.add(L.ReLU())
model.add(L.Dropout(0.3))
model.add(L.Dense(2))
model.summary()

In [None]:
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
opt = tf.keras.optimizers.Adam()
model.compile(loss=loss, optimizer=opt, metrics=['acc'])
early_stop = EarlyStopping(
    monitor='val_acc',
    patience=EARLY_STOP_PATIENCE, 
    restore_best_weights=True
)
checkpoint = ModelCheckpoint(
    filepath='./vgg16_best.hdf5',
    monitor='acc',
    save_best_only=True,
    mode='auto'
)
checkpoint_val = ModelCheckpoint(
    filepath = './vgg16_best_val.hdf5',
    save_best_only = True,
    mode = 'auto'
)

In [None]:
history1 = model.fit(
    train, 
    validation_data=val,
    batch_size=BATCH_SIZE,
    epochs=35,
    callbacks=[early_stop, checkpoint, checkpoint_val]
)

In [None]:
base.trainable = False
history2 = model.fit(
    train, 
    validation_data=val,
    batch_size=BATCH_SIZE,
    epochs=25,
    callbacks=[early_stop, checkpoint, checkpoint_val]
)

In [None]:
train_loss = history1.history['loss'] + history2.history['loss']
val_loss = history1.history['val_loss'] + history2.history['val_loss']
train_acc = history1.history['acc'] + history2.history['acc']
val_acc = history1.history['val_acc'] + history2.history['val_acc']

plt.figure(figsize=(12, 10))
plt.subplot(221)
plt.plot(train_loss)
plt.plot(val_loss)
plt.title('Model loss')
plt.ylabel('Loss') 
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'])

plt.subplot(222)
plt.plot(train_acc)
plt.plot(val_acc)
plt.title('Model accuracy')
plt.ylabel('Accuracy') 
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'])
plt.show()

In [None]:
model.load_weights('./vgg16_best_val.hdf5')
pred = model.predict(test)
pred = tf.nn.softmax(pred, axis=-1).numpy()
pred = np.argmax(pred, axis=-1)
accuracy_score(test_df['category'] == 'dog', pred)

In [None]:
IMAGE_SIZE = (299, 299)
BATCH_SIZE = 32
EARLY_STOP_PATIENCE = 7

In [None]:
data_generator = ImageDataGenerator(
    rescale=1./255
)

train = data_generator.flow_from_dataframe(
    train_df, 
    "./train", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

val = data_generator.flow_from_dataframe(
    val_df, 
    "./train", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

test = data_generator.flow_from_dataframe(
    test_df, 
    "./train", 
    x_col='filename',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

In [None]:
base = zoo.InceptionV3(include_top=False, input_shape=IMAGE_SIZE +(3,))
base.trainable = False
base.summary()

In [None]:
model = tf.keras.Sequential()
model.add(base)
model.add(L.GlobalAveragePooling2D())
model.add(L.Flatten())
model.add(L.BatchNormalization())
model.add(L.ReLU())
model.add(L.Dense(512))
model.add(L.Dropout(0.5))
model.add(L.BatchNormalization())
model.add(L.ReLU())
model.add(L.Dropout(0.5))
model.add(L.Dense(2))
model.summary()

In [None]:
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
opt = tf.keras.optimizers.Adam()
model.compile(loss=loss, optimizer=opt, metrics=['acc'])
early_stop = EarlyStopping(
    monitor='val_acc',
    patience=EARLY_STOP_PATIENCE, 
    restore_best_weights=True
)
checkpoint = ModelCheckpoint(
    filepath='./incV3_best.hdf5',
    monitor='acc',
    save_best_only=True,
    mode='auto'
)
checkpoint_val = ModelCheckpoint(
    filepath = './incV3_best_val.hdf5',
    save_best_only = True,
    mode = 'auto'
)

In [None]:
history1 = model.fit(
    train, 
    validation_data=val,
    batch_size=BATCH_SIZE,
    epochs=10,
    callbacks=[early_stop, checkpoint, checkpoint_val]
)

In [None]:
train_loss = history1.history['loss']
val_loss = history1.history['val_loss']
train_acc = history1.history['acc']
val_acc = history1.history['val_acc']

plt.figure(figsize=(12, 10))
plt.subplot(221)
plt.plot(train_loss)
plt.plot(val_loss)
plt.title('Model loss')
plt.ylabel('Loss') 
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'])

plt.subplot(222)
plt.plot(train_acc)
plt.plot(val_acc)
plt.title('Model accuracy')
plt.ylabel('Accuracy') 
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'])
plt.show()

In [None]:
model.load_weights('./incV3_best_val.hdf5')
pred = model.predict(test)
pred = tf.nn.softmax(pred, axis=-1).numpy()
pred = np.argmax(pred, axis=-1)
accuracy_score(test_df['category'] == 'dog', pred)