In [41]:
import os, shutil
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras import (models, layers, optimizers)
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

import plotly.express as ex
import plotly.graph_objects as go

In [42]:
# ~/.keras/models
base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(150, 150, 3)
)
base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 150, 150, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 150, 150, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 150, 150, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 75, 75, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 75, 75, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 75, 75, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 37, 37, 128)       0     

In [43]:
dataset_path = "../data/processed"

In [44]:
print(len(os.listdir(dataset_path + "/train/dogs")))
print(len(os.listdir(dataset_path + "/train/cats")))

1414
1422


In [45]:
BATCH_SIZE = 16
IMG_SIZE = 150

In [54]:
def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512))
    labels = np.zeros(shape=(sample_count))
    x_train = tf.keras.utils.image_dataset_from_directory(
        (directory + "/train"),
        batch_size=BATCH_SIZE,
        image_size=(IMG_SIZE, IMG_SIZE),
        seed=42
    )

    i = 0
    for input_batch, labels_batch in x_train:
        features_batch = base_model(input_batch)
        try:
            features[i * BATCH_SIZE : (i + 1) * BATCH_SIZE] = features_batch
            labels[i * BATCH_SIZE : (i + 1) * BATCH_SIZE] = labels_batch
        except:
            break
        i += 1
        if i * BATCH_SIZE >= sample_count:
            break
    return features, labels, x_train.class_names

In [55]:
features = extract_features(dataset_path, 2500)
print(len(features[0]), len(features[1]))

Found 2836 files belonging to 2 classes.
2500 2500


In [56]:
# separate 500 of it for validation
train_features, train_labels = features[0][:2000], features[1][:2000]
validation_features, validation_labels = features[0][2000:], features[1][2000:]

In [57]:
train_features = np.reshape(train_features, (2000, 4 * 4 * 512))
validation_features = np.reshape(validation_features, (500, 4 * 4 * 512))

In [64]:
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(
    optimizer=optimizers.RMSprop(learning_rate=2e-5),
    loss='binary_crossentropy',
    metrics=['acc']
)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 4, 4, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 8192)              0         
                                                                 
 dense_2 (Dense)             (None, 256)               2097408   
                                                                 
 dense_3 (Dense)             (None, 1)                 257       
                                                                 
Total params: 16,812,353
Trainable params: 16,812,353
Non-trainable params: 0
_________________________________________________________________


In [59]:
history = model.fit(
    train_features,
    train_labels,
    epochs=30,
    batch_size=BATCH_SIZE,
    validation_data=(validation_features, validation_labels)
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [60]:
model.save("./model_transfer.h5")

In [61]:
# load model
# model = models.load_model("./model.h5")

In [62]:
acc = history.history['acc']
vall_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = [x for x in range(1, len(acc) + 1)]

fig = go.Figure()
fig.add_trace(go.Scatter(x=epochs, y=acc, name="Training Accuracy"))
fig.add_trace(go.Scatter(x=epochs, y=vall_acc, name="Validation Accuracy"))
fig.update_layout(
    title="Training and Validation Accuracy",
    xaxis_title="Epochs",
    yaxis_title="Accuracy"
)
fig.show()

In [63]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=epochs, y=loss, name="Training Loss"))
fig.add_trace(go.Scatter(x=epochs, y=val_loss, name="Validation Loss"))
fig.update_layout(
    title="Training and Validation Loss",
    xaxis_title="Epochs",
    yaxis_title="Loss"
)
fig.show()