In [1]:
from tensorflow.keras.applications import VGG16

In [1]:
!pip install split-folders


In [1]:
import splitfolders
splitfolders.ratio('/kaggle/input/flowers-recognition/flowers',output="/kaggle/output/data",seed=1337,ratio=(0.5,0.25,0.25))

#### Define convolutional base

In [1]:
conv_base = VGG16(
    weights='imagenet',        # checkpoints from when to initialize the model
    include_top=False,         # when True, includes the fully connected classifier network
    input_shape=(150, 150, 3)  # shape of the image tensors to feed the network with
)
conv_base.summary()

### Fast feature extraction _without_ data augmentation

In [1]:
import os
import numpy as np

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [1]:
import os

base_dir = '/kaggle/output/data'

TRAIN_DIR = os.path.join(base_dir, 'train')
VALIDATION_DIR = os.path.join(base_dir, 'val')
TEST_DIR = os.path.join(base_dir, 'test')
print(TRAIN_DIR)
datagen = ImageDataGenerator(rescale=1./255)
batch_size=20

#### Extract features from convolution base

In [1]:
def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512)) # shape of the output of the last layer of the conv_base
    labels = np.zeros(shape=(sample_count,5))
    generator = datagen.flow_from_directory(
        directory,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='categorical'
    )
    
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch 
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
        # Since generators yield data indefinitely in a loop, we must `break` after every image has been seen once.
            break
    return features, labels
    
train_features, train_labels = extract_features(TRAIN_DIR, 2158)
validation_features, validation_labels = extract_features(VALIDATION_DIR, 1079)
test_features, test_labels = extract_features(TEST_DIR, 1080)

In [1]:
print(train_features.shape, validation_features.shape, test_features.shape,train_labels.shape, validation_labels.shape, test_labels.shape)

At this point we need to flatten the feature array in order to feed the classifier, exactly as a `Flatted` layer would do. Currently, the features have shape (`samples, 4, 4, 512`), so new array will have a shape equal to (`samples, (4 * 4 * 512)`).

In [1]:
train_features = np.reshape(train_features, (2158, 4*4*512))
validation_features = np.reshape(validation_features, (1079, 4*4*512))
test_features = np.reshape(test_features, (1080, 4*4*512))

In [1]:
print(train_features.shape, validation_features.shape, test_features.shape,train_labels.shape, validation_labels.shape, test_labels.shape)

#### Define and train the densely connected classifier

The training should be very fast, since we have to do with a small network.

In [1]:
from tensorflow.keras import models, layers, optimizers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4*4*512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(5, activation='softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.RMSprop(lr=2e-5),
    metrics=['accuracy']
)

history = model.fit(
    train_features,
    train_labels,
    epochs=30,
    batch_size=20,
    validation_data=(validation_features, validation_labels)
)

#### Plot loss and accuracy

In [1]:
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

We obtain a good 90% validation accuracy, which outperforms the CNN from scratch we built in the previous notebook. On the other hand, due to the impossibility to add data augmentation we start also to overfit quite early, despite of using `Dropout` with a fairly large rate.

### Feature extraction _with_ data augmentation

This technique allows data augmentation during training, but is also expensive. Absolutely not recommended without at least one GPU, since we'll have to do with more than 16 millions parameters.

#### Add a densely connected classifier on top of the convolutional base

By _freezing_ the `conv_base` we will reduce the amount of weights that will be trained: only those who belong the classifier on top.

In [1]:
from tensorflow.keras import models, layers, optimizers
conv_base = VGG16(
    weights='imagenet',        # checkpoints from when to initialize the model
    include_top=False,         # when True, includes the fully connected classifier network
    input_shape=(150, 150, 3)  # shape of the image tensors to feed the network with
)

conv_base.trainable = False

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(5, activation='softmax'))
          
model.summary()

#### Setup generators with data augmentation

In [1]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# Do not apply data augmentation on validation
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, 
    target_size=(150, 150),
    batch_size=20,
    class_mode='categorical'
)
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(150, 150),
    batch_size=20,
    class_mode='categorical'
)

#### Compile and train the model

In [1]:
model.compile(
    optimizer=optimizers.RMSprop(lr=2e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
      train_generator,
      steps_per_epoch=100,
      epochs=30,
      validation_data=validation_generator,
      validation_steps=50
)

In [1]:
model.save('/kaggle/output/models/PretrainedFlowerRecognition.h5')

In [1]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

#### Unfreeze selected layers

In [1]:
# Unfreeze the last 3 layers
conv_base.trainable = True 
set_trainable = False

for layer in conv_base.layers:
    if layer.name == "block4_conv1":
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

        
conv_base.summary()

#### Compile and fine-tune the network

Now we can start fine-tuning our network. We will do this with the RMSprop optimizer, using a very low learning rate. The reason for using a low learning rate is that we want to limit the magnitude of the modifications we make to the representations of the 3 layers that we are fine-tuning. Updates that are too large may harm these representations.

In [1]:
model.compile(
    optimizer=optimizers.RMSprop(lr=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
    
)

history = model.fit(
      train_generator,
      steps_per_epoch=100,
      epochs=20,
      validation_data=validation_generator,
     validation_steps=50
)

In [1]:
model.save('/kaggle/output/models/PretrainedFlowerRecognitionFineTuned.h5')

#### Plot metrics

In [1]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

These curves look very noisy. To make them more readable, we can smooth them by replacing every loss and accuracy with exponential moving averages of these quantities. Here's a trivial utility function to do this:

In [1]:
def smooth_curve(points, factor=0.8):
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points

plt.plot(epochs, smooth_curve(acc), 'bo', label='Smoothed training acc')
plt.plot(epochs, smooth_curve(val_acc), 'b', label='Smoothed validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, smooth_curve(loss), 'bo', label='Smoothed training loss')
plt.plot(epochs, smooth_curve(val_loss), 'b', label='Smoothed validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

These curves look much cleaner and more stable. We are seeing a nice 4% absolute improvement.

Note that **the loss curve does not show any real improvement** (in fact, it is deteriorating). How could accuracy improve if the loss isn't decreasing? The answer is simple: what we display is an average of pointwise loss values, but what actually matters for accuracy is the distribution of the loss values, not their average, since accuracy is the result of a binary thresholding of the class probability prediced by the model. The model may still be improving even if this isn't reflected in the average loss.

#### Evaluate the model on test data

In [1]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(150, 150),
    batch_size=20,
    class_mode='categorical'
)

test_loss, test_accuracy = model.evaluate(test_generator, steps=50)
print('test accuracy:', test_accuracy)