In [1]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras import applications
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import optimizers
import matplotlib.pyplot as plt

In [2]:
# dimensions of our images.
img_width, img_height = 330, 467

top_model_weights_path = '/home/mksnkv/models/top_tuned/bottleneck_fc_model_documents_5class.h5'
train_data_dir = '/home/mksnkv/Documents/classification/documents_5class_divided/train'
validation_data_dir = '/home/mksnkv/Documents/classification/documents_5class_divided/validation'
evaluation_data_dir = '/home/mksnkv/Documents/classification/documents_5class_divided/evaluation'
nb_train_samples = 3750
nb_validation_samples = 1500
nb_evaluation_samples = 1500
epochs = 20
batch_size = 50


def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=True)
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    np.save(open('bottleneck_features_train.npy', 'wb'),
            bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=True)
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    np.save(open('bottleneck_features_validation.npy', 'wb'),
            bottleneck_features_validation)

In [3]:
def plot(model, history):
    plot_model(model, to_file='model.png')
    # Plot training & validation accuracy values
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

In [7]:
def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
    print(train_data.shape)
    train_labels = np.array(
        [0] * (nb_train_samples // 5) + [1] * (nb_train_samples // 5) + [2] * (nb_train_samples // 5) + [3] * (nb_train_samples // 5) + [4] * (nb_train_samples // 5))
    train_labels = to_categorical(train_labels)
    print(train_labels.shape)
    validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
    print(validation_data.shape)
    validation_labels = np.array(
        [0] * (nb_validation_samples // 5) + [1] * (nb_validation_samples // 5) + [2] * (nb_validation_samples // 5) + [3] * (nb_validation_samples // 5) + [4] * (nb_validation_samples // 5))
    validation_labels = to_categorical(validation_labels)
    print(validation_labels.shape)
    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))

    custom_adam = optimizers.Adam(lr = 0.0001)
    model.compile(optimizer=custom_adam,
                  loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save(top_model_weights_path)

In [5]:
save_bottlebeck_features()

Found 3750 images belonging to 5 classes.
Found 1500 images belonging to 5 classes.


In [9]:
model, history = train_top_model()

(3750, 10, 14, 512)
(3750, 5)
(1500, 10, 14, 512)
(1500, 5)
Train on 3750 samples, validate on 1500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


TypeError: cannot unpack non-iterable NoneType object

In [10]:
plot(model, history)

NameError: name 'model' is not defined

In [None]:
def evaluation(model):
    datagen = ImageDataGenerator(rescale=1. / 255)
    generator = datagen.flow_from_directory(
        evaluation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    model1 = applications.VGG16(include_top=False, weights='imagenet')
    features = model1.predict_generator(
        generator, nb_evaluation_samples // batch_size)
    return features

In [None]:
features = evaluation(model)

In [None]:
evaluation_labels = np.array(
        [0] * (nb_evaluation_samples // 5) + [1] * (nb_evaluation_samples // 5) + [2] * (nb_evaluation_samples // 5) + [3] * (nb_evaluation_samples // 5) + [4] * (nb_evaluation_samples // 5))
model.test_on_batch(features, evaluation_labels)

In [None]:
model.predict_on_batch(features)