# Download VGG16

In [None]:
from keras.applications import VGG16
# include top should be False to remove the softmax layer
pretrained_model = VGG16(include_top=False, weights='imagenet')
pretrained_model.summary()

# Definition

In [None]:
import numpy as np 
from sklearn.model_selection import train_test_split
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, InputLayer, BatchNormalization, Dropout
from keras.utils import np_utils
import pandas as pd
from tensorflow.keras import backend as K
from keras.utils import to_categorical


# inputFile is the file that stores the 4d input arrays
# outputFile is the file that stores the target or class of each input property. 
    # ! outputFile isn't the file we write to the directory !
# testSize is the proportion of our test samples. 
    # Eg: 0.3 means 30% of the hhinput are test samples, 70% of the input are training samples.
# numOfClass is the total number of groups each property belonged to.
    # Eg; 3 represents there are three groups: group 0, group 1 and group 2
def Run_CNN_Model(inputFile, outputFile, testSize, numOfClass):
    
    # load the 4d input arrays and 1d output array
    X = np.load(inputFile) # format: 'ImageArray02(1).npy'
    #Y = np.random.choice([0,1,2,3,4,5], len(X))
    Y = np.load(outputFile) # format: 'Classification_abbrev03.npy'
    # make output value starting from 0 for conventional purpose
    if (numOfClass != 2):
        Y = Y-1

    # building the input vector from the 28x28 pixels
    X_train, X_val_and_test, y_train, Y_val_and_test = train_test_split(X, Y, test_size = testSize)
    X_val, X_test, y_val, y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)
    X_train = X_train.reshape(X_train.shape[0], X.shape[1], X.shape[2], X.shape[3])
    X_test = X_test.reshape(X_test.shape[0], X.shape[1], X.shape[2], X.shape[3])
    X_val = X_val.reshape(X_val.shape[0], X.shape[1], X.shape[2], X.shape[3])
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_val = X_val.astype('float32')
  

    # normalizing the data to help with the training
    X_train /= 255
    X_test /= 255

    # one-hot encoding using keras' numpy-related utilities
    n_classes = numOfClass
    Y_train = np_utils.to_categorical(y_train, n_classes)
    Y_test = np_utils.to_categorical(y_test, n_classes)
    Y_val = np_utils.to_categorical(y_val, n_classes)
    
    # extract train and val features
    vgg_features_train = pretrained_model.predict(X_train)
    vgg_features_val = pretrained_model.predict(X_val)
    
    # OHE target column
    train_target = Y_train
    val_target = Y_val

    model2 = Sequential()
    model2.add(Flatten(input_shape=(7,7,512)))
    model2.add(Dense(100, activation='relu'))
    model2.add(Dropout(0.5))
    model2.add(BatchNormalization())
    model2.add(Dense(10, activation='softmax'))

    # compile the model
    model2.compile(optimizer='adam', metrics=['accuracy'], loss='categorical_crossentropy')

    model2.summary()

    # train model using features generated from VGG16 model
    model2.fit(vgg_features_train, train_target, epochs=50, batch_size=128, validation_data=(vgg_features_val, val_target))
    
    
    # get the output value of last layer of the CNN model
    inp = model2.input
    outputs = [layer.output for layer in model2.layers] 
    layerIndex = len(outputs)-1
    func = K.function([model2.get_layer(index=0).input], model2.get_layer(index=layerIndex).output)
    layerOutput = func([X_test])  # input_data is a numpy array
    print(layerOutput.shape)
    layerOutput
    # another way of getting the probability of the softmax outpouts: predictions = model.predict(X_test)

    # put all information into dataframe
    result = pd.DataFrame(layerOutput)
    predictions = model2.predict(X_test)
    result['predict'] = result.idxmax(axis='columns')
    result['actual'] = y_test
    
    return result

# Example

In [None]:
a = Run_CNN_Model('ImageArray01.npy', 'Classification_abbrev01.npy', 0.3, 2)