# Compute and store features vectors for all images in database

In [25]:
from tensorflow import keras
from keras import models
import pandas as pd
import numpy as np
from tqdm import tqdm

In [26]:
# Load the complete model
model_comp = models.load_model('../models/model_v5_0/')
print(len(model_comp.layers))

133


In [27]:
# seperate the feature extractor (the xception model base)
encoder = models.Sequential()
encoder.add(models.Model(model_comp.input, model_comp.layers[-2].output))
encoder.add(keras.layers.GlobalAveragePooling2D())
encoder.trainable = False
encoder.output_shape

(None, 2048)

In [28]:
# Seperate the classifier
classifier = models.Model(model_comp.layers[-1].layers[1].input,model_comp.layers[-1].output)
classifier.trainable = False
classifier.output_shape

(None, 11)

In [22]:
def measure_accuracy(y_actual, y_pred):
    np.sum(y_actual == y_pred)/len(y_actual)

# compute the features of images
def get_feature(img_paths):
    imgs = []
    for img_path in img_paths:
        # load ans scale the image
        img = keras.preprocessing.image.load_img(img_path, target_size=(224,224))
        img = keras.preprocessing.image.img_to_array(img)
        img = img.astype('float32')/255.0
        imgs.append(img)
    
    feature = encoder.predict(np.array(imgs), verbose=0)
    return list(feature)

In [24]:
# load the database
database = pd.read_csv('../dataset/anno/img_label_database2.csv')
database.columns

Index(['image_name', 'category_label'], dtype='object')
32 (2048,)


In [None]:
features = []
y_pred = []
batch = 32
n = len(database)

# compute the features of all images in the database.
for i  in tqdm(range(0,n,batch)):
    features = features + get_feature(database['image_name'].iloc[i:min(i+batch,n)])

database.insert(2, 'feature', features)

# compute the predictions of all images in the database.
for i in tqdm(range(0,n,batch)):
    y_pred = y_pred + list(np.argmax(classifier.predict(np.array(database['feature'].iloc[i:min(i+batch,n)]), verbose=0),axis=1).flatten())

# change the predictions to the labels as per the label map obtained from data generator
label_map = [0,1,10,2,3,4,5,6,7,8,9]
for i in tqdm(range(n)):
  y_pred[i] = label_map[y_pred[i]]

database.insert(3, 'y_pred', y_pred)

# compute the accuracy of the predictions on the database.
measure_accuracy(database['categorical_label'], database['y_pred'])

I got 76% accuracy on whole database but it should be noted that this database is a super set of training set.

In [None]:
# save the features
feature_file_path = '../dataset/features.csv'
database.to_csv(feature_file_path,sep='       ')