## Get features from training images

features: dict['image_id'] = numpy array version of image features <br>
Saves image features dictionary to 'features.pkl'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pickle
from os import listdir, getcwd
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.inception_v3 import preprocess_input
from keras.models import Model

In [None]:
# Extract training dataset image features.
# Optimization to save time, so the model doesn't have to regenerate
# unchanging features when we want to test modfifications of our model.

# extract features of new uncaptioned photos
def extractFeatures(filename, features_model):
  # load photo
  image = load_img(filename, target_size=(299, 299))
  # convert image pixels to numpy array
  image = img_to_array(image)
  # reshape image array for model input
  image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) 
  # prepare image for CNN. Normalizes image array to range [-1, 1], matching format of images used to train InceptionV3
  image = preprocess_input(image)
  # get image features
  feature = features_model.predict(image, verbose=0)
  return feature

def extractAllImagesFeatures(imgs_folder):
  # Load CNN model. Because we're not classifying, do not include top layer.
  features_model = InceptionV3()
  # Omitting unneccessary classification layer (output softmax layer)
  features_model = Model(inputs=features_model.inputs, outputs=features_model.layers[-2].output)
  all_features_dict = dict()
  for name in listdir(imgs_folder):
    # load image from folder
    filename = imgs_folder + '/' + name
    # get features
    img_features = extractFeatures(filename, features_model)
    # get image id
    image_id = name.split('.')[0]
    # store feature
    all_features_dict[image_id] = img_features
    # print('>%s' % name)
  return all_features_dict

In [None]:
def extractAndSaveTrainingFeatures(images_folder):
  all_features_dict = extractAllImagesFeatures(images_folder) # get all features for all training images
  pickle.dump(all_features_dict, open('features.pkl', 'wb'))  # save to file

if __name__ == "__main__":
  extractAndSaveTrainingFeatures(images_folder)

In [None]:
extractAndSaveTrainingFeatures('/content/drive/MyDrive/ImageCaptioningProject/Flickr8k_Dataset')