<a href="https://colab.research.google.com/github/smgbx/Image_Captioning/blob/main/GetFlickerImagesFeatures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

GETS FEATURES FROM TRAINING IMAGES.

features: dict['image_id'] = numpy array version of image features

Saves image features dictionary to 'features.pkl'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pickle
from os import listdir, getcwd
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.inception_v3 import preprocess_input
from keras.models import Model

In [None]:
# Extract training dataset image features.
# Optimization to save time, so the model doesn't have to regenerate
# unchanging features when we want to test modfifications of our model.

def extract_training_image_features(imgs_folder):
  # Load CNN model. Because we're not classifying, do not include top layer.
  features_model = InceptionV3()
  # Omitting unneccessary classification layer (output softmax layer)
  features_model = Model(inputs=features_model.inputs, outputs=features_model.layers[-2].output)
  all_features_dict = dict()
  for name in listdir(imgs_folder):
    # load image from folder
    filename = imgs_folder + '/' + name
    # InceptionV3 expects (299, 299) size images
    image = load_img(filename, target_size=(299,299))
    # convert image pixels to numpy array 
    image = img_to_array(image)
    # reshape data for model, adding one more dimension
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    # prepare image for CNN. Normalizes image array to range [-1, 1], matching format of images used to train InceptionV3
    image = preprocess_input(image)
    # get features
    img_features = features_model.predict(image, verbose=0)
    # get image id
    image_id = name.split('.')[0]
    # store feature
    all_features_dict[image_id] = img_features
    # print('>%s' % name)
  return all_features_dict

# extract features from all Flicker images
flicker_folder = 'drive/My Drive/ImageCaptioningProject/Flickr8k_Dataset'
all_features_dict = extract_training_image_features(flicker_folder)
print('Number of extracted features: %d' % len(all_features_dict))
# save to file
pickle.dump(all_features_dict, open('/content/drive/My Drive/ImageCaptioningProject/features.pkl', 'wb'))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
Extracted Features: 8091
