In [33]:
import pickle

import numpy as np
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.layers import Dense,BatchNormalization,Dropout,Embedding,RepeatVector
from keras.preprocessing.image import load_img, img_to_array
from keras.models import Sequential
from keras.models import Model

from pycocotools.coco import COCO

In [13]:
dataDir='coco'
dataType='train2014'
annFile='{}/annotations/captions_{}.json'.format(dataDir,dataType)

In [14]:
coco=COCO(annFile)

loading annotations into memory...
Done (t=0.93s)
creating index...
index created!


In [15]:
# Since we are using this as feature extractor, the last softmax layer is not useful for us.
inception = InceptionV3(weights='imagenet')

In [None]:
inception.summary()

In [17]:
# pop the last softmax layer and freezing the remaining layers
inception.layers.pop()

for layer in inception.layers:
    layer.trainable = False

In [18]:
inception.layers[-2].output

<tf.Tensor 'mixed10/concat:0' shape=(None, 8, 8, 2048) dtype=float32>

##### image ==> [Inception + Embedding ] ==> Feature Extracted image of shape (None,300)

In [19]:
# building the final model
final_model = Model(input = inception.input,output = inception.layers[-1].output)

  


In [None]:
final_model.summary()

In [21]:
TARGET_SIZE = (299,299)

In [22]:
def getImgPath(imgId):
    padding = "0" * (12  - len(str(imgId)))
    imgName = "{}{}".format(padding, imgId)
    return "{}/{}/COCO_{}_{}.jpg".format(dataDir, dataType, dataType, imgName)

In [23]:
getImgPath(1036)

'coco/train2014/COCO_train2014_000000001036.jpg'

In [30]:
# code for image imbedding i.e converting image to 300 dimentional
train_image_extracted = dict()
#imgIds = coco.getImgIds()

imgIds = [151, 260, 307, 404, 450, 491, 514, 529, 575, 671] # dummy list because I don't have all images extracted

try:
    for imgId in imgIds:
        img = load_img(getImgPath(imgId), target_size=TARGET_SIZE)
        # Converting image to array
        img_array = img_to_array(img)
        nimage = preprocess_input(img_array)
        # Adding one more dimesion
        nimage = np.expand_dims(nimage, axis=0)    
        fea_vec = final_model.predict(nimage)
        train_image_extracted[imgId] = np.reshape(fea_vec, fea_vec.shape[1]) # reshape from (1, 2048) to (2048, )


except Exception as e:
    print("Exception got :- \n",e)

In [31]:
train_image_extracted

{151: array([0.2930465 , 0.2663404 , 0.3843394 , ..., 0.14990748, 0.29548055,
        0.20103858], dtype=float32),
 260: array([1.5420294 , 0.23749   , 0.14261298, ..., 0.11536592, 0.9824455 ,
        0.08451668], dtype=float32),
 307: array([0.440467  , 0.5049137 , 0.147666  , ..., 0.22392224, 0.21844909,
        0.2886581 ], dtype=float32),
 404: array([0.23801179, 0.07262233, 0.3812816 , ..., 0.2133198 , 0.7977216 ,
        0.03980841], dtype=float32),
 450: array([0.00927397, 1.1330208 , 0.07134381, ..., 0.85767764, 0.6659479 ,
        0.19884247], dtype=float32),
 491: array([0.23473062, 0.26051185, 0.8018033 , ..., 0.28381902, 0.24237214,
        0.2986067 ], dtype=float32),
 514: array([0.2874124 , 0.20027205, 0.79848576, ..., 0.12744518, 0.0127114 ,
        0.5092769 ], dtype=float32),
 529: array([0.20294471, 0.7958152 , 0.24508199, ..., 0.3041519 , 0.39853734,
        0.23544852], dtype=float32),
 575: array([0.57837427, 0.6224444 , 0.01861988, ..., 0.37554434, 0.2275149 ,
  

In [37]:
def save_embedding_matrix(matrix):
    with open("train_image_extracted.pkl","wb") as f:
        pickle.dump(matrix, f)

save_embedding_matrix(train_image_extracted)

In [38]:
print(final_model.output.shape)

(None, 2048)
