In [17]:
import pickle
import time
import numpy as np
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.layers import Dense,BatchNormalization,Dropout,Embedding,RepeatVector
from keras.preprocessing.image import load_img, img_to_array
from keras.models import Sequential
from keras.models import Model

from pycocotools.coco import COCO
from tqdm import tqdm

In [18]:
dataDir='coco'
dataType='train2014'
annFile='{}/annotations/captions_{}.json'.format(dataDir,dataType)

In [19]:
coco=COCO(annFile)

loading annotations into memory...
Done (t=1.43s)
creating index...
index created!


In [20]:
# Since we are using this as feature extractor, the last softmax layer is not useful for us.
inception = InceptionV3(weights='imagenet')

In [None]:
inception.summary()

In [21]:
# pop the last softmax layer and freezing the remaining layers
inception.layers.pop()

for layer in inception.layers:
    layer.trainable = False

In [23]:
inception.layers[-2].output

<tf.Tensor 'mixed10/concat:0' shape=(None, 8, 8, 2048) dtype=float32>

##### image ==> [Inception + Embedding ] ==> Feature Extracted image of shape (None,300)

In [24]:
# building the final model
final_model = Model(input = inception.input,output = inception.layers[-1].output)

  


In [None]:
final_model.summary()

In [25]:
TARGET_SIZE = (299,299)

In [26]:
def getImgPath(imgId):
    padding = "0" * (12  - len(str(imgId)))
    imgName = "{}{}".format(padding, imgId)
    return "{}/{}/COCO_{}_{}.jpg".format(dataDir, dataType, dataType, imgName)

In [27]:
getImgPath(1036)

'coco/train2014/COCO_train2014_000000001036.jpg'

#### Finding corrupt images
Certain images not extracted properly may give "**image file is truncated (x bytes not processed)**"

Loop for finding all corrupt images and adding them to a list

May take anywhere between 40-45mins (dependent on specs)

In [8]:
corrupt_images = []
start = time.time()
imgIds = coco.getImgIds()

for imgetImgPathn tqdm(imgIds, ascii=True, desc="Finding Corrupt Images"):
    try:
        load_img(getImgPath(imgId), target_size=TARGET_SIZE)
    except OSError:
        corrupt_images.append(imgId)

print("Processed {} images in {:.2f}s".format(len(imgIds), time.time() - start))
print("Found {} corrupt images".format(len(corrupt_images)))

print("Path of corrupted files:")
for imgId in corrupt_images:
    print(getImgPath(imgId))
corrupt_images

Finding Corrupt Images: 100%|##########| 82783/82783 [43:25<00:00, 31.78it/s]  

Processed 82783 images in 2605.21s
Found 1 corrupt images





#### Remove corrupt images' descriptions.

Load `coco_descriptions.pkl` we created in `pycocoDescriptions` notebook and save a new pickle with corrupt images' captions removed into `corruption_free_coco_descriptions.pkl`.

Note that you use the new file while training...

In [13]:
with open('coco_descriptions.pkl', 'rb') as f:
    descriptions = pickle.load(f)

print("Size before removing corrupt images: {}".format(len(descriptions)))
for imgId in corrupt_images:
    descriptions.pop(imgId)
print("Size after removing corrupt images: {}".format(len(descriptions)))

print("Saving new file with corrupt image captions removed...")
with open('corruption_free_coco_descriptions.pkl', 'wb') as f:
    pickle.dump(descriptions, f)

Size before removing corrupt images: 82783
Size after removing corrupt images: 82782
Saving new file with corrupt image captions removed...


#### Removing corrupt image Ids from `coco.getImgIds()`

In [15]:
imgIds = coco.getImgIds()

print(len(imgIds))
for imgId in corrupt_images:
    imgIds.remove(imgId)

print(len(imgIds))

82783
82782


#### Generating `train_image_extracted.pkl`

In [None]:
# code for image imbedding i.e converting image to 300 dimentional
train_image_extracted = dict()

try:
    for imgId in tqdm(imgIds, ascii=True, desc="Generating Input Matrix"):
        img = load_img(getImgPath(imgId), target_size=TARGET_SIZE)
        # Converting image to array
        img_array = img_to_array(img)
        nimage = preprocess_input(img_array)
        # Adding one more dimesion
        nimage = np.expand_dims(nimage, axis=0)    
        fea_vec = final_model.predict(nimage)
        train_image_extracted[imgId] = np.reshape(fea_vec, fea_vec.shape[1]) # reshape from (1, 2048) to (2048, )


except Exception as e:
    print("Exception got :- \n",e)

In [37]:
def save_embedding_matrix(matrix):
    with open("train_image_extracted.pkl","wb") as f:
        pickle.dump(matrix, f)

save_embedding_matrix(train_image_extracted)

In [38]:
print(final_model.output.shape)

(None, 2048)
