In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle
import os
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model

In [2]:
DATA_PATH = r"D:\GBC2\DL1\Project\Data\flickr_8k"
IMAGE_PATH = DATA_PATH + r"\images"
CAPTION_PATH = DATA_PATH + r"\captions"

In [3]:
def image_encodings(path):
    
    model = VGG16(include_top=True, weights='imagenet')
    model = Model(inputs = model.inputs, outputs = model.layers[-2].output)
    print(model.summary())
    encodings = dict()
    total_imgs = len(os.listdir(path))
    print(f"Number of images in the folder: {total_imgs}")
    for n, name in enumerate(os.listdir(path)):
        img = load_img(path + "//" + name, target_size=(224, 224))
        img = img_to_array(img)
        img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
        img = preprocess_input(img)
        img_feature = model.predict(img, verbose = 0)
        encodings[name.split('.')[0]] = img_feature
        if n%820 == 0 and n !=0:
            print(f"{int((n/total_imgs)*100)}% images processed...")
            
    return encodings

In [4]:
img_enc_dic = image_encodings(IMAGE_PATH)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [5]:
pickle.dump(img_enc_dic, open('img_enc_dic.pkl', 'wb'))

In [2]:
pickle.load( open('img_enc_dic.pkl', 'rb'))

{'1000268201_693b08cb0e': array([[2.507475, 0.      , 0.      , ..., 0.      , 0.      , 0.      ]],
       dtype=float32),
 '1001773457_577c3a7d70': array([[0.        , 0.        , 0.49414718, ..., 0.        , 0.        ,
         0.        ]], dtype=float32),
 '1002674143_1b742ab4b8': array([[1.4937935 , 0.        , 0.53567517, ..., 2.3152394 , 3.7418277 ,
         0.        ]], dtype=float32),
 '1003163366_44323f5815': array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 '1007129816_e794419615': array([[0.        , 0.09227666, 0.        , ..., 0.        , 0.        ,
         0.06528968]], dtype=float32),
 '1007320043_627395c3d8': array([[0.       , 0.       , 0.       , ..., 0.       , 3.3390145,
         0.       ]], dtype=float32),
 '1009434119_febe49276a': array([[2.0965168, 2.1192534, 3.5619888, ..., 0.6424278, 2.7142017,
         0.       ]], dtype=float32),
 '1012212859_01547e3f17': array([[0.        , 0.        , 0.98737115, ..., 0.        , 1.4932511 ,
         0.8612848