In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator

In [8]:
RUN_ON_KAGGLE = False

# Directories and filenames
if RUN_ON_KAGGLE:
    INPUT_PATH = "../input/flickr8k/"
    IMAGE_PATH = INPUT_PATH+'Images/'
    OUTPUT_PATH = "./"
    OUTPUT_IMAGE_PATH = OUTPUT_PATH +'Images/'
    CAPTIONS_FILE = INPUT_PATH+'captions.txt'
else:
    INPUT_PATH = '../data/raw/flickr8k/'
    IMAGE_PATH = INPUT_PATH+'Images/'
    OUTPUT_PATH = "../data/interim/aida-image-captioning-inceptresnetv2_v2/"
    OUTPUT_IMAGE_PATH = OUTPUT_PATH +'Images/'
    CAPTIONS_FILE = INPUT_PATH+'captions.txt'

In [9]:
# Create a dataframe which summarizes the image, path & captions as a dataframe
# Each image id has 5 captions associated with it therefore the total dataset should have 40455 samples.

captions_df = pd.read_csv(CAPTIONS_FILE)

pd.set_option('display.max_colwidth',-1) # Set the max column width to see the complete caption
print(captions_df.shape)
captions_df.head()

(40455, 2)


  


Unnamed: 0,image,caption
0,1000268201_693b08cb0e.jpg,A child in a pink dress is climbing up a set of stairs in an entry way .
1,1000268201_693b08cb0e.jpg,A girl going into a wooden building .
2,1000268201_693b08cb0e.jpg,A little girl climbing into a wooden playhouse .
3,1000268201_693b08cb0e.jpg,A little girl climbing the stairs to her playhouse .
4,1000268201_693b08cb0e.jpg,A little girl in a pink dress going into a wooden cabin .


In [10]:
captions_agg_df=captions_df.groupby("image").first().reset_index()
captions_agg_df.head()

Unnamed: 0,image,caption
0,1000268201_693b08cb0e.jpg,A child in a pink dress is climbing up a set of stairs in an entry way .
1,1001773457_577c3a7d70.jpg,A black dog and a spotted dog are fighting
2,1002674143_1b742ab4b8.jpg,A little girl covered in paint sits in front of a painted rainbow with her hands in a bowl .
3,1003163366_44323f5815.jpg,A man lays on a bench while his dog sits by him .
4,1007129816_e794419615.jpg,A man in an orange hat starring at something .


In [11]:
#datagen=ImageDataGenerator(rescale=1./255)  # not necessary
datagen=ImageDataGenerator()
train_generator=datagen.flow_from_dataframe(dataframe=captions_agg_df, 
                                            directory=IMAGE_PATH, 
                                            x_col="image", y_col="caption",
                                            class_mode="raw",
                                            target_size=(299,299),
                                            shuffle=False
                                           )


Found 8091 validated image filenames.


In [12]:
image_model = tf.keras.applications.InceptionResNetV2(
    include_top=False,
    weights='imagenet'
)
new_input = image_model.input
hidden_layer = image_model.layers[-1].output
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)

In [14]:
!mkdir $OUTPUT_PATH 
!mkdir $OUTPUT_IMAGE_PATH

In [None]:
i=0
for X, _ in train_generator:
    img = tf.keras.applications.inception_v3.preprocess_input(X)
    batch_features = image_features_extract_model(img)
    batch_features = tf.reshape(batch_features,
                              (batch_features.shape[0], -1, batch_features.shape[3]))
    if i == 0:
        print(f"X.shape: {X.shape}")
        print(f"img.shape: {img.shape}")
        print(f"batch_features.shape: {batch_features.shape}")
        print(f"reshaped batch_features.shape: {batch_features.shape}")

    for bf in batch_features:
        filename = OUTPUT_IMAGE_PATH + captions_agg_df.iloc[i].image.replace(".jpg",".npy")
        print(f"filename: {filename}")
        np.save(filename, bf.numpy())
        i=i+1