In [2]:
from os import listdir
import string
from pickle import dump

from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_resnet_v2 import InceptionResNetV2

from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model

Using TensorFlow backend.


In [0]:
# feature extraction
def feature_extraction(directory):
	model = VGG16()
 	# model = VGG19()
	# model = InceptionV3()
	# model = ResNet50()
	# model = InceptionResNetV2()
	# feature_extractor = InceptionResNetV2(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
	# model = feature_extractor.output
	# model = GlobalAveragePooling2D()(model)
	# model = Dropout(0.5)(model)
	# model = Dense(4096, activation="relu")(model)
	# model = Dropout(0.5)(model)
	# model = Dense(4096, activation="relu")(model)

	model.layers.pop()
	model = Model(inputs=model.inputs, outputs=model.layers[-1].output)

	features = dict()
	for name in listdir(directory):
		img = load_img(directory + '/' + name, target_size=(224, 224))
		img = img_to_array(img)
		img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
		img_id = name.split('.')[0]
		features[img_id] = model.predict(preprocess_input(img), verbose=0)
	return features


In [0]:
# extracting features and dumping the features into a pickle file
directory = 'Flicker8k_Dataset'
features = feature_extraction(directory)
print('Number of Features Extracted:', len(features))
dump(features, open('features.pkl', 'wb'))

In [0]:
#extracting and cleaning up the descriptions
def load_file(name):
	f = open(name, 'r')
	txt = f.read()
	f.close()
	return txt


def extract_descriptions(file):
	d_map = {}
	for r in file.split('\n'):
		word = r.split()
		if len(r) < 2:
			continue
		img_id, img_desc = word[0], word[1:]
		img_id = img_id.split('.')[0]
		img_desc = ' '.join(img_desc)
		if img_id not in d_map:
			d_map[img_id] = []
		d_map[img_id].append(img_desc)
	return d_map

def get_vocab(text):
	doc = set()
	for k in text.keys():
		[doc.update(txt.split()) for txt in text[k]]
	return doc

def proc_descriptions(text):
	lemma = str.maketrans('', '', string.punctuation)
	for key, desc_list in text.items():
		for word in range(len(desc_list)):
			desc = desc_list[word].split()
			desc = [w for w in desc if len(w)>1]
			desc = [w.lower() for w in desc]
			desc = [w for w in desc if w.isalpha()]
			desc = [w.translate(lemma) for w in desc]
			desc_list[word] =  ' '.join(desc)


def save_desc(text, name):
	row = []
	for k, txt in text.items():
		for word in txt:
			row.append(k + ' ' + word)
	lemma = '\n'.join(row)
	f = open(name, 'w')
	f.write(lemma)
	f.close()

In [0]:
tokenfile = 'Flickr8k_text/Flickr8k.token.txt'
# loading token file
tokens = load_file(tokenfile)
# extracting descriptions
descriptions = extract_descriptions(tokens)
print('Loaded Descriptions:', len(descriptions))
# processing
proc_descriptions(descriptions)
# creating vocabulary
vocab = get_vocab(descriptions)
print('Size of Vocabulary:', len(vocab))
# saving descriptions file
save_desc(descriptions, 'descriptions.txt')