<a href="https://colab.research.google.com/github/mjain125/AutomaticImageCaptioning/blob/main/automatic_image_captioning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
# loading doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

filename = "/content/drive/MyDrive/flicker8k/Flickr_Data/Flickr_TextData/Flickr8k.token.txt"
# loading descriptions
doc = load_doc(filename)
print(doc[:300])

1000268201_693b08cb0e.jpg#0	A child in a pink dress is climbing up a set of stairs in an entry way .
1000268201_693b08cb0e.jpg#1	A girl going into a wooden building .
1000268201_693b08cb0e.jpg#2	A little girl climbing into a wooden playhouse .
1000268201_693b08cb0e.jpg#3	A little girl climbing the s


In [17]:
def load_descriptions(doc):
	mapping = dict()
	# processing lines
	for line in doc.split('\n'):
		# spliting line by white space
		tokens = line.split()
		if len(line) < 2:
			continue
		# taking the first token as the image id, the rest as the description
		image_id, image_desc = tokens[0], tokens[1:]
		# extracting filename from image id
		image_id = image_id.split('.')[0]
		# converting description tokens back to string
		image_desc = ' '.join(image_desc)
		# creating the list if needed
		if image_id not in mapping:
			mapping[image_id] = list()
		# storing description
		mapping[image_id].append(image_desc)
	return mapping

# parsing descriptions
descriptions = load_descriptions(doc)
print('Loaded: %d ' % len(descriptions))

Loaded: 8092 


In [18]:
list(descriptions.keys())[10:15]


['101654506_8eb26cfb60',
 '101669240_b2d3e7f17b',
 '1016887272_03199f49c4',
 '1019077836_6fc9b15408',
 '1019604187_d087bf9a5f']

In [19]:
descriptions['101654506_8eb26cfb60']

['A brown and white dog is running through the snow .',
 'A dog is running in the snow',
 'A dog running through snow .',
 'a white and brown dog is running through a snow covered field .',
 'The white and brown dog is running over the surface of the snow .']

In [10]:
import string
def clean_descriptions(descriptions):
	# preparing translation table for removing punctuation
	table = str.maketrans('', '', string.punctuation)
	for key, desc_list in descriptions.items():
		for i in range(len(desc_list)):
			desc = desc_list[i]
			# tokenizing
			desc = desc.split()
			# converting to lower case
			desc = [word.lower() for word in desc]
			# removing punctuation from each token
			desc = [w.translate(table) for w in desc]
			# removing hanging words like 'a'
			desc = [word for word in desc if len(word)>1]
			# removing tokens with numbers in them
			desc = [word for word in desc if word.isalpha()]
			# storing as string
			desc_list[i] =  ' '.join(desc)

# cleaning descriptions
clean_descriptions(descriptions)

In [11]:
descriptions['101654506_8eb26cfb60']

['brown and white dog is running through the snow',
 'dog is running in the snow',
 'dog running through snow',
 'white and brown dog is running through snow covered field',
 'the white and brown dog is running over the surface of the snow']

In [12]:
# converting the loaded descriptions into a vocabulary of words
def to_vocabulary(descriptions):
	# building a list of all description strings
	all_desc = set()
	for key in descriptions.keys():
		[all_desc.update(d.split()) for d in descriptions[key]]
	return all_desc

# summarizing vocabulary
vocabulary = to_vocabulary(descriptions)
print('Original Vocabulary len: %d' % len(vocabulary))

Original Vocabulary len: 8764


In [20]:
# saving descriptions to file, one per line
def save_descriptions(descriptions, filename):
	lines = list()
	for key, desc_list in descriptions.items():
		for desc in desc_list:
			lines.append(key + ' ' + desc)
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()

save_descriptions(descriptions, 'descriptions.txt')

In [22]:
# loading a pre-defined list of photo identifiers
def load_set(filename):
	doc = load_doc(filename)
	dataset = list()
	# processing line by line
	for line in doc.split('\n'):
		# skiping empty lines
		if len(line) < 1:
			continue
		# getting the image identifier
		identifier = line.split('.')[0]
		dataset.append(identifier)
	return set(dataset)

# loading training dataset (6K)
filename = '/content/drive/MyDrive/flicker8k/Flickr_Data/Flickr_TextData/Flickr_8k.trainImages.txt'
train = load_set(filename)
print('Dataset: %d' % len(train))

Dataset: 6000
