In [1]:
from os import listdir
from pickle import dump
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
import string

Prepare Photo Data

In [2]:
directory = 'Flicker8k_Dataset'

Extract features dari tiap foto dalam Flicker8k_Dataset

In [3]:
# kami menggunakan Oxford Visual Geometry Group (VGG)
model = VGG16()

In [4]:
# re-structure the model
# kami re-structure ulang model dari VGG, dengan menghilangkan layer terakhir,
# karena model VGG ini untuk classification photo, sedangkan kita memerlukan
# representasi internal dari foto sebelum dilakukan classification 
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)

In [5]:
print(model.summary())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [6]:
features = dict()
for name in listdir(directory):
    filename = directory + '/' + name
    image = load_img(filename, target_size=(224, 224))
    # convert pixel image ke numpy array
    image = img_to_array(image)
    # reshape data untuk model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    # menyiapkan image untuk VGG model
    image = preprocess_input(image)
    # mendapatkan features
    feature = model.predict(image, verbose=0)
    # mendapatkan id image
    image_id = name.split('.')[0]
    # store feature
    features[image_id] = feature

In [7]:
print('Extracted Features: %d' % len(features))

Extracted Features: 8091


In [8]:
# save ke file
dump(features, open('features.pkl', 'wb'))

Prepare Text Data

In [9]:
filename = 'Flickr8k_text/Flickr8k.token.txt'

In [10]:
# load descriptions
file = open(filename, 'r')
doc = file.read()
file.close()

In [12]:
# extract descriptions untuk images
descriptions = dict()
for line in doc.split('\n'):
    # split line dengan spasi
    tokens = line.split()
    if len(line) < 2:
        continue
    # mengambil token pertama sebagai id image, dan sisa nya sebagai description
    image_id, image_desc = tokens[0], tokens[1:]
    # menghilangkan extension 
    image_id = image_id.split('.')[0]
    # convert description tokens kembali ke string
    image_desc = ' '.join(image_desc)
    # Membuat list, jika dibutuhkan
    if image_id not in descriptions:
        descriptions[image_id] = list()
    # store description
    descriptions[image_id].append(image_desc)

In [13]:
print('Loaded: %d ' % len(descriptions))

Loaded: 8092 


In [17]:
def clean_descriptions(descriptions):
    # Menyiapakn translation table untuk menghilangkan punctuation
    table = str.maketrans('', '', string.punctuation)
    for key, desc_list in descriptions.items():
        for i in range(len(desc_list)):
            desc = desc_list[i]
            # tokenize
            desc = desc.split()
            # convert ke huruf kecil
            desc = [word.lower() for word in desc]
            # menghilangkan punctuation dari setiap token
            desc = [w.translate(table) for w in desc]
            # menghilangkan hanging 's' and 'a'
            desc = [word for word in desc if len(word)>1]
            # menghilangkan tokens dengan angka di dalamnya
            desc = [word for word in desc if word.isalpha()]
            # store sebagai string
            desc_list[i] =  ' '.join(desc)

In [18]:
# clean descriptions
clean_descriptions(descriptions)

In [19]:
# convert loaded descriptions kedalam vocabulary kata
vocabulary = set()
for key in descriptions.keys():
    [vocabulary.update(d.split()) for d in descriptions[key]]

In [20]:
print('Vocabulary Size: %d' % len(vocabulary))

Vocabulary Size: 8763


In [21]:
# save ke file
lines = list()
for key, desc_list in descriptions.items():
    for desc in desc_list:
        lines.append(key + ' ' + desc)
data = '\n'.join(lines)
file = open('descriptions.txt', 'w')
file.write(data)
file.close()