In [2]:
import argparse
import json
import time
import datetime
import numpy as np
import code
import os
import cPickle as pickle
import math
import scipy.io

from imagernn.solver import Solver
from imagernn.imagernn_utils import decodeGenerator, eval_split

In [18]:
# load the checkpoint
root_path = "D:/HSE/neuraltalk/"
checkpoint_path = root_path + "flickr8k_cnn_lstm_v1.p"
print 'loading checkpoint %s' % (checkpoint_path, )
checkpoint = pickle.load(open(checkpoint_path, 'rb'))
checkpoint_params = checkpoint['params']
dataset = checkpoint_params['dataset']
model = checkpoint['model']
misc = {}
misc['wordtoix'] = checkpoint['wordtoix']
ixtoword = checkpoint['ixtoword']

loading checkpoint D:/HSE/neuraltalk/flickr8k_cnn_lstm_v1.p


In [7]:
# output blob which we will dump to JSON for visualizing the results
blob = {} 
# blob['params'] = params
blob['checkpoint_params'] = checkpoint_params
blob['imgblobs'] = []

In [10]:
# load the tasks.txt file

img_names = open(os.path.join(root_path + "example_images", 'tasks.txt'), 'r').read().splitlines()

In [11]:
# load the features for all images
features_path = os.path.join(root_path, 'vgg_feats.mat')
features_struct = scipy.io.loadmat(features_path)
features = features_struct['feats'] # this is a 4096 x N numpy array of features
D,N = features.shape

In [13]:
# iterate over all images and predict sentences
BatchGenerator = decodeGenerator(checkpoint_params)
for n in xrange(N):
    print 'image %d/%d:' % (n, N)

    # encode the image
    img = {}
    img['feat'] = features[:, n]
    img['local_file_path'] =img_names[n]

    # perform the work. heavy lifting happens inside
    kwparams = { 'beam_size' : 20 }
    Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

    # build up the output
    img_blob = {}
    img_blob['img_path'] = img['local_file_path']

    # encode the top prediction
    top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
    top_prediction = top_predictions[0] # these are sorted with highest on top
    candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
    print 'PRED: (%f) %s' % (top_prediction[0], candidate)
    img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}    
    blob['imgblobs'].append(img_blob)

image 0/16:
PRED: (-7.245608) a dog plays with a toy
image 1/16:
PRED: (-6.825829) a dog runs through a field
image 2/16:
PRED: (-6.071394) a dog runs through the woods
image 3/16:
PRED: (-7.696322) a dog plays with a toy
image 4/16:
PRED: (-10.154738) a man and a woman sit on a bench
image 5/16:
PRED: (-7.413065) a dog plays with a toy
image 6/16:
PRED: (-10.011646) a black and white dog is climbing a tree
image 7/16:
PRED: (-11.486791) a child in a red jacket is standing in the snow
image 8/16:
PRED: (-11.969720) a man in a white shirt is playing a guitar
image 9/16:
PRED: (-8.722684) a man and a woman are posing for a picture
image 10/16:
PRED: (-6.831288) a group of people pose for a picture
image 11/16:
PRED: (-9.867244) a man and a woman sit at a table
image 12/16:
PRED: (-8.609164) a man and a woman sit on a bench
image 13/16:
PRED: (-9.136447) a man is standing in the water
image 14/16:
PRED: (-8.172481) a group of people are gathered together
image 15/16:
PRED: (-8.821453) a m

In [14]:
# dump result struct to file
save_file = os.path.join(root_path, 'result_struct.json')
print 'writing predictions to %s...' % (save_file, )
json.dump(blob, open(save_file, 'w'))

writing predictions to D:/HSE/neuraltalk/example_images\result_struct.json...


In [16]:
# dump output html
html = ''
for img in blob['imgblobs']:
    html += '<img src="%s" height="400"><br>' % (img['img_path'], )
    html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])
html_file = os.path.join(root_path, 'result.html')
print 'writing html result file to %s...' % (html_file, )
open(html_file, 'w').write(html)

writing html result file to D:/HSE/neuraltalk/example_images\result.html...
