In [1]:
# cd to root project

In [2]:
cd ~/mva/recvis/project/cmn/

/Users/quentinleroy/mva/recvis/project/cmn


In [3]:
from __future__ import absolute_import, division, print_function

import sys
import os; os.environ['CUDA_VISIBLE_DEVICES'] = ''  # using GPU 0

import tensorflow as tf
import numpy as np
import skimage.io
import skimage.transform

from models import visgeno_attention_model, spatial_feat, fastrcnn_vgg_net
from util.visgeno_rel_train.rel_data_reader import DataReader
from util import loss, eval_tools, text_processing

In [4]:
################################################################################
# Parameters
################################################################################

# Model Params
T = 20
num_vocab = 72704
embed_dim = 300
lstm_dim = 1000

# Data Params
# imdb_file = './exp-visgeno-rel/data/imdb/imdb_val.npy'
imdb_file = './exp-unrel/unrel_imdb.npy'
vocab_file = './word_embedding/vocabulary_72700.txt'
im_mean = visgeno_attention_model.fastrcnn_vgg_net.channel_mean

# Snapshot Params
model_file = './downloaded_models/visgeno_attbilstm_strong_iter_360000.tfmodel'

visualize_dir = './exp-visgeno-rel/results/unrel_attbilstm_strong_iter_360000.val/'

In [4]:
################################################################################
# Network
################################################################################

im_batch = tf.placeholder(tf.float32, [1, None, None, 3])
bbox_batch = tf.placeholder(tf.float32, [None, 5])
spatial_batch = tf.placeholder(tf.float32, [None, 5])
text_seq_batch = tf.placeholder(tf.int32, [T, None])

In [5]:
scores = visgeno_attention_model.visgeno_attbilstm_net(im_batch, bbox_batch, spatial_batch,
    text_seq_batch, num_vocab, embed_dim, lstm_dim, False, False)

In [6]:
np.random.seed(3)
reader = DataReader(imdb_file, vocab_file, im_mean, shuffle=False, max_bbox_num=10000, max_rel_num=10000)

Loading ROI data from file...Done.


In [7]:
################################################################################
# Snapshot and log
################################################################################

# Snapshot saver
snapshot_saver = tf.train.Saver()

# Start Session
# sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))

sess = tf.Session()

# tf.reset_default_graph()


# Run Initialization operations
snapshot_saver.restore(sess, model_file)

In [8]:
import matplotlib.pyplot as plt
%matplotlib inline

vocab_list = [w.strip() for w in open(vocab_file).readlines()]
def vocab_indices2sentence(indices):
    return ' '.join([vocab_list[idx] for idx in indices if idx != 0])
def print_bbox(bboxes, style='r-', color='#00FF00', linewidth=5):
    """A utility function to help visualizing boxes."""
    bboxes = np.array(bboxes).reshape((-1, 4))
    for bbox in bboxes:
        xmin, ymin, xmax, ymax = bbox
        xmin-=(linewidth+3)
        ymin-=(linewidth+3)
        xmax+=(linewidth+3)
        ymax+=(linewidth+3)
        plt.plot([xmin, xmax, xmax, xmin, xmin],
                 [ymin, ymin, ymax, ymax, ymin], style, color=color, linewidth=linewidth)

In [None]:
################################################################################
# Optimization loop
################################################################################

if not os.path.isdir(visualize_dir):
    os.mkdir(visualize_dir)

# Run optimization
for n_iter in range(reader.num_batch):
    batch = reader.read_batch()
    
    ###
    # continue if the batch does not contain any relationship
    # and increment n_iter so that the first few batches are not read once again in case of
    ###
    if batch is None:
        n_iter = n_iter + 1
        continue
        
        
    print('\tthis batch: N_lang = %d, N_bbox = %d' %
          (batch['expr_obj1_batch'].shape[1], batch['bbox_batch'].shape[0]))

    k = batch['expr_obj1_batch'].shape[1] // 2

    # Forward and Backward pass
    scores_val, ((probs_obj1, probs_obj2, probs_rel),) = sess.run((scores,
            tf.get_collection("attention_probs")),
        feed_dict={
            im_batch            : batch['im_batch'],
            bbox_batch          : batch['bbox_batch'],
            spatial_batch       : batch['spatial_batch'],
            text_seq_batch      : batch['text_seq_batch']
        })

    
    expr = vocab_indices2sentence(batch['text_seq_batch'][:, k])
    is_not_pad = batch['text_seq_batch'][:, k] > 0
    words = [vocab_list[idx] for idx in batch['text_seq_batch'][is_not_pad, k]]

    im = (batch['im_batch'][0] + im_mean).astype(np.uint8)
    bboxes = batch['bbox_batch'][:, 1:]

    plt.figure(figsize=(8, 6))
    tick_marks = np.arange(10)
    plt.xticks(tick_marks, words + ['']*(10-len(words)), rotation=90, fontsize=20)
    plt.yticks([0, 1, 2], ['$a_{subj}$', '$a_{rel}$  ', '$a_{obj}$  '], fontsize=28)
    attention_mat = np.hstack((probs_obj1[is_not_pad, k], probs_rel[is_not_pad, k], probs_obj2[is_not_pad, k])).T
    attention_mat = np.hstack((attention_mat, np.zeros((3, 10-len(words)), attention_mat.dtype)))
    plt.imshow(attention_mat, interpolation='nearest', cmap='Reds')
    plt.colorbar()
    plt.savefig(visualize_dir + '%08d_att.png' % n_iter)

    plt.figure(figsize=(8, 6))
    gt_l = batch['label_batch'][k]
    plt.imshow(im)
    print_bbox(bboxes[gt_l // len(bboxes)], '-', color='#FF0000')
    print_bbox(bboxes[gt_l % len(bboxes)], '--', color='#00FF00')
    plt.title(expr + ' (ground-truth)')
    plt.axis([-10, im.shape[1]+10, -10, im.shape[0]+10])
    plt.gca().invert_yaxis()
    plt.axis('off')
    plt.savefig(visualize_dir + '%08d_gt.png' % n_iter)

    plt.figure(figsize=(8, 6))
    pred_l = np.argmax(scores_val[k])
    plt.imshow(im)
    print_bbox(bboxes[pred_l // len(bboxes)], '-', color='#FF0000')
    print_bbox(bboxes[pred_l % len(bboxes)], '--', color='#00FF00')
    plt.title(expr + ' (prediction)')
    plt.axis([-10, im.shape[1]+10, -10, im.shape[0]+10])
    plt.gca().invert_yaxis()
    plt.axis('off')
    plt.savefig(visualize_dir + '%08d_pred.png' % n_iter)
    
#     plt.close('all')

data reader: epoch = 0, batch = 5 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 6 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 7 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 8 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 9 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 10 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 11 / 1071
	this batch: N_lang = 1, N_bbox = 2




data reader: epoch = 0, batch = 12 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 13 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 14 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 15 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 16 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 17 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 18 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 19 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 20 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 21 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 22 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 23 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 24 / 1071
	this batch: N_lang = 

data reader: epoch = 0, batch = 117 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 118 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 119 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 120 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 121 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 122 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 123 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 124 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 125 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 126 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 127 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 128 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 129 / 1071
	this bat

data reader: epoch = 0, batch = 221 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 222 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 223 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 224 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 225 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 226 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 227 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 228 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 229 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 230 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 231 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 232 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 233 / 1071
	this bat

data reader: epoch = 0, batch = 325 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 326 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 327 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 328 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 329 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 330 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 331 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 332 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 333 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 334 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 335 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 336 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 337 / 1071
	this bat

data reader: epoch = 0, batch = 429 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 430 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 431 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 432 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 433 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 434 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 435 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 436 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 437 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 438 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 439 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 440 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 441 / 1071
	this bat

data reader: epoch = 0, batch = 533 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 534 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 535 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 536 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 537 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 538 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 539 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 540 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 541 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 542 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 543 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 544 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 545 / 1071
	this bat

data reader: epoch = 0, batch = 637 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 638 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 639 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 640 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 641 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 642 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 643 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 644 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 645 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 646 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 647 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 648 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 649 / 1071
	this bat

data reader: epoch = 0, batch = 741 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 742 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 743 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 744 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 745 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 746 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 747 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 748 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 749 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 750 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 751 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 752 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 753 / 1071
	this bat

data reader: epoch = 0, batch = 845 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 846 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 847 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 848 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 849 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 850 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 851 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 852 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 853 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 854 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 855 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 856 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 857 / 1071
	this bat

data reader: epoch = 0, batch = 949 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 950 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 951 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 952 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 953 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 954 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 955 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 956 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 957 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 958 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 959 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 960 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 961 / 1071
	this bat

Exception in thread Thread-4:
Traceback (most recent call last):
  File "/anaconda/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/anaconda/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/quentinleroy/mva/recvis/project/cmn/util/visgeno_rel_train/rel_data_reader.py", line 28, in run_prefetch
    vocab_dict, T, max_bbox_num, max_rel_num)
  File "/Users/quentinleroy/mva/recvis/project/cmn/util/visgeno_rel_train/prepare_batch.py", line 25, in load_one_batch
    im_processed = im_resized*255 - im_mean
ValueError: operands could not be broadcast together with shapes (1000,523,4) (3,) 



data reader: epoch = 0, batch = 1033 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1034 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1035 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1036 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1037 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1038 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1039 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1040 / 1071
	this batch: N_lang = 1, N_bbox = 2
data reader: epoch = 0, batch = 1041 / 1071
data reader: waiting for file input (IO is slow)...
