In [4]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from collections import OrderedDict
import tensorflow as tf
from keras.utils import to_categorical
import numpy as np
import json
from nltk.tokenize import word_tokenize
with open('config.json') as config_file:
    config = json.load(config_file)
data_path = config['data_path']

Using TensorFlow backend.


In [5]:
new_model = tf.keras.models.load_model('three_gram_lstm_loss_2.8469_accuracy_0.4227.h5')

In [6]:
train_dat = np.load('train_label_tokenized.npy', allow_pickle=True)
test_dat = np.load('test_label_tokenized.npy', allow_pickle=True)

In [7]:

combined_data = np.load('combined_label_tokenized.npy', allow_pickle=True)
combined_data = list(combined_data)

vocab = set()
for sent in combined_data:
    for w in sent:
        vocab.add(w.lower())
vocab.add('')

vocab_size = len(vocab)

train_dat = list(train_dat)
train_data = []
for data_point in train_dat:
    d_point = [w.lower() for w in data_point[::-1]]
    train_data.append(d_point)
test_dat = list(test_dat)
test_data = []
for data_point in test_dat:
    d_point = [w.lower() for w in data_point[::-1]]
    test_data.append(d_point)

processed_data = []
for data_point in combined_data:
    processed_data_point = [w.lower() for w in data_point[::-1]]
    processed_data.append(processed_data_point)

two_gram_train = []
for sentence in train_data:
    sentence.insert(0, '')
    sentence.append('')
    for i in range(len(sentence)-2):
        two_gram_train.append([sentence[i:i+2], sentence[i+2]])
two_gram_test = []
for sentence in test_data:
    sentence.insert(0, '')
    sentence.append('')
    for i in range(len(sentence)-2):
        two_gram_test.append([sentence[i:i+2], sentence[i+2]])

word_to_idx = {}
for sentence in processed_data:
    for word in sentence:
        if word.lower() not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
word_to_idx[''] = len(word_to_idx)

two_gram_train_inputs = []
two_gram_train_outputs = []
for sent, next_word in two_gram_train:
    sentence_in = np.array([word_to_idx[w] for w in sent])
    two_gram_train_inputs.append(sentence_in)
    next_word_out = np.array([word_to_idx[next_word]])
    two_gram_train_outputs.append(next_word_out)
two_gram_train_inputs = np.array(two_gram_train_inputs)
two_gram_train_outputs = to_categorical(two_gram_train_outputs, num_classes=vocab_size)


In [8]:
three_gram_train = []
for sentence in train_data:
    sentence.insert(0, '')
    sentence.insert(0, '')
    sentence.append('')
    for i in range(len(sentence)-3):
        three_gram_train.append([sentence[i:i+3], sentence[i+3]])
three_gram_test = []
for sentence in test_data:
    sentence.insert(0, '')
    sentence.insert(0, '')
    sentence.append('')
    for i in range(len(sentence)-3):
        three_gram_test.append([sentence[i:i+3], sentence[i+3]])

three_gram_train_inputs = []
three_gram_train_outputs = []
for sent, next_word in three_gram_train:
    sentence_in = np.array([word_to_idx[w] for w in sent])
    three_gram_train_inputs.append(sentence_in)
    next_word_out = np.array([word_to_idx[next_word]])
    three_gram_train_outputs.append(next_word_out)
three_gram_train_inputs = np.array(three_gram_train_inputs)
three_gram_train_outputs = to_categorical(three_gram_train_outputs, num_classes=vocab_size)

In [9]:
idx_to_word = {}
for word in word_to_idx:
    idx = word_to_idx[word]
    idx_to_word[idx] = word


In [50]:
top5 = 0
top1 = 0
for _ in range(1000):
    i = np.random.randint(len(two_gram_train_inputs))
    out = new_model.predict(two_gram_train_inputs[i])
    next_words = out[0].argsort()[-5:][::-1]
    label = np.where(two_gram_train_outputs[i] == 1)[0][0]
    if label == next_words[0]:
        top1 += 1
    if label in next_words:
        top5 += 1
    if _ % 50 == 49:
        print(f'checkpoint {i} : accuracy {top1/_}, top5 accuracy: {top5/_}')
print(f'accuracy {top1/1000}, top5 accuracy: {top5/1000}')

checkpoint 140228 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 236978 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 297067 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 226930 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 362567 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 327061 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 276977 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 218941 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 89325 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 236814 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 176466 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 367680 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 241366 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 196275 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 302921 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 16054 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 93165 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 139883 : accuracy 0.0, top5 accuracy: 0.0
checkpoint 152657 : accuracy 0.0, top5 accuracy: 

In [9]:
# print(three_gram_train_inputs[34780])
# print(three_gram_train_inputs[34780].shape)
# print(type(three_gram_train_inputs[34780]))
test = np.array([[9904, 9904,   44]])
print(test.shape)
out = new_model.predict(test)

(1, 3)


In [12]:
out.shape

(1, 9905)

In [11]:
next_words = out[0].argsort()[-5:][::-1]
for i in next_words:
    word = idx_to_word[str(i)]
    print(i,word, word in vocab, word_to_idx[word])
print(out[0][3100])

9904  True 9904
8 left True 8
25 right True 25
23 little True 23
11 the True 11
3.6278555e-10


In [13]:
three_gram_train_inputs[34780]

array([  46,   29, 1655])

In [11]:
index = np.where(two_gram_train_outputs[15604] == 1)[0][0]
print(index)
print(idx_to_word[index])

64
bottom


In [70]:
with open('word_to_idx_vice_versa.json', 'w') as fp:
    saved_token_to_id_and_reverse = {
        'word_to_idx': word_to_idx,
        'idx_to_word': idx_to_word
    }
    json.dump(saved_token_to_id_and_reverse, fp)

In [1]:
import json
f = open('word_to_idx_vice_versa.json')
tokenizer = json.load(f)
word_to_idx = tokenizer['word_to_idx']
idx_to_word = tokenizer['idx_to_word']

In [2]:
word_to_idx['zebra']

4

In [3]:
import sys
import pandas as pd
import json
import numpy as np
from helper import *
import argparse
from rsa import RSA
import matplotlib.pyplot as plt
import os
import tensorflow as tf
with open('config.json') as config_file:
    config = json.load(config_file)
data_path = config['data_path']


file_id = 21540#3278#182

df = pd.read_csv(os.path.join(data_path,f'refCOCO/train/attr_tables/attr_{file_id}.tsv'), encoding='utf-8',sep='\t')

with open(os.path.join(data_path,f'refCOCO/train/labels/lab_{file_id}.json')) as json_file:
    label = json.load(json_file)
refs = [[r] for r in label['ref_sents']]
img_id = df['image_id'][0]
filename = os.path.join(data_path, f'refCOCO/train/imgs_by_id/{img_id}.jpg')
image = plt.imread(filename)
# get relations generated from graph faster-RCNN
rel_load = np.load(f'./train_relation_extraction.npy', allow_pickle=True)
generated_relations = rel_load[file_id]

# add lstm model to rsa.
lstm = tf.keras.models.load_model('three_gram_lstm_loss_2.8469_accuracy_0.4227.h5')

box_data = df[['box_alias', 'x1','y1','w','h']]
fig,ax = plt.subplots(1)
img = image

# ax.imshow(img)
rng = [i for i in range(len(box_data))]
for i in [4]:#rng[:]:
    name, x,y,w,h = list(box_data.iloc[i,:])
    ax = draw_box_obj(name,x,y,w,h,img,ax)

print(label['ref_sents'])
bbox = label['bbox'][0]
sentence = label['ref_sents'][0]
fig,ax_true_label = plt.subplots(1)
ax_true_label.imshow(img)
draw_box_obj(sentence,bbox[0],bbox[1],bbox[2],bbox[3],img,ax_true_label)

rsa_agent = RSA(df, generated_relations=generated_relations, \
                model=lstm, word_to_idx=word_to_idx, idx_to_word=idx_to_word)

# output = rsa_agent.full_speaker('woman-2')

['CENTER SCATEBOARDER', 'person in middle', 'the person in the middle']


In [4]:
output = rsa_agent.full_speaker('woman-2')

  return result/np.sum(result)


0.27857507409142507 girl
0.06629854490214701 the first from right


In [4]:
output

['girl', 'the first from right']

In [8]:
x = tf.constant(10, dtype=np.float32)

In [12]:
help(x.numpy)

Help on method numpy in module tensorflow.python.framework.ops:

numpy() method of tensorflow.python.framework.ops.EagerTensor instance
    Copy of the contents of this Tensor into a NumPy array or scalar.
    
    Unlike NumPy arrays, Tensors are immutable, so this method has to copy
    the contents to ensure safety. Use `memoryview` to get a readonly
    view of the contents without doing a copy:
    
    >>> t = tf.constant([42])
    >>> np.array(memoryview(t))
    array([42], dtype=int32)
    
    Note that `memoryview` is only zero-copy for Tensors on CPU. If a Tensor
    is on GPU, it will have to be transferred to CPU first in order for
    `memoryview` to work.
    
    Returns:
      A NumPy array of the same shape and dtype or a NumPy scalar, if this
      Tensor has rank 0.
    
    Raises:
      ValueError: If the dtype of this Tensor does not have a compatible
        NumPy dtype.

