# Imports

In [43]:
from transformers import RobertaTokenizer, TFRobertaModel
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
import re
import numpy as np
import random

DATA_BASE_PATH = './data'
KB_PATH = './data/kb.txt'

# KB embdedding model

## kb data loader

In [2]:
class KBManager:
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.entity_map = {}
        self.relation_map = {}
        self._build_vocabs()
        
    @staticmethod
    def extend_vocab(word, vocab):
        if word not in vocab:
            vocab[word] = len(vocab)
    
    def _build_vocabs(self):
        with open(self.data_dir, 'r') as f:
            data = f.read()
            lines = data.strip().split('\n')
            for line in lines:
                subj, rel, obj = line.split('|')
                self.extend_vocab(subj, self.entity_map)
                self.extend_vocab(obj, self.entity_map)
                self.extend_vocab(rel, self.relation_map)
    
    
    def load_er_vocab(self):
        result = {}
        with open(self.data_dir, 'r') as f:
            data = f.read()
            lines = data.strip().split('\n')
            for line in lines:
                subj, rel, obj = line.split('|')
                subj_idx = self.entity_map[subj]
                rel_idx = self.relation_map[rel]
                obj_idx = self.entity_map[obj]
                
                er_tuple = (subj_idx, rel_idx)
                if er_tuple in result:
                    result[er_tuple].append(obj_idx)
                else:
                    result[er_tuple] = [obj_idx]
        return result
                
    

kb_mgr = KBManager(KB_PATH)


## model definition

In [5]:
EMBEDDING_DIM = 512

class KBGModel(keras.Model):
    
    def __init__(self, entity_dim, relation_dim, hidden_dim):
        super(KBGModel, self).__init__()
    
        self.entity_dim = entity_dim
        self.relation_dim = relation_dim
        self.hidden_dim = hidden_dim
        
        self.entity_encoder = keras.layers.Embedding(
            self.entity_dim,
            self.hidden_dim,
            embeddings_regularizer=keras.regularizers.l2(0.1)
        )
        
        self.relation_encoder = keras.layers.Embedding(
            self.relation_dim,
            self.hidden_dim,
            input_shape=(),
        )
        
        self.head_bn = keras.layers.BatchNormalization()
        self.head_drpout = keras.layers.Dropout(0.3)
        self.rel_drpout = keras.layers.Dropout(0.4)
        self.score_bn = keras.layers.BatchNormalization()
        self.output_drpout = keras.layers.Dropout(0.5)
        
    def get_score(self, head, relation, entity_encoder):
        
        assert self.hidden_dim % 2 == 0
        hidden_dim_slice = int(self.hidden_dim/2)

        head_norm = self.head_bn(tf.reshape(head, (-1, hidden_dim_slice, 2)))
        head_drp = self.head_drpout(head_norm)

        head_drp = tf.reshape(head_drp, (-1, self.hidden_dim))

        re_head = tf.slice(head_drp, [0, 0], [-1, hidden_dim_slice])
        im_head = tf.slice(head_drp, [0, hidden_dim_slice], [-1, -1])

        relation_drp = self.rel_drpout(tf.squeeze(relation))
        re_relation = tf.slice(relation_drp, [0, 0], [-1, hidden_dim_slice])
        im_relation = tf.slice(relation_drp, [0, hidden_dim_slice], [-1, -1])

        re_tail = tf.slice(tf.squeeze(entity_encoder.weights), [0, 0], [-1, hidden_dim_slice])
        im_tail = tf.slice(tf.squeeze(entity_encoder.weights), [0, hidden_dim_slice], [-1, -1])

        re_score = re_head * re_relation - im_head * im_relation
        im_score = re_head * im_relation + im_head * re_relation

        score = tf.stack([re_score, im_score], axis=1)
        score_bn = self.score_bn(score)
        score_drp = self.output_drpout(score_bn)

        score_drp = tf.reshape(score_drp, (-1, self.hidden_dim))
        re_score = tf.slice(score_drp, [0, 0], [-1, hidden_dim_slice])
        im_score = tf.slice(score_drp, [0, hidden_dim_slice], [-1, -1])

        scores = tf.add(
            tf.matmul(re_score, re_tail, transpose_b=True),
            tf.matmul(im_score, im_tail, transpose_b=True)
        )

        return scores
        
    def call(self, subj_ids, rel_ids):
        entity_embedding = self.entity_encoder(subj_ids)
        rel_embedding = self.relation_encoder(rel_ids)
        
        scores = self.get_score(entity_embedding, rel_embedding, self.entity_encoder)
        prediction = tf.sigmoid(scores)
        
        return prediction

entity_dim = len(kb_mgr.entity_map)
relation_dim = len(kb_mgr.relation_map)
kg_model = KBGModel(entity_dim, relation_dim, EMBEDDING_DIM)

kg_model.load_weights('data/complex/saved_models/complex')

kg_model(
    np.array([425, 77]),
    np.array([1,2]),
    training=False
)

kg_model.summary()

Model: "kbg_model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  22135808  
_________________________________________________________________
embedding_3 (Embedding)      multiple                  4608      
_________________________________________________________________
batch_normalization_2 (Batch multiple                  8         
_________________________________________________________________
dropout_3 (Dropout)          multiple                  0         
_________________________________________________________________
dropout_4 (Dropout)          multiple                  0         
_________________________________________________________________
batch_normalization_3 (Batch multiple                  1024      
_________________________________________________________________
dropout_5 (Dropout)          multiple                  

In [6]:
kb_mgr.entity_map

{'Kismet': 0,
 'William Dieterle': 1,
 'Edward Knoblock': 2,
 'Marlene Dietrich': 3,
 'Edward Arnold': 4,
 'Ronald Colman': 5,
 'James Craig': 6,
 '1944': 7,
 'English': 8,
 'bd-r': 9,
 'Flags of Our Fathers': 10,
 'Clint Eastwood': 11,
 'Paul Haggis': 12,
 'Ron Powers': 13,
 'James Bradley': 14,
 '2006': 15,
 'War': 16,
 'famous': 17,
 'world war ii': 18,
 'war': 19,
 'r': 20,
 'clint eastwood': 21,
 'american': 22,
 'iwo jima': 23,
 'flag': 24,
 'The Bride Wore Black': 25,
 'François Truffaut': 26,
 'Cornell Woolrich': 27,
 'Jeanne Moreau': 28,
 'Michel Bouquet': 29,
 'Charles Denner': 30,
 '1968': 31,
 'French': 32,
 'revenge': 33,
 'wedding': 34,
 'françois truffaut': 35,
 'black': 36,
 'bride': 37,
 'Dirty Filthy Love': 38,
 'Adrian Shergold': 39,
 'Jeff Pope': 40,
 'Michael Sheen': 41,
 'Claudie Blakley': 42,
 'Anastasia Griffith': 43,
 'Adrian Bower': 44,
 '2004': 45,
 'Drama': 46,
 'The Dark Horse': 47,
 'Alfred E. Green': 48,
 'Bette Davis': 49,
 'Warren William': 50,
 '1932':

# EmbedKGQA

## Dataset loader

In [7]:
class QA:
    def __init__(self, question, answers, question_entity):
        self.question = question
        self.answers = answers
        self.question_entity = question_entity
        
    def __repr__(self):
        return "{}\n{}\n{}\n".format(self.question, self.question_entity, self.answers)
        
def read_dataset(path):
    dataset = []
    with open(path, 'r') as f:
        data_lines = f.read().strip().split('\n')
        for line in data_lines:
            question_raw, ans_raw = line.split('\t')
            q_entity = re.search(r'\[.+\]', question_raw).group().strip('[]')
            question = question_raw.replace(']', '').replace('[', '')
            answers = ans_raw.strip().split('|')
            dataset.append(QA(
                question=question,
                answers=answers,
                question_entity=q_entity
            ))
    random.shuffle(dataset)
    return dataset

train_dataset = read_dataset('./data/1-hop/qa_train.txt')
valid_dataset = read_dataset('./data/1-hop/qa_dev.txt')
test_dataset = read_dataset('./data/1-hop/qa_test.txt')

print(test_dataset[1])
print(len(train_dataset), len(valid_dataset), len(test_dataset))

what sort of film is Alpha Dog
Alpha Dog
['Drama', 'Crime']

96106 9992 9947


## Dataset generator

In [8]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
q_embedder = TFRobertaModel.from_pretrained('roberta-base')

inputs = tokenizer(
    ["Hello, my dog is cute and nice", "hey man!"],
    return_tensors="tf",
    padding=True
)
outputs = q_embedder(inputs)
last_hidden_states = outputs[0]

last_hidden_states.numpy()[:, 0, :].shape

I0817 21:02:36.340466 140365751764800 tokenization_utils_base.py:1254] loading file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json from cache at /home/navid/.cache/torch/transformers/d0c5776499adc1ded22493fae699da0971c1ee4c2587111707a4d177d20257a2.ef00af9e673c7160b4d41cfda1f48c5f4cba57d5142754525572a846a1ab1b9b
I0817 21:02:36.342034 140365751764800 tokenization_utils_base.py:1254] loading file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt from cache at /home/navid/.cache/torch/transformers/b35e7cd126cd4229a746b5d5c29a749e8e84438b14bcdb575950584fe33207e8.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda
I0817 21:02:37.813474 140365751764800 configuration_utils.py:264] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json from cache at /home/navid/.cache/torch/transformers/e1a2a406b5a05063c31f4dfdee7608986ba7c6393f7f79db5e69dcd197208534.117c81977c5979de8c088352e74ec

(2, 768)

In [9]:
def get_batch(dataset, target_dim, batch_size=64):
    batch_idx = 0
    entity_map = kb_mgr.entity_map
    
    remainder = len(dataset) % batch_size
    if remainder != 0:
        dataset = dataset[:-remainder]
        
    while batch_idx < len(dataset):
        batch = dataset[batch_idx:batch_idx+batch_size]
        
        qs = list(map(lambda qa: qa.question, batch))
        tokens = tokenizer(
            qs,
            return_tensors="tf",
            padding=True
        )
        
        q_embedds = q_embedder(tokens)
        q_embedds = q_embedds[0].numpy()[:, 0, :]
        
        entities = map(lambda qa: qa.question_entity, batch)
        entity_ids = list(map(lambda e: entity_map[e], entities))
        entity_embedds = kg_model.entity_encoder(np.array(entity_ids)).numpy()
        
        labels_list = map(lambda qa: qa.answers, batch)
        
        Xs = [q_embedds, entity_embedds]
        Ys = np.zeros((batch_size, target_dim))
        for idx, labels in enumerate(labels_list):
            targets = list(map(lambda ans: entity_map[ans], labels))
            Ys[idx][targets] = 1
        
        yield Xs, Ys
        batch_idx += batch_size
                    
next(iter(get_batch(
    [QA(
        question='hey Kismet',
        question_entity='Kismet',
        answers=['James Bradley']
    )],
    target_dim=20,
    batch_size=1)
))


([array([[-5.68278097e-02,  7.45838210e-02, -1.55453430e-02,
          -1.18482023e-01,  5.48469238e-02, -1.20152578e-01,
          -1.99890602e-02,  1.69902574e-02,  3.33256572e-02,
          -5.86458184e-02, -7.99875893e-03,  3.83028537e-02,
           3.51682790e-02, -3.10699511e-02,  9.35465693e-02,
           3.14213783e-02, -8.18562806e-02,  2.19148360e-02,
           3.11452597e-02, -4.50817607e-02, -9.96649936e-02,
           3.61345410e-02, -5.69843017e-02,  1.17892370e-01,
           7.67162256e-03,  3.29993814e-02,  7.09764063e-02,
           9.56672356e-02, -3.93315926e-02, -1.65858027e-03,
          -6.66877814e-03, -1.92997921e-02,  2.99060810e-02,
          -5.10813892e-02,  4.07264791e-02,  5.32149039e-02,
           6.59779608e-02,  3.00106639e-03, -1.24427065e-01,
           6.19131327e-03, -2.71666963e-02,  3.82598937e-02,
           1.68873724e-02,  3.36392894e-02,  6.67385608e-02,
           5.12889214e-02,  1.33828791e-02,  2.48748660e-02,
          -4.09413986e-0

## Model definition

In [10]:

class EmbedKGQA(keras.Model):
    def __init__(self, graph_embedding_model):
        super(EmbedKGQA, self).__init__()
        self.fc1 = keras.layers.Dense(512, input_shape=(768,))
        self.fc2 = keras.layers.Dense(512)
        self.fc3 = keras.layers.Dense(512)
        self.graph_embedding_model = graph_embedding_model
        self.graph_embedding_model.trainable = False
    
    
    def call(self, q_embeddings, q_entity_embeddings):
        question_complex = self.fc1(q_embeddings)
        question_complex = self.fc2(question_complex)
        question_complex = self.fc3(question_complex)
        
        scores = self.graph_embedding_model.get_score(
            q_entity_embeddings,
            question_complex,
            self.graph_embedding_model.entity_encoder
        )
        
        prediction = tf.sigmoid(scores)
        return prediction
        
    
embedKGQA = EmbedKGQA(kg_model)

embedKGQA(
    np.ones((10, 768)),
    np.ones((10, 512)),
    training=False
)

embedKGQA.summary()

W0817 21:02:43.413469 140365751764800 base_layer.py:1790] Layer embed_kgqa is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W0817 21:02:43.424565 140365751764800 base_layer.py:1790] Layer batch_normalization_2 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer c

Model: "embed_kgqa"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  393728    
_________________________________________________________________
dense_1 (Dense)              multiple                  262656    
_________________________________________________________________
dense_2 (Dense)              multiple                  262656    
_________________________________________________________________
kbg_model_1 (KBGModel)       multiple                  22141448  
Total params: 23,060,488
Trainable params: 919,040
Non-trainable params: 22,141,448
_________________________________________________________________


## Training functions

In [11]:
loss_fn = keras.losses.BinaryCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam()
train_loss = keras.metrics.Mean(name='train_loss')
validation_loss = keras.metrics.Mean(name='validation_loss')

@tf.function
def train_step(q_embeddings, q_entity_embeddings, targets):
    with tf.GradientTape() as tape:
        predictions = embedKGQA(
            q_embeddings,
            q_entity_embeddings,
            training=True
        )
        loss = loss_fn(y_true=targets, y_pred=predictions)
    grads = tape.gradient(loss, embedKGQA.trainable_variables)
    optimizer.apply_gradients(zip(grads, embedKGQA.trainable_variables))
    train_loss(loss)
    
@tf.function
def validation_step(q_embeddings, q_entity_embeddings, targets):
    predictions = embedKGQA(
        q_embeddings,
        q_entity_embeddings,
        training=False
    )
    loss = loss_fn(y_true=targets, y_pred=predictions)
    validation_loss(loss)

## Training epochs

In [None]:
EPOCHS = 10
BATCH_SIZE = 64
VALIDATION_BATCH_SIZE = 2048
TRAIN_LOG_STEP = 20
entity_dim = len(kb_mgr.entity_map)

for epoch in range(EPOCHS):
    train_loss.reset_states()
    validation_loss.reset_states()
    
    iteration = 0
    for x, y in get_batch(train_dataset, entity_dim, batch_size=BATCH_SIZE):
        train_step(x[0], x[1], y)
        
        if not iteration % TRAIN_LOG_STEP:
            print('training loss in iteration {}: {}'.format(iteration, train_loss.result()))
        
        iteration += 1
        
    for x, y in get_batch(valid_dataset, entity_dim, batch_size=VALIDATION_BATCH_SIZE):
        validation_step(x[0], x[1], y)
    
    print("epoch:{} validation_loss:{}".format(epoch, validation_loss.result()))

## Loading model

In [14]:
embedKGQA.load_weights('data/saved_models/embedkgqa')
embedKGQA.summary()

Model: "embed_kgqa"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  393728    
_________________________________________________________________
dense_1 (Dense)              multiple                  262656    
_________________________________________________________________
dense_2 (Dense)              multiple                  262656    
_________________________________________________________________
kbg_model_1 (KBGModel)       multiple                  22141448  
Total params: 23,060,488
Trainable params: 919,040
Non-trainable params: 22,141,448
_________________________________________________________________


# Evaluate on a question

In [56]:
import random

thr = 0.05

keys = np.array(list(kb_mgr.entity_map.keys()))
idx = random.randint(1, len(test_dataset)-2)
qa1, qa2 = test_dataset[idx], test_dataset[idx+1]

print("question: {}\nanswers: {}".format(qa1.question, qa1.answers))
print("question: {}\nanswers: {}".format(qa2.question, qa2.answers))

x, y = next(iter(get_batch(
    [qa1, qa2],
    target_dim=entity_dim,
    batch_size=2)
))

preds = embedKGQA(
    x[0],
    x[1],
    training=False
).numpy()

preds_idxs = np.where(preds >= thr, 1.0, 0.0).astype('bool')
y = y.astype('bool')

np.sum(preds_idxs), np.sum(y)
for i, pred_idx in enumerate(preds_idxs):
    pred = keys[pred_idx]
    print("model answer: {}".format(pred))
    label = keys[y[i]]
    print("true answer: {}".format(label))

question: what words describe film The Illusionist
answers: ['magic', 'edward norton', 'paul giamatti', 'jessica biel', 'vienna', 'neil burger']
question: the movie Herbie Goes Bananas starred who
answers: ['Cloris Leachman']
model answer: ['jessica biel' 'edward norton' 'vienna' 'neil burger' 'sylvain chomet']
true answer: ['jessica biel' 'magic' 'paul giamatti' 'edward norton' 'vienna'
 'neil burger']
model answer: ['Cloris Leachman']
true answer: ['Cloris Leachman']


# Evaluate on test set

## "answers include our prediction" accuracy

In [48]:
k = 3
hits = 0
total = 0
batch_size = 100

for x, y in tqdm(get_batch(test_dataset, entity_dim, batch_size=batch_size)):
    total += batch_size
    
    predictions = embedKGQA(
        x[0],
        x[1],
        training=False
    )
    
    best_guess = np.argmax(predictions, axis=1)
    y = np.argpartition(y, -k)[:, -k:]
    for idx, guess in enumerate(best_guess):
        if guess in y[idx]:
            hits += 1
    
    print('accuracy: {:.2f}'.format(hits/total))
    





0it [00:00, ?it/s][A[A[A[A



1it [00:03,  3.49s/it][A[A[A[A

accuracy: 0.95






2it [00:06,  3.45s/it][A[A[A[A

accuracy: 0.94






3it [00:09,  3.34s/it][A[A[A[A

accuracy: 0.92






4it [00:12,  3.25s/it][A[A[A[A

accuracy: 0.91






5it [00:16,  3.29s/it][A[A[A[A

accuracy: 0.91






6it [00:19,  3.16s/it][A[A[A[A

accuracy: 0.91






7it [00:22,  3.18s/it][A[A[A[A

accuracy: 0.90






8it [00:25,  3.12s/it][A[A[A[A

accuracy: 0.90






9it [00:28,  3.14s/it][A[A[A[A

accuracy: 0.90






10it [00:32,  3.33s/it][A[A[A[A

accuracy: 0.89






11it [00:35,  3.24s/it][A[A[A[A

accuracy: 0.89






12it [00:38,  3.21s/it][A[A[A[A

accuracy: 0.90






13it [00:41,  3.13s/it][A[A[A[A

accuracy: 0.90






14it [00:44,  3.09s/it][A[A[A[A

accuracy: 0.90






15it [00:48,  3.23s/it][A[A[A[A

accuracy: 0.90






16it [00:51,  3.18s/it][A[A[A[A

accuracy: 0.89






17it [00:54,  3.15s/it][A[A[A[A

accuracy: 0.89






18it [00:57,  3.16s/it][A[A[A[A

accuracy: 0.89






19it [01:00,  3.13s/it][A[A[A[A

accuracy: 0.89






20it [01:03,  3.10s/it][A[A[A[A

accuracy: 0.89






21it [01:06,  3.07s/it][A[A[A[A

accuracy: 0.89






22it [01:09,  3.12s/it][A[A[A[A

accuracy: 0.89






23it [01:13,  3.34s/it][A[A[A[A

accuracy: 0.89






24it [01:16,  3.18s/it][A[A[A[A

accuracy: 0.89






25it [01:19,  3.07s/it][A[A[A[A

accuracy: 0.89






26it [01:22,  3.06s/it][A[A[A[A

accuracy: 0.89






27it [01:25,  3.12s/it][A[A[A[A

accuracy: 0.88






28it [01:28,  3.03s/it][A[A[A[A

accuracy: 0.88






29it [01:31,  2.99s/it][A[A[A[A

accuracy: 0.88






30it [01:34,  3.09s/it][A[A[A[A

accuracy: 0.88






31it [01:37,  3.00s/it][A[A[A[A

accuracy: 0.88






32it [01:40,  3.01s/it][A[A[A[A

accuracy: 0.88






33it [01:44,  3.23s/it][A[A[A[A

accuracy: 0.88






34it [01:47,  3.15s/it][A[A[A[A

accuracy: 0.88






35it [01:49,  3.05s/it][A[A[A[A

accuracy: 0.88






36it [01:52,  3.04s/it][A[A[A[A

accuracy: 0.88






37it [01:55,  2.92s/it][A[A[A[A

accuracy: 0.88






38it [01:58,  2.95s/it][A[A[A[A

accuracy: 0.88






39it [02:01,  3.03s/it][A[A[A[A

accuracy: 0.88






40it [02:05,  3.18s/it][A[A[A[A

accuracy: 0.88






41it [02:08,  3.29s/it][A[A[A[A

accuracy: 0.88






42it [02:12,  3.40s/it][A[A[A[A

accuracy: 0.88






43it [02:15,  3.25s/it][A[A[A[A

accuracy: 0.88






44it [02:18,  3.24s/it][A[A[A[A

accuracy: 0.88






45it [02:22,  3.39s/it][A[A[A[A

accuracy: 0.88






46it [02:26,  3.57s/it][A[A[A[A

accuracy: 0.88






47it [02:30,  3.69s/it][A[A[A[A

accuracy: 0.88






48it [02:33,  3.41s/it][A[A[A[A

accuracy: 0.88






49it [02:37,  3.61s/it][A[A[A[A

accuracy: 0.88






50it [02:40,  3.55s/it][A[A[A[A

accuracy: 0.88






51it [02:44,  3.67s/it][A[A[A[A

accuracy: 0.88






52it [02:47,  3.54s/it][A[A[A[A

accuracy: 0.88






53it [02:51,  3.50s/it][A[A[A[A

accuracy: 0.88






54it [02:54,  3.38s/it][A[A[A[A

accuracy: 0.88






55it [02:57,  3.30s/it][A[A[A[A

accuracy: 0.88






56it [03:00,  3.29s/it][A[A[A[A

accuracy: 0.88






57it [03:04,  3.46s/it][A[A[A[A

accuracy: 0.88






58it [03:08,  3.49s/it][A[A[A[A

accuracy: 0.88






59it [03:11,  3.35s/it][A[A[A[A

accuracy: 0.88






60it [03:14,  3.26s/it][A[A[A[A

accuracy: 0.88






61it [03:17,  3.23s/it][A[A[A[A

accuracy: 0.88






62it [03:21,  3.53s/it][A[A[A[A

accuracy: 0.88






63it [03:24,  3.44s/it][A[A[A[A

accuracy: 0.88






64it [03:28,  3.51s/it][A[A[A[A

accuracy: 0.88






65it [03:31,  3.46s/it][A[A[A[A

accuracy: 0.88






66it [03:35,  3.63s/it][A[A[A[A

accuracy: 0.88






67it [03:39,  3.64s/it][A[A[A[A

accuracy: 0.88






68it [03:43,  3.76s/it][A[A[A[A

accuracy: 0.88






69it [03:47,  3.78s/it][A[A[A[A

accuracy: 0.88






70it [03:50,  3.71s/it][A[A[A[A

accuracy: 0.88






71it [03:54,  3.63s/it][A[A[A[A

accuracy: 0.88






72it [03:57,  3.56s/it][A[A[A[A

accuracy: 0.88






73it [04:00,  3.40s/it][A[A[A[A

accuracy: 0.88






74it [04:03,  3.24s/it][A[A[A[A

accuracy: 0.88






75it [04:07,  3.56s/it][A[A[A[A

accuracy: 0.88


KeyboardInterrupt: 

## Exact answers

In [57]:
k = 3
hits = 0
total = 0
batch_size = 100
threshold = 0.05

for x, y in tqdm(get_batch(test_dataset, entity_dim, batch_size=batch_size)):
    total += batch_size
    
    predictions = embedKGQA(
        x[0],
        x[1],
        training=False
    )
    
    preds_idxs = np.where(predictions >= thr, 1.0, 0.0).astype('int')
    for idx, guess in enumerate(preds_idxs):
        if np.array_equal(guess, y[idx]):
            hits += 1
    
    print('accuracy: {:.2f}'.format(hits/total))








0it [00:00, ?it/s][A[A[A[A[A[A[A






1it [00:02,  2.45s/it][A[A[A[A[A[A[A

accuracy: 0.54









2it [00:04,  2.34s/it][A[A[A[A[A[A[A

accuracy: 0.55









3it [00:06,  2.19s/it][A[A[A[A[A[A[A

accuracy: 0.56









4it [00:08,  2.10s/it][A[A[A[A[A[A[A

accuracy: 0.56









5it [00:10,  2.14s/it][A[A[A[A[A[A[A

accuracy: 0.58









6it [00:12,  2.19s/it][A[A[A[A[A[A[A

accuracy: 0.56


KeyboardInterrupt: 