-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate-qa.py
50 lines (44 loc) · 2.05 KB
/
evaluate-qa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from transformers import AutoTokenizer, AutoModel
import json
import tensorflow as tf
MODEL = "bert-base-multilingual-cased" # to be filled
data = json.load(open("qa_datasets/construction-qset-v1.json", encoding="utf-8"))
qids = []
dataset = []
for item in data:
if item['qid'] in qids:
# search labels that have multiple qids, no need to calculate similarity on a label that has just 1 qid,
# it will provide 100% everytime
qids_set = set([x['qid'] for ind, x in enumerate(data) if data[ind]['label'] == item['label']])
if len(qids_set) > 1:
dataset.append(item)
else:
qids.append(item['qid'])
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModel.from_pretrained(MODEL)
acc = c = 0
total = len(dataset)
for qa_test in dataset:
max_cosine = qid = 0
c += 1
tokens = tokenizer(qa_test['question'], padding="max_length", max_length=128, return_tensors='pt')
a = tf.constant(model(**tokens).last_hidden_state.reshape(1, -1).tolist())
for qa_train in [x for ind, x in enumerate(dataset) if
dataset[ind]['label'] == qa_test['label'] and dataset[ind]['question'] != qa_test["question"]]:
b = tf.constant(model(**tokenizer(qa_train['question'], padding="max_length", max_length=128,
return_tensors='pt')).last_hidden_state.reshape(1, -1).tolist())
similarity = tf.reduce_sum(a[:, tf.newaxis] * b, axis=-1)
similarity /= tf.norm(a[:, tf.newaxis], axis=-1) * tf.norm(b, axis=-1)
cosine = similarity.numpy()
# given the large number of similar questions in one label, 0.95 cosine would be a great similarity,
# no need to go through all set
if cosine[0] > 0.95:
qid = qa_train['qid']
break
elif max_cosine < cosine[0]:
qid = qa_train['qid']
max_cosine = cosine[0]
if qid == qa_test['qid']:
acc += 1
print(c, acc / c)
print("Accuracy is {}".format(acc / total))