In [1]:
import torch
import matplotlib.pyplot as plt

In [2]:
from transformers import BertTokenizer, get_linear_schedule_with_warmup, BertConfig, BertForMaskedLM
model_path = 'dmis-lab/biobert-base-cased-v1.2'
tokenizer = BertTokenizer.from_pretrained(model_path)
bert_lm = BertForMaskedLM.from_pretrained(model_path)

In [None]:
exp_path = '../exp/0mp_unseen_path2/'

In [None]:
pack = torch.load(exp_path+'pack.bin')
name_array = torch.load(exp_path+'name.bin')
ent_total = len(name_array)
triples = torch.load(exp_path+'triples.bin')

In [None]:
def get_nei(triples, max_length, ent_total):
    from collections import defaultdict
    import copy
    neis = [{} for i in range(max_length+1)] # neis[i] stores i-hop neighbors
    
    neis[0] = {e:{e} for e in range(ent_total)}

    for i in range(ent_total):
        neis[1][i] = set()
    for h, r, t in triples:
        neis[1][h].add(t)
        neis[1][t].add(h)
    
    for length in range(2, max_length+1):
        nei_1 = neis[1]
        nei_last = neis[length-1]
        nei = neis[length]
        for center in range(ent_total):
            nei[center] = copy.deepcopy(nei_1[center])
            for i in nei_1[center]:
                nei[center] = nei[center].union(nei_last[i])
    for i in range(5):
        for j in range(i+1, 6):
            for e in range(ent_total):
                neis[-i-1][e] -= neis[-j-1][e]

    return neis
neis = get_nei(triples, 5, ent_total)

In [None]:
labels = pack['labels'][:, 0]
N = len(labels)

results = []
for depth in range(6):
    result = 0
    for i in range(N):
        topk = pack['idx'][i][:1].tolist()
        result += len(neis[depth][int(labels[i])].intersection(topk)) / 1 # top 1
    result /= N
    print(f'{depth}-hop neighbor in top{1} = {result}')
    results.append(result)

print('sum = ', sum(results))

0-hop neighbor in top1 = 0.8976798143851508
1-hop neighbor in top1 = 0.018329466357308585
2-hop neighbor in top1 = 0.02157772621809745
3-hop neighbor in top1 = 0.00580046403712297
4-hop neighbor in top1 = 0.010904872389791183
5-hop neighbor in top1 = 0.0034802784222737818
sum =  0.9577726218097448


In [None]:
from collections import defaultdict
child2parent = defaultdict(set)
for h, r, t in triples:
    if r == 'is_a':
        child2parent[h].add(t)

E = len(name_array)
siblings = set()
for a in range(E):
 for c in range(a+1, E):
  if len(child2parent[a].intersection(child2parent[c])) > 0:
   siblings.add((a,c))
   siblings.add((c,a))

grandpas = set()
grandsons = set()
for aa in range(E):
 for b in child2parent[aa]:
  for c in child2parent[b]:
   grandpas.add((aa,c))
   grandsons.add((c,aa))

In [None]:
results = []
depth = 2

results = {'grandpa':0, 'grandson':0, 'sibling':0, 'other':0}
cnt = 0

for i in range(N):
    top1 = int(pack['idx'][i][0])
    if top1 in neis[depth][int(labels[i])]:
        cnt += 1
        if (int(labels[i]), top1) in grandpas:
            results['grandpa'] += 1
        elif (int(labels[i]), top1) in grandsons:
            results['grandson'] += 1
        elif (int(labels[i]), top1) in siblings:
            results['sibling'] += 1
        else:
            results['other'] += 1
            print(i, int(labels[i]), top1)
result = {k:v/cnt for k, v in results.items()}
print(f'{depth}-hop: pred is label\'s {result}')

2-hop: pred is label's {'grandpa': 0.012944983818770227, 'grandson': 0.006472491909385114, 'sibling': 0.9805825242718447, 'other': 0.0}
