In [7]:
import json
import os

In [8]:
with open('unlabeled.json', 'r') as f:
    unlabeled = json.load(f)

In [9]:
with open('iters/iter=0/prediction_0.json', 'r') as f:
    pred0 = json.load(f)
with open('iters/iter=0/prediction_1.json', 'r') as f:
    pred1 = json.load(f)

In [17]:
def convert_to_word_atoms(prediction, model_num):
    all_atoms = []
    for row in prediction:
        atoms = []
        tokens = row['tokens']
        entities = row['entities']
        relations = row['relations']
        for ent in entities:
            c = 'atom({}("{}"), {}).'.format(ent[2].lower(), "+".join(tokens[ent[0]:ent[1]]), model_num)
            atoms.append(c)
        for rel in relations:
            c = 'atom({}("{}", "{}"), {}).'.format(''.join([rel[4].split('_')[0].lower(), *rel[4].split('_')[1:]]), 
                                    "+".join(tokens[rel[0]:rel[1]]), 
                                    "+".join(tokens[rel[2]:rel[3]]), model_num)
            atoms.append(c)
        all_atoms.append(atoms)
    return all_atoms

In [11]:
all_atoms0 = convert_to_word_atoms(pred0)
all_atoms1 = convert_to_word_atoms(pred1)

In [18]:
for r in range(4):
    os.makedirs(f'iters_atoms/iter={r}', exist_ok=True)
    with open(f'iters/iter={r}/prediction_0.json', 'r') as f:
        pred0 = json.load(f)
    with open(f'iters/iter={r}/prediction_1.json', 'r') as f:
        pred1 = json.load(f)
    all_atoms0 = convert_to_word_atoms(pred0)
    all_atoms1 = convert_to_word_atoms(pred1)
    with open(f'iters_atoms/iter={r}/all_atoms_0.json', 'w') as f:
        json.dump(all_atoms0, f)
    with open(f'iters_atoms/iter={r}/all_atoms_1.json', 'w') as f:
        json.dump(all_atoms1, f)

TypeError: convert_to_word_atoms() missing 1 required positional argument: 'model_num'

In [19]:
import shutil

if os.path.exists('iters_atoms'):
    shutil.rmtree('iters_atoms')
for r in range(4):
    os.makedirs(f'iters_atoms/iter={r}', exist_ok=True)
    with open(f'iters/iter={r}/prediction_0.json', 'r') as f:
        pred0 = json.load(f)
    with open(f'iters/iter={r}/prediction_1.json', 'r') as f:
        pred1 = json.load(f)
    all_atoms0 = convert_to_word_atoms(pred0, model_num=1)
    all_atoms1 = convert_to_word_atoms(pred1, model_num=2)
    
    for i, (atoms0, atoms1) in enumerate(zip(all_atoms0, all_atoms1)):
        with open(f'iters_atoms/iter={r}/{i}.1.txt', 'w') as f:
            f.writelines(map(lambda x: x+'\n', atoms0))
        with open(f'iters_atoms/iter={r}/{i}.2.txt', 'w') as f:
            f.writelines(map(lambda x: x+'\n', atoms1))

In [12]:
match_idx = []
unmatch_idx = []
for i, (atoms0, atoms1) in enumerate(zip(all_atoms0, all_atoms1)):
    if set(atoms0) == set(atoms1):
        match_idx.append(i)
    else:
        unmatch_idx.append(i)

In [13]:
len(match_idx)

242

In [14]:
len(all_atoms0)

646

In [15]:
incorrect_idx = []
for i in match_idx:
    pred = [tuple(e) for e in pred0[i]['entities']]
    gt = pred0[i]['entity_gts']
    gt = [tuple(e[:3]) for e in gt]
    if set(pred) != set(gt):
        incorrect_idx.append(i)

In [26]:
len(incorrect_idx)

64

In [39]:
pred0[48]

{'tokens': ['`',
  '`',
  'Please',
  'don',
  "'t",
  'execute',
  'him',
  ',',
  'please',
  ',',
  "'",
  "'",
  'said',
  'Mrs.',
  'Cicippio',
  ',',
  'who',
  'works',
  'at',
  'the',
  'American',
  'Embassy',
  'in',
  'east',
  'Beirut.'],
 'entities': [[13, 15, 'Peop'], [20, 22, 'Org'], [24, 25, 'Loc']],
 'relations': [[20, 22, 24, 25, 'OrgBased_In'],
  [13, 15, 20, 22, 'Work_For'],
  [13, 15, 24, 25, 'Live_In']],
 'entity_gts': [[13, 15, 'Peop', 'Mrs. Cicippio'],
  [20, 22, 'Loc', 'American Embassy'],
  [24, 25, 'Loc', 'Beirut.']],
 'relation_gts': [[13, 15, 24, 25, 'Live_In', 'Mrs. Cicippio', 'Beirut.'],
  [20, 22, 24, 25, 'Located_In', 'American Embassy', 'Beirut.']],
 'entities_': [[13, 15, 'Peop', 'Mrs. Cicippio', [1.0, 1.0]],
  [20, 22, 'Org', 'American Embassy', [0.9951382279396057, 1.0]],
  [24, 25, 'Loc', 'Beirut.', [1.0]]],
 'relations_': [[20,
   22,
   24,
   25,
   'OrgBased_In',
   'American Embassy',
   'Beirut.',
   [[0.9809473752975464], [0.999995827674865

In [105]:
k = 642
print('entities: ', pred0[k]['entity_gts'])
print('==========================')
print('relations: ', pred0[k]['relation_gts'])

entities:  [[0, 1, 'Other', 'An-a-one'], [18, 20, 'Peop', 'Lawrence Welk'], [21, 23, 'Loc', 'North Dakota'], [34, 35, 'Org', 'Congress']]
relations:  [[18, 20, 21, 23, 'Live_In', 'Lawrence Welk', 'North Dakota']]


In [107]:
17 / len(incorrect_idx)

0.265625

In [73]:
interesting_cases = [1, 2, 34, 75]

In [28]:
for i, idx in enumerate(incorrect_idx):
    print(idx, all_atoms0[idx])

4 ['Peop(John Wilkes Booth)', 'Peop(Abraham Lincoln)', 'Kill(John Wilkes Booth, Abraham Lincoln)']
44 ['Loc(U.S.)', 'Peop(Marvin Feuerwerger)', 'Org(Washington Institute)', 'Org(Near East Policy)', 'Work_For(Marvin Feuerwerger, Near East Policy)', 'Work_For(Marvin Feuerwerger, Washington Institute)']
48 ['Peop(Mrs. Cicippio)', 'Org(American Embassy)', 'Loc(Beirut.)', 'OrgBased_In(American Embassy, Beirut.)', 'Work_For(Mrs. Cicippio, American Embassy)', 'Live_In(Mrs. Cicippio, Beirut.)']
54 ['Peop(William Leonard Jennings)', 'Loc(Yorkshire)', 'Peop(Stephen)', 'Other(Dec.)']
62 ['Other(80 mph)', 'Loc(Covington)', 'Loc(Okla.)', 'Located_In(Covington, Okla.)']
70 ['Other(Jan.)', 'Peop(Elvis Presley)', 'Loc(Tupelo , Miss)', 'Live_In(Elvis Presley, Tupelo , Miss)']
87 ['Peop(Birand)', 'Loc(Germany)']
93 ['Peop(Emily Dickinson)', 'Loc(Amherst , Mass .)', 'Live_In(Emily Dickinson, Amherst , Mass .)']
126 ['Loc(Modesto)', 'Peop(George Lucas)', 'Live_In(George Lucas, Modesto)']
129 ['Peop(Sevket

In [16]:
for i, idx in enumerate(unmatch_idx):
    print(idx, all_atoms0[idx])
    print(idx, all_atoms1[idx])
    print('=======================================')

0 ['atom(peop("Annie+Oakley")).', 'atom(peop("Phoebe+Ann+Moses")).', 'atom(loc("Willowdell+,+Darke+County")).', 'atom(liveIn("Phoebe+Ann+Moses", "Willowdell+,+Darke+County")).']
0 ['atom(peop("Annie+Oakley")).', 'atom(peop("Phoebe+Ann+Moses")).', 'atom(loc("Willowdell")).', 'atom(loc("Darke+County")).', 'atom(liveIn("Phoebe+Ann+Moses", "Darke+County")).', 'atom(liveIn("Phoebe+Ann+Moses", "Willowdell")).', 'atom(liveIn("Annie+Oakley", "Willowdell")).']
1 ['atom(loc("Anhui")).', 'atom(loc("Beijing")).', 'atom(org("XINHUA")).', 'atom(other("1313+GMT")).', 'atom(other("11+Feb+94")).', 'atom(orgbasedIn("XINHUA", "Anhui")).', 'atom(orgbasedIn("XINHUA", "Beijing")).']
1 ['atom(loc("Beijing")).', 'atom(org("XINHUA")).', 'atom(other("1313+GMT")).', 'atom(other("11+Feb+94")).', 'atom(orgbasedIn("XINHUA", "Beijing")).']
2 ['atom(peop("Shi+Liming")).', 'atom(org("Institute+of+Zoology")).', 'atom(loc("Kunming")).', 'atom(liveIn("Shi+Liming", "Kunming")).', 'atom(workFor("Shi+Liming", "Institute+of+

In [71]:
k = 75
all_atoms1[k]

['Loc(Cape+Town)',
 'Org(Supreme+Court)',
 'Peop(J.H.+Berman)',
 'Loc(Port+Nolloth)',
 'Work_For(J.H.+Berman, Cape+Town)',
 'Live_In(J.H.+Berman, Port+Nolloth)',
 'Work_For(J.H.+Berman, Supreme+Court)']

In [None]:
'Loc(Cape+Town)', 
'Org(Supreme+Court)', 
'Peop(J.H.+Berman)', 
'Loc(Port+Nolloth)', 
'Work_For(J.H.+Berman, Supreme+Court)', 
'OrgBased_In(Supreme+Court, Cape+Town)', 
'Live_In(J.H.+Berman, Port+Nolloth)'

In [72]:
print('entities: ', pred1[k]['entity_gts'])
print('==========================')
print('relations: ', pred1[k]['relation_gts'])

entities:  [[0, 2, 'Loc', 'Cape Town'], [2, 4, 'Org', 'Supreme Court'], [5, 7, 'Peop', 'J.H. Berman'], [10, 12, 'Loc', 'Port Nolloth']]
relations:  [[2, 4, 0, 2, 'OrgBased_In', 'Supreme Court', 'Cape Town'], [5, 7, 0, 2, 'Live_In', 'J.H. Berman', 'Cape Town'], [5, 7, 2, 4, 'Work_For', 'J.H. Berman', 'Supreme Court']]


In [19]:
pred1[k]

{'tokens': ['Meanwhile',
  ',',
  'Shi',
  'Liming',
  'at',
  'the',
  'Institute',
  'of',
  'Zoology',
  'of',
  'Kunming',
  'found',
  'that',
  'pandas',
  'lack',
  'variety',
  'in',
  'their',
  'protein',
  'heredity',
  ',',
  'which',
  'may',
  'serve',
  'as',
  'one',
  'of',
  'the',
  'major',
  'reasons',
  'for',
  'pandas',
  "'",
  'near',
  'extinction',
  '.'],
 'entities': [[2, 4, 'Peop'], [10, 11, 'Loc']],
 'relations': [[2, 4, 10, 11, 'Live_In']],
 'entity_gts': [[2, 4, 'Peop', 'Shi Liming'],
  [6, 9, 'Org', 'Institute of Zoology'],
  [10, 11, 'Loc', 'Kunming']],
 'relation_gts': [[2,
   4,
   6,
   9,
   'Work_For',
   'Shi Liming',
   'Institute of Zoology'],
  [6, 9, 10, 11, 'OrgBased_In', 'Institute of Zoology', 'Kunming']],
 'entities_': [[2, 4, 'Peop', 'Shi Liming', [1.0, 1.0]],
  [10, 11, 'Loc', 'Kunming', [1.0]]],
 'relations_': [[2,
   4,
   10,
   11,
   'Live_In',
   'Shi Liming',
   'Kunming',
   [[0.9999998807907104], [1.0]]]],
 'agg_probs': 0.999