In [6]:
import pickle
from graphviz import Digraph
from collections import defaultdict

In [5]:
with open('semlink.pickle', 'rb') as f:
    semlink = pickle.load(f)

In [3]:
with open('deepbank.pickle', 'rb') as f:
    deepbank = pickle.load(f)

In [93]:
deepbank = [sentence for sentence in deepbank if sentence and sentence.id not in (20455028, 20296014, 21286044)]

In [94]:
semlink_dict = defaultdict(list)
for instance in semlink:
    semlink_dict[instance.id].append(instance)

In [95]:
from data_classes import Sentence
sentences = [Sentence(sentence.id, sentence, semlink_dict[sentence.id]) for sentence in deepbank if sentence]

In [96]:
len(sentences)

15103

In [97]:
semlink[0]

SemlinkInstance(id=20001001, token='join', verb='join-v', frame='Cause_to_amalgamate', args={'ARG0': 'Agent', 'ARG1': 'Part_1'})

In [98]:
def display_graph(sentence):
    g = Digraph(str(sentence.id), directory='out/')
    eds = sentence.deepbank.eds
    predicate_map = dict()
    edge_map = dict()
    tokens = [val[0] for val in sentence.deepbank.tokens]
    for instance in sentence.semlink:
        try:
            token_lnk = [token for token in sentence.deepbank.tokens if instance.token in token[0]][0][1]
        except IndexError as e:
            raise Exception(f'''
            {sentence.id}\n
            {sentence.deepbank.sentence}\n
            {sentence.deepbank.tokens}\n
            {instance.token}
            ''') from e
        start, end = token_lnk.split(':')
        try:
            predicate_id = [node for node in eds.nodes if node.cfrom >= int(start[1:]) and node.cto >= int(end[:-1])][0].id
        except IndexError as e:
            raise Exception(f'''
            {instance}\n
            {sentence.deepbank.tokens}\n
            {token_lnk}
            ''') from e
        predicate_map[predicate_id] = instance.frame
        for i, edge in enumerate(eds.edges):
            if edge[0] == predicate_id:
                try:
                    arg = edge[1][:-1] + str(int(edge[1][-1]) - 1)
                    if arg in instance.args:
                        edge_map[i] = instance.args[arg]
                except ValueError:
                    pass
    for node in eds.nodes:
        if node.id in predicate_map:
            g.node(node.id, label=f'{node.predicate}-fn.{predicate_map[node.id]}', color='red')
        else:
            g.node(node.id, label=node.predicate)
    for i, edge in enumerate(eds.edges):
        if i in edge_map:
            g.edge(edge[0], edge[2], label=f'{edge[1]}-fn.{edge_map[i]}', color='blue')
        else:
            g.edge(edge[0], edge[2], label=edge[1])
    g.render()

In [99]:
from tqdm.contrib.concurrent import process_map
process_map(display_graph, sentences, chunksize=10)

  0%|          | 0/15103 [00:00<?, ?it/s]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [72]:
semlink_dict[20455028]

[SemlinkInstance(id=20455028, token='caused', verb='cause-v', frame='Causation', args={'ARG1': 'Effect', 'ARG0': 'Cause'})]

In [76]:
[sentence for sentence in deepbank if sentence and sentence.id == 20455028]

[DeepbankSentence(id=20455028, sentence='General Dynamics was sued by the families of five Navy divers who were killed in 1982 after they re-entered a submarine through a diving chamber.', tokens=[('General', '<0:7>'), ('Dynamics', '<8:16>'), ('was', '<17:20>'), ('sued', '<21:25>'), ('by', '<26:28>'), ('the', '<29:32>'), ('families', '<33:41>'), ('of', '<42:44>'), ('five', '<45:49>'), ('Navy', '<50:54>'), ('divers', '<55:61>'), ('who', '<62:65>'), ('were', '<66:70>'), ('killed', '<71:77>'), ('in', '<78:80>'), ('1982', '<81:85>'), ('after', '<86:91>'), ('they', '<92:96>'), ('re-entered', '<97:107>'), ('a', '<108:109>'), ('submarine', '<110:119>'), ('through', '<120:127>'), ('a', '<128:129>'), ('diving', '<130:136>'), ('chamber', '<137:144>'), ('.', '<144:145>')], eds=<EDS object (proper_q compound proper_q named named _sue_v_1 parg_d _the_q _family_n_of udef_q card compound proper_q named _diver_n_1 _kill_v_1 parg_d _in_p_temp proper_q yofc _after_x_h pron pronoun_q _enter_v_1 _re-_a_ag