In [1]:
import os

os.chdir('..')

In [2]:
import json
import pandas as pd

from RE.RelationExtractor import RelationExtractor

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import spacy

print('Using GPU: ', spacy.prefer_gpu())

Using GPU:  True


### Read data

In [4]:
data_path = 'RE/assets/annotations_test.jsonl'

In [5]:
def read_jsonl(data_path):
    with open(data_path, 'r') as f:
        json_list = list(f)
    data = []
    for j in json_list:
        data.append(json.loads(j))
    return pd.DataFrame(data)

In [6]:
data = read_jsonl(data_path)

In [7]:
data.head()

Unnamed: 0,text,spans,tokens,relations,answer,meta
0,"additionally, whereas incubation of adipose-ti...","[{'id': 35091, 'text': 'adipose-tissue', 'star...","[{'text': 'additionally', 'start': 0, 'end': 1...","[{'head': 8, 'child': 18, 'head_span': {'start...",accept,{'source': 'unkown'}
1,in comparison to a pressed echinaceae-preparat...,"[{'id': 17727, 'text': 'fec', 'start': 51, 'to...","[{'text': 'in', 'start': 0, 'end': 2, 'id': 0,...","[{'head': 10, 'child': 15, 'head_span': {'star...",accept,{'source': 'unkown'}
2,dna damage and replication stress activate th...,"[{'id': 35408, 'text': 'dna', 'start': 1, 'tok...","[{'text': ' ', 'start': 0, 'end': 1, 'id': 0, ...","[{'head': 2, 'child': 9, 'head_span': {'start'...",accept,{'source': 'unkown'}
3,"when compared to controls, the density of 125i...","[{'id': 35071, 'text': '125i-ngf', 'start': 42...","[{'text': 'when', 'start': 0, 'end': 4, 'id': ...","[{'head': 11, 'child': 21, 'head_span': {'star...",accept,{'source': 'unkown'}
4,we establish that rnf169 binds to ubiquitylate...,"[{'id': 18566, 'text': 'rnf169', 'start': 18, ...","[{'text': 'we', 'start': 0, 'end': 2, 'id': 0,...","[{'head': 4, 'child': 10, 'head_span': {'start...",accept,{'source': 'unkown'}


In [8]:
texts = data.text.values.tolist()

In [9]:
spans = []
for k in range(data.shape[0]):
    x = data.iloc[k]
    spans.append([(x.spans[i]['start'], x.spans[i]['end'], x.spans[i]['label']) for i in range(len(x.spans))])

In [10]:
spans[0]

[(36, 50, 'TISSUE'),
 (93, 121, 'TARGET'),
 (142, 168, 'TARGET'),
 (185, 191, 'TARGET'),
 (326, 345, 'CHEMICAL')]

### Predict

In [11]:
if spacy.prefer_gpu():
    re = RelationExtractor(model_re='transformer')
else:
    re = RelationExtractor(model_re='tok2vec')

In [14]:
for i in range(len(texts)):
    print('\n\n\n')
    print('Text : ', texts[i])
    preds = re.get_predictions(texts[i], threshold=0.4, disable_ner=True, ents=spans[i])





Text :  additionally, whereas incubation of adipose-tissue extracts from fed steers did not activate endogenous glycogen synthase (through a presumed phosphoprotein phosphatase mechanism), the enzyme from starved or re-fed (up to 3 days re-feeding) steers was reversibly activated as measured by changes in the value for the a0.5 for glucose 6-phosphate
spans: [(5, 'adipose-tissue', 'TISSUE'), (15, 'endogenous glycogen synthase', 'TARGET'), (22, 'phosphoprotein phosphatase', 'TARGET'), (28, 'enzyme', 'TARGET'), (59, 'glucose 6-phosphate', 'CHEMICAL')]
entities: ('adipose-tissue', 'endogenous glycogen synthase') --> predicted relation: Activate




Text :  in comparison to a pressed echinaceae-preparation, fec activated mouse macrophages secrete interleukin-6 and tumor-necrosis-factor and kill protozoa, fungi and bacteria, with higher efficiency
spans: [(9, 'fec', 'CHEMICAL'), (12, 'macrophages', 'TISSUE'), (14, 'interleukin-6', 'TARGET'), (16, 'tumor-necrosis-factor', 'TARGET')]
enti

### Try with simple sentences

In [15]:
texts_test = ['A1 inhibits B1', 
              'A1 inhibited B1', 
              'A1 has been inhibited by B1', 
              'A1 does not inhibit B1', 
              'A1 and B1 and not related', 
              'A1 is inhibited by B1', 
              'A1 and B1 does not share an inhibition relation',
              'A1 and B1 share an inhibition relation',
              "A1 isn't inhibited by B1", 
              'A1 inhibits B1 and B2', 
              'A1 is inhibited by B1 and inhibits B2', 
              'A1 inhibits B1 and not B2', 
              'A1 inhibits B1 and reduces B2', 
              'A1 does not inhibit B1 but it reduces B2']

spans_test = []
for t in texts_test:
    try:
        spans_test.append([(t.index('A1'), t.index('A1') + 2, 'TARGET'), 
                        (t.index('B1'), t.index('B1') + 2, 'TARGET'), 
                          (t.index('B2'), t.index('B2') + 2, 'TARGET')])
    except ValueError:
        spans_test.append([(t.index('A1'), t.index('A1') + 2, 'TARGET'), 
                 (t.index('B1'), t.index('B1') + 2, 'TARGET')])

In [16]:
for i in range(len(texts_test)):
    print('\n')
    print('Text : ', texts_test[i])
    preds = re.get_predictions(texts_test[i], 
                       threshold=0.5, 
                       disable_ner=True, 
                       ents=spans_test[i])



Text :  A1 inhibits B1
spans: [(0, 'A1', 'TARGET'), (2, 'B1', 'TARGET')]
entities: ('A1', 'B1') --> predicted relation: Inhibit


Text :  A1 inhibited B1
spans: [(0, 'A1', 'TARGET'), (2, 'B1', 'TARGET')]
entities: ('A1', 'B1') --> predicted relation: Inhibit


Text :  A1 has been inhibited by B1
spans: [(0, 'A1', 'TARGET'), (5, 'B1', 'TARGET')]
entities: ('B1', 'A1') --> predicted relation: Inhibit


Text :  A1 does not inhibit B1
spans: [(0, 'A1', 'TARGET'), (4, 'B1', 'TARGET')]
entities: ('A1', 'B1') --> predicted relation: Inhibit


Text :  A1 and B1 and not related
spans: [(0, 'A1', 'TARGET'), (2, 'B1', 'TARGET')]


Text :  A1 is inhibited by B1
spans: [(0, 'A1', 'TARGET'), (4, 'B1', 'TARGET')]
entities: ('B1', 'A1') --> predicted relation: Inhibit


Text :  A1 and B1 does not share an inhibition relation
spans: [(0, 'A1', 'TARGET'), (2, 'B1', 'TARGET')]


Text :  A1 and B1 share an inhibition relation
spans: [(0, 'A1', 'TARGET'), (2, 'B1', 'TARGET')]


Text :  A1 isn't inhibited

In [75]:
spans_text = ['keratin', 'myosin']
text = f"""
{spans_text[0]} is also expressed in human heart and can associate with {spans_text[1]} to suppress 
its current amplitude and slow the deactivation gating process
"""
spans = [(text.index(spans_text[0]), text.index(spans_text[0])+len(spans_text[0]), 'TARGET'), 
         (text.index(spans_text[1]), text.index(spans_text[1])+len(spans_text[1]), 'TARGET')]

In [76]:
re.get_predictions(text, 0.5, True, spans)

spans: [(1, 'keratin', 'TARGET'), (12, 'myosin', 'TARGET')]
entities: ('keratin', 'myosin') --> predicted relation: Bind


In [26]:
annotations_train = read_jsonl('RE/assets/annotations_train.jsonl')
annotations_test = read_jsonl('RE/assets/annotations_test.jsonl')