In [1]:
import pickle
import json
import warnings

import utils

from ontology_processing import OntologyProc
from rdf_processing import RDFProc
from report_processing import ReportProc
from bionlp import BioNLP

In [2]:
# filter warnings produced by spacy on similarity between empty vectors
warnings.filterwarnings('ignore')

In [3]:
## set instances - this part needs to be uploaded as a request server (I guess?)

# ontology processing
exa_proc = OntologyProc(ontology_path='./ontology/examode.owl', hiearchies_path='./hierarchy_relations.txt')
# rdf processing
rdf_proc = RDFProc()
# report processing
report_proc = ReportProc()
# biomedical nlp processing
bio_proc = BioNLP(biospacy="en_core_sci_lg", biofast='/home/ims/Desktop/Marchesin/EXAMODE_COLON/embeddings/BioWordVec_PubMed_MIMICIII_d200.bin', 
                 rules='./rules.txt', dysplasia_mappings='./dysplasia_mappings.txt')




In [24]:
## load and translate datasets 

# load Radboud translated reports
with open('./trans_reports/colon/radboud/radboud_translated_reports_2ndDS.pkl', 'rb') as out:
    reports = pickle.load(out)

In [20]:
## user-defined parameters

# define disease use case
use_case = 'colon'

In [8]:
## restrict base on ontology use case

# restrict hand-crafted rules and dysplasia mappings based on use-case
bio_proc.restrict2use_case(use_case=use_case)
# restrict ontology to given use case
exa_use_case = exa_proc.restrict2use_case(use_case=use_case)
# get concept labels for the considered use case
exa_labels = bio_proc.process_ontology_concepts(labels=[label.lower() for label in exa_use_case['label'].tolist()])

In [25]:
## perform linking and merge concepts from 'structured' and 'nlp' parts

# process Radboud reports before linking
reports, un_reports, mis_reports = report_proc.process_radboud_reports(reports)

# perform linking over Radboud reports
concepts = bio_proc.radboud_entity_linking(reports, exa_proc, exa_labels, use_case, exa_use_case)

100%|██████████| 2510/2510 [00:00<00:00, 4109.76it/s]
100%|██████████| 686/686 [02:00<00:00,  5.69it/s]


In [13]:
## convert report concepts into pre-defined labels used for classification 

# get report labels 
labels = utils.radboud_colon_concepts2labels(concepts)
# convert report labels to binary
binary = utils.radboud_colon_labels2binary(labels)

In [15]:
## store concepts and labels

utils.store_concepts(concepts, './data/concepts/colon/radboud/concepts_2ndDS')
utils.store_labels(labels, './data/labels/colon/radboud/labels_2ndDS')
utils.store_labels(binary, './data/labels/colon/radboud/binary_2ndDS')

True

In [18]:
## convert report concepts into an rdf graph and serialize into n3, trig, and turtle formats

graphs = list()
# convert report concepts into list of (s, p, o) triples
for rid in reports.keys():
    graphs.append(rdf_proc.radboud_create_graph(rid, reports[rid], concepts[rid], exa_proc, use_case))
# serialize graphs into rdf using specified format
rdf_proc.searialize_report_graphs(graphs, output='./data/graphs/colon/radboud/graph_2ndDS.ttl', rdf_format='turtle')
rdf_proc.searialize_report_graphs(graphs, output='./data/graphs/colon/radboud/graph_2ndDS.n3', rdf_format='n3')
rdf_proc.searialize_report_graphs(graphs, output='./data/graphs/colon/radboud/graph_2ndDS.trig', rdf_format='trig')

decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention(s)
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention(s)
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention(s)
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention
decoupled dysplasia mention(s)
decoupled dysplasia mention
decoupled dysplasia mention
decouple

True