In [1]:
import pymysql
import pandas as pd
import getpass
from PyRuSH.RuSH import RuSH
from pyConTextNLP import pyConTextGraph
from DocumentClassifier import DocumentClassifier 
from pyConTextNLP.utils import get_document_markups

from DocumentClassifier import FeatureInferencer
from DocumentClassifier import DocumentInferencer
from nlp_pneumonia_utils import markup_sentence
from itemData import get_item_data
from visual import convertMarkups2DF

from visual import view_pycontext_output
from visual import view_pycontext_outputs
from visual import Vis

In [2]:
# begin to define MyPipe class
class MyPipe:
    def __init__(self, sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule):
        # initiate necessary components here        
        self.sentence_segmenter = RuSH(sentence_rules)
        self.targets=get_item_data(target_rules)
        self.modifiers=get_item_data(context_rules)
        self.feature_inferencer=FeatureInferencer(feature_inference_rule)
        self.document_inferencer = DocumentInferencer(document_inference_rule)
    
    def process(self, doc_text):        
        #process your input doc_text, return the required results
        sentences=self.sentence_segmenter.segToSentenceSpans(doc_text)
        #sentences=doc_text.split('\n')
        context_doc = pyConTextGraph.ConTextDocument()

        for sentence in sentences:
            sentence_text=doc_text[sentence.begin:sentence.end].lower()
            # Process every sentence by adding markup
            m = markup_sentence(sentence_text, modifiers=self.modifiers, targets=self.targets)
            context_doc.addMarkup(m)
            #context_doc.getSectionMarkups()
        
        markups = get_document_markups(context_doc)
        annotations, relations, doc_txt = convertMarkups2DF(markups) 
        
        inferenced_types = self.feature_inferencer.process(annotations, relations)
        doc_class = self.document_inferencer.process(inferenced_types)
        
        return doc_class, context_doc, annotations, relations

In [14]:
sentence_rules='KB/rush_rules.tsv'
# you can point target_rules to a file path instead, if there are many rules
target_rules='''
Comments: ''
Direction: ''
Lex: peripheral vascular disease
Regex: r'(peripheral\s*(arter\w*|vasc\w*)\s*disease)'
Type: PAD
---
Comments: ''
Direction: ''
Lex: PVD
Regex: 'pvd'
Type: PAD'''
# context rules are often lengthy, you can point context_rules to an external rule files instead
context_rules='''Comments: ''
Direction: forward
Lex: 'no'
Regex: ''
Type: DEFINITE_NEGATED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'denies'
Regex: ''
Type: DEFINITE_NEGATED_EXISTENCE
'''
# define the feature inference rule
feature_inference_rule='''
#Conclusion type, Evidence type, Modifier values associated with the evidence
NEGATED_CONCEPT,PAD,DEFINITE_NEGATED_EXISTENCE
'''
# define the document inference rule
document_inference_rule='''
#Conclusion Type at document level, Evidence type at mention level
PAD_DOC,PAD

#Default document type
NO_PAD
'''

In [15]:
#initiate an instance of MyPipe
myPipe=MyPipe(sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule)

In [5]:
conn = pymysql.connect(host="mysql",
                       port=3306,user="jovyan",
                       passwd=getpass.getpass("Enter MySQL passwd for jovyan"),db='mimic2')

cursor = conn.cursor()

Enter MySQL passwd for jovyan········


In [6]:
pad_data = pd.read_sql("""select c.subject_id, code, text
                    from icd9 c, noteevents n
                    where c.subject_id = n.subject_id and code like '443%' and `text` like
                    '%ankle%brachial index%'""",conn)

In [8]:
len(pad_data)

6

In [16]:
results = {index: (myPipe.process(row.text.replace('\n',' ')))for index, row in pad_data.iterrows()}

In [17]:
print(results)

{0: ('pad_doc', __________________________________________
,   markup_id vis_category  start   end  txt type
0        T0       Target   28.0  31.0  pvd  pad
1        T1       Target   80.0  83.0  pvd  pad, Empty DataFrame
Columns: [relation_id, type, arg1_cate, arg1_id, arg2_cate, arg2_id]
Index: []), 1: ('pad_doc', __________________________________________
,   markup_id vis_category  start   end  txt type
0        T0       Target   28.0  31.0  pvd  pad
1        T1       Target   80.0  83.0  pvd  pad, Empty DataFrame
Columns: [relation_id, type, arg1_cate, arg1_id, arg2_cate, arg2_id]
Index: []), 2: ('pad_doc', __________________________________________
,   markup_id vis_category  start    end  txt type
0        T0       Target  171.0  174.0  pvd  pad, Empty DataFrame
Columns: [relation_id, type, arg1_cate, arg1_id, arg2_cate, arg2_id]
Index: []), 3: ('no_pad', __________________________________________
, Empty DataFrame
Columns: [markup_id, vis_category, start, end, txt, type]
Index:

In [11]:
context_docs=dict()
i=0
for item, row in results.items(): 
    doc_name = "doc" + str(i)
    context_docs[doc_name]=row[1]
    i=i+1

In [12]:
view_pycontext_output(context_docs['doc1'], Vis(file_name="test.html"))

In [13]:
view_pycontext_outputs(context_docs)