In [1]:
import pymysql
import pandas as pd
import getpass

conn = pymysql.connect(host="mysql",
                       port=3306,user="jovyan",
                       passwd=getpass.getpass("Enter MySQL passwd for jovyan"),db='mimic2')

cursor = conn.cursor()

Enter MySQL passwd for jovyan········


In [2]:
#filetered mimic2 with icd9 code for PAD
PAD = cursor.execute("""select c.subject_id, code, text
                    from icd9 c, noteevents n
                    where c.subject_id = n.subject_id and code like '443%' and `text` like '%peripheral vascular disease%'""")
 
#print("Read",PAD)
PAD_notes = []
for note in cursor:                   #grab each note from the SELECT results       
    PAD_notes.append(str(note))
    

In [3]:
PAD

125

In [4]:
print (PAD_notes[:10])

['(21, \'443.9\', \'\\n \\n \\n \\nAdmission Date:  [**3138-10-29**]              Discharge Date:   [**3138-11-11**]\\n \\nDate of Birth:  [**3051-5-22**]             Sex:   M\\n \\nService: MEDICINE\\n \\nAllergies: \\nPatient recorded as having No Known Allergies to Drugs\\n \\nAttending:[**First Name3 (LF) 1919**] \\nChief Complaint:\\nchest pain, dyspnea\\n \\nMajor Surgical or Invasive Procedure:\\ncardiac catheterization with placement of three stents and IABP. \\n Swan catheter placement.\\n\\n \\nHistory of Present Illness:\\nHISTORY OF PRESENT ILLNESS: 87 yo M with chronic kidney disease \\ns/p AV Graft placement [**9-19**], stroke, hypertension, diabetes, and \\nperipheral vascular disease presents with chest pain and \\nshortness of breath. He reports that his chest pain began \\napproximately one week ago. During the week it has gotten worse. \\nIt is substernal, radiating to left shoulder, especially with \\ninspiration. It is associated with shortness of breath. He came \

In [5]:
from PyRuSH.RuSH import RuSH
from pyConTextNLP import pyConTextGraph
from DocumentClassifier import DocumentClassifier 
from pyConTextNLP.utils import get_document_markups

from DocumentClassifier import FeatureInferencer
from DocumentClassifier import DocumentInferencer
from nlp_pneumonia_utils import markup_sentence
from itemData import get_item_data
from visual import convertMarkups2DF


# begin to define MyPipe class
class MyPipe:
    def __init__(self, sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule):
        # initiate necessary components here        
        self.sentence_segmenter = RuSH(sentence_rules)
        self.targets=get_item_data(target_rules)
        self.modifiers=get_item_data(context_rules)
        self.feature_inferencer=FeatureInferencer(feature_inference_rule)
        self.document_inferencer = DocumentInferencer(document_inference_rule)
    
    def process(self, doc_text):        
        #process your input doc_text, return the required results
        sentences=self.sentence_segmenter.segToSentenceSpans(doc_text)
        #sentences=doc_text.split('\n')
        context_doc = pyConTextGraph.ConTextDocument()

        for sentence in sentences:
            sentence_text=doc_text[sentence.begin:sentence.end].lower()
            # Process every sentence by adding markup
            m = markup_sentence(sentence_text, modifiers=self.modifiers, targets=self.targets)
            context_doc.addMarkup(m)
            #context_doc.getSectionMarkups()
        
        markups = get_document_markups(context_doc)
        annotations, relations, doc_txt = convertMarkups2DF(markups) 
        
        inferenced_types = self.feature_inferencer.process(annotations, relations)
        doc_class = self.document_inferencer.process(inferenced_types)
        
        return doc_class, context_doc, annotations, relations

In [6]:
# configure your rules 

sentence_rules='KB/rush_rules.tsv'
# you can point target_rules to a file path instead, if there are many rules
target_rules='''
Comments: ''
Direction: ''
Lex: peripheral vascular disease
Regex: ''
Type: PAD
---
Comments: ''
Direction: ''
Lex: high temperature
Regex: '1\d\d\.\d F'
Type: PAD'''
# context rules are often lengthy, you can point context_rules to an external rule files instead
context_rules='''Comments: ''
Direction: forward
Lex: 'no'
Regex: ''
Type: DEFINITE_NEGATED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'denies'
Regex: ''
Type: DEFINITE_NEGATED_EXISTENCE
'''
# define the feature inference rule
feature_inference_rule='''
#Conclusion type, Evidence type, Modifier values associated with the evidence
NEGATED_CONCEPT,PAD,DEFINITE_NEGATED_EXISTENCE
'''
# define the document inference rule
document_inference_rule='''
#Conclusion Type at document level, Evidence type at mention level
PAD_DOC,PAD

#Default document type
NO_PAD
'''

In [7]:
#initiate an instance of MyPipe
myPipe=MyPipe(sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule)

In [8]:
pad_data = pd.read_sql("""select c.subject_id, code, text
                    from icd9 c, noteevents n
                    where c.subject_id = n.subject_id and code like '443%' and `text` like
                    '%peripheral vascular disease%'""",conn)

In [9]:
#for loop

#%%timeit -n 10

"""results=dict()  # this dictionary will contain document names as keys and result as values.
for index, row in pad_data.iterrows():
    t = row.text.replace('\n',' ')
    result = myPipe.process(t)
    results[index] = result"""

In [10]:
#dict comprehension

#%%timeit -n 10

results = {index: (myPipe.process(row.text.replace('\n',' ')))for index, row in pad_data.iterrows()}

In [11]:
print (results)

{0: ('pad_doc', __________________________________________
,   markup_id vis_category  start    end                          txt type
0        T0       Target  163.0  190.0  peripheral vascular disease  pad, Empty DataFrame
Columns: [relation_id, type, arg1_cate, arg1_id, arg2_cate, arg2_id]
Index: []), 1: ('no_pad', __________________________________________
,   markup_id vis_category  start    end                          txt  \
0        T0       Target  180.0  207.0  peripheral vascular disease   
1        T1     Modifier   39.0   41.0                           no   

                         type  
0                         pad  
1  definite_negated_existence  ,   relation_id                        type arg1_cate arg1_id arg2_cate arg2_id
0          R0  definite_negated_existence  Modifier      T1    Target      T0), 2: ('no_pad', __________________________________________
,   markup_id vis_category  start    end                          txt  \
0        T0       Target  180.0  207.

In [12]:
from visual import view_pycontext_output
from visual import view_pycontext_outputs
from visual import Vis

In [13]:

context_docs=dict()
i=0
for item, row in results.items(): 
    doc_name = "doc" + str(i)
    context_docs[doc_name]=row[1]
    i=i+1

In [14]:
view_pycontext_output(context_docs['doc1'], Vis(file_name="test.html"))

In [15]:
view_pycontext_outputs(context_docs)

In [16]:
#machine learning

