In [1]:
import pymysql
import pandas as pd
import getpass
from PyRuSH.RuSH import RuSH
from pyConTextNLP import pyConTextGraph
from DocumentClassifier import DocumentClassifier 
from pyConTextNLP.utils import get_document_markups

from DocumentClassifier import FeatureInferencer
from DocumentClassifier import DocumentInferencer
from nlp_pneumonia_utils import markup_sentence
from itemData import get_item_data
from visual import convertMarkups2DF

from visual import view_pycontext_output
from visual import view_pycontext_outputs
from visual import Vis

from pipeUtils import Document

In [2]:
help(view_pycontext_output)

Help on function view_pycontext_output in module visual:

view_pycontext_output(input, vis=<visual.Vis object at 0x7f14f5130cc0>)



In [3]:
# begin to define MyPipe class
class MyPipe:
    def __init__(self, sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule):
        # initiate necessary components here        
        self.sentence_segmenter = RuSH(sentence_rules)
        self.targets=get_item_data(target_rules)
        self.modifiers=get_item_data(context_rules)
        self.feature_inferencer=FeatureInferencer(feature_inference_rule)
        self.document_inferencer = DocumentInferencer(document_inference_rule)
       
    
    def process(self, doc_text):        
        #process your input doc_text, return the required results
        sentences=self.sentence_segmenter.segToSentenceSpans(doc_text)
        #sentences=doc_text.split('\n')
        context_doc = pyConTextGraph.ConTextDocument()
        #document_id = doc_text.document_id

        for sentence in sentences:
            sentence_text=doc_text[sentence.begin:sentence.end].lower()
            # Process every sentence by adding markup
            m = markup_sentence(sentence_text, modifiers=self.modifiers, targets=self.targets)
            context_doc.addMarkup(m)
            #context_doc.getSectionMarkups()
        
        markups = get_document_markups(context_doc)
        annotations, relations, doc_txt = convertMarkups2DF(markups) 
        
        inferenced_types = self.feature_inferencer.process(annotations, relations)
        doc_class = self.document_inferencer.process(inferenced_types)
        
        return doc_class, context_doc, annotations, relations

In [8]:
sentence_rules='KB/rush_rules.tsv'
# you can point target_rules to a file path instead, if there are many rules
target_rules='''
Comments: ''
Direction: ''
Lex: peripheral vascular disease
Regex: r'p(eri\w+)?\s*(a(rt\w+)?|v(as\w+)?)\s*d(ise\w+)?'
Type: PAD
---
Comments: ''
Direction: ''
Lex: PVD
Regex: r'a(nk\w+)?(\s*\-|\\|\/)?\s*b(ra\w+)?\s*i(nd\w+)?(\W*)?((?:\w+\W+){1,5}?((0*(\.\d{1,2}))|(1*(\.[4-9])))?)?'
Type: PAD'''
# context rules are often lengthy, you can point context_rules to an external rule files instead
context_rules='''Comments: ''
Direction: forward
Lex: 'no'
Regex: ''
Type: DEFINITE_NEGATED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'denies'
Regex: ''
Type: DEFINITE_NEGATED_EXISTENCE
'''
# define the feature inference rule
feature_inference_rule='''
#Conclusion type, Evidence type, Modifier values associated with the evidence
NEGATED_CONCEPT,PAD,DEFINITE_NEGATED_EXISTENCE
'''
# define the document inference rule
document_inference_rule='''
#Conclusion Type at document level, Evidence type at mention level
PAD_DOC,PAD

#Default document type
NO_PAD
'''

In [17]:
#initiate an instance of MyPipe
myPipe=MyPipe(sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule)

In [10]:
from pipeUtils import Annotation
from pipeUtils import Document
 
import os
import glob 

In [11]:
# Read all test documents
unid="u1166466"
project_1 = "PAD_TRAIN"
project_2 = "PAD_ABI"
path_1 = "/home/"+str(unid)+"/BRAT/"+str(unid)+"/"+project_1
path_2 = "/home/"+"u0420717"+"/BRAT/"+str(unid)+"/"+project_2

In [12]:
test_docs=dict()
test_doc_paths = glob.glob(str(path_2+'/*.txt')) 
for d in test_doc_paths:
    doc = Document()
    #print(d)
    doc.load_document_from_file(d)
    #print(str(d[:-3])+'ann')
    doc.load_annotations_from_brat(str(d[:-3])+'ann')
    #print(os.path.basename(d))
    test_docs[os.path.basename(d)]=doc


test_docs

{'10083_27.txt': <pipeUtils.Document at 0x7f14f5011d68>,
 '10594_28.txt': <pipeUtils.Document at 0x7f14f5016f98>,
 '10594_29.txt': <pipeUtils.Document at 0x7f14f4c11e80>,
 '12272_30.txt': <pipeUtils.Document at 0x7f14f50ef240>,
 '12403_31.txt': <pipeUtils.Document at 0x7f14f501fdd8>,
 '12403_32.txt': <pipeUtils.Document at 0x7f14f500ebe0>,
 '12573_33.txt': <pipeUtils.Document at 0x7f14f5011fd0>,
 '1266_4.txt': <pipeUtils.Document at 0x7f14f50119b0>,
 '1266_5.txt': <pipeUtils.Document at 0x7f14f50283c8>,
 '1266_6.txt': <pipeUtils.Document at 0x7f14f50786a0>,
 '13260_34.txt': <pipeUtils.Document at 0x7f14f5073da0>,
 '13625_35.txt': <pipeUtils.Document at 0x7f14f50ef358>,
 '1369_8.txt': <pipeUtils.Document at 0x7f14f50ca940>,
 '1371_7.txt': <pipeUtils.Document at 0x7f14f5013be0>,
 '14566_36.txt': <pipeUtils.Document at 0x7f14f5016748>,
 '15011_37.txt': <pipeUtils.Document at 0x7f14f5013b38>,
 '1604_10.txt': <pipeUtils.Document at 0x7f14f50ef278>,
 '1604_9.txt': <pipeUtils.Document at 0x7f

In [5]:
conn = pymysql.connect(host="mysql",
                       port=3306,user="jovyan",
                       passwd=getpass.getpass("Enter MySQL passwd for jovyan"),db='mimic2')

cursor = conn.cursor()

Enter MySQL passwd for jovyan········


In [6]:
pad_data = pd.read_sql("""select c.subject_id, text, code
                    from icd9 c, noteevents n
                    where c.subject_id = n.subject_id and code like '443%' and `text` like
                    '%peripheral vascular disease%'""",conn)

In [14]:
for index, row in test_docs.iterrows():
    print(index, row[0], row[1])

AttributeError: 'dict' object has no attribute 'iterrows'

In [None]:
("""select c.subject_id, code, text
                    from icd9 c, noteevents n
                    where c.subject_id = n.subject_id and code like '443%' and `text` like
                    '%ankle%brachial index%'""",conn)

In [8]:
len(pad_data)

125

In [19]:
# Processing the all notes
nlp_system = myPipe()

for doc_id in  test_docs.keys():
    myPipe.process(test_docs.get(doc_id))

TypeError: 'MyPipe' object is not callable

In [13]:
results = {index: (myPipe.process(row.text.replace('\n',' ')))for doc_id in test_docs.keys()}

AttributeError: 'dict' object has no attribute 'iterrows'

In [None]:
print(results)

In [None]:
context_docs=dict()
i=0
for item, row in results.items(): 
    doc_name = "doc" + str(i)
    context_docs[doc_name]=row[1]
    i=i+1

In [None]:
test = 'pt has history of PAD, The ankle-brachial index is 0.60, patient has no family history of peripheral arterial disease,pt mother was diagnosted with peripheral arterial disease,no ability was found'

In [None]:
#!pip install pipeUtils
#import pipeUtils

doc=Document(text=test, document_id='Doc1')

In [None]:
view_pycontext_output(test, Vis(file_name="test.html"))

In [None]:
view_pycontext_output(context_docs['doc1'], Vis(file_name="test.html"))

In [None]:
view_pycontext_outputs(context_docs)

In [12]:
unid = 'u0420717'

In [10]:
%%bash  -s "$u0420717"
echo ~/BRAT/$1/*
ls   ~/BRAT/$1/ 

/home/u0420717/BRAT/$u0420717/*


ls: cannot access '/home/u0420717/BRAT/$u0420717/': No such file or directory


In [14]:
%%bash
ls -l ~/BRAT/

total 76
drwxrwsrwx+  6 www-data www-data 4096 Jan 27 02:42 Example_projects
drwxrwsrwx+  6 www-data www-data 4096 Mar 31 21:36 u0073711
drwxrwsrwx+  5 www-data www-data 4096 Apr 18 02:58 u01190277
drwxrwsrwx+  9 www-data www-data 4096 Apr  9 03:37 u0384041
drwxrwsrwx+ 11 www-data www-data 4096 Apr 19 20:09 u0410167
drwxrwsrwx+  7 www-data www-data 4096 Apr 20 23:15 u0420717
drwxrwsrwx+  4 www-data www-data 4096 Feb  9 05:05 u0426888
drwxrwsrwx+  9 www-data www-data 4096 Mar 27 03:44 u0496358
drwxrwsrwx+  3 www-data www-data 4096 Feb 15 11:03 u0499179
drwxrwsrwx+  4 www-data www-data 4096 Feb 16 02:23 u0585043
drwxrwsrwx+  4 www-data www-data 4096 Feb 15 23:53 u0608777
drwxrwsrwx+  4 www-data www-data 4096 Feb 12 00:24 u0883495
drwxrwsrwx+ 11 www-data www-data 4096 Apr 15 07:47 u0927327
drwxrwsrwx+  5 www-data www-data 4096 Mar 28 21:37 u0941142
drwxrwsrwx+  6 www-data www-data 4096 Apr 20 22:37 u1166466
drwxrwsrwx+  5 www-data www-data 4096 Apr 19 01:30 u1188401
drwxrwsrwx+  7 www-dat

In [15]:
%%bash  -s "$unid"
echo ~/BRAT/$1/
ls ~/BRAT/$1 

/home/u0420717/BRAT/u0420717/
Example
PAD_Project_Test
PAD_Project_Training
Project_1


In [17]:
docs_text = pd.read_sql("""select c.subject_id, text, code
                    from icd9 c, noteevents n
                    where c.subject_id = n.subject_id and code like '443%' and `text` like
                    '%peripheral vascular disease%'""",conn)

In [18]:
docs_text

Unnamed: 0,subject_id,text,code
0,21,\n \n \n \nAdmission Date: [**3138-10-29**] ...,443.9
1,21,\n\n\n DATE: [**3139-3-21**] 9:23 AM\n ...,443.9
2,21,\n\n\n DATE: [**3139-3-21**] 9:23 AM\n ...,443.9
3,792,\n\nAdmission Date: [**2883-5-27**] Dis...,443.9
4,781,\n \n \n \nAdmission Date: [**2554-7-2**] ...,443.9
5,998,\n\nAdmission Date: [**3475-5-3**] Disc...,443.9
6,998,\n\n\n DATE: [**3475-5-6**] 3:10 PM\n ...,443.9
7,998,\n\nAdmission Date: [**3475-5-3**] Disc...,443.9
8,998,\n\n\n DATE: [**3475-5-6**] 3:10 PM\n ...,443.9
9,2477,\n\nAdmission Date: [**3364-5-31**] Dis...,443.9


In [19]:
for index, row in docs_text.iterrows():
    print(index, row[0], row[1])

0 21 
 
 
 
Admission Date:  [**3138-10-29**]              Discharge Date:   [**3138-11-11**]
 
Date of Birth:  [**3051-5-22**]             Sex:   M
 
Service: MEDICINE
 
Allergies: 
Patient recorded as having No Known Allergies to Drugs
 
Attending:[**First Name3 (LF) 1919**] 
Chief Complaint:
chest pain, dyspnea
 
Major Surgical or Invasive Procedure:
cardiac catheterization with placement of three stents and IABP. 
 Swan catheter placement.

 
History of Present Illness:
HISTORY OF PRESENT ILLNESS: 87 yo M with chronic kidney disease 
s/p AV Graft placement [**9-19**], stroke, hypertension, diabetes, and 
peripheral vascular disease presents with chest pain and 
shortness of breath. He reports that his chest pain began 
approximately one week ago. During the week it has gotten worse. 
It is substernal, radiating to left shoulder, especially with 
inspiration. It is associated with shortness of breath. He came 
to the hospital today because the pain was much worse, [**10-24**]. In 
a

In [None]:
%%bash  -s "$unid"
mkdir   ~/BRAT/$1/PAD_Project_Test
echo ~/BRAT/$1/*
ls   ~/BRAT/$1/ 

In [20]:
path = "/home/"+str(unid)+"/BRAT/"+str(unid)+"/PAD_Project_Training"
for index, row in docs_text.iterrows():
    new_file_path_txt = path+"/"+str(row[0]) + "_" + str(index) + ".txt" 
    new_file_path_ann = path+"/"+str(row[0]) + "_" + str(index) + ".ann" 
    print(new_file_path_txt)
    print(new_file_path_ann)   
    

/home/u0420717/BRAT/u0420717/PAD_Project_Training/21_0.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/21_0.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/21_1.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/21_1.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/21_2.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/21_2.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/792_3.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/792_3.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/781_4.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/781_4.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/998_5.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/998_5.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/998_6.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/998_6.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Training/998_7.txt
/home/u0420717/BRAT/u0420717/PAD_Project_Training/998_7.ann
/home/u0420717/BRAT/u0420717/PAD_Project_Train

In [21]:
for index, row[0:75] in docs_text.iterrows():
    new_file_path_txt = path+"/"+str(row[0]) + "_" + str(index) + ".txt" 
    new_file_path_ann = path+"/"+str(row[0]) + "_" + str(index) + ".ann" 
    f=open(new_file_path_txt, "w")
    f.write(row[1])
    f.close()
    f=open(new_file_path_ann, "w")
    f.write("")
    f.close()

In [None]:
%%bash  -s "$unid"  
ls   ~/BRAT/$1/PAD_Project_Training/*