In [1]:
from PyRuSH.RuSH import RuSH
from pyConTextNLP import pyConTextGraph
from DocumentClassifier import DocumentClassifier 
from pyConTextNLP.utils import get_document_markups

from DocumentClassifier import FeatureInferencer
from DocumentClassifier import DocumentInferencer
from nlp_pneumonia_utils import markup_sentence
from itemData import get_item_data
from visual import convertMarkups2DF


# begin to define MyPipe class
class MyPipe:
    def __init__(self, sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule):
        # initiate necessary components here        
        self.sentence_segmenter = RuSH(sentence_rules)
        self.targets=get_item_data(target_rules)
        self.modifiers=get_item_data(context_rules)
        self.feature_inferencer=FeatureInferencer(feature_inference_rule)
        self.document_inferencer = DocumentInferencer(document_inference_rule)
    
    def process(self, doc_text):        
        #process your input doc_text, return the required results
        sentences=self.sentence_segmenter.segToSentenceSpans(doc_text)
        #sentences=doc_text.split('\n')
        context_doc = pyConTextGraph.ConTextDocument()

        for sentence in sentences:
            sentence_text=doc_text[sentence.begin:sentence.end].lower()
            # Process every sentence by adding markup
            m = markup_sentence(sentence_text, modifiers=self.modifiers, targets=self.targets)
            context_doc.addMarkup(m)
            #context_doc.getSectionMarkups()
        
        markups = get_document_markups(context_doc)
        annotations, relations, doc_txt = convertMarkups2DF(markups) 
        
        inferenced_types = self.feature_inferencer.process(annotations, relations)
        doc_class = self.document_inferencer.process(inferenced_types)
        
        return doc_class, context_doc, annotations, relations

In [2]:
# configure your rules 

sentence_rules='KB/rush_rules.tsv'
# you can point target_rules to a file path instead, if there are many rules
target_rules='''
Comments: ''
Direction: ''
Lex: peripheral arterial/vascular disease
Regex: 'p(eri\w+)?\s*(a(rt\w+)?|v(as\w+)?)\s*d(ise\w+)?'
Type: PAD

'''
# context rules are often lengthy, you can point context_rules to an external rule files instead
context_rules='''
Comments: ''
Direction: forward
Lex: 'history'
Regex: ''
Type: DEFINITE_AFFIRMED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'h/o'
Regex: ''
Type: DEFINITE_AFFIRMED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: ankle brachial index
Regex: 'a(nk\w+)?(\s*\-|\\|\/)?\s*b(ra\w+)?\s*i(nd\w+)?(\W*)?(?:\w+\W+){1,5}?(0*(\.\d{1,2}))|(1*(\.[4-9]))'
Type: DEFINITE_AFFIRMED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'past'
Regex: ''
Type: DEFINITE_AFFIRMED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'diagnosis'
Regex: ''
Type: DEFINITE_AFFIRMED_EXISTENCE
---
Comments: ''
Direction: forward
Lex: 'condition'
Regex: ''
Type: DEFINITE_AFFIRMED_EXISTENCE
'''
# define the feature inference rule
feature_inference_rule='''
#Conclusion type, Evidence type, Modifier values associated with the evidence
AFFIRMED_CONCEPT,PAD,DEFINITE_AFFIRMED_EXISTENCE
'''
# define the document inference rule
document_inference_rule='''
#Conclusion Type at document level, Evidence type at mention level
PAD_DOC,PAD

#Default document type
NO_PAD
'''

In [3]:
#initiate an instance of MyPipe
myPipe=MyPipe(sentence_rules, target_rules, context_rules, feature_inference_rule, document_inference_rule)

In [4]:
import pymysql
import pandas as pd
import getpass

conn = pymysql.connect(host="mysql",
                       port=3306,user="jovyan",
                       passwd=getpass.getpass("Enter MySQL passwd for jovyan"),db='mimic2')

Enter MySQL passwd for jovyan········


In [5]:
pad_data = pd.read_sql("SELECT text FROM noteevents where text like '%peripheral%vascular disease%' limit 5",conn)

In [6]:
# write you code here, fill the results by processing each document through MyPipe

results = {index: (myPipe.process(row.text.replace('\n',' ')))for index, row in pad_data.iterrows()}

In [7]:
from visual import view_pycontext_output
from visual import view_pycontext_outputs
from visual import Vis

In [8]:
#get the context documents that you created in Exercise 2 and put them into a dictionary.
context_docs=dict()
i=0
for item, row in results.items(): 
    doc_name = "doc" + str(i)
    context_docs[doc_name]=row[1]
    i=i+1

In [113]:
view_pycontext_output(context_docs['doc1'], Vis(file_name="test.html"))

In [114]:
view_pycontext_outputs(context_docs)

In [116]:
from pyConTextNLP.utils import get_document_markups
from visual import convertMarkups2DF
import csv

In [115]:
with open('output.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile)
    spamwriter.writerows(list(concepts.values()))

NameError: name 'csv' is not defined

In [10]:
view_pycontext_output(context_docs['doc1'])