In [1]:
import sys

In [2]:
sys.path.insert(0, "..")

In [3]:
import spacy
from spacy.tokens import Span

import medspacy
from medspacy.preprocess import PreprocessingRule, Preprocessor
from medspacy.ner import TargetRule
from medspacy.context import ConTextRule
from medspacy.section_detection import Sectionizer
from medspacy.postprocess import PostprocessingRule, PostprocessingPattern, Postprocessor
from medspacy.postprocess import postprocessing_functions
from medspacy.visualization import visualize_ent, visualize_dep


import re

# Overview
In the previous notebook, we instantiated each component and added it to our pipeline separately. This time, we'll use the `medspacy.load()` to create a model, access some of the components to add rules, and then process our text as a complete pipeline.

In [4]:
with open("./discharge_summary.txt") as f:
    text = f.read()

In [5]:
nlp = medspacy.load()

In [6]:
nlp.pipe_names

['medspacy_pyrush', 'medspacy_target_matcher', 'medspacy_context']

### Preprocessing

In [7]:
preprocessor = Preprocessor(nlp.tokenizer)

In [8]:
nlp.tokenizer = preprocessor

In [9]:
preprocess_rules = [
    PreprocessingRule(
        re.compile(r"\[\*\*[\d]{1,4}-[\d]{1,2}(-[\d]{1,2})?\*\*\]"),
        repl="01-01-2010",
        desc="Replace MIMIC date brackets with a generic date."
    ),
    
    PreprocessingRule(
        re.compile(r"\[\*\*[\d]{4}\*\*\]"),
        repl="2010",
        desc="Replace MIMIC year brackets with a generic year."
    ),
    
    PreprocessingRule(
        re.compile("dx'd"), repl="Diagnosed", 
                  desc="Replace abbreviation"
    ),
    
    PreprocessingRule(
        re.compile("tx'd"), repl="Treated", 
                  desc="Replace abbreviation"
    ),
    
        PreprocessingRule(
        re.compile(r"\[\*\*[^\]]+\]"), 
        desc="Remove all other bracketed placeholder text from MIMIC"
    )
]

  re.compile("\[\*\*[\d]{1,4}-[\d]{1,2}(-[\d]{1,2})?\*\*\]"),
  re.compile("\[\*\*[\d]{4}\*\*\]"),
  re.compile("\[\*\*[^\]]+\]"),


In [10]:
preprocessor.add(preprocess_rules)

### Target matching

In [11]:
Span.set_extension("icd10", default="")

In [12]:
target_matcher = nlp.get_pipe("medspacy_target_matcher")

In [13]:
target_rules = [
    TargetRule(literal="abdominal pain", category="PROBLEM"),
    TargetRule("stroke", "PROBLEM"),
    TargetRule("hemicolectomy", "TREATMENT"),
    TargetRule("Hydrochlorothiazide", "TREATMENT"),
    TargetRule("colon cancer", "PROBLEM"),
    TargetRule("radiotherapy", "PROBLEM",
              pattern=[{"LOWER": "xrt"}]),
    TargetRule("metastasis", "PROBLEM"),
    
    TargetRule("Type II Diabetes Mellitus", "PROBLEM", 
              pattern=[
                  {"LOWER": "type"},
                  {"LOWER": {"IN": ["2", "ii", "two"]}},
                  {"LOWER": {"IN": ["dm", "diabetes"]}},
                  {"LOWER": "mellitus", "OP": "?"}
              ],
              attributes={"icd10": "E11.9"}),
    TargetRule("Hypertension", "PROBLEM",
              pattern=[{"LOWER": {"IN": ["htn", "hypertension"]}}],
              attributes={"icd10": "I10"}),
    
]

In [14]:
target_matcher.add(target_rules)

### Context

In [15]:
context = nlp.get_pipe("medspacy_context")

In [16]:
context_rules = [
    ConTextRule("diagnosed in <YEAR>", "HISTORICAL", 
               pattern=[
                   {"LOWER": "diagnosed"},
                   {"LOWER": "in"},
                   {"LOWER": {"REGEX": r"^[\d]{4}$"}}
               ])
]

  {"LOWER": {"REGEX": "^[\d]{4}$"}}


In [17]:
context.add(context_rules)

### Section detection

In [18]:
from medspacy.section_detection import SectionRule

In [19]:
sectionizer = nlp.add_pipe("medspacy_sectionizer", config={"rules": "default"})

In [20]:
section_patterns = [
    SectionRule("Brief Hospital Course:", "hospital_course", )
]

In [21]:
sectionizer.add(section_patterns)

### Postprocessing

In [22]:
postprocessor = nlp.add_pipe("medspacy_postprocessor")

In [23]:
postprocess_rules = [
    PostprocessingRule(
        patterns=[
            PostprocessingPattern(condition=lambda ent: ent._.section_category == "patient_instructions"),
        ],
        action=postprocessing_functions.remove_ent,
        description="Remove any entities from the instructions section."
    ),
    
]

In [24]:
postprocessor.add(postprocess_rules)

# Process our document

In [25]:
nlp.pipe_names

['medspacy_pyrush',
 'medspacy_target_matcher',
 'medspacy_context',
 'medspacy_sectionizer',
 'medspacy_postprocessor']

In [26]:
doc = nlp(text)

In [27]:
visualize_ent(doc)

In [28]:
short_text = "Colon cancer dx'd in [**2554**], tx'd with hemicolectomy"
short_doc = nlp(short_text)

In [29]:
visualize_ent(short_doc)

In [30]:
visualize_dep(short_doc)

In [31]:
discharge_instructions = """
Discharge Instructions:
Patient may shower. Please call your surgeon or return to the
emergency room if [**Doctor First Name **] experience fever >101.5, nausea, vomiting,
abdominal pain, shortness of breath, abdominal pain or any
significant  change in your medical condition.
"""
discharge_doc = nlp(discharge_instructions)

In [32]:
visualize_ent(discharge_doc)