In [39]:
import spacy
from spacy.tokens import Span

import medspacy
from medspacy.preprocess import PreprocessingRule
from medspacy.ner import TargetRule
from medspacy.context import ConTextItem
from medspacy.postprocess import PostprocessingRule, PostprocessingPattern
from medspacy.postprocess import postprocessing_functions
from medspacy.visualization import visualize_ent, visualize_dep


import re

# Overview
In this notebook, we'll put all of the pieces of our pipeline together.

In [2]:
with open("./discharge_summary.txt") as f:
    text = f.read()

In [3]:
nlp = medspacy.load(disable=["pyrush"])

In [4]:
nlp.pipe_names

['tagger',
 'parser',
 'target_matcher',
 'sectionizer',
 'context',
 'postprocessor']

### Preprocessing

In [8]:
preprocessor = nlp.tokenizer

In [11]:
preprocess_rules = [
    lambda x: x.lower(),
    
    PreprocessingRule(
        re.compile("\[\*\*[\d]{1,4}-[\d]{1,2}(-[\d]{1,2})?\*\*\]"),
        repl="01-01-2010",
        desc="Replace MIMIC date brackets with a generic date."
    ),
    
    PreprocessingRule(
        re.compile("\[\*\*[\d]{4}\*\*\]"),
        repl="2010",
        desc="Replace MIMIC year brackets with a generic year."
    ),
    
    PreprocessingRule(
        re.compile("dx'd"), repl="Diagnosed", 
                  desc="Replace abbreviation"
    ),
    
    PreprocessingRule(
        re.compile("tx'd"), repl="Treated", 
                  desc="Replace abbreviation"
    ),
    
        PreprocessingRule(
        re.compile("\[\*\*[^\]]+\]"), 
        desc="Remove all other bracketed placeholder text from MIMIC"
    )
]

In [12]:
preprocessor.add(preprocess_rules)

### Target matching

In [14]:
Span.set_extension("icd10", default="")

In [15]:
target_matcher = nlp.get_pipe("target_matcher")

In [16]:
target_rules = [
    TargetRule(literal="abdominal pain", category="PROBLEM"),
    TargetRule("stroke", "PROBLEM"),
    TargetRule("hemicolectomy", "TREATMENT"),
    TargetRule("Hydrochlorothiazide", "TREATMENT"),
    TargetRule("colon cancer", "PROBLEM"),
    TargetRule("radiotherapy", "PROBLEM",
              pattern=[{"LOWER": "xrt"}]),
    TargetRule("metastasis", "PROBLEM"),
    
    TargetRule("Type II Diabetes Mellitus", "PROBLEM", 
              pattern=[
                  {"LOWER": "type"},
                  {"LOWER": {"IN": ["2", "ii", "two"]}},
                  {"LOWER": {"IN": ["dm", "diabetes"]}},
                  {"LOWER": "mellitus", "OP": "?"}
              ],
              attributes={"icd10": "E11.9"}),
    TargetRule("Hypertension", "PROBLEM",
              pattern=[{"LOWER": {"IN": ["htn", "hypertension"]}}],
              attributes={"icd10": "I10"}),
    
]

In [17]:
target_matcher.add(target_rules)

### Context

In [18]:
context = nlp.get_pipe("context")

In [19]:
item_data = [
    ConTextItem("diagnosed in <YEAR>", "HISTORICAL", 
               pattern=[
                   {"LOWER": "diagnosed"},
                   {"LOWER": "in"},
                   {"LOWER": {"REGEX": "^[\d]{4}$"}}
               ])
]

In [20]:
context.add(item_data)

### Section detection

In [21]:
sectionizer = nlp.get_pipe("sectionizer")

In [22]:
section_patterns = [
    {"section_title": "hospital_course", "pattern": "Brief Hospital Course:"}
]

In [23]:
sectionizer.add(section_patterns)

### Postprocessing

In [24]:
postprocessor = nlp.get_pipe("postprocessor")

In [27]:
postprocess_rules = [
    PostprocessingRule(
        patterns=[
            PostprocessingPattern(condition=lambda ent: ent._.section_title == "patient_instructions"),
        ],
        action=postprocessing_functions.remove_ent,
        description="Remove any entities from the instructions section."
    ),
    
]

In [28]:
postprocessor.add(postprocess_rules)

# Process our document

In [29]:
doc = nlp(text)

abdominal pain
Passed: PostprocessingRule: None - Remove any entities from the instructions section. on ent: abdominal pain please call your surgeon or return to the
emergency room if  experience fever >101.5, nausea, vomiting,
abdominal pain, shortness of breath, abdominal pain or any
significant  change in your medical condition.

abdominal pain
Passed: PostprocessingRule: None - Remove any entities from the instructions section. on ent: abdominal pain please call your surgeon or return to the
emergency room if  experience fever >101.5, nausea, vomiting,
abdominal pain, shortness of breath, abdominal pain or any
significant  change in your medical condition.

htn

type 2 dm

stroke

hypertension

type ii diabetes mellitus

xrt

hemicolectomy

colon cancer

metastasis

abdominal pain

stroke

type 2 dm

abdominal pain

hydrochlorothiazide



In [30]:
visualize_ent(doc)

In [35]:
short_text = "Colon cancer dx'd in [**2554**], tx'd with hemicolectomy"
short_doc = nlp(short_text)

hemicolectomy

colon cancer



In [34]:
visualize_ent(short_doc)

In [40]:
visualize_dep(short_doc)

In [36]:
discharge_instructions = """
Discharge Instructions:
Patient may shower. Please call your surgeon or return to the
emergency room if [**Doctor First Name **] experience fever >101.5, nausea, vomiting,
abdominal pain, shortness of breath, abdominal pain or any
significant  change in your medical condition.
"""
discharge_doc = nlp(discharge_instructions)

abdominal pain
Passed: PostprocessingRule: None - Remove any entities from the instructions section. on ent: abdominal pain please call your surgeon or return to the
emergency room if  experience fever >101.5, nausea, vomiting,
abdominal pain, shortness of breath, abdominal pain or any
significant  change in your medical condition.


abdominal pain
Passed: PostprocessingRule: None - Remove any entities from the instructions section. on ent: abdominal pain please call your surgeon or return to the
emergency room if  experience fever >101.5, nausea, vomiting,
abdominal pain, shortness of breath, abdominal pain or any
significant  change in your medical condition.




In [38]:
visualize_ent(discharge_doc)