# Overview


In this notebook, we'll look at a few examples of how cycontext can be used to extract information from clinical text.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys

In [3]:
sys.path.append("..")

In [4]:
import spacy
from spacy.pipeline import EntityRuler

from cycontext import ConTextItem, ConTextComponent

In [5]:
nlp = spacy.load("en_core_web_sm")

# 1. Identifying positive and negative mentions of pneumonia

In [123]:
nlp = spacy.load("en_core_web_sm")
_ = nlp.remove_pipe("ner")

In [124]:
texts = ['interval opacification within the left lower lobe consistent with consolidation.',
         "IMPRESSION:  Left lower lobe pneumonia.",
         "INDICATION: Pneumonia.",
        "basilar lung opacities with residual opacity",
         "Reason: eval for CHF, infiltrate",
        "Worsening consolidation in the left lower lobe.",
        "Bilateral pulmonary opacities",
        "No radiographic evidence of pneumonia."]

In [125]:
targets = [{'label': 'EVIDENCE_OF_PNEUMONIA',
  'pattern': [{'LOWER': {'REGEX': 'pneumonias?'}}]},
 {'label': 'EVIDENCE_OF_PNEUMONIA', 'pattern': [{'LOWER': {'REGEX': 'pna'}}]},
 {'label': 'EVIDENCE_OF_PNEUMONIA',
  'pattern': [{"POS": {"IN": ["ADJ", "NOUN"]}, "OP": "*"}, {'LOWER': {'REGEX': 'consolidations?'}}]},
 {'label': 'EVIDENCE_OF_PNEUMONIA',
  'pattern': [{'LOWER': {'REGEX': 'infiltrat(e|es|ion)'}}]},
 {'label': 'EVIDENCE_OF_PNEUMONIA',
  'pattern': [{"POS": {"IN": ["ADJ", "NOUN"]}, "OP": "*"}, {'LOWER': {'REGEX': 'opacit(y|ies)'}}]},
          ]

In [126]:
ruler = EntityRuler(nlp, overwrite_ents=True)

In [127]:
ruler.add_patterns(targets)

In [128]:
nlp.add_pipe(ruler)

In [129]:
item_data = [
    ConTextItem(literal='indication', category='INDICATION', pattern=None, rule='BIDIRECTIONAL'),
    ConTextItem(literal='no evidence of', category='DEFINITE_NEGATED_EXISTENCE', 
                pattern=[{'LOWER': {'IN': ['no', 'without']}}, {'LOWER': {'IN': ['definite', 'other', 'definitive', 'secondary', 'indirect']}, 'OP': '?'}, {'LOWER': {'IN': ['radiographic', 'sonographic', 'ct']}, 'OP': '?'}, 
                         {'LOWER': 'evidence'}, {'LOWER': {'IN': ['of', 'for']}}], rule='FORWARD'),
    ConTextItem(literal='reason', category='INDICATION', pattern=None, rule='FORWARD'),
    ConTextItem(literal='eval for', category='INDICATION', pattern=None, rule='FORWARD'),
]

In [130]:
# item_data = ConTextItem.from_json("../kb/pneumonia_modifiers.json")

In [131]:
item_data

[ConTextItem(literal='indication', category='INDICATION', pattern=None, rule='BIDIRECTIONAL'),
 ConTextItem(literal='no evidence of', category='DEFINITE_NEGATED_EXISTENCE', pattern=[{'LOWER': {'IN': ['no', 'without']}}, {'LOWER': {'IN': ['definite', 'other', 'definitive', 'secondary', 'indirect']}, 'OP': '?'}, {'LOWER': {'IN': ['radiographic', 'sonographic', 'ct']}, 'OP': '?'}, {'LOWER': 'evidence'}, {'LOWER': {'IN': ['of', 'for']}}], rule='FORWARD'),
 ConTextItem(literal='reason', category='INDICATION', pattern=None, rule='FORWARD'),
 ConTextItem(literal='eval for', category='INDICATION', pattern=None, rule='FORWARD')]

In [132]:
context = ConTextComponent(nlp, add_attrs=True)

In [133]:
context.add(item_data)

In [134]:
nlp.add_pipe(context)

In [135]:
docs = list(nlp.pipe(texts))

In [136]:
from cycontext import viz

In [138]:
doc = docs[4]

In [139]:
viz.visualize_ent(doc)

In [140]:
viz.visualize_dep(doc)

In [141]:
from spacy.tokens import Doc

In [143]:
Doc.set_extension("pneumonia_positive", default=False, force=True)

In [144]:
for doc in docs:
    for ent in doc.ents:
        if ent._.is_experienced:
            doc._.pneumonia_positive = True

In [145]:
pos_docs = [doc for doc in docs if doc._.pneumonia_positive is True]
neg_docs = [doc for doc in docs if doc._.pneumonia_positive is False]

In [146]:
pos_docs

[interval opacification within the left lower lobe consistent with consolidation.,
 IMPRESSION:  Left lower lobe pneumonia.,
 basilar lung opacities with residual opacity,
 Worsening consolidation in the left lower lobe.,
 Bilateral pulmonary opacities]

In [147]:
neg_docs

[INDICATION: Pneumonia.,
 Reason: eval for CHF, infiltrate,
 No radiographic evidence of pneumonia.]

# X. Extracting anatomical sites of surgical site infections

In [196]:
nlp = spacy.load("en_core_web_sm")
_ = nlp.remove_pipe("ner")

In [197]:
texts = ["There is a 3.3 cm abscess in the abdomen.",
        "There is a collection of fluid in the jejunum.",
        "Hematomas are seen around the right lower quadrant"]

In [198]:
targets = [
    {"label": "EVIDENCE_OF_SSI",
     "pattern": [{"LOWER": "abscess"}]
    },
    
    {"label": "EVIDENCE_OF_SSI",
     "pattern": [{"LOWER": "hematomas"}]
    },
    
    {"label": "EVIDENCE_OF_SSI",
     "pattern": [{"LEMMA": "collection"}, {"LOWER": "of"}, {"LOWER": "fluid"}]
    },
]

In [199]:
ruler = EntityRuler(nlp, overwrite_ents=True)

In [200]:
ruler.add_patterns(targets)

In [201]:
nlp.add_pipe(ruler)

In [202]:
context = ConTextComponent(nlp, add_attrs=False)

In [203]:
item_data = [
    ConTextItem(literal='abdomen', category='ANATOMICAL_SITE', rule='BIDIRECTIONAL'),
    ConTextItem(literal='jejunum', category='ANATOMICAL_SITE', rule='BIDIRECTIONAL'),
    ConTextItem(literal='right lower quadrant', category='ANATOMICAL_SITE', rule='BIDIRECTIONAL')
]

In [204]:
context.add(item_data)

In [205]:
nlp.add_pipe(context)

In [206]:
docs = list(nlp.pipe(texts))

In [207]:
viz.visualize_ent(docs[0])

In [208]:
viz.visualize_dep(docs[0])

In [209]:
from spacy.tokens import Span

In [210]:
Span.set_extension("anatomical_site", default=None, force=True)

In [212]:
for doc in docs:
    for ent in doc.ents:
        for mod in ent._.modifiers:
            if mod.category == 'ANATOMICAL_SITE':
                ent._.anatomical_site = mod.span.text
        print("{0} --> {1}".format(ent, ent._.anatomical_site))

abscess --> abdomen
collection of fluid --> jejunum
Hematomas --> right lower quadrant


# 3. Family History of Breast Cancer