In [18]:
import spacy

from spacy.matcher import Matcher, PhraseMatcher
from spacy.pipeline import EntityRuler

In [73]:
nlp = spacy.load("en_core_web_sm")
nlp.remove_pipe("ner")

('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x170ca702e88>)

In [396]:
example_text = "There is no evidence of pneumonia or asthma but there is angina. There is CHF. PE is ruled out. There is pneumonia but pe is ruled out."

In [60]:
from spacy.tokens import Doc, Span

In [5]:
Doc.set_extension("context_graph", default=(), force=True)

In [6]:
Span.set_extension("is_negated", default=False, force=True)

In [61]:
doc = nlp(example_text)

In [62]:
sents = list(doc.sents)

In [63]:
sents

[There is no evidence of pneumonia or  but there is angina.,
 There is CHF.,
 PE is ruled out.]

In [295]:
class ConTextComponent:
    name = "context"
    
    def __init__(self, modifier_rules, nlp, doc_attr="ents", text_attr="ORTH"):
        self.modifier_rules = modifier_rules
        self.nlp = nlp
        self.doc_attr = doc_attr
        self._modifier_rule_hash = dict() # A mapping of unique identifiers to ModifierRules
        self.phrase_matcher = PhraseMatcher(nlp.vocab, attr=text_attr)
        self._i = 0
        
        self.set_phrase_matching_rules()
        
    def set_phrase_matching_rules(self):
        for rule in self.modifier_rules:
            uid = self.nlp.vocab.strings[str(self._i)]
            self.phrase_matcher.add(str(self._i), 
                           None, 
                           self.nlp(rule.literal))
            self._modifier_rule_hash[uid] = rule
            self._i += 1
            
    def __call__(self, doc):
        if self.doc_attr == "ents":
            targets = doc.ents
        else:
            targets = getattr(doc._, self.doc_attr)
        print("asdf")
        context_graph = ConTextGraph(targets=targets)
        doc._.context_graph = context_graph
        # Call the phrase matcher to get the modifiers in the sentence
        # TODO: will need to prune here
        matches = self.phrase_matcher(doc)
        for (match_id, start, end) in matches:
            modifier = Modifier(doc[start:end], self._modifier_rule_hash[match_id])
            context_graph.add_modifier(modifier)
        context_graph.build_graph()
        
        

In [296]:
class ModifierRule:
    def __init__(self, literal, category, regex=None, directionality="bidirectional"):
        self.literal = literal
        self.category = category
        self.regex = regex
        self.directionality = directionality

In [315]:
class ConTextGraph:
    def __init__(self, targets=(), modifiers=()):    
        self.targets = targets
        self.modifiers = ()
        self.edges = []
        self.add_modifiers(modifiers)
        
    def add_modifiers(self, modifiers):
        for modifier in modifiers:
            print(modifier)
            self.add_modifier(modifier)
        for modifier in modifiers:
            modifier.update_scope()
        
    def add_modifier(self, modifier):
        """Add a TagObject modifier and create any edges between them.
        # TODO: move edge-building to its own method 
        """
        self.modifiers += (modifier,)
        modifier.set_scope()
        
            
    def build_graph(self):
        # Update scopes of modifiers
        self.update_scopes()
        # Build edges
        for modifier in self.modifiers:
            for target in self.targets:
                if modifier.modifies(target):
                    self.edges.append((target, modifier))
    
    def update_scopes(self):
        print("Here")
        for i in range(len(self.modifiers) - 1):
            print(i)
            modifier = self.modifiers[i]
            for j in range(i+1, len(self.modifiers)):
                modifier2 = self.modifiers[j]
                if modifier.limit_scope(modifier2):
                    print(True)
#     def build_edges(self):
        
                
    def __repr__(self):
        return f"ConTextGraph with {len(self.targets)} targets and {len(self.modifiers)} modifiers"

In [397]:
class Modifier:
    def __init__(self, span, modifier):
        self.span = span
        self.modifier = modifier
        self._scope_start = None
        self._scope_end = None
        
        self.set_scope()
    
    @property
    def category(self):
        return self.modifier.category
    
    @property
    def directionality(self):
        return self.modifier.directionality
    
    def set_scope(self):
        """Based on the directionality of `modifier`, 
        set the scope of this tag object within the sentence.
        
        """
        
        sent = self.span.sent
        
        if self.modifier.directionality in ("bidirectional", "backward"):
            self._scope_start = self.span.start
            self._scope_end = self.span.start
            
            while True:    
                if self._scope_start == sent.start:
                    break
                self._scope_start -= 1
            
        
        elif self.modifier.directionality in ("bidirectional", "forward"):
            self._scope_start = self.span.end
            self._scope_end = self.span.end
            
            while True:
                if self._scope_end == sent.end:
                    break
                self._scope_end += 1
        elif self.modifier.directionality == "terminate":
            self._scope_start = self.span.end
            self._scope_end = self.span.end
        else:
            raise ValueError(f"Modifier direction {self.modifier.directionality} is not known")
            
    def update_scope(self):
         # Set the span to be the entire sentence either forward or backward
        if self.modifier.directionality in ("bidirectional", "backward"):
            pass
        
        elif self.modifier.directionality in ("bidirectional", "forward"):
            pass
                
        else:
            raise ValueError(f"Modifier direction {self.modifier.directionality} is not known")
            
    def limit_scope(self, other):
        """If self and obj have the same category or if obj has a directionality of 'terminate',
        use the span of obj to update the scope of self.
        Returns True if obj modfified the scope of self
        """
        if self.span.sent != other.span.sent:
            return False
        # TODO: Start here
        if self.directionality.lower() == "terminate":
            return False
        if other.directionality.lower() not in ("terminate", self.directionality.lower()):
            return False
        
        orig_scope = self.scope
        
        print(self, other)
        if (self.directionality.lower() in ("forward", "bidirectional")):
            if other > self:
                self._scope_end = min(self._scope_end, other._scope_start)
        elif (self.directionality.lower() in ("backward", "bidirectional")):
            if other < self:
                self._scope_start = max(self._scope_start, other._scope_end)
        print(orig_scope, self.scope)
        if orig_scope != self.scope:
            return True
        else:
            return False

    @property
    def scope(self):
        return (self._scope_start, self._scope_end)
        
    def get_scope(self):
        return self.scope
        
    def modifies(self, other):
        """Return True if self.span overlaps with another span."""
        other_start, other_end = other.start, other.end
        if self.scope[0] <= other_start < self.scope[1]:
            return True
        return False
    
    def __gt__(self, other): return self._scope_start > other._scope_start
    def __ge__(self, other): return self._scope_start >= other._scope_start
    def __lt(self, other): return self._scope_start < other._scope_start
    def __lt(self, other): return self._scope_start <= other._scope_start
    
    def __repr__(self):
        return f"[{self.span} [{self.category}]]"

In [373]:
ruler = EntityRuler(nlp, overwrite_ents=True)

In [374]:
patterns = [
    [{"lower": "pneumonia"}],
    [{"lower": "asthma"}],
    [{"lower": "angina"}],
    [{"lower": "chf"}],
    [{"lower": "pe"}],
    
]

In [375]:
patterns = [{"label": "CONDITION", "pattern": pattern} for pattern in patterns]

In [376]:
ruler.add_patterns(patterns)

In [255]:
nlp.add_pipe(ruler)

ValueError: [E007] 'entity_ruler' already exists in pipeline. Existing names: ['tagger', 'parser', 'entity_ruler']

In [388]:
doc = nlp(example_text)

In [389]:
doc.ents

(pneumonia, asthma, angina, CHF, PE)

In [390]:
modifier_rules = [ModifierRule("no evidence of", category="DEFINITE_NEGATED_EXISTENCE", directionality="forward"),
                  ModifierRule("but", category="DEFINITE_NEGATED_EXISTENCE", directionality="terminate"),
            ModifierRule("is ruled out", category="DEFINITE_NEGATED_EXISTENCE", directionality="backward")]

In [391]:
context = ConTextComponent(modifier_rules, nlp, "ents")
context(doc)

asdf
Here
0
[no evidence of [DEFINITE_NEGATED_EXISTENCE]] [but [DEFINITE_NEGATED_EXISTENCE]]
(5, 13) (5, 9)
True
1


In [392]:
doc[5:13]

pneumonia or asthma but there is angina.

In [393]:
doc[5:9]

pneumonia or asthma but

In [394]:
doc._.context_graph

ConTextGraph with 5 targets and 3 modifiers

In [395]:
doc._.context_graph

ConTextGraph with 5 targets and 3 modifiers

In [385]:
for target, tag_object in doc._.context_graph.edges:
    if tag_object.category in ("DEFINITE_NEGATED_EXISTENCE",):
        target._.is_negated = True

In [386]:
for target in doc.ents:
    if not target._.is_negated:
        print(target)

angina
CHF


In [345]:
doc.ents

(pneumonia, asthma, angina, CHF, PE)

In [313]:
mods = doc._.context_graph.modifiers
mod = mods[1]

In [314]:
mods

([no evidence of [DEFINITE_NEGATED_EXISTENCE]],
 [no evidence of [DEFINITE_NEGATED_EXISTENCE]],
 [is ruled out [DEFINITE_NEGATED_EXISTENCE]])

In [185]:
mod

[is ruled out [DEFINITE_NEGATED_EXISTENCE]]

In [186]:
start, end = mod.get_scope()
doc[start:end]

PE

In [187]:
mod.get_scope()

(17, 18)