<a href="https://colab.research.google.com/github/kullawattana/thesis_2020_spacy_colab/blob/master/22_master_matcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Rangarajan Krishnamoorthy, July 10, 2019
# Code to explore spaCy's Matcher
# (spaCy 2.0.13)
# https://www.rangakrish.com/index.php/2019/07/11/information-extraction-using-spacys-pattern-matcher/

import spacy
from spacy.matcher import Matcher

# Class to extract key features from case text.
class HomeoProcessor:
    matcher = None
    nlp = None

    def __init__(self, nlp):
        self.matcher = Matcher(nlp.vocab)
        self.nlp = nlp

    def addPattern(self, cat, *patterns):
        self.matcher.add(cat, None, *patterns)
        return self

    # Returns a dictionary of matched pattern labels & text
    def process(self, text):
        doc = self.nlp(text)
        cats = {}
        for match_id, start, end in self.matcher(doc):
            string_id = self.nlp.vocab.strings[match_id]
            span = doc[start:end]
            if string_id in cats:
                cats[string_id].append(span.text)
            else:
                cats[string_id] = [span.text]
        return cats

# ------------ Let us define some patterns ---------

# Does the patient exhibit symptoms of common cold?
cold_patterns = [
    [{"LEMMA" : "sneeze"}],
    [{"LEMMA" : "cough"}],
    [{"LOWER" : "rhinitis"}],
    [{"LOWER" : "runny"}, {"LOWER": "nose"}]
]

# Patterns suggestive of diarrhea
diarrhea_patterns = [
    [{"LOWER" : "diarrhoea"}],
    [{"LOWER" : "diarrhea"}],
    [{"LOWER" : "loose"}, {"LEMMA": "stool"}],
    [{"LOWER" : "loose"}, {"LEMMA": "motion"}]
]

# Pattern to identify the patient's name
name_patterns = [
    [{"TAG" : "NNP"}, {"TAG" : "NNP", "OP" : "?"}]
]

# Pattern to identify the age of the patient
age_patterns = [
    [{"LEMMA" : "age"}, {"LIKE_NUM" : True}],
    [{"LIKE_NUM": True}, {"LEMMA": "year"}, {"LEMMA": "old"} ]
]

# Is the patient male or female?
gender_patterns = [
    [{"LOWER" : "male"}],
    [{"LOWER" : "female"}],
    [{"LOWER" : "lady"}],
    [{"LOWER": "boy"}],
    [{"LOWER": "girl"}]
]

# Simple cases!
case1 = u"Mary, a 50 year old lady is sneezing continuously. There is runny nose as well."

case2 = "Peter is a young boy, aged 10 years. He is suffering from rhinitis for the past 2 days. He has occasional loose motion."

# Load the Model
nlp = spacy.load('en_core_web_sm')

# Create the object
homeo = HomeoProcessor(nlp)

# Bind the different patterns
homeo.addPattern("Cold", *cold_patterns)
homeo.addPattern("Diarrhea", *diarrhea_patterns)
homeo.addPattern("Name", *name_patterns)
homeo.addPattern("Age", *age_patterns)
homeo.addPattern("Gender", *gender_patterns)

# Process Case-1
for category, matching_text in homeo.process(case1).items():
    print(category, matching_text)
print('********************')

# Process Case-2
for category, matching_text in homeo.process(case2).items():
    print(category, matching_text)

Name ['Mary']
Age ['50 year old']
Gender ['lady']
Cold ['sneezing', 'runny nose']
********************
Name ['Peter']
Gender ['boy']
Age ['aged 10']
Cold ['rhinitis']
Diarrhea ['loose motion']
