# SpaCy

## Named entity recognition

In [1]:
dialog_text = """Doctor: How are you Miss G? 
Patient: I am good doctor, thank you for asking. 
Doctor: So, tell me what is going on?
Patient: I have this ear pain and headache for some time. It's better than before but I still want to get it checked. 
Doctor: Okay, when exactly did it start?
Patient: Um, almost three weeks ago. I am having difficulty hearing. I also feel this pressure on the left side of my sinus causing tooth pain. I went to my dentist yesterday, but my teeth are fine. 
Doctor: Okay, do you have headache now?
Patient: No, just ear pain and this jaw pain on the left side. 
Doctor: Any fever, cough, sore throat, or any cold like symptoms? 
Patient: No, but I have a sinus problem and I suffer from chronic left sided headache.
Doctor: How old are you?
Patient: Oh, I am forty nine.
Doctor: Hm, so are you taking any medications for your pain?
Patient: No, currently I am just using Cutivate for my eczema. It has helped me a lot, I do need a refill for it. 
Doctor: Okay I will send a prescription for it to your pharmacy.
"""

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(dialog_text)

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [3]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Miss G PERSON
almost three weeks ago DATE
yesterday DATE
Cutivate PRODUCT


In [18]:
text2 = """Patient: I just had few questions. Can you tell me about my diagnosis?
Doctor: Sure. It's called Serotonin syndrome, ma'am. After careful evaluation of your labs, we found out that your white count and C P K was high, and those abnormalities lined up with serotonin syndrome. What are you experiencing right now?
Patient: I have been very restless and easily agitated, I have diarrhea. But no fever or shakiness.
Doctor: These can match serotonin syndrome as well. You deny any fever, tremor or hypperflexia so we will give you some IV fluids and I will check on you in an hour or so.
Patient: Okay. 
Doctor: Looks like your C P K counts improved with I V fluids and after discontinuing Prozac.
Patient: How are the counts now? Are they normal? Because I feel normal.
Doctor: Yes, your C P K and white blood cell counts have come back down. Almost normal now.
Patient: My husband left me two weeks ago. My panic attacks are increasing day by day.
Doctor: Okay, I see that you have a history of panic attacks and you do have depression and anxiety, is that correct? Last Friday, I talked to psychiatrist about your issues, and he recommended Cymbalta as an alternative to Prozac. 
Patient: Yes, I stopped taking Prozac, and I am going to see him on Monday or Tuesday. I have a counselor too.
Patient: I do think it will be difficult to go home alone but my daughter is coming to visit me in two weeks.
Doctor: Oh wow.
Patient: Yeah.
Doctor: That's nice. Do you have someone who can drop you home and help you?
Patient: Yes, I have a friend who does that, I am staying with her for next three days.
Doctor: Okay that sounds good. Just continue with your medications for high blood pressure and diabetes as well. So, we treated your imbalance issues and gave you IV fluids, you do not have any more diarrhea, right?
Patient: Yes, that's right."""

## default model identified 0 entities

In [26]:
import medspacy
nlp = medspacy.load( )
doc2 = nlp(text2)
for ent in doc2.ents:
    print(ent.text, ent.label_)

## medical model

In [27]:
import en_core_sci_scibert
import en_core_med7_trf


In [28]:
# import scispacy
import spacy

nlp2 = spacy.load("en_core_med7_trf")
doc2 = nlp2(text2)
for ent in doc2.ents:
    print(ent.text, ent.label_)

IV ROUTE
fluids DRUG
fluids DRUG
Prozac DRUG
Cymbalta DRUG
Prozac DRUG
Prozac DRUG
IV ROUTE
fluids DRUG


In [29]:
doc2.ents

(IV, fluids, fluids, Prozac, Cymbalta, Prozac, Prozac, IV, fluids)

## Intent recognitioin / classification

In [7]:
import spacy

# Load the English model
nlp = spacy.load('en_core_web_sm')

# Function to recognize intent
def recognize_intent(text):
    doc = nlp(text)
    # Here you can define your intents based on the entities or patterns
    intents = {'greeting': ['hello', 'hi', 'hey'], 'goodbye': ['bye', 'goodbye']}
    for token in doc:
        for intent, keywords in intents.items():
            if token.text.lower() in keywords:
                return intent
    return 'unknown'

# Example usage
user_input = 'Hello, how are you?'
intent = recognize_intent(user_input)
print(f'Intent recognized: {intent}')  # Output: Intent recognized: greeting

Intent recognized: greeting


## Training a Custom Model

In [8]:
from spacy.util import minibatch, compounding
import random
from spacy.training.example import Example

In [9]:
# training_data = [
#     ('Hello, I need help', {'entities': [(0, 5, 'greeting')]}),
#     ('Goodbye, see you later', {'entities': [(0, 7, 'goodbye')]}),
# ]
training_data = [
    ('Hello, I need help', {'cats': {'greeting': 1.0, 'goodbye': 0.0}}),
    ('How is it going', {'cats': {'greeting': 1.0, 'goodbye': 0.0}}),
    ('Goodbye, see you later', {'cats': {'greeting': 0.0, 'goodbye': 1.0}}),
    ('Byebye, see you', {'cats': {'greeting': 0.0, 'goodbye': 1.0}}),
    ('see you', {'cats': {'greeting': 0.0, 'goodbye': 1.0}}),
    ('Hi there', {'cats': {'greeting': 1.0, 'goodbye': 0.0}}),
    ('See you soon', {'cats': {'greeting': 0.0, 'goodbye': 1.0}}),
]

In [10]:
nlp = spacy.blank('en')
# text_cat = nlp.create_pipe('textcat')
text_cat = nlp.add_pipe('textcat', last=True)
text_cat.add_label('greeting')
text_cat.add_label('goodbye')

1

In [11]:


examples = []
for text, annots in training_data:
    examples.append(Example.from_dict(nlp.make_doc(text), annots))
nlp.initialize(lambda: examples)

n_iter = 20
for epoch in range(n_iter):
    random.shuffle(examples)
    losses = {}
    # Create the minibatch generator
    for batch in minibatch(examples, size=8):
        nlp.update(batch, drop=0.3, losses=losses)
    print(losses)


{'textcat': 0.25}
{'textcat': 0.2427244335412979}
{'textcat': 0.23560336232185364}
{'textcat': 0.2227870374917984}
{'textcat': 0.21068832278251648}
{'textcat': 0.18657274544239044}
{'textcat': 0.18335475027561188}
{'textcat': 0.18716123700141907}
{'textcat': 0.1483396738767624}
{'textcat': 0.11363311111927032}
{'textcat': 0.11102795600891113}
{'textcat': 0.09083671122789383}
{'textcat': 0.08976726979017258}
{'textcat': 0.04922223836183548}
{'textcat': 0.062485165894031525}
{'textcat': 0.030504679307341576}
{'textcat': 0.024628501385450363}
{'textcat': 0.01169653795659542}
{'textcat': 0.01180785708129406}
{'textcat': 0.008775681257247925}


In [12]:
# Assuming trained_nlp is your trained model
def predict_text_category(nlp, text):
    doc = nlp(text)
    print("Prediction scores:")
    for label, score in doc.cats.items():
        print(f"{label}: {score}")
    
    # Get the category with the highest score
    predicted_category = max(doc.cats, key=doc.cats.get)
    print(f"Predicted category: {predicted_category}")
    return predicted_category

# Example usage
test_texts = [
    "Hello there",
    "Goodbye",
    "See you later"
]

for text in test_texts:
    print(f"\nAnalyzing: '{text}'")
    predict_text_category(nlp, text)


Analyzing: 'Hello there'
Prediction scores:
greeting: 0.9404430389404297
goodbye: 0.059556975960731506
Predicted category: greeting

Analyzing: 'Goodbye'
Prediction scores:
greeting: 0.07562782615423203
goodbye: 0.924372136592865
Predicted category: goodbye

Analyzing: 'See you later'
Prediction scores:
greeting: 0.013240814208984375
goodbye: 0.9867592453956604
Predicted category: goodbye


## Rule based solution

In [1]:
import spacy
from spacy.matcher import Matcher

In [2]:
nlp = spacy.load("en_core_web_sm")

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [3]:
matcher = Matcher(nlp.vocab)

In [22]:
pattern = [
    {"LOWER": {"IN": ["print", "generate", "create"]}},  # Action keywords
    {"IS_ALPHA": True, "OP": "*"},  # Allow intermediate words
    {"LOWER": "map"},
    {"IS_ALPHA": True, "OP": "*"},  # Allow intermediate words
    {"LOWER": {"IN": ["hospital", "clinic", "station"]}}  # Target keywords
]
matcher.add("PRINT_MAP", [pattern])

In [23]:
text = """
can you print a map for the Toronto hospital I was wondering if we could generate one for the Guelph clinic what about printing directions to the nearest gas station
"""

In [24]:
doc = nlp(text)

In [25]:
matches = matcher(doc)

In [26]:
for matchid, start, end in matches:
    print(matchid, start, end)
    span = doc[start:end]
    print(f"Matched Intent: {span.text}")
    for token in span:
        if token.text.lower() in ["hospital", "clinic", "station"]:
            # Check for location descriptors
            descriptor = " ".join(child.text for child in token.lefts if child.dep in ["compound", "amod"])
            print(f"Descriptor: {descriptor} {token.text}")

9095104806068616893 3 10
Matched Intent: print a map for the Toronto hospital
Descriptor:  hospital
9095104806068616893 3 22
Matched Intent: print a map for the Toronto hospital I was wondering if we could generate one for the Guelph clinic
Descriptor:  hospital
Descriptor:  clinic
9095104806068616893 3 31
Matched Intent: print a map for the Toronto hospital I was wondering if we could generate one for the Guelph clinic what about printing directions to the nearest gas station
Descriptor:  hospital
Descriptor:  clinic
Descriptor:  station
