In [1]:
import json
import spacy
from pprint import pprint
from spacy.matcher import Matcher

### Loading JSON data from file

In [2]:
with open('data/services.json', 'r') as file:
    data = json.load(file)

### Extracting doctor names using Spacy Matcher

In [3]:
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doctor_name_pattern = [
    [{"LOWER": {"IN": ["dr.", "dr", "doctor.", "doctor"]}}, {"POS": "PROPN"}, {"POS": "PROPN", "OP": "*"}],
]

matcher.add("DOCTOR_NAME", doctor_name_pattern)

def extract_doctor_names(text):
    doc = nlp(text)
    matches = matcher(doc)
    doctor_names = [doc[start:end].text for match_id, start, end in matches]
    return doctor_names

### Display all matches found in services.json

In [4]:
doctor_names = [
    name
    for dialogue in data
    for turn in dialogue['turns']
    if turn['speaker'] == 'SYSTEM'
    for frame in turn['frames']
    if 'service_results' in frame
    for result in frame['service_results']
    if 'dentist_name' in result
    for name in extract_doctor_names(result['dentist_name'])
]

pprint(set(doctor_names), compact=True)

{'Dr Oscar', 'Dr Oscar Ventanilla', 'Dr Pucan', 'Dr Pucan Dental',
 'Dr Pucan Dental Office', 'Dr. Andrei', 'Dr. Andrei Simel', 'Dr. Azadeh',
 'Dr. Azadeh Jafarnia', 'Dr. Brian', 'Dr. Brian W.', 'Dr. Brian W. Chun',
 'Dr. Burk', 'Dr. Christopher', 'Dr. Christopher J.',
 'Dr. Christopher J. Bennett', 'Dr. David', 'Dr. David I.',
 'Dr. David I. Thompson', 'Dr. Diaz', 'Dr. Diaz F', 'Dr. Diaz F Luisa',
 'Dr. Eugene', 'Dr. Eugene H.', 'Dr. Eugene H. Burton',
 'Dr. Eugene H. Burton Iii', 'Dr. James', 'Dr. James A.', 'Dr. James A. Abbott',
 'Dr. John', 'Dr. John Ichiuji', 'Dr. John Y.', 'Dr. John Y. Park',
 'Dr. Keller', 'Dr. Kwang', 'Dr. Kwang H.', 'Dr. Kwang H. Kim', 'Dr. Lawrence',
 'Dr. Lawrence J.', 'Dr. Lawrence J. Mcdonald', 'Dr. Maria',
 'Dr. Maria Theresa', 'Dr. Maria Theresa V.', 'Dr. Maria Theresa V. Chua',
 'Dr. Michael', 'Dr. Michael K.', 'Dr. Michael K. Singh', 'Dr. Pascuala',
 'Dr. Pascuala Geraldine', 'Dr. Pascuala Geraldine T.',
 'Dr. Pascuala Geraldine T. Ocampo', 'Dr. Pedra

### Extracting user confirmations with Spacy Matcher

In [5]:
matcher = Matcher(nlp.vocab)

confirmation_patterns = [
    [{"LOWER": {"IN": ["yes", "yeah", "yep", "sure", "ok", "okay", "alright", "great", "absolutely", "definitely", "affirmative", "indeed"]}}],
    [{"LOWER": "yes"}, {"IS_PUNCT": True, "OP": "?"}, {"LOWER": "that"}, {"LOWER": "is"}, {"LOWER": {"IN": ["correct", "right"]}}],
    [{"LOWER": "sounds"}, {"LOWER": "good"}],
]

matcher.add("CONFIRMATION", confirmation_patterns)

def extract_confirmations(text):
    doc = nlp(text)
    matches = matcher(doc)
    confirmations = [doc[start:end].text for match_id, start, end in matches]
    return confirmations

### Display all matches found in services.json

In [6]:
confirmations = [
    (turn['utterance'], confirmation)
    for dialogue in data[:4]
    for turn in dialogue['turns']
    if turn['speaker'] == 'USER'
    for confirmation in extract_confirmations(turn['utterance'])
]

for utterance, confirmation in confirmations:
    print(f"Sentence: {utterance}\nConfirmation: {confirmation}\n")

Sentence: Yes that sounds good I have to make an appointment, is there anything available for 5:30 in the evening?
Confirmation: Yes

Sentence: Yes that sounds good I have to make an appointment, is there anything available for 5:30 in the evening?
Confirmation: sounds good

Sentence: Yes, that is correct.
Confirmation: Yes

Sentence: Yes, that is correct.
Confirmation: Yes, that is correct

Sentence: Great, thank you for your help.
Confirmation: Great

Sentence: Yeah I'd like to see a dentist at that one.
Confirmation: Yeah

Sentence: Yeah
Confirmation: Yeah

Sentence: Alright thank you, that's all I need.
Confirmation: Alright

Sentence: Yes, I think I'll go to them.
Confirmation: Yes

Sentence: Yes please. 1:45 PM would be perfect if there's a slot then.
Confirmation: Yes

Sentence: Yes that is right.
Confirmation: Yes

Sentence: Yes that is right.
Confirmation: Yes that is right



### Extracting user intent with direct objects

In [7]:
utterances = [
    turn['utterance']
    for dialogue in data[:3]
    for turn in dialogue['turns']
    if turn['speaker'] == 'USER'
    for frame in turn['frames']
    if 'active_intent' in frame['state']
]

for text in utterances:
    text = nlp(text)
    for token in text:
        if token.dep_ == "dobj":
            verb, dobj = token.head, token.text
            print(f"Text: {text}\nIntent: {verb.lemma_.lower()} {dobj}\n")

Text: Yes that sounds good I have to make an appointment, is there anything available for 5:30 in the evening?
Intent: make appointment

Text: Great, thank you for your help.
Intent: thank you

Text: My tooth has been hurting lately, where can I find a dentist?
Intent: find dentist

Text: Yeah I'd like to see a dentist at that one.
Intent: see dentist

Text: Book it on the 10th of March around 12:30 in the afternoon.
Intent: book it

Text: Actually, no. Book it on the 7th.
Intent: book it

Text: Do they do cosmetic services too?
Intent: do services

Text: Alright thank you, that's all I need.
Intent: thank you

Text: I've been wanting to fix my crooked teeth for a while. Can you find me a good dentist that practices cosmetic dentistry.
Intent: fix teeth

Text: I've been wanting to fix my crooked teeth for a while. Can you find me a good dentist that practices cosmetic dentistry.
Intent: practice dentistry

Text: No, I've changed my mind. Is there a slot available at quarter to 12 in th