Highlight departure points using the Matcher class.
Highlight user statements where they ask for something to be changed
(e.g., Find me something else.), using templates. Use the flights.json file.

In [1]:
import json
import spacy
from spacy.matcher import Matcher
import re

In [6]:
nlp = spacy.load("en_core_web_sm")

In [7]:
with open("flights.json", "r") as f:
    data = json.load(f)

In [8]:
matcher = Matcher(nlp.vocab)

Highlight departure points using the Matcher class.

In [9]:
departure_patterns = [
    [{"LOWER": "from"}, {"ENT_TYPE": "GPE"}, {"ENT_TYPE": "GPE", "OP": "?"}, {"ENT_TYPE": "GPE", "OP": "?"}],
    [{"LOWER": "out"}, {"LOWER": "of"}, {"ENT_TYPE": "GPE"}, {"ENT_TYPE": "GPE", "OP": "?"},
     {"ENT_TYPE": "GPE", "OP": "?"}],
    [{"LOWER": "departing"}, {"ENT_TYPE": "GPE"}, {"ENT_TYPE": "GPE", "OP": "?"}, {"ENT_TYPE": "GPE", "OP": "?"}],
    [{"LOWER": "leaving"}, {"ENT_TYPE": "GPE"}, {"ENT_TYPE": "GPE", "OP": "?"}, {"ENT_TYPE": "GPE", "OP": "?"}],
]

In [10]:
matcher.add("PotentialDeparturePoint", departure_patterns)

potential_departure_points_with_utterances = []

In [11]:
for dialogue in data:
    for turn in dialogue.get('turns', []):
        utterance = turn.get('utterance', '')
        doc = nlp(utterance)

        with doc.retokenize() as retokenizer:
            for ent in doc.ents:
                if ent.label_ == "GPE":
                    retokenizer.merge(ent)

        matches = matcher(doc)
        for match_id, start, end in matches:
            string_id = nlp.vocab.strings[match_id]
            span = doc[start:end]
            if string_id == "PotentialDeparturePoint":
                potential_departure_points_with_utterances.append((span.text, utterance))


In [12]:
if potential_departure_points_with_utterances:
    unique_matches = set(potential_departure_points_with_utterances)
    for dp, utt in unique_matches:
        cleaned_dp = dp
        cleaned_dp = re.sub(r'^(from|out of|departing|leaving)\s+', '', cleaned_dp, flags=re.IGNORECASE)
        print(f"- Point: \"{cleaned_dp}\" (from utterance: \"{utt}\")")
else:
    print("No potential departure points identified.")


- Point: "San Francisco" (from utterance: "Please confirm that you want to leave next Tuesday on American Airlines from San Francisco and flying to Los Angeles. You need 1 ticket in Economy.")
- Point: "Seattle" (from utterance: "There are 3 people in my group. I am leaving from Seattle, WA. I prefer Southwest Airlines.")
- Point: "Mexico City" (from utterance: "Please confirm: Flight from Mexico City to Paris, American Airlines, Economy. Departure date is next Thursday. 1 passenger.")
- Point: "San Francisco" (from utterance: "I would like to fly out of San Francisco.")
- Point: "Los Angeles" (from utterance: "Look for a flight from Los Angeles to Portland, OR")
- Point: "San Diego" (from utterance: "Okay, I have a flight from San Diego, going to Chicago today with Delta Airlines. It is for 1 passenger with Economy seating. Does that work for you?")
- Point: "Seattle" (from utterance: "I prefer taking Alaska Airlines in Economy, and I am leaving from Seattle and am traveling to NYC.")

Highlight a user's statement where they ask for something to be changed

In [13]:
change_patterns = [
    [{"LOWER": "find"}, {"LOWER": "me"}, {"LOWER": "something"}, {"LOWER": "else"}],
    [{"LOWER": "find"}, {"LOWER": "me"}, {"LOWER": "something"}, {"LOWER": "different"}],
    [{"LOWER": "another"}],
    [{"LOWER": "different"}],
    [{"LOWER": "show"}, {"LOWER": "me"}, {"LOWER": "other"}, {"LOWER": "flights"}]
]

In [14]:
matcher.add("ChangeRequest", change_patterns)

change_requests = set()

In [15]:
for dialogue in data:
    for turn in dialogue.get('turns', []):
        if turn.get('speaker') == 'USER':
            utterance = turn.get('utterance', '')
            doc = nlp(utterance)

            matches = matcher(doc)
            for match_id, start, end in matches:
                string_id = nlp.vocab.strings[match_id]
                if string_id == "ChangeRequest":
                    change_requests.add(utterance)

In [16]:
print("\nIdentified Change Requests:")
if change_requests:
    for cr in change_requests:
        print(f"- {cr}")
else:
    print("No change requests identified from utterance text.")


Identified Change Requests:
- Find me something else. I want to fly with Southwest Airlines. Look for them from Atlanta.
- No, please find another flight.
- A different airline please, in Premium Economy.
- Can you find me something else?
- Please look for another flight.
- Find another ticket.
- Please show me other flights.
- Try to find me something different. I would like 1 ticket with Southwest Airlines.
- Find me something else, Delta Airlines instead, economy tickets.
- Find me something else with United Airlines.
- Find me something else.
