# ЛР7 Кривохата Марія ІМ-21

In [None]:
import spacy
from spacy.matcher import Matcher
import json
import re

In [None]:
nlp = spacy.load("en_core_web_sm")

with open("/content/sample_data/events.json", "r", encoding="utf8") as f:
    data = json.load(f)

1. Виділити назви спортивних команд за допомогою класу Matcher.

In [None]:
utterances = []
for dialogue in data:
    for turn in dialogue.get("turns", []):
        text = turn.get("utterance")
        if text:
            utterances.append(text)

In [None]:
print(utterances)

["Okay, I'm looking for a list of dates for special events in San Francisco. Would you help pull up a list for me, please?", "Sure, I'll be happy to assist you. Do you prefer information on a particular even?", "Well, I'm thinking the Giants Vs Dodgers game. I've heard it's really entertaining.", 'All right. The Giants Vs Dodgers game takes place on 5 days at Oracle Park. The event is coming up next Monday.', 'Are there other dates listed for the events?', 'I show the event taking place next Friday. It is the Giants Vs Dodgers game at Oracle Park.', 'Okay, that would be fine. Would you arrange for me to purchase 3 tickets for that event, please?', 'Yes, I am reserving 3 tickets for you to attend the Giants Vs Dodgers game next Friday. The event will take place in San Francisco. Please confirm', 'Wonderful. Yes, please confirm. That is exactly what I want. Now, what is the address of the venue? And, at what time will the game start?', 'You now have a reservation for the event. The addre

In [None]:
matcher = Matcher(nlp.vocab)

vs_pattern = [
    {"ENT_TYPE": {"IN": ["ORG", "GPE"]}, "POS": "PROPN", "OP": "+"},  # 1 team
    {"LOWER": {"IN": ["vs", "versus", "vs.", "against"]}},
    {"ENT_TYPE": {"IN": ["ORG", "GPE"]}, "POS": "PROPN", "OP": "+"}   # 2 team
]

matcher.add("SPORTS_TEAMS", [vs_pattern])

In [None]:
unique_teams = set()
team_occurrences = {}

print("Detected sports teams:\n" + "-" * 30)
for text in utterances:
    doc = nlp(text)
    matches = matcher(doc)

    for match_id, start, end in matches:
        vs_index = next((i for i in range(start, end)
                         if doc[i].lower_ in {"vs", "versus", "vs.", "against"}), None)
        if vs_index is None or vs_index <= start or vs_index >= end - 1:
            continue

        team1_span = doc[start:vs_index]
        team2_span = doc[vs_index + 1:end]

        team1 = team1_span.text.strip()
        team2 = team2_span.text.strip()

        for team in [team1, team2]:
            if team and team not in unique_teams:
                unique_teams.add(team)
                print(f"Found team: {team}")
                print(f"In context: {text}")
                print("-" * 30)
            team_occurrences[team] = team_occurrences.get(team, 0) + 1

Detected sports teams:
------------------------------
Found team: Giants
In context: Well, I'm thinking the Giants Vs Dodgers game. I've heard it's really entertaining.
------------------------------
Found team: Dodgers
In context: Well, I'm thinking the Giants Vs Dodgers game. I've heard it's really entertaining.
------------------------------
Found team: Jackets
In context: I am looking for events in ATL...I heard that Yellow Jackets Vs Bulls is supposed to be really exciting.
------------------------------
Found team: Bulls
In context: I am looking for events in ATL...I heard that Yellow Jackets Vs Bulls is supposed to be really exciting.
------------------------------
Found team: Yellow Jackets
In context: I am looking for events in ATL...I heard that Yellow Jackets Vs Bulls is supposed to be really exciting.
------------------------------
Found team: Washington
In context: I want to watch Washington Vs Ny Liberty in Washington D.C. When does it start?
-----------------------------

In [None]:
filtered_teams = set()
for team in sorted(unique_teams, key=len, reverse=True):
    if not any(team in longer and team != longer for longer in filtered_teams):
        filtered_teams.add(team)

print("\nFiltered team names:\n" + "-" * 30)
for team in filtered_teams:
    print(team)


Filtered team names:
------------------------------
Sun
Whitecaps
Ny Liberty
Blue Jays
Phillies
Nationals
Tigers
Pirates
Earthquakes
Panthers
Raiders
White Sox
Diamondbacks
Padres
Giants
Bengals
Cardinal
Mariners
Marlins
Mystics
Red Sox
Yankees
Bulls
Brewers
Toros
Republic
Dodgers
Jaguars
Angels
Mountaineers
Washington
Yellow Jackets
Ducks


1.1 Виділити висловлювання користувача, що є підтвердженнями (наприклад, Awesome, sounds great!), за допомогою шаблонів.

In [None]:
act_types = set()

for dialogue in data:
    turns = dialogue.get("turns", [])
    for turn in turns:
        actions = turn.get("frames", [])
        for frame in actions:
            for action in frame.get("actions", []):
                act = action.get("act")
                if act:
                    act_types.add(act)

print("Unique Acts found in the dataset:")
print("-" * 30)
for act in sorted(act_types):
    print(act)


Unique Acts found in the dataset:
------------------------------
AFFIRM
AFFIRM_INTENT
CONFIRM
GOODBYE
INFORM
INFORM_COUNT
INFORM_INTENT
NEGATE
NOTIFY_SUCCESS
OFFER
OFFER_INTENT
REQUEST
REQUEST_ALTS
REQ_MORE
SELECT
THANK_YOU


In [None]:
matcher = Matcher(nlp.vocab)

# adjective + optional punctuation + exclamation
exclamation_pattern = [
    {"POS": "ADJ"},
    {"IS_PUNCT": True, "OP": "?"},
    {"TEXT": {"REGEX": "!+"}}
]

# "sounds/seems/looks/feels" + adjective
expression_pattern = [
    {"LEMMA": {"IN": ["sound", "seem", "look", "feel"]}},
    {"POS": "ADJ"}
]

# yes-like
simple_pattern = [
    {"LOWER": {"REGEX": "^(yes|yeah|yep|sure|absolutely|definitely|certainly)$"}}
]

# "that’s/this is" + adjective
thats_pattern = [
    {"LOWER": {"IN": ["that", "this"]}},
    {"LOWER": {"IN": ["is", "'s"]}, "OP": "?"},
    {"POS": "ADJ"}
]

matcher.add("EXCLAMATION_AFFIRMATION", [exclamation_pattern])
matcher.add("EXPRESSION_AFFIRMATION", [expression_pattern])
matcher.add("SIMPLE_AFFIRMATION", [simple_pattern])
matcher.add("THATS_AFFIRMATION", [thats_pattern])

In [None]:
print("Detected affirmations:\n" + "-" * 40)
affirmations_found = set()

for text in utterances:
    doc = nlp(text)
    matches = matcher(doc)

    for match_id, start, end in matches:
        span = doc[start:end]

        if any(tok.tag_ in ["WRB", "WP", "WP$", "WDT"] for tok in span.sent) or "?" in span.sent.text:
          continue

        pattern_name = nlp.vocab.strings[match_id]

        sentence = next((sent.text for sent in doc.sents if span.start >= sent.start and span.end <= sent.end), text)

        affirmation_key = (span.text, pattern_name)
        if affirmation_key not in affirmations_found:
            affirmations_found.add(affirmation_key)
            print(f"Affirmation: {span.text}")
            print(f"Pattern: {pattern_name}")
            print(f"Full utterance: {text}")
            print("-" * 40)

pattern_counts = {}
for affirmation, pattern in affirmations_found:
    pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1

print("\nSummary of affirmation patterns found:")
for pattern, count in pattern_counts.items():
    print(f"{pattern}: {count} matches")

Detected affirmations:
----------------------------------------
Affirmation: Sure
Pattern: SIMPLE_AFFIRMATION
Full utterance: Sure, I'll be happy to assist you. Do you prefer information on a particular even?
----------------------------------------
Affirmation: Yes
Pattern: SIMPLE_AFFIRMATION
Full utterance: Yes, I am reserving 3 tickets for you to attend the Giants Vs Dodgers game next Friday. The event will take place in San Francisco. Please confirm
----------------------------------------
Affirmation: sounds good
Pattern: EXPRESSION_AFFIRMATION
Full utterance: That sounds good.
----------------------------------------
Affirmation: sounds great
Pattern: EXPRESSION_AFFIRMATION
Full utterance: Awesome, sounds great!
----------------------------------------
Affirmation: great!
Pattern: EXCLAMATION_AFFIRMATION
Full utterance: Awesome, sounds great!
----------------------------------------
Affirmation: Sounds great
Pattern: EXPRESSION_AFFIRMATION
Full utterance: Sounds great! I want to 

2. Застосувати синтаксичні залежності для визначення намірів.

In [None]:
import random

In [None]:
utterances_with_intent = []

for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] != "USER":
            continue

        for frame in turn.get("frames", []):
            for action in frame.get("actions", []):
                if action.get("slot") == "intent":
                    utterances_with_intent.append(turn["utterance"])
                    break

In [None]:
def extract_intents(text):
    doc = nlp(text)
    intents = []

    for token in doc:
        if token.dep_ == "dobj":  # dobj = прямий додаток
            verb = token.head.lemma_
            obj = token.lemma_
            intent = verb + obj.capitalize()
            intents.append(intent)

            for conjunct in token.conjuncts:
                conj_intent = verb + conjunct.lemma_.capitalize()
                intents.append(conj_intent)

    return intents

print("Detected intents:\n" + "-" * 30)
for utterance in random.sample(utterances_with_intent, 10):
    found_intents = extract_intents(utterance)
    print(f"Utterance: {utterance}")
    print(f"Intents: {found_intents}\n")

Detected intents:
------------------------------
Utterance: That sounds great. I want to buy three tickets.
Intents: ['buyTicket']

Utterance: I would like to search for the event White Sox Vs Angels in Chi-town.
Intents: []

Utterance: I want to find dates for events.
Intents: ['findDate']

Utterance: I'm interested in finding dates for events.
Intents: ['findDate']

Utterance: Okay, that would be fine. Would you arrange for me to purchase 3 tickets for that event, please?
Intents: ['purchaseTicket']

Utterance: I would like to find local event dates.
Intents: ['findDate']

Utterance: Okay, I'm looking for a list of dates for special events in San Francisco. Would you help pull up a list for me, please?
Intents: ['pullList']

Utterance: Can you help find dates for some upcoming events?
Intents: ['findDate']

Utterance: I need the dates for some events.
Intents: ['needDate']

Utterance: I'm looking for an event.
Intents: []



In [None]:
def extract_intents(text):
    doc = nlp(text)
    intents = []

    # 'want to find', 'would like to book' (xcomp patterns)
    for token in doc:
        if token.dep_ == "xcomp" and token.pos_ == "VERB":  # xcomp = доповнення з дієсловом. "I want to find..." want — головне дієслово find — xcomp
            verb = token.head.lemma_
            next_verb = token.lemma_
            intents.append(verb + next_verb.capitalize())

    # direct object - "buy tickets"
    for token in doc:
        if token.dep_ == "dobj":
            verb = token.head.lemma_
            obj = token.lemma_
            intents.append(verb + obj.capitalize())

    # object of preposition - "search for events"
    for token in doc:
        if token.dep_ == "pobj" and token.head.dep_ == "prep":  # дієслово + прийменник + іменник: search → for → events
            verb = token.head.head.lemma_
            obj = token.lemma_
            intents.append(verb + obj.capitalize())

    return intents


print("Detected intents:\n" + "-" * 30)
for utterance in random.sample(utterances_with_intent, 10):
    found_intents = extract_intents(utterance)
    print(f"Utterance: {utterance}")
    print(f"Intents: {found_intents}\n")

Detected intents:
------------------------------
Utterance: I'm looking for dates for events.
Intents: ['lookDate', 'dateEvent']

Utterance: Hi, I am trying to find some dates for events in the city.
Intents: ['tryFind', 'findDate', 'dateEvent', 'eventCity']

Utterance: That sounds great! Could I reserve 2 tickets to that event?
Intents: ['reserveTicket', 'reserveEvent']

Utterance: Can you help find dates for some upcoming events?
Intents: ['helpFind', 'findDate', 'findEvent']

Utterance: Could you help me find some dates for events?
Intents: ['findDate', 'dateEvent']

Utterance: I'm interested in finding dates for events.
Intents: ['findDate', 'dateEvent']

Utterance: Ok. I would like to buy tickets.
Intents: ['likeBuy', 'buyTicket']

Utterance: I need the dates for some events.
Intents: ['needDate', 'dateEvent']

Utterance: I'm looking for some popular events to go to. I hear that the Raiders Vs Jaguars is popular.
Intents: ['lookEvent']

Utterance: I would like to search for the ev