In [2]:
%run -i "../util/file_utils.ipynb"
%run -i "../util/lang_utils.ipynb"

c:\Users\ravik\Documents\nlp_cookbook\Python-Natural-Language-Processing-Cookbook-Second-Edition\Chapter02


# Finding triplets using spaCy

In [3]:
sentences = [
    "The big black cat stared at the small dog.", 
    "Jane watched her brother in the evenings.", 
    "Nick was driving to Madrid."
]
verb_patterns = [
    [{"POS": "VERB"}],
    [{"POS": "VERB"}, {"POS": "ADP"}],
    [{"POS": "AUX", "OP": "?"}, {"POS": "VERB"}, {"POS": "ADP", "OP": "?"}]
]

In [4]:
from spacy.matcher import Matcher
matcher = Matcher(small_model.vocab)
matcher.add("VP", verb_patterns)

In [5]:
def find_verb_phrase(doc, matcher):
    matches = matcher(doc)
    verb_phrases = [match for match in matches if small_model.vocab.strings[match[0]] == "VP"]
    verb_phrase_spans = [doc[match[1]:match[2]] for match in verb_phrases]
    verb_phrase_spans.sort(key=len, reverse=True)
    verb_phrase = verb_phrase_spans[0]
    root = verb_phrase[0]
    for token in verb_phrase:
        if token.dep_ == "ROOT":
            root = token
    return verb_phrase, root

In [6]:
for sentence in sentences:
    doc = small_model(sentence)
    verb_phrase, root = find_verb_phrase(doc, matcher)
    subject_phrase = get_subject_phrase(doc)
    object_phrase = get_object_phrase(doc)
    prep_phrases = get_prepositional_phrase_objs(doc)
    if object_phrase is None:
        object_phrase = prep_phrases[0]
    print(subject_phrase, "\t", verb_phrase, "\t", object_phrase)

The big black cat 	 stared at 	 the small dog
Jane 	 watched 	 her brother
Nick 	 was driving to 	 Madrid


# Finding triplets using GPT

In [8]:
import os
from dotenv import load_dotenv

load_dotenv()  # loads variables from .env into environment

OPEN_AI_KEY = os.getenv("OPENAI_API_KEY")

if not OPEN_AI_KEY:
    raise ValueError("OPENAI_API_KEY not found. Did you set it in .env?")

In [11]:
from openai import OpenAI
client = OpenAI(api_key=OPEN_AI_KEY)


In [13]:
prompt = """Find subject, verb, object triplets in the following sentence.
Create a python dictionary structure of the form: {"subject": Subject, "verb": Verb, "object": Object}
Sentence: Nick was driving to Madrid.
Return only the dictionary, no extra text.
"""

response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0,
    max_tokens=256,
    messages=[
        {"role": "system", "content": "You are a precise information extraction system."},
        {"role": "user", "content": prompt}
    ],
)

print(response)


ChatCompletion(id='chatcmpl-CyxbixZSObcrGdtjEG9MxCdpIPDvN', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{"subject": "Nick", "verb": "was driving", "object": "to Madrid"}', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1768644846, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_c4585b5b9c', usage=CompletionUsage(completion_tokens=20, prompt_tokens=73, total_tokens=93, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
