In [1]:
!pip install --upgrade ipython jupyter



In [2]:
#What is dependency Parsing?
"""
It's a technique in NLP to determine the grammatical structure of a sentence.
It identifies the relationships between “head” words and words which modify those heads.
"""

"\nIt's a technique in NLP to determine the grammatical structure of a sentence.\nIt identifies the relationships between “head” words and words which modify those heads.\n"

In [3]:
import spacy

In [4]:
nlp = spacy.load("en_core_web_sm")

In [5]:
doc = nlp("l love learning NLP")

In [6]:
#iterate through each token in the parsed doc
for token in doc:
    #each token, print:
    #token.text : the actual word
    #token.dep_ : the syntactic dependency label (e.g. dobj, nsubj)
    #token.head.text: the head word that this token is syntactically
    #:<10 is used to align the columns neatly
    print(f"{token.text:<10} -> {token.dep_:<10} (head: {token.head.text})")

l          -> compound   (head: love)
love       -> ROOT       (head: love)
learning   -> acl        (head: love)
NLP        -> dobj       (head: learning)


In [7]:
# Iterate through tokens and print grammatical roles

In [8]:
doc = nlp("I study, write, and update my GitHub every night.")

In [9]:
for token in doc:
    print(f"{token.text:<10} → {token.dep_:<10} (head: {token.head.text})")

I          → nsubj      (head: study)
study      → ROOT       (head: study)
,          → punct      (head: study)
write      → conj       (head: study)
,          → punct      (head: write)
and        → cc         (head: write)
update     → conj       (head: write)
my         → poss       (head: GitHub)
GitHub     → dobj       (head: update)
every      → det        (head: night)
night      → npadvmod   (head: update)
.          → punct      (head: study)


In [10]:
for token in doc:
    if token.dep_ == "nsubj":
        print("Subject:", token.text)
    elif token.dep_ in ("dobj", "pobj"):
        print("Object:", token.text)
    elif token.dep_ in ("ROOT", "conj") and token.pos_ == "VERB":
        print("Verb:", token.text)


Subject: I
Verb: study
Verb: write
Verb: update
Object: GitHub


In [11]:
doc = nlp("The book that John bought yesterday was expensive.")

In [12]:
for token in doc:
    print(f"{token.text:<10} → {token.dep_:<10} (head: {token.head.text})")

The        → det        (head: book)
book       → nsubj      (head: was)
that       → dobj       (head: bought)
John       → nsubj      (head: bought)
bought     → relcl      (head: book)
yesterday  → npadvmod   (head: bought)
was        → ROOT       (head: was)
expensive  → acomp      (head: was)
.          → punct      (head: was)


In [13]:
for token in doc:
    if token.dep_ == "nsubj":
        print("Subject:", token.text)
    elif token.dep_ == "dobj":
        print("Object:", token.text)
    elif token.dep_ in ("relcl", "ROOT"):
        print("Verb:", token.text)
    elif token.dep_=="punct":
        print("Punct :", token.text)

Subject: book
Object: that
Subject: John
Verb: bought
Verb: was
Punct : .


In [14]:
triples = []  # Initialize the list to store subject-verb-object triples

In [15]:
doc = nlp("Alice gave Bob a book and sent him an email.")

In [19]:
for token in doc:
    # For every token, check if its dependency label (dep_) is "nsubj"
    if token.dep_ == "nsubj":
        # If yes, assign that token as the subject
        subject = token
        # Find the verb related to this subject by accessing the token's head,
        # which usually is the main verb governing the subject
        verb = token.head
        # For the found verb, look at its children tokens
        for child in verb.children:
            # Among children, check if any have dependency labels "dobj" or "pobj"
            if child.dep_ in ("dobj", "pobj"):
                # If yes, this child token is the object of the verb
                # Append the tuple (subject, verb, object) to the triples list
                triples.append((subject.text, verb.text, child.text))

print(triples)

[('Alice', 'gave', 'book'), ('Alice', 'gave', 'book'), ('Alice', 'gave', 'book')]
