# spaCy Dependency Parsing

In [1]:
# Imports and set up
import spacy   
from spacy import displacy
from spacy.symbols import nsubj, nsubjpass, VERB

print(f'Spacy version: {spacy.__version__}')

nlp = spacy.load('en_core_web_trf')
# nlp = spacy.load('en_core_web_lg')

Spacy version: 3.5.0


In [6]:
# text = "No House seat, no office in this land is more important than the principles we swore to protect."
# text = "Jane is an attorney and is unable to stomach lies or to handle stress."
text = "I am doing the dishes."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)

Case=Nom|Number=Sing|Person=1|PronType=Prs PRON nsubj I
Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin AUX aux be
Aspect=Prog|Tense=Pres|VerbForm=Part VERB ROOT do
Definite=Def|PronType=Art DET det the
Number=Plur NOUN dobj dish
PunctType=Peri PUNCT punct .


In [6]:
text = "John is unable to stomach lies or to witness violence."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)

Number=Sing PROPN nsubj John
Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin AUX ROOT be
Degree=Pos ADJ acomp unable
 PART aux to
VerbForm=Inf VERB xcomp stomach
Number=Plur NOUN dobj lie
ConjType=Cmp CCONJ cc or
 PART aux to
VerbForm=Inf VERB conj witness
Number=Sing NOUN dobj violence
PunctType=Peri PUNCT punct .


In [26]:
text = "Trump urged Republican voters."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)
for ent in nlp_sentence.ents:
    print(ent.text)

Number=Sing PROPN nsubj Trump
Tense=Past|VerbForm=Fin VERB ROOT urge
Degree=Pos ADJ amod republican
Number=Plur NOUN dobj voter
PunctType=Peri PUNCT punct .
Trump
Republican


In [22]:
text = "Jane is averse to broccoli."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)

Number=Sing PROPN nsubj Jane
Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin AUX ROOT be
Degree=Pos ADJ acomp averse
 ADP prep to
Number=Sing NOUN pobj broccoli
PunctType=Peri PUNCT punct .


In [3]:
text = "Cheney said her opposition to former President Donald Trump was rooted in the principles that members of congress are sworn to protect and that she well understood the potential political consequences of opposing Trump."
text = "U.S. Rep. Liz Cheney conceded defeat Tuesday in the Republican primary in Wyoming"
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)

Number=Sing PROPN compound U.S.
Number=Sing PROPN compound Rep.
Number=Sing PROPN compound Liz
Number=Sing PROPN nsubj Cheney
Tense=Past|VerbForm=Fin VERB ROOT concede
Number=Sing NOUN dobj defeat
Number=Sing PROPN npadvmod Tuesday
 ADP prep in
Definite=Def|PronType=Art DET det the
Degree=Pos ADJ amod republican
Number=Sing NOUN pobj primary
 ADP prep in
Number=Sing PROPN pobj Wyoming


In [4]:
text = "She loved to play with her sister, Beatrice. She danced with her brothers, George and Paul. Her father was John Smith. Her mother was former mayor Mary Smith. Bob Jones is her brother. Her brothers are George Smith, and Paul and Bob Jones."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)

Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs PRON nsubj she
Tense=Past|VerbForm=Fin VERB ROOT love
 PART aux to
VerbForm=Inf VERB xcomp play
 ADP prep with
Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs PRON poss her
Number=Sing NOUN pobj sister
PunctType=Comm PUNCT punct ,
Number=Sing PROPN appos Beatrice
PunctType=Peri PUNCT punct .
Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs PRON nsubj she
Tense=Past|VerbForm=Fin VERB ROOT dance
 ADP prep with
Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs PRON poss her
Number=Plur NOUN pobj brother
PunctType=Comm PUNCT punct ,
Number=Sing PROPN appos George
ConjType=Cmp CCONJ cc and
Number=Sing PROPN conj Paul
PunctType=Peri PUNCT punct .
Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs PRON poss her
Number=Sing NOUN nsubj father
Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin AUX ROOT be
Number=Sing PROPN compound John
Number=Sing PROPN attr Smith
PunctType=Peri PUNCT punct .
Gender=Fem|Number=Sing|Perso

In [5]:
from spacy.matcher import DependencyMatcher

empty_string = ''
family_members = {'mother': 'FEMALE', 'father': 'MALE', 'sister': 'FEMALE', 'brother': 'MALE',
                  'aunt': 'FEMALE', 'uncle': 'MALE', 'grandmother': 'FEMALE', 'grandfather': 'MALE',
                  'parent': empty_string, 'sibling': empty_string, 'cousin': empty_string,
                  'grandparent': empty_string, 'relative': empty_string}
plural_family_members = ['mothers', 'fathers', 'sisters', 'brothers', 'aunts', 'uncles',
                         'grandmothers', 'grandfathers', 'grandparents', 'parents', 'siblings',
                         'cousins', 'relatives']

DEP = 'DEP'
ENT_TYPE = 'ENT_TYPE'
LEFT_ID = 'LEFT_ID'
ORTH = 'ORTH'
POS = 'POS'
RIGHT_ID = 'RIGHT_ID'
RIGHT_ATTRS = 'RIGHT_ATTRS'
REL_OP = 'REL_OP'

# For example, match 'her sister, Beatrice'
member_name_pattern = [
    {RIGHT_ID: 'family_member', RIGHT_ATTRS: {ORTH: {'IN': list(family_members.keys())}}},
    {LEFT_ID: 'family_member', REL_OP: '>',
     RIGHT_ID: 'proper_name', RIGHT_ATTRS: {DEP: 'appos', POS: 'PROPN'}}]
# For example, match 'her sisters, Beatrice and Susan'
members_names_pattern = [
    {RIGHT_ID: 'family_members', RIGHT_ATTRS: {ORTH: {'IN': plural_family_members}}},
    {LEFT_ID: 'family_members', REL_OP: '>',
     RIGHT_ID: 'proper_name', RIGHT_ATTRS: {DEP: 'appos', POS: 'PROPN'}}]
# For example, match 'her brother is Bob Smith.'
member_verb_name_pattern = [
    {RIGHT_ID: 'verb_be', RIGHT_ATTRS: {DEP: 'ROOT', 'LEMMA': 'be'}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'family_member', RIGHT_ATTRS: {DEP: 'nsubj', ORTH: {'IN': list(family_members.keys())}}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'proper_name', RIGHT_ATTRS: {DEP: 'attr', POS: 'PROPN'}}]
# For example, match 'her brothers are Bob, George and Paul Smith.'
members_verb_names_pattern = [
    {RIGHT_ID: 'verb_be', RIGHT_ATTRS: {DEP: 'ROOT', 'LEMMA': 'be'}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'family_members', RIGHT_ATTRS: {DEP: 'nsubj', ORTH: {'IN': plural_family_members}}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'proper_name', RIGHT_ATTRS: {DEP: 'attr', POS: 'PROPN'}}]
# For example, match 'Bob Jones is her brother'
name_verb_member_pattern = [
    {RIGHT_ID: 'verb_be', RIGHT_ATTRS: {DEP: 'ROOT', 'LEMMA': 'be'}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'family_member', RIGHT_ATTRS: {DEP: 'attr', ORTH: {'IN': list(family_members.keys())}}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'proper_name', RIGHT_ATTRS: {DEP: 'nsubj', POS: 'PROPN'}}]
# For example, match 'Bob and George Smith are her brothers'
names_verb_members_pattern = [
    {RIGHT_ID: 'verb_be', RIGHT_ATTRS: {DEP: 'ROOT', 'LEMMA': 'be'}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'family_members', RIGHT_ATTRS: {DEP: 'attr', ORTH: {'IN': plural_family_members}}},
    {LEFT_ID: 'verb_be', REL_OP: '>', 
     RIGHT_ID: 'proper_name', RIGHT_ATTRS: {DEP: 'nsubj', POS: 'PROPN'}}]

matcher = DependencyMatcher(nlp.vocab)
matcher.add("member_name", [member_name_pattern])
matcher.add("member_verb_name", [member_verb_name_pattern])
matcher.add("name_verb_member", [name_verb_member_pattern])
matcher.add("members_names", [members_names_pattern])
matcher.add("members_verb_names", [members_verb_names_pattern])
matcher.add("names_verb_members", [names_verb_members_pattern])

doc = nlp("She loved to play with her sister, Beatrice Mary. She danced with her brothers, George and Paul Frank Jones. Her father was John Smith. Her mother was former mayor Mary Smith. Bob Jones is her brother. Bob and Paul Jones are her brothers. Her brothers are George Smith, and Paul and Bob Jones.")
matches = matcher(doc)
for match_id, token_ids in matches:          # Indicates which pattern is matched and the specific tokens
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    conj_tokens = []
    if string_id == 'member_name':
        role = doc[token_ids[0]].text
        name_tokens = doc[token_ids[1]].subtree
    elif string_id == 'members_names':
        role = doc[token_ids[0]].text[:-1]
        name_tokens = doc[token_ids[1]].subtree
    elif string_id in ('member_verb_name', 'name_verb_member'):
        role = doc[token_ids[1]].text
        name_tokens = doc[token_ids[2]].subtree
    else:
        role = doc[token_ids[1]].text[:-1]
        name_tokens = doc[token_ids[2]].subtree       
    names = []
    preceding_punct = True
    for name_token in name_tokens:
        if name_token.text == ',' or name_token.text == 'and':
            preceding_punct = True
        print(name_token.text, name_token.dep_, name_token.pos_)
        if name_token.pos_ == 'PROPN':
            if preceding_punct:
                preceding_punct = False
                names.append(f'new{name_token.text}')
            else:
                names.append(name_token.text)
    print(string_id, token_ids, role, names)
    print()

Beatrice compound PROPN
Mary appos PROPN
member_name [6, 9] sister ['newBeatrice', 'Mary']

John compound PROPN
Smith attr PROPN
member_verb_name [25, 24, 27] father ['newJohn', 'Smith']

former amod ADJ
mayor compound NOUN
Mary compound PROPN
Smith attr PROPN
member_verb_name [31, 30, 35] mother ['newMary', 'Smith']

Bob compound PROPN
Jones nsubj PROPN
name_verb_member [39, 41, 38] brother ['newBob', 'Jones']

George nmod PROPN
and cc CCONJ
Paul conj PROPN
Frank compound PROPN
Jones appos PROPN
members_names [15, 21] brother ['newGeorge', 'newPaul', 'Frank', 'Jones']

George compound PROPN
Smith attr PROPN
, punct PUNCT
and cc CCONJ
Paul nmod PROPN
and cc CCONJ
Bob conj PROPN
Jones conj PROPN
members_verb_names [53, 52, 55] brother ['newGeorge', 'Smith', 'newPaul', 'newBob', 'Jones']

Bob nmod PROPN
and cc CCONJ
Paul conj PROPN
Jones nsubj PROPN
names_verb_members [47, 49, 46] brother ['newBob', 'newPaul', 'Jones']



In [6]:
text = "Rep. Liz Cheney R-WY compared herself to former President Abraham Lincoln during her concession speech shortly after her loss to Trump-backed Republican challenger Harriet Hageman."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)

Number=Sing PROPN compound Rep.
Number=Sing PROPN compound Liz
Number=Sing PROPN nsubj Cheney
Number=Sing PROPN appos R
Number=Sing PROPN punct -
Number=Sing PROPN npadvmod WY
Tense=Past|VerbForm=Fin VERB ROOT compare
Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs|Reflex=Yes PRON dobj herself
 ADP prep to
Degree=Pos ADJ amod former
Number=Sing PROPN compound President
Number=Sing PROPN compound Abraham
Number=Sing PROPN pobj Lincoln
 ADP prep during
Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs PRON poss her
Number=Sing NOUN compound concession
Number=Sing NOUN pobj speech
 ADV advmod shortly
 ADP prep after
Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs PRON poss her
Number=Sing NOUN pobj loss
 ADP prep to
Number=Sing PROPN npadvmod Trump
PunctType=Dash PUNCT punct -
Aspect=Perf|Tense=Past|VerbForm=Part VERB amod back
Degree=Pos ADJ amod republican
Number=Sing NOUN compound challenger
Number=Sing PROPN compound Harriet
Number=Sing PROPN pobj Hageman
PunctType=Per

In [7]:
text = "I traveled to Billie Holiday's NYC."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)
for ent in nlp_sentence.ents:
    print(ent.text, ent.label_)

Case=Nom|Number=Sing|Person=1|PronType=Prs PRON nsubj I
Tense=Past|VerbForm=Fin VERB ROOT travel
 ADP prep to
Number=Sing PROPN compound Billie
Number=Sing PROPN poss Holiday
 PART case 's
Number=Sing PROPN pobj NYC
PunctType=Peri PUNCT punct .
Billie Holiday's PERSON
NYC GPE


In [4]:
text = "She loved playing with her sister."
text = "She enjoyed being with her sister."
nlp_sentence = nlp(text)
displacy.render(nlp_sentence, style="dep")
for token in nlp_sentence:
    print(token.morph, token.pos_, token.dep_, token.lemma_)
for ent in nlp_sentence.ents:
    print(ent.text, ent.label_)

Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs PRON nsubj she
Tense=Past|VerbForm=Fin VERB ROOT enjoy
VerbForm=Ger AUX xcomp be
 ADP prep with
Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs PRON poss her
Number=Sing NOUN pobj sister
PunctType=Peri PUNCT punct .
