In [1]:
import spacy


In [2]:
nlp = spacy.load("en_core_web_lg")

In [3]:
trial_text = "I would like to apologize to the Nix family for taking Mark away from you. I hope this brings you closure. I want to say to all my family and friends around the world 'thank you' for supporting me. To my kids, stand tall and continue to make me proud. Don¡¯t worry about me, I'm ready to fly. Alright Warden, I¡¯m ready to ride."
doc = nlp(trial_text)

In [4]:
# 识别语法角色及句子结构关系
for token in doc:
      print(f"{token.text}: {token.pos_}, {token.dep_}")

I: PRON, nsubj
would: AUX, aux
like: VERB, ROOT
to: PART, aux
apologize: VERB, xcomp
to: ADP, prep
the: DET, det
Nix: PROPN, compound
family: NOUN, pobj
for: ADP, prep
taking: VERB, pcomp
Mark: PROPN, dobj
away: ADV, advmod
from: ADP, prep
you: PRON, pobj
.: PUNCT, punct
I: PRON, nsubj
hope: VERB, ROOT
this: PRON, nsubj
brings: VERB, ccomp
you: PRON, dative
closure: NOUN, dobj
.: PUNCT, punct
I: PRON, nsubj
want: VERB, ROOT
to: PART, aux
say: VERB, xcomp
to: ADP, prep
all: DET, det
my: PRON, poss
family: NOUN, pobj
and: CCONJ, cc
friends: NOUN, conj
around: ADP, prep
the: DET, det
world: NOUN, pobj
': PUNCT, punct
thank: VERB, ccomp
you: PRON, dobj
': PUNCT, punct
for: ADP, prep
supporting: VERB, pcomp
me: PRON, dobj
.: PUNCT, punct
To: ADP, prep
my: PRON, poss
kids: NOUN, pobj
,: PUNCT, punct
stand: VERB, ROOT
tall: ADJ, advmod
and: CCONJ, cc
continue: VERB, conj
to: PART, aux
make: VERB, xcomp
me: PRON, nsubj
proud: ADJ, ccomp
.: PUNCT, punct
Don¡¯t: NOUN, nsubj
worry: VERB, ccomp
ab

In [5]:
# 识别命名实体
for ent in doc.ents:
      print(f"{ent.text} - {ent.label_}")

Nix - PERSON
Mark - PERSON
I¡¯m - GPE


In [6]:
# 识别句子
assert doc.has_annotation("SENT_START")
for sent in doc.sents:
    print(sent.text)

I would like to apologize to the Nix family for taking Mark away from you.
I hope this brings you closure.
I want to say to all my family and friends around the world 'thank you' for supporting me.
To my kids, stand tall and continue to make me proud.
Don¡¯t worry about me, I'm ready to fly.
Alright Warden, I¡¯m ready to ride.


In [7]:
# 分析句法树
for token in doc:
    print(f"{token.text} ← 头词: {token.head.text}")

I ← 头词: like
would ← 头词: like
like ← 头词: like
to ← 头词: apologize
apologize ← 头词: like
to ← 头词: apologize
the ← 头词: family
Nix ← 头词: family
family ← 头词: to
for ← 头词: apologize
taking ← 头词: for
Mark ← 头词: taking
away ← 头词: taking
from ← 头词: away
you ← 头词: from
. ← 头词: like
I ← 头词: hope
hope ← 头词: hope
this ← 头词: brings
brings ← 头词: hope
you ← 头词: brings
closure ← 头词: brings
. ← 头词: hope
I ← 头词: want
want ← 头词: want
to ← 头词: say
say ← 头词: want
to ← 头词: say
all ← 头词: family
my ← 头词: family
family ← 头词: to
and ← 头词: family
friends ← 头词: family
around ← 头词: family
the ← 头词: world
world ← 头词: around
' ← 头词: say
thank ← 头词: say
you ← 头词: thank
' ← 头词: thank
for ← 头词: thank
supporting ← 头词: for
me ← 头词: supporting
. ← 头词: want
To ← 头词: stand
my ← 头词: kids
kids ← 头词: To
, ← 头词: stand
stand ← 头词: stand
tall ← 头词: stand
and ← 头词: stand
continue ← 头词: stand
to ← 头词: make
make ← 头词: continue
me ← 头词: proud
proud ← 头词: make
. ← 头词: stand
Don¡¯t ← 头词: worry
worry ← 头词: 'm
about ← 头词: worry
me ← 头词: ab

In [12]:
appologize_tokens = [
    'sorry',
    'apologize',
    'forgive',
    'regret',
    'remorse',
]

In [16]:
# 添加自定义属性
from spacy.tokens import Token
#Token.set_extension("is_apology", default=False)
# 为含有道歉词汇的token设置标记
for token in doc:
    if token.text.lower() in ["apologize", "sorry"]:
        token._.is_apology = True
    print(f"{token.text}: {token._.is_apology}")

I: False
would: False
like: False
to: False
apologize: True
to: False
the: False
Nix: False
family: False
for: False
taking: False
Mark: False
away: False
from: False
you: False
.: False
I: False
hope: False
this: False
brings: False
you: False
closure: False
.: False
I: False
want: False
to: False
say: False
to: False
all: False
my: False
family: False
and: False
friends: False
around: False
the: False
world: False
': False
thank: False
you: False
': False
for: False
supporting: False
me: False
.: False
To: False
my: False
kids: False
,: False
stand: False
tall: False
and: False
continue: False
to: False
make: False
me: False
proud: False
.: False
Don¡¯t: False
worry: False
about: False
me: False
,: False
I: False
'm: False
ready: False
to: False
fly: False
.: False
Alright: False
Warden: False
,: False
I¡¯m: False
ready: False
to: False
ride: False
.: False
