In [1]:
import spacy

In [6]:
nlp = spacy.load('en_core_web_sm')
text = "Sometimes i got scary. But inside me motivate me. You came till this point and you have the ability to go further!"
word = nlp(text)
for i in word:
    print(i, "|", i.pos_, "|", spacy.explain(i.pos_))

Sometimes | ADV | adverb
i | PRON | pronoun
got | VERB | verb
scary | ADJ | adjective
. | PUNCT | punctuation
But | CCONJ | coordinating conjunction
inside | ADP | adposition
me | PRON | pronoun
motivate | VERB | verb
me | PRON | pronoun
. | PUNCT | punctuation
You | PRON | pronoun
came | VERB | verb
till | SCONJ | subordinating conjunction
this | DET | determiner
point | NOUN | noun
and | CCONJ | coordinating conjunction
you | PRON | pronoun
have | VERB | verb
the | DET | determiner
ability | NOUN | noun
to | PART | particle
go | VERB | verb
further | ADV | adverb
! | PUNCT | punctuation


In [8]:
for i in word:
    print(i, "==>", i.pos_, "==>", i.tag_, "==>", spacy.explain(i.tag_))

Sometimes ==> ADV ==> RB ==> adverb
i ==> PRON ==> PRP ==> pronoun, personal
got ==> VERB ==> VBD ==> verb, past tense
scary ==> ADJ ==> JJ ==> adjective (English), other noun-modifier (Chinese)
. ==> PUNCT ==> . ==> punctuation mark, sentence closer
But ==> CCONJ ==> CC ==> conjunction, coordinating
inside ==> ADP ==> IN ==> conjunction, subordinating or preposition
me ==> PRON ==> PRP ==> pronoun, personal
motivate ==> VERB ==> VBP ==> verb, non-3rd person singular present
me ==> PRON ==> PRP ==> pronoun, personal
. ==> PUNCT ==> . ==> punctuation mark, sentence closer
You ==> PRON ==> PRP ==> pronoun, personal
came ==> VERB ==> VBD ==> verb, past tense
till ==> SCONJ ==> IN ==> conjunction, subordinating or preposition
this ==> DET ==> DT ==> determiner
point ==> NOUN ==> NN ==> noun, singular or mass
and ==> CCONJ ==> CC ==> conjunction, coordinating
you ==> PRON ==> PRP ==> pronoun, personal
have ==> VERB ==> VBP ==> verb, non-3rd person singular present
the ==> DET ==> DT ==> det

In [9]:
for i in word:
    if i.pos_ not in ['PUNCT', "X"]:
        print(i, "==>", i.pos_)

Sometimes ==> ADV
i ==> PRON
got ==> VERB
scary ==> ADJ
But ==> CCONJ
inside ==> ADP
me ==> PRON
motivate ==> VERB
me ==> PRON
You ==> PRON
came ==> VERB
till ==> SCONJ
this ==> DET
point ==> NOUN
and ==> CCONJ
you ==> PRON
have ==> VERB
the ==> DET
ability ==> NOUN
to ==> PART
go ==> VERB
further ==> ADV


In [19]:
words = word.count_by(spacy.attrs.POS)
words

{86: 2, 95: 5, 100: 5, 84: 1, 97: 3, 89: 2, 85: 1, 98: 1, 90: 2, 92: 2, 94: 1}

In [11]:
word.count_by(spacy.attrs.IS_PUNCT)

{0: 22, 1: 3}

In [12]:
word.count_by(spacy.attrs.IS_ALPHA)

{1: 22, 0: 3}

In [17]:
word.vocab[95].text

'PRON'

In [25]:
for i, v in words.items():
    vocab = word.vocab[i]
    print(vocab.text, "==>", v)

ADV ==> 2
PRON ==> 5
VERB ==> 5
ADJ ==> 1
PUNCT ==> 3
CCONJ ==> 2
ADP ==> 1
SCONJ ==> 1
DET ==> 2
NOUN ==> 2
PART ==> 1


# Exercise

In [26]:
import spacy

In [28]:
with open("./nlp-assests/news_story.txt", 'r') as f:
    text = f.readlines()
text

['Inflation rose again in April, continuing a climb that has pushed consumers to the brink and is threatening the economic expansion, the Bureau of Labor Statistics reported Wednesday.\n',
 '\n',
 'The consumer price index, a broad-based measure of prices for goods and services, increased 8.3% from a year ago, higher than the Dow Jones estimate for an 8.1% gain. That represented a slight ease from Marchâ€™s peak but was still close to the highest level since the summer of 1982.\n',
 '\n',
 'Removing volatile food and energy prices, so-called core CPI still rose 6.2%, against expectations for a 6% gain, clouding hopes that inflation had peaked in March.\n',
 '\n',
 'The month-over-month gains also were higher than expectations â€” 0.3% on headline CPI versus the 0.2% estimate and a 0.6% increase for core, against the outlook for a 0.4% gain.\n',
 '\n',
 'The price gains also meant that workers continued to lose ground. Real wages adjusted for inflation decreased 0.1% on the month despit

In [32]:
sentence = " ".join(text)
nlp = spacy.load('en_core_web_sm')
sentences = nlp(sentence)

In [35]:
noun = []
for sent in sentences:
    if sent.pos_ == "NOUN":
        noun.append(sent)
noun

[Inflation,
 climb,
 consumers,
 brink,
 expansion,
 consumer,
 price,
 index,
 measure,
 prices,
 goods,
 services,
 %,
 year,
 estimate,
 %,
 gain,
 ease,
 peak,
 level,
 summer,
 food,
 energy,
 prices,
 core,
 %,
 expectations,
 %,
 gain,
 hopes,
 inflation,
 month,
 month,
 gains,
 expectations,
 %,
 headline,
 %,
 estimate,
 %,
 increase,
 core,
 outlook,
 %,
 gain,
 price,
 gains,
 workers,
 ground,
 wages,
 inflation,
 %,
 month,
 increase,
 %,
 earnings,
 year,
 earnings,
 %,
 earnings,
 %,
 threat,
 recovery,
 pandemic,
 economy,
 stage,
 year,
 growth,
 level,
 prices,
 pump,
 grocery,
 stores,
 problem,
 inflation,
 areas,
 housing,
 auto,
 sales,
 host,
 areas,
 officials,
 problem,
 interest,
 rate,
 hikes,
 year,
 pledges,
 inflation,
 %,
 goal,
 ™,
 data,
 job,
 Credits]

In [38]:
numbers = []
for num in sentences:
    if num.pos_ == "NUM":
        numbers.append(num)

numbers

[8.3,
 8.1,
 1982,
 6.2,
 6,
 0.3,
 0.2,
 0.6,
 0.4,
 0.1,
 0.3,
 2.6,
 5.5,
 2021,
 1984,
 one,
 two,
 two,
 2]

In [40]:
counts = sentences.count_by(spacy.attrs.POS)

In [42]:
pos_count = {}
for noun, count in counts.items():
    vocab = sentences.vocab[noun]
    pos_count[vocab.text] = count
pos_count

{'NOUN': 95,
 'VERB': 27,
 'ADV': 15,
 'ADP': 39,
 'PROPN': 21,
 'PUNCT': 32,
 'DET': 34,
 'PRON': 4,
 'AUX': 13,
 'CCONJ': 10,
 'ADJ': 23,
 'SPACE': 7,
 'NUM': 19,
 'PART': 4,
 'SCONJ': 8,
 'X': 1}