In [2]:
import spacy

In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [5]:
doc = nlp("Elon flew to mars yesterday, he carried biryani masala with him")

for token in doc:
    print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_))

Elon  |  PROPN  |  proper noun
flew  |  VERB  |  verb
to  |  ADP  |  adposition
mars  |  NOUN  |  noun
yesterday  |  NOUN  |  noun
,  |  PUNCT  |  punctuation
he  |  PRON  |  pronoun
carried  |  VERB  |  verb
biryani  |  ADJ  |  adjective
masala  |  NOUN  |  noun
with  |  ADP  |  adposition
him  |  PRON  |  pronoun


In [6]:
doc = nlp("Wow! Dr. Strange made, 265 million $ on the very first day")

for token in doc:
    print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_), " | ", token.tag_, " | ", spacy.explain(token.tag_))

Wow  |  INTJ  |  interjection  |  UH  |  interjection
!  |  PUNCT  |  punctuation  |  .  |  punctuation mark, sentence closer
Dr.  |  PROPN  |  proper noun  |  NNP  |  noun, proper singular
Strange  |  PROPN  |  proper noun  |  NNP  |  noun, proper singular
made  |  VERB  |  verb  |  VBD  |  verb, past tense
,  |  PUNCT  |  punctuation  |  ,  |  punctuation mark, comma
265  |  NUM  |  numeral  |  CD  |  cardinal number
million  |  NUM  |  numeral  |  CD  |  cardinal number
$  |  NUM  |  numeral  |  CD  |  cardinal number
on  |  ADP  |  adposition  |  IN  |  conjunction, subordinating or preposition
the  |  DET  |  determiner  |  DT  |  determiner
very  |  ADV  |  adverb  |  RB  |  adverb
first  |  ADJ  |  adjective  |  JJ  |  adjective (English), other noun-modifier (Chinese)
day  |  NOUN  |  noun  |  NN  |  noun, singular or mass


In [9]:
doc = nlp("He quits the job")
doc[1]
print(doc[1].text, " | ", doc[1].pos_, " | ", doc[1].tag_, " | ", spacy.explain(doc[1].tag_))

quits  |  VERB  |  VBZ  |  verb, 3rd person singular present


In [10]:
doc = nlp("He quit the job")
doc[1]
print(doc[1].text, " | ", doc[1].pos_, " | ", doc[1].tag_, " | ", spacy.explain(doc[1].tag_))

quit  |  VERB  |  VBD  |  verb, past tense


In [None]:
earning_text = """Microsoft Corp. today announced the following results for the quarter ended September 30, 2025, as compared to the corresponding period of last fiscal year:

·        Revenue was $77.7 billion and increased 18% (up 17% in constant currency)

·        Operating income was $38.0 billion and increased 24% (up 22% in constant currency)

·        Net income, on a GAAP basis, was $27.7 billion and increased 12%, and on a non-GAAP basis was $30.8 billion and increased 22% (up 21% in constant currency)

·        Diluted earnings per share, on a GAAP basis, was $3.72 and increased 13%, and on a non-GAAP basis was $4.13 and increased 23% (up 21% in constant currency)

·        Non-GAAP results exclude the impact from investments in OpenAI etc., explained in the Non-GAAP Definition section below"""

In [12]:
doc = nlp(earning_text)

In [17]:
filter_tokens = []
for token in doc:
    if token.pos_ not in ["SPACE", "PUNCT", "X"]:
        filter_tokens.append(token)

filter_tokens[:20]

[Microsoft,
 Corp.,
 today,
 announced,
 the,
 following,
 results,
 for,
 the,
 quarter,
 ended,
 September,
 30,
 2025,
 as,
 compared,
 to,
 the,
 corresponding,
 period]

In [20]:
count = doc.count_by(spacy.attrs.POS)
count

{96: 7,
 92: 37,
 100: 14,
 90: 9,
 85: 16,
 93: 22,
 97: 23,
 98: 1,
 84: 15,
 103: 10,
 87: 6,
 99: 6,
 89: 8,
 86: 4}

In [19]:
doc.vocab[96].text

'PROPN'

In [22]:
for k, v in count.items():
    print(doc.vocab[k].text, " | ", v)

PROPN  |  7
NOUN  |  37
VERB  |  14
DET  |  9
ADP  |  16
NUM  |  22
PUNCT  |  23
SCONJ  |  1
ADJ  |  15
SPACE  |  10
AUX  |  6
SYM  |  6
CCONJ  |  8
ADV  |  4
