In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [7]:
doc = nlp("Elon flew to mars yesterday. He carried biryani masala with him")

for token in doc:
    print(token,"|", token.pos_, "|", spacy.explain(token.pos_)) #part of speech

Elon | PROPN | proper noun
flew | VERB | verb
to | ADP | adposition
mars | NOUN | noun
yesterday | NOUN | noun
. | PUNCT | punctuation
He | PRON | pronoun
carried | VERB | verb
biryani | ADJ | adjective
masala | NOUN | noun
with | ADP | adposition
him | PRON | pronoun


In [9]:
nlp.pipe_names #the pipeline gives the pos

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [10]:
doc = nlp("Wow! Dr. Strange made 265 million $ on the very first day")

for token in doc:
    print(token,"|", token.pos_, "|", spacy.explain(token.pos_)) #part of speech

Wow | INTJ | interjection
! | PUNCT | punctuation
Dr. | PROPN | proper noun
Strange | PROPN | proper noun
made | VERB | verb
265 | NUM | numeral
million | NUM | numeral
$ | NUM | numeral
on | ADP | adposition
the | DET | determiner
very | ADV | adverb
first | ADJ | adjective
day | NOUN | noun


In [12]:
for token in doc:
    print(token, "|", token.tag_, "|", spacy.explain(token.tag_))

Wow | UH | interjection
! | . | punctuation mark, sentence closer
Dr. | NNP | noun, proper singular
Strange | NNP | noun, proper singular
made | VBD | verb, past tense
265 | CD | cardinal number
million | CD | cardinal number
$ | CD | cardinal number
on | IN | conjunction, subordinating or preposition
the | DT | determiner
very | RB | adverb
first | JJ | adjective (English), other noun-modifier (Chinese)
day | NN | noun, singular or mass


In [13]:
#spacy is good with tenses as well

In [17]:
doc = nlp("He quit the job")
doc1 = nlp("He quits the job")
print(doc[1].text, "|", doc[1].tag_, "|", spacy.explain(doc[1].tag_))
print(doc1[1].text, "|", doc1[1].tag_, "|", spacy.explain(doc1[1].tag_))

quit | VBD | verb, past tense
quits | VBZ | verb, 3rd person singular present


In [19]:
#we can remove extra text(punc, etc, space) from a doc by:

for token in doc:
    if token.pos_ not in ["SPACE", "X", "PUNCT"]:
        print(token, "|", token.pos_, "|", spacy.explain(token.pos))

He | PRON | None
quit | VERB | None
the | DET | None
job | NOUN | None




In [20]:
count = doc.count_by(spacy.attrs.POS)
count

{95: 1, 100: 1, 90: 1, 92: 1}

In [23]:
doc.vocab[100].text

'VERB'