In [1]:
import spacy
from spacy import displacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
text = 'Jim bought 300 shares of Acme Corp. in 2006.'

In [4]:
tokens = nlp(text)

In [5]:
for token in tokens:
    print(token, '\t', token.pos_, '\t', token.tag_, '\t', token.dep_, '\t', token.ent_type_)

Jim 	 PROPN 	 NNP 	 nsubj 	 PERSON
bought 	 VERB 	 VBD 	 ROOT 	 
300 	 NUM 	 CD 	 nummod 	 CARDINAL
shares 	 NOUN 	 NNS 	 dobj 	 
of 	 ADP 	 IN 	 prep 	 
Acme 	 PROPN 	 NNP 	 compound 	 ORG
Corp. 	 PROPN 	 NNP 	 pobj 	 ORG
in 	 ADP 	 IN 	 prep 	 
2006 	 NUM 	 CD 	 pobj 	 DATE
. 	 PUNCT 	 . 	 punct 	 


In [6]:
spacy.explain('NNP')

'noun, proper singular'

In [7]:
for token in tokens:
    print(token.tag_, ':', spacy.explain(token.tag_))

NNP : noun, proper singular
VBD : verb, past tense
CD : cardinal number
NNS : noun, plural
IN : conjunction, subordinating or preposition
NNP : noun, proper singular
NNP : noun, proper singular
IN : conjunction, subordinating or preposition
CD : cardinal number
. : punctuation mark, sentence closer


In [8]:
print(tokens.ents)

(Jim, 300, Acme Corp., 2006)


In [9]:
for ent in tokens.ents:
    print(ent.start_char, ent.end_char, ent.label_)

0 3 PERSON
11 14 CARDINAL
25 35 ORG
39 43 DATE


In [11]:
displacy.render(tokens, style='ent', jupyter=True)

In [12]:
print(displacy.parse_deps(tokens))

{'words': [{'text': 'Jim', 'tag': 'PROPN', 'lemma': None}, {'text': 'bought', 'tag': 'VERB', 'lemma': None}, {'text': '300', 'tag': 'NUM', 'lemma': None}, {'text': 'shares', 'tag': 'NOUN', 'lemma': None}, {'text': 'of', 'tag': 'ADP', 'lemma': None}, {'text': 'Acme', 'tag': 'PROPN', 'lemma': None}, {'text': 'Corp.', 'tag': 'PROPN', 'lemma': None}, {'text': 'in', 'tag': 'ADP', 'lemma': None}, {'text': '2006.', 'tag': 'NUM', 'lemma': None}], 'arcs': [{'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'}, {'start': 2, 'end': 3, 'label': 'nummod', 'dir': 'left'}, {'start': 1, 'end': 3, 'label': 'dobj', 'dir': 'right'}, {'start': 3, 'end': 4, 'label': 'prep', 'dir': 'right'}, {'start': 5, 'end': 6, 'label': 'compound', 'dir': 'left'}, {'start': 4, 'end': 6, 'label': 'pobj', 'dir': 'right'}, {'start': 1, 'end': 7, 'label': 'prep', 'dir': 'right'}, {'start': 7, 'end': 8, 'label': 'pobj', 'dir': 'right'}], 'settings': {'lang': 'en', 'direction': 'ltr'}}


In [13]:
displacy.render(tokens, style='dep', jupyter=True, options={'distance':120})

In [14]:
import en_core_web_sm
model = en_core_web_sm.load()

In [15]:
tokens1 = nlp(text)
for token in tokens1:
    print(token, '\t', token.pos_, '\t', token.tag_, '\t', token.dep_, '\t', token.ent_type_)

Jim 	 PROPN 	 NNP 	 nsubj 	 PERSON
bought 	 VERB 	 VBD 	 ROOT 	 
300 	 NUM 	 CD 	 nummod 	 CARDINAL
shares 	 NOUN 	 NNS 	 dobj 	 
of 	 ADP 	 IN 	 prep 	 
Acme 	 PROPN 	 NNP 	 compound 	 ORG
Corp. 	 PROPN 	 NNP 	 pobj 	 ORG
in 	 ADP 	 IN 	 prep 	 
2006 	 NUM 	 CD 	 pobj 	 DATE
. 	 PUNCT 	 . 	 punct 	 
