In [None]:
import spacy
sp = spacy.load('en_core_web_sm')

In [None]:
sen = sp(u"I like to play football. I hated it in my childhood though")

In [None]:
print(sen.text)

I like to play football. I hated it in my childhood though


In [None]:
print(sen[7].pos_)

VERB


In [None]:
print(sen[7].tag_)

VBD


In [None]:
print(spacy.explain(sen[7].tag_))

verb, past tense


In [None]:
for word in sen:
    print(f'{word.text:{12}} {word.pos_:{10}} {word.tag_:{8}} {spacy.explain(word.tag_)}')

I            PRON       PRP      pronoun, personal
like         VERB       VBP      verb, non-3rd person singular present
to           PART       TO       infinitival "to"
play         VERB       VB       verb, base form
football     NOUN       NN       noun, singular or mass
.            PUNCT      .        punctuation mark, sentence closer
I            PRON       PRP      pronoun, personal
hated        VERB       VBD      verb, past tense
it           PRON       PRP      pronoun, personal
in           ADP        IN       conjunction, subordinating or preposition
my           PRON       PRP$     pronoun, possessive
childhood    NOUN       NN       noun, singular or mass
though       ADV        RB       adverb


In [None]:
#Finding Number of POS Tags count_by and attrs.POS attribute
sen = sp(u"I like to play football. I hated it in my childhood though")

num_pos = sen.count_by(spacy.attrs.POS)
num_pos

{95: 4, 100: 3, 94: 1, 92: 2, 97: 1, 85: 1, 86: 1}

In [None]:
# finding frequency of each POS tag
for k,v in sorted(num_pos.items()):
    print(f'{k}. {sen.vocab[k].text:{8}}: {v}')

85. ADP     : 1
86. ADV     : 1
92. NOUN    : 2
94. PART    : 1
95. PRON    : 4
97. PUNCT   : 1
100. VERB    : 3


In [None]:
# dependency of POS
from spacy import displacy

sen = sp(u"I like to play football. I hated it in my childhood though")
displacy.render(sen, style='dep', jupyter=True, options={'distance': 85})

In [None]:
displacy.serve(sen, style='dep', options={'distance': 120})


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [None]:
# Named Entity Recognition
import spacy
sp = spacy.load('en_core_web_sm')

sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million')

In [None]:
print(sen.ents)

(Manchester United, Harry Kane, $90 million)


In [None]:
# to see the named entities and its detail
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - ORG - Companies, agencies, institutions, etc.
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit


In [None]:
sen = sp(u'Nesfruita is setting up a new company in India')
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Nesfruita - ORG - Companies, agencies, institutions, etc.
India - GPE - Countries, cities, states


In [None]:
# Counting the entities
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars')
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - ORG - Companies, agencies, institutions, etc.
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit
100 Million Dollars - MONEY - Monetary values, including unit


In [None]:
len([ent for ent in sen.ents if ent.label_=='MONEY'])

2

In [None]:
from spacy import displacy

sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars')
displacy.render(sen, style='ent', jupyter=True)

In [None]:
filter = {'ents': ['MONEY']}
displacy.render(sen, style='ent', jupyter=True, options=filter)

In [None]:
displacy.serve(sen, style='ent')


Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.
