In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import random


In [2]:
import spacy
from spacy.cli.download import download
download(model="en_core_web_sm")

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
sp = spacy.load('en_core_web_sm')
sen = sp(u"I like to drink coffee. I hated it in my childhood though")
print("text:", sen.text)
print("sen[7]\t\t", sen[7])
print("sen[7].pos_\t", sen[7].pos_)
print("sen[7].tag_\t", sen[7].tag_)
print("Explanation\t", spacy.explain(sen[7].tag_))

text: I like to drink coffee. I hated it in my childhood though
sen[7]		 hated
sen[7].pos_	 VERB
sen[7].tag_	 VBD
Explanation	 verb, past tense


In [4]:
for word in sen:
    print(f'{word.text:{12}} {word.pos_:{10}} {word.tag_:{8}} {spacy.explain(word.tag_)}')

I            PRON       PRP      pronoun, personal
like         VERB       VBP      verb, non-3rd person singular present
to           PART       TO       infinitival "to"
drink        VERB       VB       verb, base form
coffee       NOUN       NN       noun, singular or mass
.            PUNCT      .        punctuation mark, sentence closer
I            PRON       PRP      pronoun, personal
hated        VERB       VBD      verb, past tense
it           PRON       PRP      pronoun, personal
in           ADP        IN       conjunction, subordinating or preposition
my           PRON       PRP$     pronoun, possessive
childhood    NOUN       NN       noun, singular or mass
though       ADV        RB       adverb


In [5]:
sen1 = sp(u'Can you google it?')
sen2 = sp(u'Can you search it on google?')
word1 = sen1[2]
word2 = sen2[5]
print(f'The word \"{word1}\" in \"{sen1}\"\n{word1.text:{12}} {word1.pos_:{10}} {word1.tag_:{8}} {spacy.explain(word1.tag_)}')
print(f'The word \"{word2}\" in \"{sen2}\"\n{word2.text:{12}} {word2.pos_:{10}} {word2.tag_:{8}} {spacy.explain(word2.tag_)}')

The word "google" in "Can you google it?"
google       VERB       VB       verb, base form
The word "google" in "Can you search it on google?"
google       PROPN      NNP      noun, proper singular


In [6]:
sen = sp(u"I like to play football. I hated it in my childhood though")
num_pos = sen.count_by(spacy.attrs.POS)
num_pos

{95: 4, 100: 3, 94: 1, 92: 2, 97: 1, 85: 1, 86: 1}

In [7]:
for k,v in sorted(num_pos.items()):
    print(f'{k:{3}}. {sen.vocab[k].text:{8}}: {v}')

 85. ADP     : 1
 86. ADV     : 1
 92. NOUN    : 2
 94. PART    : 1
 95. PRON    : 4
 97. PUNCT   : 1
100. VERB    : 3


In [8]:
from spacy import displacy
sen = sp(u"I like to play football. I hated it in my childhood though")
displacy.render(sen, style='dep', jupyter=True, options={'distance': 85})

In [9]:
# displacy.serve(sen, style='dep', options={'distance': 120})
displacy.render(sen, style='dep', options={'distance': 120})

In [10]:
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million')
print(f'sen.ents\t {sen.ents}\n')
for entity in sen.ents:
    print(f'{entity.text:{20}}{entity.label_:{10}}{spacy.explain(entity.label_)}')

sen.ents	 (Manchester United, Harry Kane, $90 million)

Manchester United   GPE       Countries, cities, states
Harry Kane          PERSON    People, including fictional
$90 million         MONEY     Monetary values, including unit


Imagine that you have a word that is not listed as a yeag and you want to add tag to it.  
Forexample, here I want the word "aaaabbbbcccc" tagged as ORG.  
Therefore, I will use the `Span(sen, 2, 3, label=ORG)` to add it.
- First, we need to import the `Span` class from the `spacy.tokens` module. 
- Next, we need to get the hash value of the `ORG` entity type from our document. 
- After that, we need to assign the hash value of `ORG` to the span. 
  - Since "aaaabbbbcccc" is the third word in the document, the span is 2-3. 
- Finally, we need to add the new entity span to the list of entities.  

Now if you execute the following script, you will see "aaaabbbbcccc" in the list of entities.

In [11]:
sen = sp(u'Nesfruita and aaaabbbbcccc is setting up a new company in India')
print(f'sen.ents\t {sen.ents}\n')
for entity in sen.ents:
    print(f'{entity.text:{20}}{entity.label_:{10}}{spacy.explain(entity.label_)}')

sen.ents	 (Nesfruita, India)

Nesfruita           ORG       Companies, agencies, institutions, etc.
India               GPE       Countries, cities, states


In [12]:
from spacy.tokens import Span
ORG = sen.vocab.strings[u'ORG']
new_entity = Span(sen, 2, 3, label=ORG)
sen.ents = list(sen.ents) + [new_entity]

In [13]:
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Nesfruita - ORG - Companies, agencies, institutions, etc.
aaaabbbbcccc - ORG - Companies, agencies, institutions, etc.
India - GPE - Countries, cities, states


In [14]:
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David de mand 100 Million Dollars')
for entity in sen.ents:
    print(f'{entity.text:{20}}{entity.label_:{10}}{spacy.explain(entity.label_)}')

Manchester United   GPE       Countries, cities, states
Harry Kane          PERSON    People, including fictional
$90 million         MONEY     Monetary values, including unit
David de mand       PERSON    People, including fictional
100 Million Dollars MONEY     Monetary values, including unit


In [15]:
len([ent for ent in sen.ents if ent.label_=='PERSON'])

2

In [16]:
from spacy import displacy
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David de mand 100 Million Dollars')
displacy.render(sen, style='ent', jupyter=True)

In [17]:
filter = {'ents': ['PERSON']}
displacy.render(sen, style='ent', jupyter=True, options=filter)

### Reference
[https://stackabuse.com/python-for-nlp-parts-of-speech-tagging-and-named-entity-recognition](https://stackabuse.com/python-for-nlp-parts-of-speech-tagging-and-named-entity-recognition)   
Author: Usman Malik Blog name: Python for NLP: Parts of Speech Tagging and Named Entity Recognition