## <font color = blue> Name Entity Recognization

## <font color = blue> Name Entity Recognization with Spacy

In [2]:
# Import spacy and create instance of spacy for name entity recognization(ner)
import spacy

ner = spacy.load('en_core_web_sm')

In [3]:
doc = ner(u"Pravin, Radhika and Rutvik went to Akola")

### <font color = blue> Simple text can be retireved from above instance

In [4]:
doc.text

'Pravin, Radhika and Rutvik went to Akola'

### <font color = blue> Words can be retireved just like list or string indexing from sentence

In [5]:
doc[0]

Pravin

In [6]:
doc[-1]

Akola

### <font color = blue> Fine and Coarse Pos tagging

In [7]:
# Coarse part of speech tagging

doc[0].pos_     # corse
doc[0].tag_     # fine

'NNP'

In [8]:
# fine tagging
doc[0].tag_     # fine

'NNP'

In [9]:
doc[0].pos_

'PROPN'

### <font color = blue> Get the meaning of tags

In [10]:
spacy.explain('PROPN')

'proper noun'

In [11]:
spacy.explain('NNP')

'noun, proper singular'

### <font color = blue> Tag all the words

In [12]:
for word in doc:
    print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag_))

Pravin ------> PROPN NNP noun, proper singular
, ------> PUNCT , punctuation mark, comma
Radhika ------> PROPN NNP noun, proper singular
and ------> CCONJ CC conjunction, coordinating
Rutvik ------> PROPN NNP noun, proper singular
went ------> VERB VBD verb, past tense
to ------> ADP IN conjunction, subordinating or preposition
Akola ------> PROPN NNP noun, proper singular


### <font color = blue> WAP to extract only names

In [17]:
doc = ner("Pravin, Radhika and Rutvik went to Akola")

name = []

for word in doc:
    print(word.text,"------>", word.pos_,word.tag_,spacy.explain(word.tag_))
    if word.pos_ == 'PROPN':
        name.append(word)
        
name

Pravin ------> PROPN NNP noun, proper singular
, ------> PUNCT , punctuation mark, comma
Radhika ------> PROPN NNP noun, proper singular
and ------> CCONJ CC conjunction, coordinating
Rutvik ------> PROPN NNP noun, proper singular
went ------> VERB VBD verb, past tense
to ------> ADP IN conjunction, subordinating or preposition
Akola ------> PROPN NNP noun, proper singular


[Pravin, Radhika, Rutvik, Akola]

### Example 2

In [19]:
raw_text='''The Indian Space Research Organisation or is the national space agency of India, headquartered in Bengaluru. It operates under Department of Space which is directly overseen by the Prime Minister of India while Chairman of ISRO acts as executive of DOS as well.'''

In [20]:
doc = ner(raw_text)

for word in doc.ents:
    print(word.text, word.label_)

The Indian Space Research Organisation ORG
the national space agency ORG
India GPE
Bengaluru GPE
Department of Space ORG
India GPE
ISRO ORG
DOS ORG


### Example 3: Extract Contact Numbers only

In [32]:
text = '''I am Pravin Jawarkar. My contact details are 9156691740 and 9860258196. Flat no is Rohini J 204'''

doc2 = ner(text)

numbers = []

for word in doc2:
    print(word, word.pos_)
    if word.pos_ == 'NUM':
        numbers.append(word)
        
numbers    

I PRON
am AUX
Pravin PROPN
Jawarkar PROPN
. PUNCT
My PRON
contact NOUN
details NOUN
are AUX
9156691740 NUM
and CCONJ
9860258196 NUM
. PUNCT
Flat ADJ
no PRON
is AUX
Rohini PROPN
J PROPN
204 NUM


[9156691740, 9860258196, 204]

In [34]:
contact_details = []
for num in numbers:         # numbers= [9156691740, 9860258196, 204], to extract only contact apply for loop
    if len(num)== 10:
        print(num)
        contact_details.append(num)
        
contact_details

9156691740
9860258196


[9156691740, 9860258196]