## Named Entity Recognition (NER)

In [1]:
#importing spaCY
import spacy

#Create and instance of spacy and call it "nlp"
nlp=spacy.load("en_core_web_sm")

In [2]:
# Create doc object
doc_object=nlp("Samsung in Ireland are pleased with their new folding screen that they released after a $9 million investment.")

In [3]:
#Show tokens from doc_object
for token in doc_object:
    print(token.text, end=" | ")

Samsung | in | Ireland | are | pleased | with | their | new | folding | screen | that | they | released | after | a | $ | 9 | million | investment | . | 

In [4]:
#Show NER of doc object

for entity in doc_object.ents:
    print(entity)

Samsung
Ireland
$9 million


In [5]:
#Show NER & its labels of doc object

for entity in doc_object.ents:
    print(entity,entity.label_)

Samsung ORG
Ireland GPE
$9 million MONEY


In [6]:
#Show NER & its labels with the explanation of the label from doc object

for entity in doc_object.ents:
    print(entity,entity.label_,spacy.explain(entity.label_))

Samsung ORG Companies, agencies, institutions, etc.
Ireland GPE Countries, cities, states
$9 million MONEY Monetary values, including unit


In [7]:
# Car is not recognized as entity
doc_object=nlp("I like to drive my Nissan car in the U.K.")

for entity in doc_object.ents:
    print(entity,entity.label_)

Nissan ORG
U.K. GPE


In [8]:
#Create a function to display entity info from a doc object.
#Show entity text,label and label explanation
#Show message if no NER exists

def showentities(doc_object):
    if doc_object:
        if len(doc_object.ents)==0:
            print("No NER exists")
        else:
            for entity in doc_object.ents:
                print(entity,entity.label_,spacy.explain(entity.label_))
    else:
        print("There are no information in doc object")
        

#Test the function with the text "I like my LYIT"
doc_obj=nlp("I like my text")

showentities(doc_obj)

No NER exists


In [9]:
#Another Method 

#Create a function to display entity info from a doc object.
#Show entity text,label and label explanation
#Show message if no NER exists
#Add spaces between the texts printed

def showentities(doc_object):
    if doc_object:
        for entity in doc_object.ents:
            print(f"{entity.text:{20}} {entity.label_:{10}} {spacy.explain(entity.label_):{20}}")
    else:
        print("There are no NER information in doc object")
        
#Test the function with the text "I like my LYIT"
doc_obj=nlp("I like my LYIT")

showentities(doc_obj)

LYIT                 ORG        Companies, agencies, institutions, etc.


## Noun Chunks

In [10]:
#Here we have (autonomous cars, insurance liability, manufacturers)
doc_object=nlp("Autonomous cars shift insurance liability toward manufacturers")


In [11]:
#Print noun chunks
for chunk in doc_object.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [12]:
#Print noun chunks, their original text and its explanations
for chunk in doc_object.noun_chunks:
    print(chunk.text,chunk.root.text,spacy.explain(chunk.root.dep_))

Autonomous cars cars nominal subject
insurance liability liability direct object
manufacturers manufacturers object of preposition


In [13]:
#Print noun chunks, their original text and its explanations + Root Head Texts (word which links the noun chunks)
for chunk in doc_object.noun_chunks:
    print(chunk.text,chunk.root.text,spacy.explain(chunk.root.dep_),chunk.root.head.text)

Autonomous cars cars nominal subject shift
insurance liability liability direct object shift
manufacturers manufacturers object of preposition toward


In [14]:
#Use display to show NER,noun chunks etc
from spacy import displacy

In [26]:
#Command to render the sentence
displacy.render(doc_object,style="dep",jupyter=True,options={"distance":120, 
                                                             "color":"Blue", 
                                                             "arrow_stroke":4,
                                                             "arrow_spacing":20,
                                                             "word_spacing":50,
                                                             "compact":True})

In [42]:
#Load text file "noun-chunks.txt"
#Show entities and labels
#display entities using "style=ent"

def displayEntitiesInfo(doc_object):
    for entity in doc_object.ents:
        print(entity,entity.label_,spacy.explain(entity.label_))
    displacy.render(doc_object,style="ent",jupyter=True)
    
file=open("noun_chunks.txt","r")
file_contents=file.read()
file.close()

#print(file_contents)
doc_object=nlp(file_contents)

displayEntitiesInfo(doc_object)

Sebastian Thrun PERSON People, including fictional
Google ORG Companies, agencies, institutions, etc.
2007 DATE Absolute or relative dates or periods

 GPE Countries, cities, states
American NORP Nationalities or religious or political groups
Thrun PERSON People, including fictional
Recode ORG Companies, agencies, institutions, etc.
earlier this week DATE Absolute or relative dates or periods

 GPE Countries, cities, states
less than a decade later DATE Absolute or relative dates or periods
dozens CARDINAL Numerals that do not fall under another type
