In [5]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [7]:
#Write a Function to displacy basic entity info
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+" - "+ent.label_+" - "+str(spacy.explain(ent.label_)))
    else:
        print("No named entities found.")

In [9]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [11]:
doc = nlp(u"Can I please borrow 500 dollor from you to buy some Microsoft stock")

for ent in doc.ents:
    print(ent.text, ent.start, ent.end, ent.end, ent.start_char, ent.end_char, ent.label_)

500 4 5 5 20 23 CARDINAL
Microsoft 11 12 12 52 61 ORG


In [13]:
for token in doc:
    print(token.text, token.pos_, token.dep_)

Can AUX aux
I PRON nsubj
please INTJ intj
borrow VERB ROOT
500 NUM nummod
dollor NOUN dobj
from ADP prep
you PRON pobj
to PART aux
buy VERB advcl
some DET det
Microsoft PROPN compound
stock NOUN dobj


In [15]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")   #tesla vocab  me nahi hai isliye print nahi ho rah hai uske vocab add akrna padega 
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [23]:
from spacy.tokens import Span   #vocab kuch bhi add karna hai Tesla
#Get the hash value of ORG entity label
ORG = doc.vocab.strings[u"ORG"]

#Create a span for the new entity
new_ent = Span(doc,0,1, label=ORG)

#Add the entity to the existing Doc object
doc.ents = list(doc.ents) + [new_ent]

In [25]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [41]:
#Adding Named Entitie to All Matching Span          #Oridnal matlab number fisrt second 
doc = nlp(u"Our Company plans to introduce a new vaccum cleaner."
         u"If sucessful, the vacuum-cleaner will be our first product.")

show_ents(doc)

first - ORDINAL - "first", "second", etc.


In [43]:
#Import PhraseMatcher phrase pattern
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [45]:
#create the desired phraser pattern 
phrase_list = ["vaccum cleaner", "vacuum-cleaner"]  #ye doc me find karenge hai ki hai output dega
phrase_pattern = [nlp(text) for text in phrase_list]

#Apply the pattern to ur matcher object
matcher.add("newproduct", None, *phrase_pattern)

#Apply the matcher to our Doc object
matche = matcher(doc)
#See what matche occur
matche

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 17)]

In [49]:
#Here we create span from each match and create named entitie from when  # add entit PRODUCT  jo ki isse pahle nahi le rah tha product ko
from spacy.tokens import span

PROD = doc.vocab.strings[u"PRODUCT"]

new_ent = [Span(doc, match[1],match[2],label=PROD) for match in matche]

doc.ents = list(doc.ents) + new_ent


In [53]:
show_ents(doc)

vaccum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
first - ORDINAL - "first", "second", etc.


In [67]:
#Count Entite koi paarticular entitie kitni baar aa rah a hai 
doc = nlp(u"Originally priced at $29.50, the sweater was marked down to five dollor")
show_ents(doc)

29.50 - MONEY - Monetary values, including unit
five - CARDINAL - Numerals that do not fall under another type


In [69]:
len([ent for ent in doc.ents if ent.label_== "MONEY"])

1

In [75]:
#Line break ho toh kaise kare 
doc = nlp(u"Originally priced at $29.50,\nthe sweater was marked down to five dollors.")
show_ents(doc)

29.50 - MONEY - Monetary values, including unit
five - CARDINAL - Numerals that do not fall under another type


In [83]:
#Noun chunk  
# text
# root.text
# root.dep_
# root.head.text
noun_chunk = nlp(u"Autonomouse cars shift insurence liability toward manufactures.")

for chunk in doc.noun_chunks:
    print(chunk.text+' - '+chunk.root.text+' - '+chunk.root.dep_+' - '+chunk.root.head.text)

the sweater - sweater - nsubjpass - marked
five dollors - dollors - pobj - to


In [85]:
#noun_chunk len aishe nahi niklega
len(doc.noun_chunks)

TypeError: object of type 'generator' has no len()

In [87]:
len(list(doc.noun_chunks))  #noun_chunk ki len nikalne ke liye list chahiye

2

In [89]:
#Visulization Name Entitie doc
#Perform standard import
nlp = spacy.load("en_core_web_sm")

#Import the displacy library
from spacy import displacy


In [95]:
doc = nlp(u"Over the last quater Apple sold nearly 20 thousand ipods for a profit of $6 million."
        u"By contrast, Sony sold only 7 thousand Walkman music players.")
displacy.render(doc, style="ent", jupyter=True)

In [97]:
#View sentence line by line 
for sent in doc.sents:
    displacy.render(nlp(sent.text), style="ent", jupyter=True)

In [99]:
doc2 = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million."
           u"By contrast, my kind sold a lot of lemonade.")  # ye diagram isliye nahi ban ki ushe kuch nahi mila ish line me

for sent in doc2.sents:
    displacy.render(nlp(sent.text), style="ent", jupyter=True)

