In [None]:
# Expanding named entity
# Mr.Krisna and Dr.Srikanth

In [None]:
# spaCy is trained on millon of samples 

In [40]:
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span
from spacy import displacy


In [41]:
nlp = spacy.load('en_core_web_sm')

In [42]:
doc = nlp('Dr. krishna Kumar is coming to address this issues')

In [43]:
doc

Dr. krishna Kumar is coming to address this issues

In [44]:
print([(ent.text, ent.label_) for ent in doc.ents])

[('krishna Kumar', 'PERSON')]


In [45]:
# custom adding title to the sentences
def add_title(doc):
    new_ents = []
    for ent in doc.ents:
        if ent.label_ == 'PERSON' and ent.start!=0:
            prev_token = doc[ent.start-1]
            if prev_token.text in ('Dr.','Dr','Mr','Mr.','Mrs','Mrs.'):
                new_ent = Span(doc,ent.start-1,ent.end,label=ent.label)
                new_ents.append(new_ent)
            else:
                new_ents.append(ent)
    doc.ents=new_ents
    return doc

In [46]:
nlp = spacy.load('en_core_web_sm')

In [47]:
nlp.add_pipe(add_title, after='ner')

In [48]:
doc = nlp('Dr. krishna Kumar is coming to address this issues')

In [49]:
print([(ent.text, ent.label_) for ent in doc.ents])

[('Dr. krishna Kumar', 'PERSON')]


## Use of POS and Dependency Parsing

In [52]:
nlp=spacy.load('en_core_web_sm')

In [53]:
doc = nlp('Krishna Kumar was working at Google')

In [56]:
displacy.render(doc,style='dep',options={'compact':True,'distance':100})

In [63]:
def get_person_orgs(doc):
    person_entity=[ent for ent in doc.ents if ent.label_ =="PERSON"]
    for ent in person_entity:
        head=ent.root.head
        if head.lemma_=='work':
            preps=[token for token in head.children if token.dep_=='prep']
            for prep in preps:
                orgs = [token for token in prep.children if token.ent_type == 'ORG']
                print({'person':ent,'orgs':orgs,'past':head.tag_=="VBD"})
                
    return doc

In [64]:
from spacy.pipeline import merge_entities

In [65]:
nlp =spacy.load('en_core_web_sm')

In [66]:
nlp.add_pipe(merge_entities)

In [67]:
nlp.add_pipe(get_person_orgs)

In [68]:
doc = nlp('Krishna Mohan was working at Google')

{'person': Krishna Mohan, 'orgs': [], 'past': False}


# Modified Model

In [94]:
def get_person_orgs(doc):
    person_entity=[ent for ent in doc.ents if ent.label_ =="PERSON"]
    for ent in person_entity:
        head=ent.root.head
        if head.lemma_=='work':
            preps=[token for token in head.children if token.dep_=='prep']
            for prep in preps:
                orgs = [token for token in prep.children if token.ent_type == 'ORG' or token.ent_type == 'pobj']
                print({'person':ent,'orgs':orgs,'past':head.tag_=="VBD" or head.tag_=='VBG'})
                aux = [token for token in head.children if token.dep_=='aux']
                past_aux = any(t.tag =='VBD' for t in aux)
                past = head.tag_=='VBD' or head.tag_=='VBG' and past_aux
            #print({'person':ent,'orgs':orgs,'past':past})
    return doc

In [95]:
from spacy.pipeline import merge_entities

In [96]:
nlp =spacy.load('en_core_web_sm')

In [97]:
nlp.add_pipe(merge_entities)

In [98]:
nlp.add_pipe(get_person_orgs)

In [99]:
doc = nlp('Krishna Mohan worked at Google')

{'person': Krishna Mohan, 'orgs': [], 'past': True}
