In [19]:
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span
from spacy import displacy
nlp = spacy.load('en_core_web_sm')
from spacy import displacy
from spacy.pipeline import merge_entities

### **Expanding names entities**

In [3]:
doc = nlp('Dr. Alex Smith chaird first board meeting at Google')

In [4]:
doc

Dr. Alex Smith chaird first board meeting at Google

In [7]:
print([(ent.text, ent.label_) for ent in doc.ents])

[('Alex Smith', 'PERSON'), ('Google', 'ORG')]


In [8]:
def add_title(doc):
  new_ents = []
  for ent in doc.ents:
    if ent.label_ == 'PERSON' and ent.start!= 0:
      prev_token = doc[ent.start-1]
      if prev_token.text in ('Dr', 'Dr.', 'Mr', 'Mr.'):
        new_ent = Span(doc, ent.start-1, ent.end, label=ent.label)
        new_ents.append(new_ent)
      else:
        new_ents.append(ent)
  doc.ents = new_ents
  return doc

In [9]:
nlp = spacy.load('en_core_web_sm')

In [10]:
nlp.add_pipe(add_title, after='ner')

In [12]:
doc = nlp('Dr. Alex Smith chaird first board meeting at Google')

In [13]:
print([(ent.text, ent.label_) for ent in doc.ents])

[('Dr. Alex Smith', 'PERSON')]


### **Use of POS and dependancy parsin**

In [14]:
nlp = spacy.load('en_core_web_sm')

In [15]:
doc = nlp('Alex Smith was working at Google')

In [17]:
displacy.render(doc, style='dep', options={'compact': True, 'distance': 100})

'<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="5c2e28d5b00547eb8b682b1c836729af-0" class="displacy" width="650" height="237.0" direction="ltr" style="max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="147.0">\n    <tspan class="displacy-word" fill="currentColor" x="50">Alex</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">PROPN</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="147.0">\n    <tspan class="displacy-word" fill="currentColor" x="150">Smith</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="150">PROPN</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="147.0">\n    <tspan class="displacy-word" fill="currentColor" x="250">was</tspan>\n    <tspan class="displacy-

In [18]:
def get_person_orgs(doc):
    person_entities = [ent for ent in doc.ents if ent.label_=="PERSON"]
    for ent in person_entities:
        head = ent.root.head
        if head.lemma_ == 'work':
            preps = [token for token in head.children if token.dep_ == 'prep']
            for prep in preps:
                orgs = [token for token in prep.children if token.ent_type_ == 'ORG']
                print({'person': ent, 'orgs': orgs, 'past': head.tag_ == "VBD"})
    return doc

In [20]:
nlp = spacy.load('en_core_web_sm')

In [21]:
nlp.add_pipe(merge_entities)

In [22]:
nlp.add_pipe(get_person_orgs)

In [24]:
doc = nlp('Alex Smith worked at Google')

{'person': Alex Smith, 'orgs': [Google], 'past': True}


### **Modify Model**

In [29]:
def get_person_orgs(doc):
    person_entities = [ent for ent in doc.ents if ent.label_=="PERSON"]
    for ent in person_entities:
        head = ent.root.head
        if head.lemma_ == 'work':
            preps = [token for token in head.children if token.dep_ == 'prep']
            for prep in preps:
                orgs = [token for token in prep.children if token.ent_type_ == 'ORG']
                aux = [token for token in head.children if token.dep_ == 'aux']
                past_aux = any(t.tag_ == 'VBD' for t in aux)
                past = head.tag_ == 'VBD' or head.tag_ == 'VBG' and past_aux
             
            print({'person': ent, 'orgs': orgs, 'past': past})
    return doc

In [30]:
nlp = spacy.load('en_core_web_sm')

In [31]:
nlp.add_pipe(merge_entities)

In [32]:
nlp.add_pipe(get_person_orgs)

In [33]:
doc = nlp('Alex Smith was working at Google')

{'person': Alex Smith, 'orgs': [Google], 'past': True}


In [34]:
doc = nlp('Alex Smith worked at Google')

{'person': Alex Smith, 'orgs': [Google], 'past': True}
