In [1]:
import spacy

In [2]:
#Build upon the spaCy Small Model
nlp = spacy.load("en_core_web_sm")

In [3]:
#Sample text
text = "The village of Treblinka is in Poland. Treblinka was also an extermination camp."

In [4]:
#Create the Doc object
doc = nlp(text)

In [5]:
#extract entities
for ent in doc.ents:
    print (ent.text, ent.label_)

Treblinka GPE
Poland GPE


# EntityRuler

In [6]:
#Create the EntityRuler
ruler = nlp.add_pipe("entity_ruler", after="ner")

In [7]:
patterns = [
                {"label": "GPE", "pattern": "Treblinka"}
            ]

ruler.add_patterns(patterns)

In [8]:
doc = nlp(text)

In [9]:
#extract entities
for ent in doc.ents:
    print (ent.text, ent.label_)

Treblinka GPE
Poland GPE
Treblinka GPE


In [10]:
nlp.analyze_pipes()

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires'

# Introducing Complex Rules and Variance to the EntityRuler (Advanced)

In [21]:
#Sample text
text = "This is a sample number (539) 505-7649."

In [22]:
#Build upon the spaCy Small Model
nlp = spacy.blank("en")

In [23]:
#Create the Ruler and Add it
ruler = nlp.add_pipe("entity_ruler")

In [24]:
#List of Entities and Patterns (source: https://spacy.io/usage/rule-based-matching)
patterns = [
                {"label": "PHONE_NUMBER", "pattern": [{"ORTH": "("}, {"SHAPE": "ddd"}, {"ORTH": ")"}, {"SHAPE": "ddd"},
                {"ORTH": "-", "OP": "?"}, {"SHAPE": "dddd"}]}
            ]
#add patterns to ruler
ruler.add_patterns(patterns)

In [25]:
#create the doc
doc = nlp(text)

In [26]:
#extract entities
for ent in doc.ents:
    print (ent.text, ent.label_)

(539) 505-7649 PHONE_NUMBER


In [17]:
nlp = spacy.load("en_core_web_lg")

In [18]:
text = '''Harry James[59] Potter (b. 31 July 1980[1]) was an English half-blood[2] wizard, and one of the most famous wizards 
of modern times. He was the only child and son of James and Lily Potter (née Evans), both members of the original Order of 
the Phoenix. Harry's birth was overshadowed by a prophecy, naming either himself or Neville Longbottom as the one with the 
power to vanquish Lord Voldemort. After half of the prophecy was reported to Voldemort, courtesy of Severus Snape, Harry was 
chosen as the target due to his many similarities with the Dark Lord. In turn, this caused the Potter family to go into hiding.
Voldemort made his first vain attempt to circumvent the prophecy when Harry was a year and three months old. During this 
attempt, he murdered Harry's parents as they tried to protect him, but this unsuccessful attempt to kill Harry led to 
Voldemort's first downfall. This downfall marked the end of the First Wizarding War, and to Harry henceforth being known as 
the 'Boy Who Lived',[5] as he was the only known survivor of the Killing Curse.
One consequence of Lily's loving sacrifice was that her orphaned son had to be raised by her only remaining blood relative,
his Muggle aunt, Petunia Dursley. While in her care he would be protected from Lord Voldemort, due to the Bond of Blood charm
Albus Dumbledore placed upon him.[60] This powerful charm would protect him until he became of age, or no longer called his 
aunt's house home. Due to Petunia's resentment of her sister and her magic gifts, Harry grew up abused and neglected.
On his eleventh birthday, Harry learned that he was a wizard, from Rubeus Hagrid.[61] He began attending Hogwarts School of 
Witchcraft and Wizardry in 1991. The Sorting Hat was initially going to Sort Harry into Slytherin House, but Harry pleaded
'not Slytherin' and the Hat heeded this plea, instead sorting the young wizard into Gryffindor House.[62] At school, Harry 
became best friends with Ron Weasley and Hermione Granger. He later became the youngest Quidditch Seeker in over a century 
and eventually the captain of the Gryffindor House Quidditch Team in his sixth year, winning two Quidditch Cups.[63] He became
even better known in his early years for protecting the Philosopher's Stone from Voldemort, saving Ron's sister Ginny Weasley, 
solving the mystery of the Chamber of Secrets, slaying Salazar Slytherin's basilisk, and learning how to conjure a corporeal 
stag Patronus at the age of thirteen. In his fourth year, Harry won the Triwizard Tournament, although the competition ended 
with the tragic death of Cedric Diggory and the return of Lord Voldemort. During the next school year, Harry reluctantly 
taught and led Dumbledore's Army. He also fought in the Battle of the Department of Mysteries, during which he lost his 
godfather, Sirius Black.'''

In [19]:
doc = nlp(text)

In [20]:
for ent in doc.ents:
    if ent.label_ == "PERSON":
        print (ent.text, "***", ent.label_)

Harry James[59] Potter *** PERSON
Evans *** PERSON
Harry *** PERSON
Neville Longbottom *** PERSON
Voldemort *** PERSON
Severus Snape *** PERSON
Harry *** PERSON
Harry *** PERSON
Harry *** PERSON
Harry *** PERSON
Harry *** PERSON
Lily *** PERSON
Petunia Dursley *** PERSON
Voldemort *** PERSON
Harry *** PERSON
Harry *** PERSON
Harry *** PERSON
Harry 
 *** PERSON
Ron Weasley *** PERSON
Hermione Granger *** PERSON
Quidditch Seeker *** PERSON
Voldemort *** PERSON
Ron *** PERSON
Ginny Weasley *** PERSON
Salazar Slytherin's *** PERSON
Harry *** PERSON
Cedric Diggory *** PERSON
Voldemort *** PERSON
Harry *** PERSON
