In [1]:
import random
import spacy

from spacy import displacy
from spacy.util import minibatch, compounding

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [4]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")

In [5]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Australia 0 9 GPE
Facebook 25 33 ORG
Google 38 44 ORG


In [6]:
displacy.render(nlp(doc.text), style="ent", jupyter=True)

In [7]:
doc = nlp("A little less than a decade later, dozens of self-driving startups have cropped up while automakers around the world clamor")
displacy.render(nlp(doc.text), style="ent", jupyter=True)

In [8]:
doc = nlp("I am working at Microsoft from 27/07/2017")
displacy.render(nlp(doc.text), style="ent", jupyter=True)

In [9]:
doc = nlp("I do not have money to pay my credit card account")

In [10]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [11]:
doc = nlp("what is the process to open a new saving account")

In [12]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [13]:
train = [
    ("Money transfer from my checking account is not working", {"entities": [(6, 13, "ACTIVITY"), (23, 39, "SERVICE")]}),
    ("I want to check balance in my savings account", {"entities": [(16, 23, "ACTIVITY"), (30, 45, "SERVICE")]}),
    ("I suspect a fraud in my credit card account", {"entities": [(12, 17, "ACTIVITY"), (24, 35, "SERVICE")]}),
    ("I am here for opening a new savings account", {"entities": [(14, 21, "ACTIVITY"), (28, 43, "SERVICE")]}),
    ("Your mortage is in delinquent status", {"entities": [(20, 30, "ACTIVITY"), (5, 13, "SERVICE")]}),
    ("Your credit card is in past due status", {"entities": [(23, 31, "ACTIVITY"), (5, 16, "SERVICE")]}),
    ("My loan account is still not approved and funded", {"entities": [(25, 37, "ACTIVITY"), (3, 15, "SERVICE"), (42, 48, "ACTIVITY")]}),
    ("How do I open a new loan account", {"entities": [(9, 13, "ACTIVITY"), (20, 32, "SERVICE")]}),
    ("what are the charges on Investment account", {"entities": [(13, 20, "ACTIVITY"), (24, 42, "SERVICE")]}),
    ("Can you explain late charges on my credit card", {"entities": [(21, 28, "ACTIVITY"), (35, 46, "SERVICE")]}),
    ("I want to open a new loan account", {"entities": [(10, 14, "ACTIVITY"), (21, 33, "SERVICE")]}),
    ("Can you help updating payment on my credit card", {"entities": [(22, 29, "ACTIVITY"), (36, 47, "SERVICE")]}),
    ("When is the payment due date on my card", {"entities": [(12, 19, "ACTIVITY"), (35, 39, "SERVICE")]})
]

In [14]:
# get the ner pipeline
ner = nlp.get_pipe("ner")

In [15]:
# add the labels to ner pipeline
for _, annotations in train:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

In [16]:
# disable other pipelines, since we are only training NER
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

In [17]:
with nlp.disable_pipes(*disable_pipes):
    # resumes from the previous learning
    optimizer = nlp.resume_training()
    
    # run for 100 iterations
    for iteration in range(100):
        # randomly shuffle the data
        random.shuffle(train)
        losses = {}

        # create minibatches for training
        batches = minibatch(train, size=compounding(1.0, 4.0, 1.001))
        for batch in batches:
            text, annotation = zip(*batch)
            nlp.update(
                text,
                annotation,
                drop=0.5,
                losses=losses,
                sgd=optimizer
            )
        print(f"Losses: {losses}")

  gold = GoldParse(doc, **gold)
  gold = GoldParse(doc, **gold)


Losses: {'ner': 100.71093436330844}
Losses: {'ner': 122.41120285630247}
Losses: {'ner': 103.8089513879051}
Losses: {'ner': 105.45337385661696}
Losses: {'ner': 97.87823971917987}
Losses: {'ner': 83.07547397495364}
Losses: {'ner': 102.02442360681016}
Losses: {'ner': 98.99996892976924}
Losses: {'ner': 102.9022340408992}
Losses: {'ner': 80.93175452461583}
Losses: {'ner': 91.59616170730442}
Losses: {'ner': 91.4254884137772}
Losses: {'ner': 88.89890713547356}
Losses: {'ner': 90.1629876664374}
Losses: {'ner': 91.32173850946128}
Losses: {'ner': 89.65925266314298}
Losses: {'ner': 87.64519623224624}
Losses: {'ner': 82.24141556274844}
Losses: {'ner': 77.15951113209303}
Losses: {'ner': 88.71789869840723}
Losses: {'ner': 75.59164459252497}
Losses: {'ner': 86.82372440415202}
Losses: {'ner': 84.85971103869088}
Losses: {'ner': 87.21562957260176}
Losses: {'ner': 86.03731005222653}
Losses: {'ner': 82.20659529053955}
Losses: {'ner': 98.84115327009931}
Losses: {'ner': 94.94889551450615}
Losses: {'ner': 80

In [18]:
for text, entities in train:
    doc = nlp(text)
    print(f"Text: {text} | entites: {entities}")
    print(f"\tActual: {[(text[ent[0]: ent[1]], ent[2]) for ent in entities['entities']]}")
    print(f"\tPredicted: {[(ent.text, ent.label_) for ent in doc.ents]}")

Text: My loan account is still not approved and funded | entites: {'entities': [(25, 37, 'ACTIVITY'), (3, 15, 'SERVICE'), (42, 48, 'ACTIVITY')]}
	Actual: [('not approved', 'ACTIVITY'), ('loan account', 'SERVICE'), ('funded', 'ACTIVITY')]
	Predicted: [('loan account', 'SERVICE')]
Text: I am here for opening a new savings account | entites: {'entities': [(14, 21, 'ACTIVITY'), (28, 43, 'SERVICE')]}
	Actual: [('opening', 'ACTIVITY'), ('savings account', 'SERVICE')]
	Predicted: [('opening', 'ACTIVITY'), ('savings account', 'SERVICE')]
Text: Money transfer from my checking account is not working | entites: {'entities': [(6, 13, 'ACTIVITY'), (23, 39, 'SERVICE')]}
	Actual: [('transfe', 'ACTIVITY'), ('checking account', 'SERVICE')]
	Predicted: [('checking account', 'SERVICE')]
Text: Your credit card is in past due status | entites: {'entities': [(23, 31, 'ACTIVITY'), (5, 16, 'SERVICE')]}
	Actual: [('past due', 'ACTIVITY'), ('credit card', 'SERVICE')]
	Predicted: [('credit card', 'SERVICE')]
Tex

In [19]:
# visualize using displacy
for text, _ in train:
    doc = nlp(text)
    displacy.render(nlp(doc.text), style="ent", jupyter=True)



In [20]:
doc = nlp("My credit card payment will be delayed")
displacy.render(nlp(doc.text), style="ent", jupyter=True)

In [21]:
doc = nlp("what are the charges on credit card late payment in Bank of America")
displacy.render(nlp(doc.text), style="ent", jupyter=True)

In [22]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")
displacy.render(nlp(doc.text), style="ent", jupyter=True)