<a href="https://colab.research.google.com/github/srivatsan88/Natural-Language-Processing/blob/master/Custom_Named_Entity_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import spacy

In [None]:
nlp=spacy.load('en_core_web_sm')
nlp.pipe_names

['tagger', 'parser', 'ner']

In [None]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")

In [None]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Australia 0 9 GPE
Facebook and Google 25 44 ORG


In [None]:
doc = nlp("I do not have money to pay my credit card account")

In [None]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
doc = nlp("what is the process to open a new savings account")

In [None]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
train = [
         ("Money transfer from my checking account is not working", {"entities": [(6, 13, "ACTIVITY"), (23, 39, 'PRODUCT')]}),
         ("I want to check balance in my savings account", {"entities": [(16, 23, "ACTIVITY"), (30, 45, 'PRODUCT')]}),
         ("I suspect a fraud in my credit card account", {"entities": [(12, 17, "ACTIVITY"), (24, 35, 'PRODUCT')]}),
         ("I am here for opening a new savings account", {"entities": [(14, 21, "ACTIVITY"), (28, 43, 'PRODUCT')]}),
         ("Your mortgage is in delinquent status", {"entities": [(20, 30, "ACTIVITY"), (5, 13, 'PRODUCT')]}),
         ("Your credit card is in past due status", {"entities": [(23, 31, "ACTIVITY"), (5, 16, 'PRODUCT')]}),
         ("My loan account is still not approved and funded", {"entities": [(25, 37, "ACTIVITY"), (3, 15, 'PRODUCT'), (42, 48, "ACTIVITY")]}),
         ("How do I open a new loan account", {"entities": [(9, 13, "ACTIVITY"), (20, 32, 'PRODUCT')]}),
         ("What are the charges on Investment account", {"entities": [(13, 20, "ACTIVITY"), (24, 42, 'PRODUCT')]}),
         ("Can you explain late charges on my credit card", {"entities": [(21, 28, "ACTIVITY"), (35, 46, 'PRODUCT')]}),
         ("I want to open a new loan account", {"entities": [(10, 14, "ACTIVITY"), (21, 33, 'PRODUCT')]}),
         ("Can you help updating payment on my credit card", {"entities": [(22, 29, "ACTIVITY"), (36, 47, 'PRODUCT')]}),
         ("When is the payment due date on my card", {"entities": [(12, 19, "ACTIVITY"), (35, 39, 'PRODUCT')]})
        ]

In [None]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [None]:
ner=nlp.get_pipe("ner")

In [None]:
for _, annotations in train:
  for ent in annotations.get("entities"):
      ner.add_label(ent[2])

In [None]:
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

In [None]:
import random
from spacy.util import minibatch, compounding
from pathlib import Path

with nlp.disable_pipes(*disable_pipes):
  optimizer = nlp.resume_training()

  for iteration in range(100):

    random.shuffle(train)
    losses = {}

    batches = minibatch(train, size=compounding(1.0, 4.0, 1.001))
    for batch in batches:
        text, annotation = zip(*batch)
        nlp.update(
                    text,  
                    annotation, 
                    drop=0.5, 
                    losses=losses,
                    sgd=optimizer
                )
        print("Losses", losses)

Losses {'ner': 6.854678056978502}
Losses {'ner': 14.679185251066825}
Losses {'ner': 23.950488703485256}
Losses {'ner': 32.36337262751686}
Losses {'ner': 42.090825372785396}
Losses {'ner': 50.74776712642291}
Losses {'ner': 59.078528280037375}
Losses {'ner': 67.89365277086264}
Losses {'ner': 78.11245668663716}
Losses {'ner': 88.82007650024909}
Losses {'ner': 98.59731430207623}
Losses {'ner': 108.66014529320736}
Losses {'ner': 116.38581796682075}
Losses {'ner': 6.447028030822807}
Losses {'ner': 13.636424764846865}
Losses {'ner': 20.294305473189283}
Losses {'ner': 29.512459657462145}
Losses {'ner': 38.08894005723536}
Losses {'ner': 44.182104308243694}
Losses {'ner': 49.5827408020687}
Losses {'ner': 56.5009826144121}
Losses {'ner': 64.20241453156788}
Losses {'ner': 72.5024181514352}
Losses {'ner': 82.18058728643948}
Losses {'ner': 94.28702119211513}
Losses {'ner': 103.80432979672761}
Losses {'ner': 10.187788695795462}
Losses {'ner': 18.081422520428532}
Losses {'ner': 28.794723854692165}
Los

In [None]:
for text, _ in train:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('loan account', 'PRODUCT')]
Entities [('loan account', 'PRODUCT')]
Entities [('credit card', 'PRODUCT'), ('past', 'ACTIVITY')]
Entities [('payment', 'ACTIVITY')]
Entities [('loan account', 'PRODUCT'), ('funded', 'ACTIVITY')]
Entities [('mortgage', 'PRODUCT'), ('delinquent', 'ACTIVITY')]
Entities [('savings account', 'PRODUCT')]
Entities [('Investment account', 'PRODUCT')]
Entities [('payment', 'ACTIVITY'), ('credit card', 'PRODUCT')]
Entities [('checking account', 'PRODUCT')]
Entities [('credit card', 'PRODUCT')]
Entities [('savings account', 'PRODUCT')]
Entities [('credit card', 'PRODUCT')]


In [None]:
from spacy import displacy

doc = nlp("what is the process to open a new savings account")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)    

savings account 34 49 PRODUCT


In [None]:
doc = nlp("My credit card payment will be delayed")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

credit card 3 14 PRODUCT
payment 15 22 ACTIVITY


In [None]:
doc = nlp("what are the charges on credit card late payment in Bank of America")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)

credit card 24 35 PRODUCT
payment 41 48 ACTIVITY


In [None]:
doc = nlp("I lost my investment account password and cannot open my account now")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)

investment account 10 28 PRODUCT
account now 57 68 PRODUCT


In [None]:
doc = nlp("what is the status of my loan account")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

loan account 25 37 PRODUCT


https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting

In [None]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)