In [1]:
#pip install spacy

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')
nlp.pipe_names

['tagger', 'parser', 'ner']

In [3]:
ner = nlp.get_pipe('ner')

In [4]:
TRAIN_DATA = [
              ("Walmart is a leading e-commerce company", {"entities": [(0, 7, "ORG")]}),
              ("I reached Chennai yesterday.", {"entities": [(19, 28, "GPE")]}),
              ("I recently ordered a book from Amazon", {"entities": [(24,32, "ORG")]}),
              ("I was driving a BMW", {"entities": [(16,19, "PRODUCT")]}),
              ("I ordered this from ShopClues", {"entities": [(20,29, "ORG")]}),
              ("Fridge can be ordered in Amazon ", {"entities": [(0,6, "PRODUCT")]}),
              ("I bought a new Washer", {"entities": [(16,22, "PRODUCT")]}),
              ("I bought a old table", {"entities": [(16,21, "PRODUCT")]}),
              ("I bought a fancy dress", {"entities": [(18,23, "PRODUCT")]}),
              ("I rented a camera", {"entities": [(12,18, "PRODUCT")]}),
              ("I rented a tent for our trip", {"entities": [(12,16, "PRODUCT")]}),
              ("I rented a screwdriver from our neighbour", {"entities": [(12,22, "PRODUCT")]}),
              ("I repaired my computer", {"entities": [(15,23, "PRODUCT")]}),
              ("I got my clock fixed", {"entities": [(16,21, "PRODUCT")]}),
              ("I got my truck fixed", {"entities": [(16,21, "PRODUCT")]}),
              ("Flipkart started it's journey from zero", {"entities": [(0,8, "ORG")]}),
              ("I recently ordered from Max", {"entities": [(24,27, "ORG")]}),
              ("Flipkart is recognized as leader in market",{"entities": [(0,8, "ORG")]}),
              ("I recently ordered from Swiggy", {"entities": [(24,29, "ORG")]})
              ]

In [5]:
OurData = [("")]

In [6]:
for _, annotations in TRAIN_DATA:
  for ent in annotations.get("entities"):
    ner.add_label(ent[2])

In [7]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

In [8]:
import random
from spacy.util import minibatch, compounding
from pathlib import Path

# TRAINING THE MODEL
with nlp.disable_pipes(*unaffected_pipes):

  # Training for 30 iterations
  for iteration in range(30):

    # shuufling examples  before every iteration
    random.shuffle(TRAIN_DATA)
    losses = {}
    # batch up the examples using spaCy's minibatch
    batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        texts, annotations = zip(*batch)
        nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
                    losses=losses,
                )
        print("Losses", losses)

Losses {'ner': 3.7628068023223022}
Losses {'ner': 4.97137270591702}
Losses {'ner': 6.837301886987916}
Losses {'ner': 8.134859696872809}
Losses {'ner': 9.90261323606137}
Losses {'ner': 2.372989619805594}
Losses {'ner': 5.688270458265833}
Losses {'ner': 10.10990089984989}
Losses {'ner': 11.382781526073813}
Losses {'ner': 14.857234546687323}
Losses {'ner': 2.830343837267719}
Losses {'ner': 4.878000189513841}
Losses {'ner': 11.042186311875412}
Losses {'ner': 11.062276639888296}
Losses {'ner': 13.297297573613832}
Losses {'ner': 5.080971277940421}
Losses {'ner': 5.171745412032919}
Losses {'ner': 8.19052974863314}
Losses {'ner': 11.455174883779705}
Losses {'ner': 11.676680631852264}
Losses {'ner': 3.2333805411763024}
Losses {'ner': 5.768409270678603}
Losses {'ner': 5.792136508084695}
Losses {'ner': 8.12762487870441}
Losses {'ner': 11.886919942776537}
Losses {'ner': 1.9303018717619125}
Losses {'ner': 3.897699504803313}
Losses {'ner': 3.9580082692154974}
Losses {'ner': 8.524871211284335}
Losses

In [9]:
doc = nlp("I was driving a Alto")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Entities [('Alto', 'PRODUCT')]


In [10]:

output_dir = Path('/dev/NER')
nlp.to_disk(output_dir)
print("Saved model to", output_dir)

# Load the saved model and predict
print("Loading from", output_dir)
nlp_updated = spacy.load(output_dir)
doc = nlp_updated("Fridge can be ordered in FlipKart" )
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Saved model to /dev/NER
Loading from /dev/NER
Entities [('Fridge', 'PRODUCT'), ('FlipKart', 'ORG')]
