In [1]:
import json
import spacy


In [2]:
# load the training data
with open('pvr_training_data.json',encoding='utf8') as fp:
  training_data = json.load(fp)

In [3]:
training_data

{'classes': ['NAME',
  'COMPANY',
  'ROLE',
  'PERCENTAGE',
  'WEEKDAY',
  'MONEY',
  'TIME PERIOD',
  'STATES'],
 'annotations': [['The share price of PVR rose over 7 percent on Wednesday after the multiplex chain said that it has reduced losses in Q2 despite nil revenue from the core movie exhibition business.',
   {'entities': [[19, 22, 'COMPANY'],
     [33, 42, 'PERCENTAGE'],
     [46, 55, 'WEEKDAY']]}],
  ['The company managed to get rent waivers from most landlords, CFO Nitin Sood said in an interview to CNBC-TV18. “The big focus for us right now as revenues have been nil is to really reduce our fixed cost and we have managed to do that, ” he added.',
   {'entities': [[61, 64, 'ROLE'], [65, 75, 'NAME']]}],
  ['Sood further said that they have brought down the fixed cost down by almost 75-80 percent.',
   {'entities': [[0, 4, 'NAME'], [76, 89, 'PERCENTAGE']]}],
  ["The stock rose as much as 7.6 percent to the day's high of Rs 1,186.85 per share on the BSE.",
   {'entities': [[26, 

In [4]:
# prepare an empty model to train
nlp = spacy.blank('en')
nlp.vocab.vectors.name = 'demo'
ner = nlp.create_pipe('ner')
nlp.add_pipe('ner', last=True)

<spacy.pipeline.ner.EntityRecognizer at 0x2beec98f1c0>

In [5]:
# Add the custome NER Tags as entities into the model
for label in training_data["classes"]:
    print(label)
    ner.add_label(label)

NAME
COMPANY
ROLE
PERCENTAGE
WEEKDAY
MONEY
TIME PERIOD
STATES


In [6]:
# Train the model
optimizer = nlp.begin_training()


In [7]:
training_data["annotations"]

[['The share price of PVR rose over 7 percent on Wednesday after the multiplex chain said that it has reduced losses in Q2 despite nil revenue from the core movie exhibition business.',
  {'entities': [[19, 22, 'COMPANY'],
    [33, 42, 'PERCENTAGE'],
    [46, 55, 'WEEKDAY']]}],
 ['The company managed to get rent waivers from most landlords, CFO Nitin Sood said in an interview to CNBC-TV18. “The big focus for us right now as revenues have been nil is to really reduce our fixed cost and we have managed to do that, ” he added.',
  {'entities': [[61, 64, 'ROLE'], [65, 75, 'NAME']]}],
 ['Sood further said that they have brought down the fixed cost down by almost 75-80 percent.',
  {'entities': [[0, 4, 'NAME'], [76, 89, 'PERCENTAGE']]}],
 ["The stock rose as much as 7.6 percent to the day's high of Rs 1,186.85 per share on the BSE.",
  {'entities': [[26, 37, 'PERCENTAGE'],
    [59, 70, 'PERCENTAGE'],
    [88, 91, 'COMPANY']]}],
 ['Meanwhile, for the September quarter, the company reported a 

In [8]:

from spacy.tokens import Doc, Span
from spacy.training import Example

In [9]:
for iter in range(50):
    for text, annotations in training_data["annotations"]:
        if len(text) > 0:
            example=Example.from_dict(nlp.make_doc(text), annotations)
            nlp.update([example], sgd=optimizer)



In [16]:
text="PVR share price rose over 7% today despite the firm reporting a consolidated net loss of Rs 184.06 crore in Q2 as the film exhibition business remained affected by coronavirus-related restrictions. The movie exhibitor had posted a net profit of Rs 47.67 crore in the July-September quarter a year ago."

In [17]:
text

'PVR share price rose over 7% today despite the firm reporting a consolidated net loss of Rs 184.06 crore in Q2 as the film exhibition business remained affected by coronavirus-related restrictions. The movie exhibitor had posted a net profit of Rs 47.67 crore in the July-September quarter a year ago.'

In [18]:
doc=nlp(text)

In [19]:
for ent in doc.ents:
    print(ent.text,ent.label_)

7% PERCENTAGE
Rs 184.06 crore MONEY
Rs 47.67 crore MONEY
July-September quarter a year ago TIME PERIOD
