Installing needed libraries

In [1]:
!pip install spacy 



In [1]:
import spacy

In [3]:
print(spacy.__version__)

2.3.5


In [3]:
!python -m spacy download en_core_web_sm 

[+] Download and installation successful
You can now load the model via spacy.load('en_core_web_sm')


In [4]:
nlp = spacy.load('en_core_web_sm')

In [5]:
doc = nlp("He went to play football")

In [6]:
nlp.pipe_names

['tagger', 'parser', 'ner']

### **POS Tagging**

In [7]:
for token in doc:
    print(token.text," ", token.pos_)

He   PRON
went   VERB
to   PART
play   VERB
football   NOUN


In [8]:
spacy.explain("PART")

'particle'

### **Dependency Parsing**

In [9]:
for token in doc:
    print(token.text, " ", token.dep_)

He   nsubj
went   ROOT
to   aux
play   advcl
football   dobj


In [10]:
spacy.explain("nsubj"), spacy.explain("aux")

('nominal subject', 'auxiliary')

## **Named Entity Recognition**

In [11]:
text = "Sherlock Holmes is a famous detective"

In [12]:
doc = nlp(text)
 
for ent in doc.ents:
    print(ent.text, ent.label_)
    print(ent.text, ent.start_char, ent.end_char,
    ent.label_, spacy.explain(ent.label_))

Sherlock Holmes PERSON
Sherlock Holmes 0 15 PERSON People, including fictional


**Visualising results using displacy**

In [13]:
from spacy import displacy

In [14]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 90})

### **Custom Named Entity Recognition**

In [15]:
DATA = [
  ("Search Analytics: Business Value & BigData NoSQL Backend, Otis Gospodnetic ", {'entities': [ (58,75,'PERSON') ] }),
  ("Introduction to Elasticsearch by Radu ", {'entities': [ (16,29,'TECH'), (32, 36, 'PERSON') ] }),
]

In [16]:
nlp.entity.add_label('PERSON')
nlp.entity.add_label('TECH')


optimizer = nlp.begin_training()

for i in range(200):
    #random.shuffle(DATA)
    for text, annotations in DATA:
        nlp.update([text], [annotations], sgd=optimizer)




  gold = GoldParse(doc, **gold)
  gold = GoldParse(doc, **gold)


In [21]:
doc = nlp("Running High Performance And Fault Tolerant Elasticsearch by Radu")
for entity in doc.ents:
      print(entity.label_, ' | ', entity.text)

In [None]:
import json
with open('/content/NER/stock_market_training.json',encoding="utf8") as file:
  TRAIN_DATA = json.load(file)

In [None]:
TRAIN_DATA

{'annotations': [['The share price of PVR rose over 7 percent on Wednesday after the multiplex chain said that it has reduced losses in Q2 despite nil revenue from the core movie exhibition business.',
   {'entities': [[19, 22, 'COMPANY'],
     [33, 42, 'PERCENTAGE'],
     [46, 55, 'WEEKDAY']]}],
  ['The company managed to get rent waivers from most landlords, CFO Nitin Sood said in an interview to CNBC-TV18. “The big focus for us right now as revenues have been nil is to really reduce our fixed cost and we have managed to do that, ” he added.',
   {'entities': [[61, 64, 'ROLE'], [65, 75, 'NAME']]}],
  ['Sood further said that they have brought down the fixed cost down by almost 75-80 percent.',
   {'entities': [[0, 4, 'NAME'], [76, 89, 'PERCENTAGE']]}],
  ["The stock rose as much as 7.6 percent to the day's high of Rs 1,186.85 per share on the BSE.",
   {'entities': [[26, 37, 'PERCENTAGE'],
     [59, 70, 'PERCENTAGE'],
     [88, 91, 'COMPANY']]}],
  ['Meanwhile, for the September quar

**Prepare an empty model to train**

In [None]:
nlp = spacy.blank('en')
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner)

**Add the custom NER Tags as entities into the model**

In [None]:
for label in TRAIN_DATA["classes"]:
  nlp.entity.add_label(label)

**Training the model**

In [None]:
optimizer = nlp.begin_training()

In [None]:
for itn in range(40):
    for text, annotations in TRAIN_DATA["annotations"]:
        loss = {}
        if len(text) > 0:
            nlp.update([text], [annotations],drop= 0.3, sgd=optimizer, losses= loss)
            print("Current loss",loss)

Current loss {'ner': 1.0543702592388791e-05}
Current loss {'ner': 0.0007745897731913514}
Current loss {'ner': 0.0044470114939623735}
Current loss {'ner': 1.634943158654324e-05}
Current loss {'ner': 5.002866962152886e-05}
Current loss {'ner': 0.2562790723688976}
Current loss {'ner': 3.7534676041222357e-06}
Current loss {'ner': 1.1076708293067133e-07}
Current loss {'ner': 0.0013167336306275509}
Current loss {'ner': 0.0007900404783257226}
Current loss {'ner': 1.4560951283629219e-05}
Current loss {'ner': 9.314342240692522e-07}
Current loss {'ner': 0.0011773366178242492}
Current loss {'ner': 5.6043441575788066e-05}
Current loss {'ner': 0.00015759252101732417}
Current loss {'ner': 0.2172696527948575}
Current loss {'ner': 2.8459472719723857e-08}
Current loss {'ner': 3.1220374595278924e-11}
Current loss {'ner': 5.241847919561377e-05}
Current loss {'ner': 7.323120826862261e-05}
Current loss {'ner': 5.855586548650994e-05}
Current loss {'ner': 0.0033469863621008993}
Current loss {'ner': 0.0006287

**Testing the model**

In [None]:
test_text = "The company managed to get rent waivers from most landlords, CEO Sonu Sood said in an interview to CNBC-TV18. “The big focus for us right now as revenues have been nil is to really reduce our fixed cost and we have managed to do that, ”"
doc = nlp(test_text)
print("Entities in '%s'" % test_text)
for ent in doc.ents:
    print(ent.label_, ent.text)

Entities in 'The company managed to get rent waivers from most landlords, CEO Sonu Sood said in an interview to CNBC-TV18. “The big focus for us right now as revenues have been nil is to really reduce our fixed cost and we have managed to do that, ”'
ROLE CEO
NAME Sonu Sood


**Saving the model**

In [None]:
nlp.to_disk('/content/NER/')
print("Model Saved")


Model Saved
