In [1]:
import numpy as np
from scipy import stats
import pandas as pd
import time

from allennlp.predictors import Predictor
from allennlp_models.pretrained import load_predictor

# Load Models

In [2]:
nlp_models = [
    { 'name' : 'ner-model',
      'url': 'https://storage.googleapis.com/allennlp-public-models/ner-model-2020.02.10.tar.gz'
    },
    { 'name' : 'ner-elmo',
      'url' : 'https://storage.googleapis.com/allennlp-public-models/ner-elmo.2021-02-12.tar.gz',
    },
]

In [3]:
## load models
print ("Loading models...")
for nlp_model in nlp_models:
    print ("Loading model :", nlp_model['name'])
    t1 = time.perf_counter()
    nlp_model['model'] = Predictor.from_path(nlp_model['url'])
    t2 = time.perf_counter()
    print ("Loaded model '{}' in {:,.1f} milli seconds".format (nlp_model['name'], (t2-t1)*1e3))
print ()

Loading models...
Loading model : ner-model


error loading _jsonnet (this is expected on Windows), treating C:\Users\user\AppData\Local\Temp\tmp549me0mu\config.json as plain json


Loaded model 'ner-model' in 64,565.5 milli seconds
Loading model : ner-elmo


error loading _jsonnet (this is expected on Windows), treating C:\Users\user\AppData\Local\Temp\tmp83_0q9lb\config.json as plain json


Loaded model 'ner-elmo' in 72,015.0 milli seconds



In [8]:
def entity_recognition (sentence):
    miscellaneous = []
    person = []
    organisation = []
    location = []
    for nlp_model in nlp_models:
        t1 = time.perf_counter()
        results =  nlp_model['model'].predict(sentence=sentence)
        t2 = time.perf_counter()
        print ("   model {} predicted in {:,.1f} milli seconds".format (nlp_model['name'], (t2-t1)*1e3))
        for word, tag in zip(results["words"], results["tags"]):
            if tag == 'O':
                continue
            else:
                print(f"{word}\t{tag}")
        print()
        #return results

    print('-------------------')

# Load Data

In [9]:
sentences = pd.read_csv('guardian_publications.csv')

In [10]:
def get_entities(df):
    for name, values in sentences.text.iteritems():
        x = name,values
        sentence = str(x)
        entity_recognition(sentence)

In [None]:
final=sentences["text"].head(1).apply(get_entities)
final

   model ner-model predicted in 32,470.1 milli seconds
Shell	U-ORG
Ben	B-PER
van	I-PER
Beurden	L-PER
Reuters	U-ORG
London	U-LOC
Andrew	B-PER
Mackenzie	L-PER
Shell	U-ORG
Van	B-PER
Beurden	L-PER
Shell	U-ORG
Europe	U-LOC
Van	B-PER
Beurden	L-PER
Canadian	U-MISC
Shell	U-ORG
Wael	B-PER
Sawan	L-PER
Shell	U-ORG
Reuters	U-ORG
Shell	U-ORG
Sinead	B-PER
Gorman	L-PER
Zoe	B-PER
Yujnovich	L-PER
Huibert	B-PER
Vigeveno	L-PER
Shell	U-ORG
Van	B-PER
Beurden	L-PER
Shell	U-ORG
BG	B-ORG
Group	L-ORG
Netherlands	U-LOC
London	U-LOC
Van	B-PER
Beurden	L-PER
Shell	U-ORG
UK	U-LOC
Van	B-PER
Beurden	L-PER
Russia	U-LOC
Ukraine	U-LOC
Norway	U-LOC
Van	B-PER
Beurden	L-PER
Business	B-ORG
Today	I-ORG
Free	L-ORG
Newsletters	U-ORG
Privacy	B-MISC
Policy	L-MISC
Google	B-MISC
reCaptcha	L-MISC
Google	B-MISC
Privacy	I-MISC
Policy	L-MISC
Terms	B-MISC
of	I-MISC
Service	L-MISC
Shell	U-ORG

   model ner-elmo predicted in 38,981.6 milli seconds
Shell	U-ORG
Ben	B-PER
van	I-PER
Beurden	L-PER
Reuters	U-ORG
London	U-LOC
Andrew	B-PER
Macke