In [3]:
from __future__ import unicode_literals, print_function
import pickle
import plac
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding

print(spacy.__version__)

ModuleNotFoundError: No module named 'spacy'

In [2]:
#Entity labels

LABEL = ['I-geo', 'B-geo', 'I-art', 'B-art', 'B-tim', 'B-nat', 'B-eve', 'O', 'I-per', 'I-tim', 'I-nat', 'I-eve', 'B-per', 'I-org', 'B-gpe', 'B-org', 'I-gpe']

"""
geo = Geographical Entity
org = Organization
per = Person
gpe = Geopolitical Entity
tim = Time indicator
art = Artifact
eve = Event
nat = Natural Phenomenon
"""

'\ngeo = Geographical Entity\norg = Organization\nper = Person\ngpe = Geopolitical Entity\ntim = Time indicator\nart = Artifact\neve = Event\nnat = Natural Phenomenon\n'

In [3]:
# Loading training data 
with open ('spacy_dataset', 'rb') as fp:
    TRAIN_DATA = pickle.load(fp)


In [4]:
def train_model(model, new_model_name, output_dir, n_iter):
    if model is not None:
        nlp = spacy.load(model)  # load existing spacy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank('en')  # create blank Language class
        print("Created blank 'en' model")
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner)
    else:
        ner = nlp.get_pipe('ner')

    for i in LABEL:
        ner.add_label(i)   # Add new entity labels to entity recognizer

    if model is None:
        optimizer = nlp.begin_training()
    else:
        optimizer = nlp.entity.create_optimizer()

    # Get names of other pipes to disable them during training to train only NER
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=0.35,
                           losses=losses)
            print('Losses', losses)

    # Test the trained model
    test_text = 'Gianni Infantino is the president of FIFA.'
    doc = nlp(test_text)
    print("Entities in '%s'" % test_text)
    for ent in doc.ents:
        print(ent.label_, ent.text)

    # Save model 
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        nlp.meta['name'] = new_model_name  # rename model
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)





In [5]:
train_model(None, 'new_model', 'final/', 100)

Created blank 'en' model
Losses {'ner': 83980.74906165757}
Losses {'ner': 66598.87410086502}
Losses {'ner': 62025.46787136757}
Losses {'ner': 58416.93177854887}
Losses {'ner': 56304.94258025313}
Losses {'ner': 54666.00781541086}
Losses {'ner': 53012.76570588539}
Losses {'ner': 51832.13984271931}
Losses {'ner': 50809.4559556132}
Losses {'ner': 49790.181594244066}
Losses {'ner': 49012.7357218915}
Losses {'ner': 47999.03951101698}
Losses {'ner': 47681.424960762786}
Losses {'ner': 46899.778345566316}
Losses {'ner': 46190.84527791395}
Losses {'ner': 45738.620149338836}
Losses {'ner': 45339.688975220015}
Losses {'ner': 44865.25835511218}
Losses {'ner': 44434.82847181812}
Losses {'ner': 44009.28517202612}
Losses {'ner': 43380.192136654296}
Losses {'ner': 43247.37860892044}
Losses {'ner': 43077.104026936504}
Losses {'ner': 42698.42480985176}
Losses {'ner': 42571.76772104968}
Losses {'ner': 42365.066316204975}
Losses {'ner': 41697.193848642375}
Losses {'ner': 41630.68995797503}
Losses {'ner': 4

In [1]:
# Test the saved model
test_text = 'Lakshitha is the president of FIFA.'
output_dir = 'final/'
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
doc2 = nlp2(test_text)
for ent in doc2.ents:
    print(ent.label_, ent.text)



Loading from final/


NameError: name 'spacy' is not defined