In [None]:
import spacy

In [None]:
nlp = spacy.load('en')

# **Training the Entity Recognizer**

## **Deciding Whether You Need to Train the Entity Recognizer**

In [None]:
doc = nlp(u'Coult you pick me at Solnce?')

In [None]:
for ent in doc.ents:
  print(ent.text, ent.label_)

Solnce ORG


In [None]:
print(spacy.explain('ORG'))

Companies, agencies, institutions, etc.


## **Creating Training Examples**

In [None]:
train_exams = [
    ('Could you send a taxi to Solnce?', {'entities': [(25, 32, 'GPE')]}),
    ('Is there a flat rate to the airport from Solnce?', {'entities': [(41, 48, 'GPE')]}),
    ('How long is the wait for a taxi right now?', {'entities': []})
]

In [None]:
First = 'Could you send a taxi to Solnce?'

In [None]:
len(First)

32

In [None]:
First[25:32]

'Solnce?'

In [None]:
Second = 'Is there a flat rate to the airport from Solnce?'

In [None]:
len(Second)

48

In [None]:
Second[41:48]

'Solnce?'

## **Automating the example creation process**

In [None]:
doc = nlp(u'Could you send a taxi to Solnce? I need to get to Google. Could you send a taxi an hour later?')

 **If we pick up the content from file**

In [None]:
# f = open("test.txt", "rb")
# contents = f.read()
# doc = nlp(contents.decode('utf8'))

In [None]:
train_exams = []
districts = ['Solnce', 'Greenwal', 'Downtown']
for sent in doc.sents:
  entities = []
  for token in sent:
    if token.ent_type != 0:
      start = token.idx - sent.start_char
      if token.text in districts:
        entity = (start, start + len(token), 'GPE')
      else:
        entity = (start, start + len(token), token.ent_type_)
      entities.append(entity)

  tpl = (sent.text, {'entities': entities})

  train_exams.append(tpl)


In [None]:
train_exams

[('Could you send a taxi to Solnce?', {'entities': [(25, 31, 'GPE')]}),
 ('I need to get to Google.', {'entities': [(17, 23, 'ORG')]}),
 ('Could you send a taxi an hour later?', {'entities': []})]

## **Disabling the Other Pipeline Components**

In [None]:
other_pipes = [pipe for pipe in nlp.pipe_names if pipe !='ner']
nlp.disable_pipes(*other_pipes)

[('tagger', <spacy.pipeline.pipes.Tagger at 0x7fb031748fd0>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x7fb0316226a8>)]

## **The Training Process**

In [None]:
import random
from spacy.util import minibatch, compounding

In [None]:
optimizer = nlp.entity.create_optimizer()

In [None]:
for i in range(25):
  random.shuffle(train_exams)
  max_batch_size = 3
  batch_size = compounding(2.0, max_batch_size, 1.001)
  batches = minibatch(train_exams, size=batch_size)
  for batch in batches:
    texts, annotations = zip(*batch)
    nlp.update(texts, annotations, sgd=optimizer)

  ner = nlp.get_pipe('ner')
  ner.to_disk('usr/to/ner')

## **Evaluating the Updated Recognizer**

In [None]:
from spacy.pipeline import EntityRecognizer

In [None]:
nlp = spacy.load('en', disable=['ner'])

In [None]:
ner = EntityRecognizer(nlp.vocab)

In [None]:
ner.from_disk('/content/usr/to/ner')

<spacy.pipeline.pipes.EntityRecognizer at 0x7fb031a7d588>

In [None]:
nlp.add_pipe(ner, "custom_ner")

In [None]:
print(nlp.meta['pipeline'])

['tagger', 'parser', 'custom_ner']


In [None]:
doc = nlp(u'Could you pick me up at Solnce?')

In [None]:
for ent in doc.ents:
  print(ent.text, ent.label_)

Solnce GPE


# **Creating a New Dependency Parser**

## **Custom Synctactic Parsing to Understand User Input**

In [None]:
doc = nlp(u'Find a high paid job with no experience.')

In [None]:
print([(t.text, t.dep_, t.head.text) for t in doc])

[('Find', 'ROOT', 'Find'), ('a', 'det', 'job'), ('high', 'advmod', 'paid'), ('paid', 'amod', 'job'), ('job', 'dobj', 'Find'), ('with', 'prep', 'Find'), ('no', 'det', 'experience'), ('experience', 'pobj', 'with'), ('.', 'punct', 'Find')]


## **Deciding on Types of Semantic Relations to Use**

## **Creating Training Examples**

In [None]:
TRAINING_DATA = [
    ('find a high paying job with no experience', {
        'heads': [0, 4, 4, 4, 0, 7, 7, 4],
        'deps' : ['ROOT', '-', 'QUALITY', 'QUALITY', 'ACTIVITY', '-', 'QUALITY', 'ATTRIBUTE']
    }),

    ('find good workout classes near home', {
        'heads': [0, 4, 4, 4, 0, 4],
        'deps' : ['ROOT', '-', 'QUALITY', 'ACTIVITY', 'QUALITY', 'ATTRIBUTE']
    })


]

In [None]:
doc = nlp(u'find a high paying job with no experience')
heads = []
for token in doc:
    heads.append(token.head.i)

print(heads)

[0, 4, 4, 4, 0, 0, 7, 5]


## **Training the Parser**

In [None]:
other_pipes = [pipe for pipe in nlp.pipe_names if pipe !='ner']
nlp.disable_pipes(*other_pipes)

[('parser', <spacy.pipeline.pipes.DependencyParser at 0x7fb02f8a9048>)]

In [None]:
parser = nlp.create_pipe('parser')
nlp.add_pipe(parser, first=True)
for text, annotations in TRAINING_DATA:
    for d in annotations.get('deps', []):
        parser.add_label(d)

optimizer = nlp.begin_training()
import random
for i in range(25):
    random.shuffle(TRAINING_DATA)
    for text, annotations in TRAINING_DATA:
        nlp.update([text], [annotations], sgd=optimizer)

parser.to_disk('/content/oracle/to/parser')

In [None]:
test = []
for text, annotations in TRAINING_DATA:
  print(text, annotations)
  for d in annotations.get('deps', []):
    print(d)


find a high paying job with no experience {'heads': [0, 4, 4, 4, 0, 7, 7, 4], 'deps': ['ROOT', '-', 'QUALITY', 'QUALITY', 'ACTIVITY', '-', 'QUALITY', 'ATTRIBUTE']}
ROOT
-
QUALITY
QUALITY
ACTIVITY
-
QUALITY
ATTRIBUTE
find good workout classes near home {'heads': [0, 4, 4, 4, 0, 4], 'deps': ['ROOT', '-', 'QUALITY', 'ACTIVITY', 'QUALITY', 'ATTRIBUTE']}
ROOT
-
QUALITY
ACTIVITY
QUALITY
ATTRIBUTE
