https://github.com/explosion/spaCy/blob/master/LICENSE

Example of training spaCy dependency parser, starting off with an existing
model or a blank model. For more details, see the documentation:
* Training: https://spacy.io/usage/training#section-tagger-parser
* Dependency Parse: https://spacy.io/usage/linguistic-features#dependency-parse

In [1]:
import random
from pathlib import Path
import spacy

In [2]:
# training data
TRAIN_DATA = [
    ("They trade mortgage-backed securities.", {
        'heads': [1, 1, 4, 4, 5, 1, 1],
        'deps': ['nsubj', 'ROOT', 'compound', 'punct', 'nmod', 'dobj', 'punct']
    }),
    ("I like London and Berlin.", {
        'heads': [1, 1, 1, 2, 2, 1],
        'deps': ['nsubj', 'ROOT', 'dobj', 'cc', 'conj', 'punct']
    })
]

In [3]:
nlp = spacy.blank('en')

In [4]:
# add the parser to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'parser' not in nlp.pipe_names:
    parser = nlp.create_pipe('parser')
    nlp.add_pipe(parser, first=True)

In [5]:
# add labels to the parser
for _, annotations in TRAIN_DATA:
    for dep in annotations.get('deps', []):
        parser.add_label(dep)

In [6]:
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'parser']
with nlp.disable_pipes(*other_pipes):  # only train parser
    optimizer = nlp.begin_training()
    for itn in range(10):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in TRAIN_DATA:
            nlp.update([text], [annotations], sgd=optimizer, losses=losses)
        print(losses)

{'parser': 14.141747042648603}
{'parser': 8.855948711589104}
{'parser': 6.2255389432232455}
{'parser': 4.949621295755755}
{'parser': 3.007197869062118}
{'parser': 1.257717654555111}
{'parser': 3.988629501106004}
{'parser': 0.3553556270657353}
{'parser': 0.00010969843792011904}
{'parser': 2.3639529078560907e-08}


In [7]:
# test the trained model
test_text = "I like securities."
doc = nlp(test_text)
print('Dependencies', [(t.text, t.dep_, t.head.text) for t in doc])


Dependencies [('I', 'nsubj', 'like'), ('like', 'ROOT', 'like'), ('securities', 'dobj', 'like'), ('.', 'punct', 'like')]
