# Chapter 3: Processing Pipelines

## Processing pipelines

In [2]:
import spacy

nlp = spacy.load('en_core_web_sm')

print(nlp.pipe_names)

['tagger', 'parser', 'ner']


In [3]:
print(nlp.pipeline)

[('tagger', <spacy.pipeline.pipes.Tagger object at 0x7f7bacf2f400>), ('parser', <spacy.pipeline.pipes.DependencyParser object at 0x7f7bacf2c528>), ('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x7f7bacf2c588>)]


## Inspecting the pipeline

In [5]:
import spacy

# Load the en_core_web_sm mode
nlp = spacy.load('en_core_web_sm')

# Print the names of the pipeline components
print(nlp.pipe_names)

# Print the full pipeline of (name, component) tuples
print(nlp.pipeline)

['tagger', 'parser', 'ner']
[('tagger', <spacy.pipeline.pipes.Tagger object at 0x7f7b71d7d748>), ('parser', <spacy.pipeline.pipes.DependencyParser object at 0x7f7b77d576a8>), ('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x7f7b71fee9a8>)]


## Simple components


In [14]:
import spacy

# Define the custom component
def length_component(doc):
    # Get the doc's length
    doc_length = len(doc)
    print("This document is {} tokens long.".format(doc_length))
    # Return the doc
    return doc

# Load the small English model
nlp = spacy.load('en_core_web_sm')

# Add the component first in pipeline and print the pipe names
nlp.add_pipe(length_component, first=True)
print(nlp.pipe_names)

# Process a text
doc = nlp("This is some text")

['length_component', 'tagger', 'parser', 'ner']
This document is 4 tokens long.
