# PIPELINE

In [3]:
import spacy

# Load the en_core_web_sm model
nlp = spacy.load("en_core_web_sm")

# Print the names of the pipeline components
print(nlp.pipe_names)

# Print the full pipeline of (name, component) tuples
print(nlp.pipeline)

['tagger', 'parser', 'ner']
[('tagger', <spacy.pipeline.pipes.Tagger object at 0x110581410>), ('parser', <spacy.pipeline.pipes.DependencyParser object at 0x118427670>), ('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x118427910>)]


### CUSTOM PIPELINE

In [5]:
import spacy

# Load the en_core_web_sm model
nlp = spacy.load("en_core_web_sm")

def custom_component(doc):
    print(len(doc))
    return doc

nlp.add_pipe(custom_component, first=True)
print(nlp.pipe_names)

['custom_component', 'tagger', 'parser', 'ner']


# Simple component



In [1]:
import spacy

# Define the custom component
def length_component(doc):
    # Get the doc's length
    doc_length = len(doc)
    print(f"This document is {doc_length} tokens long.")
    # Return the doc
    return doc


# Load the small English model
nlp = spacy.load("en_core_web_sm")

# Add the component first in the pipeline and print the pipe names
nlp.add_pipe(length_component, first=True)
print(nlp.pipe_names)

# Process a text
doc = nlp("This is a sentence.")

['length_component', 'tagger', 'parser', 'ner']
This document is 5 tokens long.
