## Pipeline in Spacy

![Spacy Loaded Pipeline](spacy_loaded_pipeline.jpg)


In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm') # loading a spacy Model; it is a pre trained pipeline, check above image

nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x164489b50>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x164489d30>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x16445f140>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x164777a10>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1647609d0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x16445f1b0>)]

In [3]:
doc = nlp("Captain America are 100$ of Samosa. Then he said I want to eat 6 plates of Pani Puri at Sai GupChup.")

# pos - part of speech, lemma
for token in doc:
    print(token, token.pos_, token.lemma_, sep=" | ")

Captain | PROPN | Captain
America | PROPN | America
are | AUX | be
100 | NUM | 100
$ | NUM | $
of | ADP | of
Samosa | PROPN | Samosa
. | PUNCT | .
Then | ADV | then
he | PRON | he
said | VERB | say
I | PRON | I
want | VERB | want
to | PART | to
eat | VERB | eat
6 | NUM | 6
plates | NOUN | plate
of | ADP | of
Pani | PROPN | Pani
Puri | PROPN | Puri
at | ADP | at
Sai | PROPN | Sai
GupChup | PROPN | GupChup
. | PUNCT | .


### Named entity recognition

In [4]:
doc = nlp('Tesla Inc is going to acquire twitter for $45 billion')

doc = nlp('Apple is going to acquire twitter for $45 billion')

# doc = nlp("Apple is a very nutritious fruit.")


for ent in doc.ents:
    print(ent.text, ent.label_, spacy.explain(ent.label_), sep=" || ")

Apple || ORG || Companies, agencies, institutions, etc.
$45 billion || MONEY || Monetary values, including unit


In [12]:
# REVIEW: this code is not working 

In [11]:
from spacy import displacy

displacy.render(doc, style="ent", jupyter=False)

'<div class="entities" style="line-height: 2.5; direction: ltr">\n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Apple\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n is going to acquire twitter for \n<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    $45 billion\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">MONEY</span>\n</mark>\n</div>'

## Adding components to a blank pipline

In [13]:
# with full components in pipeline
source_nlp = spacy.load('en_core_web_sm')

# with only tokenizer in pipeline
nlp = spacy.blank('en')

print(nlp.pipeline)

[]


In [None]:
nlp.add_pipe("ner", source=source_nlp) # is ner a reserved keyword? 
nlp.pipe_names

In [15]:
doc = nlp('Apple is going to acquire twitter for $45 billion')

for ent in doc.ents:
    print(ent.text, ent.label_, spacy.explain(ent.label_), sep=" || ")

Apple || ORG || Companies, agencies, institutions, etc.
$45 billion || MONEY || Monetary values, including unit
