In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')   #loading trained pipeline model

In [3]:
nlp.pipeline


[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x16b4abb6390>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x16b4abb6870>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x16b4aac67a0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x16b4ad5b3d0>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x16b4ad7f450>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x16b4aac6ab0>)]

In [4]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

## Use of tagger and lemmatizer of trained NLP pipeline

tagger -> for determining POS(Part Of Speech) , 
lemmatizer -> for determining Base words 

In [6]:
doc =  nlp("Captain America ate 100$ of samosa. Then he said I can do this all day")

for token in doc:
    print(token , " | ",token.pos_, " | ",token.lemma_)

Captain  |  PROPN  |  Captain
America  |  PROPN  |  America
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  NUM  |  $
of  |  ADP  |  of
samosa  |  PROPN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day


## Named Entity Recognition(ner) - uses

In [9]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_ , " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [10]:
## Fancier way of displaying the entities using ner

from spacy import displacy

displacy.render(doc, style="ent")

##### When we use the same above concepts for a blank language object 

In [11]:
nlp = spacy.blank("en")  # This means that in the pipeline, we only have the tokenizer and the language processing pipeline is empty

doc =  nlp("Captain America ate 100$ of samosa. Then he said I can do this all day")

for token in doc:
    print(token , " | ",token.pos_, " | ",token.lemma_)

## got the below output as we do not have any trained / inbuilt language processing pipeline 

Captain  |    |  
America  |    |  
ate  |    |  
100  |    |  
$  |    |  
of  |    |  
samosa  |    |  
.  |    |  
Then  |    |  
he  |    |  
said  |    |  
I  |    |  
can  |    |  
do  |    |  
this  |    |  
all  |    |  
day  |    |  


In [12]:
# we'll get no output
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")  

for ent in doc.ents:
    print(ent.text, " | ", ent.label_ , " | ", spacy.explain(ent.label_))

## Adding a custom component to our language processing pipeline from a trained pipeline