In [1]:
import spacy

In [2]:
nlp = spacy.blank("en")

In [3]:
nlp.pipe_names #initially it is empty, since in above cell we created a blank pipeline

[]

In [4]:
#we can use some pre-trained pipelines also like;
pipe = "en_core_web_sm" #en is for english and sm is for small, we need to download it first
nlp_pipe = spacy.load(pipe)

In [5]:
nlp_pipe.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
nlp_pipe.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x17dcf4bdfa0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x17dcf4bde80>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x17dcf36f890>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x17dcf203b40>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x17dcf2073c0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x17dcf36f900>)]

In [7]:
string = "Tesla Inc. is going to aquire twitter for $45 billion"

for token in nlp_pipe(string):
    print(token , " | ", token.pos_, " | ", token.lemma_) #pos tell part of speech (noun, verb etc), lemmma gives base word

Tesla  |  PROPN  |  Tesla
Inc.  |  PROPN  |  Inc.
is  |  AUX  |  be
going  |  VERB  |  go
to  |  PART  |  to
aquire  |  VERB  |  aquire
twitter  |  NOUN  |  twitter
for  |  ADP  |  for
$  |  SYM  |  $
45  |  NUM  |  45
billion  |  NUM  |  billion


In [8]:
#named entity recognition (to identify different entities)
for ent in nlp_pipe(string).ents:
    print(ent.text , " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc.  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [9]:
from spacy import displacy

displacy.render(nlp_pipe(string), style="ent")

In [10]:
#only take on of the elements of a pre-trained pipe, let's say we only need ner
pipe = "en_core_web_sm"
nlp_pipe = spacy.load(pipe)
nlp_pipe.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [11]:
nlp.pipe_names

[]

In [13]:
nlp.add_pipe("ner", source=nlp_pipe)

<spacy.pipeline.ner.EntityRecognizer at 0x17dd27a2ba0>

In [14]:
nlp.pipe_names

['ner']