In [6]:
import spacy

# Blank Pipe

In [8]:
nlp = spacy.blank("en")

doc = nlp("Captain America ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token)

Captai
America
ate
100
$
of
samosa
.
Then
he
said
I
can
do
this
all
day
.


In [3]:
nlp.pipe_names

[]

# Load Pre Built Pipeline and perform Name Entity Recognization

In [36]:
nlp = spacy.load("en_core_web_sm")
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1f76d2f2e80>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1f76d2f2700>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1f768e85970>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1f76d9182c0>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1f7689d5440>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1f768e859e0>)]

In [17]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

## Part of Speech and Lemma

In [16]:
doc = nlp("Captain America ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)

Captain  |  PROPN  |  Captain
America  |  PROPN  |  America
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  NUM  |  $
of  |  ADP  |  of
samosa  |  PROPN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
.  |  PUNCT  |  .


In [23]:
doc = nlp("Tesla. Inc is going to acquire for $45 billion")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


## Render with style

In [25]:
from spacy import displacy

displacy.render(doc, style="ent")

In [26]:
doc = nlp("Bloomberg founded data company called Bloomberg")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Bloomberg  |  PERSON  |  People, including fictional
Bloomberg  |  ORG  |  Companies, agencies, institutions, etc.


In [27]:
from spacy import displacy

displacy.render(doc, style="ent")

# Manually add specific pipe

In [29]:
nlp = spacy.blank("en")
doc = nlp("Captain America ate 100$ of samosa. Then he said I can do this all day.")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

In [33]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")

nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [35]:
doc = nlp("Tesla. Inc is going to acquire for $45 billion")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
