In [1]:
import spacy

In [4]:
#Blank pipeline

nlp = spacy.blank('en')

doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token)

Captain
america
ate
100
$
of
samosa
.
Then
he
said
I
can
do
this
all
day
.


In [5]:
nlp.pipe_names

[]

In [6]:
nlp = spacy.load('en_core_web_sm') # loading english pipeline

In [7]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [8]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1ac1b24e648>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1ac1b52ba08>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1ac1b5807b8>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1ac1b85cd08>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1ac1b8125c8>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1ac1b580828>)]

In [11]:
doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token, " == ", spacy.explain(token.pos_), "==", token.lemma_)

Captain  ==  proper noun == Captain
america  ==  proper noun == america
ate  ==  verb == eat
100  ==  numeral == 100
$  ==  numeral == $
of  ==  adposition == of
samosa  ==  noun == samosa
.  ==  punctuation == .
Then  ==  adverb == then
he  ==  pronoun == he
said  ==  verb == say
I  ==  pronoun == I
can  ==  auxiliary == can
do  ==  verb == do
this  ==  pronoun == this
all  ==  determiner == all
day  ==  noun == day
.  ==  punctuation == .


In [12]:
#Name Entity Recognition NER

doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


In [13]:
from spacy import displacy

displacy.render(doc, style='ent')

In [14]:
#Adding components to blank pipeline

source_nlp = spacy.load('en_core_web_sm')
nlp = spacy.blank('en')

nlp.add_pipe('ner', source=source_nlp)
nlp.pipe_names

['ner']

In [15]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY
