In [None]:
import spacy

#Blank NLP Pipeline

Here we are creates a blank spacy object with english language which comes with tokenizer by default but with empty pipeline.

In [None]:
nlp = spacy.blank('en')

doc = nlp('Captain America almost killed Iron Man in the Captain America: Civil War movie. Then he said, I can do this all day.')

for token in doc:
    print(token.text)

Captain
America
almost
killed
Iron
Man
in
the
Captain
America
:
Civil
War
movie
.
Then
he
said
,
I
can
do
this
all
day
.


In [None]:
nlp.pipe_names

[]

#Pre-trained Model Pipeline in English

If we use pre-trained spacy model and that comes with pipeline.

In [None]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m47.0 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
nlp = spacy.load('en_core_web_sm')



In [None]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [None]:
doc = nlp('Captain America almost killed Iron Man in the Captain America: Civil War movie. Then he said, I can do this all day.')

for token in doc:
    print(token.text, ' | ', token.pos_, ' | ', token.lemma_)

Captain  |  PROPN  |  Captain
America  |  PROPN  |  America
almost  |  ADV  |  almost
killed  |  VERB  |  kill
Iron  |  PROPN  |  Iron
Man  |  PROPN  |  Man
in  |  ADP  |  in
the  |  DET  |  the
Captain  |  PROPN  |  Captain
America  |  PROPN  |  America
:  |  PUNCT  |  :
Civil  |  PROPN  |  Civil
War  |  PROPN  |  War
movie  |  NOUN  |  movie
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
,  |  PUNCT  |  ,
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
.  |  PUNCT  |  .


#Named Entity Recognition (NER)

In [None]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

for entity in doc.ents:
    print(entity.text, ' | ', entity.label_, ' | ', spacy.explain(entity.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [None]:
from spacy import displacy

displacy.render(doc, style='ent')

#Pre-trained Model Pipeline in French

In [None]:
!python -m spacy download fr_core_news_sm

Collecting fr-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: fr-core-news-sm
Successfully installed fr-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('fr_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
nlp = spacy.load('fr_core_news_sm')

doc = nlp("Tesla Inc va racheter Twitter pour $45 milliards de dollars")

for entity in doc.ents:
    print(entity.text, ' | ', entity.label_, ' | ', spacy.explain(entity.label_))



Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  MISC  |  Miscellaneous entities, e.g. events, nationalities, products or works of art


#Customize blank pipeline by adding a pre-trained model component

In [None]:
source_nlp = spacy.load('en_core_web_sm')

nlp = spacy.blank('en')
nlp.add_pipe('ner', source=source_nlp)
nlp.pipe_names



['ner']

In [None]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ' | ', ent.label_)

Tesla Inc  |  ORG
$45 billion  |  MONEY
