In [1]:
import spacy

n = spacy.blank("en")

doc = n("Hi my name is dhruv taneja .I love ml,nlp,computer vision,dl.")

for token in doc:
    print(token)


Hi
my
name
is
dhruv
taneja
.I
love
ml
,
nlp
,
computer
vision
,
dl
.


In [4]:
#This being empty indicates there is nothing in the pipeline like tokeniser,stemming etc.
n.pipe_names

[]

In [8]:
#To download the pipeline we need to install by loading the particular pipeline for all the process
n = spacy.load("en_core_web_sm")
n.pipe_names
#We can see the parts of pipeline that are given below

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [10]:
#This is done in order to initate pipeline for the particular sentence
n.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1e8ba3ec820>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1e8ba3ec8e0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1e8b9ba42e0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1e8b613bc40>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1e8ba2dcb80>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1e8b9ba4890>)]

In [16]:
d = n("Hi my name is dhruv taneja .I love ml,nlp,computer vision,dl.")

for token in d:
    print(token," | ", spacy.explain(token.pos_) ," | ", token.lemma_)

Hi  |  interjection  |  hi
my  |  pronoun  |  my
name  |  noun  |  name
is  |  auxiliary  |  be
dhruv  |  proper noun  |  dhruv
taneja  |  proper noun  |  taneja
.I  |  proper noun  |  .I
love  |  proper noun  |  love
ml  |  proper noun  |  ml
,  |  punctuation  |  ,
nlp  |  proper noun  |  nlp
,  |  punctuation  |  ,
computer  |  noun  |  computer
vision  |  noun  |  vision
,  |  punctuation  |  ,
dl  |  proper noun  |  dl
.  |  punctuation  |  .


# Named Entity Recognition

In [19]:
#This is done in order to get the name and label of the paprticular word this can be done using ner of pipeline
#Try doing all these steps without pipeline and see the output
dhruv = n("Tesla Inc is going to acquire twitter for $45 billion")

for ent in dhruv.ents:
    print(ent.text,ent.label_)

Tesla Inc ORG
$45 billion MONEY


In [21]:
#This is done in order to show the entities a little fancier
from spacy import displacy

displacy.render(dhruv, style="ent")

# For France Language

In [26]:
#for this we need to download the python -m spacy download fr_core_news_sm in command prompt or download this lib in anyone env u are using
nlp = spacy.load("fr_core_news_sm")

In [27]:
doc = nlp("Tesla Inc va racheter Twitter pour $45 milliards de dollars")
for token in doc:
    print(token)

Tesla
Inc
va
racheter
Twitter
pour
$
45
milliards
de
dollars


In [30]:
#For the part of speech and lemmatisation word of the particular sentence
for token in doc:
    print(token," | ",token.pos_," | ",token.lemma_)

Tesla  |  PROPN  |  Tesla
Inc  |  PROPN  |  Inc
va  |  VERB  |  aller
racheter  |  VERB  |  racheter
Twitter  |  VERB  |  twitter
pour  |  ADP  |  pour
$  |  NOUN  |  dollar
45  |  NUM  |  45
milliards  |  NOUN  |  milliard
de  |  ADP  |  de
dollars  |  NOUN  |  dollar


In [32]:
for ent in doc.ents:
    print(token," | ",ent.label_," | ",ent.text)

dollars  |  ORG  |  Tesla Inc
dollars  |  MISC  |  Twitter


# Adding a component to a blank pipeline

In [34]:
#This is done in order to add some component to a pipeline instead of adding the whole thing to the pipeline
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [36]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY
