# Spacy pipeline

![image.png](attachment:image.png)

In [None]:
##Scpacy Obejt oriented others string processing library

In [137]:
import spacy

# Tokenization

In [139]:
nlp = spacy.blank("en")
doc = nlp ("Captain america ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token.text)

Captain
america
ate
100
$
of
samosa
.
Then
he
said
I
can
do
this
all
day
.


In [140]:
nlp.pipe_names

[]

# Loading trained  pipeline

In [141]:
nlp = spacy.load("en_core_web_sm")

In [142]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

![image.png](attachment:image.png)

In [143]:
doc = nlp ("Captain america  is ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token, "|",token.pos_, "|", token.lemma_)

Captain | PROPN | Captain
america | PROPN | america
  | SPACE |  
is | AUX | be
ate | VERB | eat
100 | NUM | 100
$ | NUM | $
of | ADP | of
samosa | PROPN | samosa
. | PUNCT | .
Then | ADV | then
he | PRON | he
said | VERB | say
I | PRON | I
can | AUX | can
do | VERB | do
this | PRON | this
all | DET | all
day | NOUN | day
. | PUNCT | .


# Entity Recognition

In [144]:
doc = nlp("Tesla Inc and Apple are going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, "|",ent.label_,"|", spacy.explain(ent.label_))

Tesla Inc | ORG | Companies, agencies, institutions, etc.
Apple | ORG | Companies, agencies, institutions, etc.
Twitter | PRODUCT | Objects, vehicles, foods, etc. (not services)
$45 billion | MONEY | Monetary values, including unit


In [145]:
from spacy import displacy

In [146]:
displacy.render(doc, style="ent")

In [148]:
for entity in doc.ents:
    if entity.label_=="ORG":
        print(entity.text)
    

Tesla Inc
Apple


In [149]:
displacy.render(doc, style="dep")

In [150]:
nlp = spacy.load("es_core_news_sm")

In [152]:
doc = nlp ("El grupo de la maestria en ciencia de datos come pastel en el campus cuando festeja un cumpleaños")

for token in doc:
    print(token, "|",token.pos_, "|", token.lemma_)

El | DET | el
grupo | NOUN | grupo
de | ADP | de
la | DET | el
maestria | NOUN | maestria
en | ADP | en
ciencia | NOUN | ciencia
de | ADP | de
datos | NOUN | dato
come | VERB | come
pastel | NOUN | pastel
en | ADP | en
el | DET | el
campus | NOUN | campus
cuando | SCONJ | cuando
festeja | VERB | festejar
un | DET | uno
cumpleaños | NOUN | cumpleaños


In [169]:
nlp = spacy.load("en_core_web_sm")

In [170]:
doc = nlp("Dr. Strange loves pav bhaji of mumbai. Hulk loves chaat of delhi")

In [171]:
for sentence in doc.sents:
    print(sentence)
    

Dr. Strange loves pav bhaji of mumbai.
Hulk loves chaat of delhi


In [172]:
for token in doc:
    print(token)


Dr.
Strange
loves
pav
bhaji
of
mumbai
.
Hulk
loves
chaat
of
delhi


#### The goal of lemmatization is to reduce different forms of a word to a common base form. 

In [173]:
doc = nlp ("I am recording videos")

In [174]:
for token in doc:
    print(token.lemma_)

I
be
record
video


In [175]:
for sentnce in doc.sents:
    print(sentence.lemma_)

Hulk love chaat of delhi


# Similarity

In [176]:
g1 = nlp("hi")
print(g1)
g2 = nlp("Hello")
print(g2)

hi
Hello


In [177]:
g1.similarity(g2)

  g1.similarity(g2)


0.7025752355239727

In [178]:
g2.similarity(g1)

  g2.similarity(g1)


0.7025752355239727

In [179]:
g3 = nlp("data")

In [180]:
g3.similarity(g1)

  g3.similarity(g1)


0.21056045768425835

In [181]:
g3.similarity(g2)

  g3.similarity(g2)


0.05532433460094752

In [182]:
s1 = nlp("nlp is useful for dealing with text")

In [183]:
s2 =nlp("spacy is part of nlp which is used for text analisis")

In [184]:
s3 = nlp("I like movies")

In [185]:
s1.similarity(s2)

  s1.similarity(s2)


0.561283140962804

In [188]:
s2.similarity(s1)

  s2.similarity(s1)


0.561283140962804

# Sentiment Analysis

In [189]:
from spacytextblob.spacytextblob import SpacyTextBlob

### Adding to the pipeline

In [191]:
nlp.add_pipe("spacytextblob")

ValueError: [E007] 'spacytextblob' already exists in pipeline. Existing names: ['tok2vec', 'tagger', 'parser', 'senter', 'attribute_ruler', 'lemmatizer', 'ner', 'spacytextblob']

In [193]:
nlp.pipe_names

['tok2vec',
 'tagger',
 'parser',
 'attribute_ruler',
 'lemmatizer',
 'ner',
 'spacytextblob']

In [194]:
mytext = "John loves eating apples when he works at Apple"

In [195]:
docx = nlp(mytext)

In [196]:
docx._.polarity

0.0

#### Check Sentiment Polatity

In [200]:
print(doc._.polarity)

None


In [198]:
doc._.subjectivity