In [2]:
# loading small english model
import spacy
nlp = spacy.load('en_core_web_sm')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Version
spacy.__version__

'3.7.2'

In [4]:
# Check components in the nlp pipeline
nlp.components

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x16e08b6a0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x16e08bf40>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x16ce10270>),
 ('senter', <spacy.pipeline.senter.SentenceRecognizer at 0x16e110280>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x16e0f8100>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x16e153680>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x16ce10510>)]

In [5]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x16e08b6a0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x16e08bf40>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x16ce10270>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x16e0f8100>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x16e153680>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x16ce10510>)]

In [6]:
# Get component names
nlp.component_names

['tok2vec',
 'tagger',
 'parser',
 'senter',
 'attribute_ruler',
 'lemmatizer',
 'ner']

In [7]:
# Using SpaCy Textblob: Sentiment Analysis
from spacytextblob.spacytextblob import SpacyTextBlob

In [8]:
# Adding sentiment pipe to nlp pipeline
nlp.add_pipe("spacytextblob")

<spacytextblob.spacytextblob.SpacyTextBlob at 0x17b7469d0>

In [9]:
# Recheck our pipeline
nlp.components

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x16e08b6a0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x16e08bf40>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x16ce10270>),
 ('senter', <spacy.pipeline.senter.SentenceRecognizer at 0x16e110280>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x16e0f8100>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x16e153680>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x16ce10510>),
 ('spacytextblob', <spacytextblob.spacytextblob.SpacyTextBlob at 0x17b7469d0>)]

In [10]:
sample_text = "I had a really horrible day. It was the worst day ever! But every now and then I have a really good day that makes me happy."

In [11]:
doc = nlp(sample_text)

In [12]:
print(doc)

I had a really horrible day. It was the worst day ever! But every now and then I have a really good day that makes me happy.


In [13]:
# Check Sentiment
print(doc._.polarity)

-0.125


In [14]:
# Subjectivity
doc._.subjectivity

0.9

In [15]:
# Check Assessment: list for each token
doc._.assessments

[(['really', 'horrible'], -1.0, 1.0, None),
 (['worst', '!'], -1.0, 1.0, None),
 (['really', 'good'], 0.7, 0.6000000000000001, None),
 (['happy'], 0.8, 1.0, None)]

In [16]:
# Testing polarity determination within spacy
best_text = "This was the best song in the album!"
not_the_best_text = "This was not the best song in the album."
text = "This was a song in the album."
not_the_worst_text = "This was not the worst song in the album."
worst_text = "This was the worst song in the album!"

# applying nlp pipeline to text
best = nlp(best_text)
not_the_best = nlp(not_the_best_text)
neutral = nlp(text)
not_the_worst = nlp(not_the_worst_text)
worst = nlp(worst_text)

In [17]:
print(best._.polarity)
print(not_the_best._.polarity)
print(neutral._.polarity)
print(not_the_worst._.polarity)
print(worst._.polarity)

1.0
1.0
0.0
-1.0
-1.0


In [18]:
# spacy seems to have trouble with determining the degree of polarity
# Note to self: Make a pipe to remove stop words when applied to text

In [19]:
print(best._.subjectivity)
print(not_the_best._.subjectivity)
print(neutral._.subjectivity)
print(not_the_worst._.subjectivity)
print(worst._.subjectivity)

0.3
0.3
0.0
1.0
1.0


In [24]:
text = "John loves eating apples when he works at Apple" # Note: loves and love behave differently, implement lemmatize?
docx = nlp(text)

In [25]:
docx._.polarity

0.0

In [26]:
docx._.subjectivity

0.0

In [27]:
docx._.assessments

[]