In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
# We need to download the 'punkt' package to use tokenizers
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('sentiwordnet')
from nltk.corpus import sentiwordnet as swn

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\solharsh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\solharsh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package sentiwordnet to
[nltk_data]     C:\Users\solharsh\AppData\Roaming\nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


# SentiWordNet word scores:

We will go through some sample sentences, look at word's sentiments. Steps are:

    Tokenize each sentence
    Lemmatize each token and check its sentiment

In [2]:
# Initialize stemmer. 
ss = SnowballStemmer('english')
# Our sample sentences here.
sentences = ["Let's go home.",
    "My car is expensive and fast",
    "This movie is exciting.",
    "You failed this class",
    "The weather is warm today!"
    ]
for sentence in sentences:
    tokens = word_tokenize(sentence)
    for word in tokens:
        lemma = ss.stem(word)
        synsets = list(swn.senti_synsets(lemma))
       # if it is not in the sentiment library, pass. 
        if not synsets:
            continue
        # Pick the first result
        synset = synsets[0]
        print(synset)

<lashkar-e-taiba.n.01: PosScore=0.0 NegScore=0.0>
<go.n.01: PosScore=0.0 NegScore=0.0>
<home.n.01: PosScore=0.0 NegScore=0.0>
<car.n.01: PosScore=0.0 NegScore=0.0>
<be.v.01: PosScore=0.25 NegScore=0.125>
<fast.n.01: PosScore=0.0 NegScore=0.0>
<be.v.01: PosScore=0.25 NegScore=0.125>
<fail.v.01: PosScore=0.0 NegScore=0.125>
<class.n.01: PosScore=0.0 NegScore=0.0>
<weather.n.01: PosScore=0.0 NegScore=0.0>
<be.v.01: PosScore=0.25 NegScore=0.125>
<warm.v.01: PosScore=0.25 NegScore=0.0>
<today.n.01: PosScore=0.125 NegScore=0.0>


# SentiWordNet sentiments applied to sentences:

It is nice to have these individual words having sentiments, but what about sentences? How can we evaluate sentences?

    Let's implement something very simple. We can take difference between positive and negative score for each token in the sentence and sum them.
    The result will be the overall score for our sentence.
    We will update the previous code slightly.

In [3]:
sentence_sentiments = []

for sentence in sentences:
    sentence_sentiment = 0
    tokens = word_tokenize(sentence)
    for word in tokens:
        lemma = ss.stem(word)
        synsets = list(swn.senti_synsets(lemma))
        # if it is not in the sentiment library, pass. 
        if not synsets:
            continue
        # Pick the first result
        synset = synsets[0]
        # Let's add up token scores.
        sentence_sentiment += synset.pos_score() - synset.neg_score()
    print("Sentence:", sentence, "Sentiment Score:", sentence_sentiment)

Sentence: Let's go home. Sentiment Score: 0.0
Sentence: My car is expensive and fast Sentiment Score: 0.125
Sentence: This movie is exciting. Sentiment Score: 0.125
Sentence: You failed this class Sentiment Score: -0.125
Sentence: The weather is warm today! Sentiment Score: 0.5


Many words have relatively different sentiment depending on the local context.
We disregard the relationship between words.