In [1]:
import numpy as np
import pandas as pd
import nltk
from sklearn.model_selection import train_test_split
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.classify import NaiveBayesClassifier



### Build the classifier to classify negative and positive documents

In [2]:
from nltk.corpus import subjectivity
nltk.download('subjectivity')
nltk.download('punkt')
nltk.download('vader_lexicon')
nltk.download('stopwords')

[nltk_data] Downloading package subjectivity to
[nltk_data]     /home/kishore/nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!
[nltk_data] Downloading package punkt to /home/kishore/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/kishore/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/kishore/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [97]:
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')]

In [98]:
train_subj_docs, test_subj_docs = train_test_split(subj_docs, test_size=.2)
train_obj_docs, test_obj_docs = train_test_split(obj_docs, test_size=.2)
training_docs = train_subj_docs + train_obj_docs
testing_docs = test_subj_docs + test_obj_docs

In [99]:
sentiment_analyzer = SentimentAnalyzer()
all_words_neg = sentiment_analyzer.all_words([mark_negation(doc) for doc in training_docs])

In [100]:
unigram_feats = sentiment_analyzer.unigram_word_feats(all_words_neg, min_freq=0)

In [101]:
sentiment_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

In [102]:
training_set = sentiment_analyzer.apply_features(training_docs)
test_set = sentiment_analyzer.apply_features(testing_docs)

In [103]:
trainer = NaiveBayesClassifier.train
classifier = sentiment_analyzer.train(trainer, training_set)

Training classifier


In [104]:
for key,value in sorted(sentiment_analyzer.evaluate(test_set).items()):
...     print('{0}: {1}'.format(key, value))

Evaluating NaiveBayesClassifier results...
Accuracy: 0.9235
F-measure [obj]: 0.9214175654853622
F-measure [subj]: 0.9254749147588895
Precision [obj]: 0.9472016895459345
Precision [subj]: 0.9021842355175689
Recall [obj]: 0.897
Recall [subj]: 0.95


### Tag new documents and get sentiment intensity

In [105]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize

In [106]:
sentences = ["The service at MCD was surprisingly pleasant. Would recommend.",
             "The employee was rude and manager didn't help",
             "I worked here for 3 years and my manager never helped me",
             "Just started here and impressed with the management team",
             "Most automated sentiment analysis tools are shit",
             "VADER sentiment analisys is the shit.",
             "VADER is smart, handsome, and funny.",
             "Poor direction"]
paragraph = """Very bad exprnce
               here at the restaurant"""

In [107]:
lines_list = tokenize.sent_tokenize(paragraph)
sentences.extend(lines_list)

In [113]:
sid = SentimentIntensityAnalyzer()
for s in sentences:
    print(s)
    ss = sid.polarity_scores(s)
    if ss['compound'] > 0:
        senti = 'Positive'
    elif ss['compound'] < 0:
        senti = 'Negative'
    else:
        senti = 'Neutral'
    print(senti)
    for k in sorted(ss):
        print("{0} : {1}, ".format(k, ss[k]), end='')
    print("")
    print("")

The service at MCD was surprisingly pleasant. Would recommend.
Positive
compound : 0.7906, neg : 0.0, neu : 0.429, pos : 0.571, 

The employee was rude and manager didn't help
Negative
compound : -0.6437, neg : 0.467, neu : 0.533, pos : 0.0, 

I worked here for 3 years and my manager never helped me
Neutral
compound : 0.0, neg : 0.0, neu : 1.0, pos : 0.0, 

Just started here and impressed with the management team
Positive
compound : 0.4767, neg : 0.0, neu : 0.721, pos : 0.279, 

Most automated sentiment analysis tools are shit
Negative
compound : -0.5574, neg : 0.375, neu : 0.625, pos : 0.0, 

VADER sentiment analisys is the shit.
Positive
compound : 0.6124, neg : 0.0, neu : 0.556, pos : 0.444, 

VADER is smart, handsome, and funny.
Positive
compound : 0.8316, neg : 0.0, neu : 0.254, pos : 0.746, 

Poor direction
Negative
compound : -0.4767, neg : 0.756, neu : 0.244, pos : 0.0, 

Very bad exprnce
               here at the restaurant
Negative
compound : -0.5849, neg : 0.387, neu : 0.61