## Sentiment Analysis using NLTK

In [7]:
!pip install textblob

import nltk
nltk.download('punkt')
from textblob import TextBlob
from textblob import Blobber
from textblob.sentiments import NaiveBayesAnalyzer



You should consider upgrading via the 'c:\users\prasa\anaconda3\python.exe -m pip install --upgrade pip' command.
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\prasa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [8]:
blob = TextBlob("This restaurant was great, but I'm not sure if I'll go there again.")
blob.sentiment

Sentiment(polarity=0.275, subjectivity=0.8194444444444444)

In [16]:
def form_sent(sent):
    return {word: True for word in nltk.word_tokenize(sent)}

s1="This is a good book\n"
s2="This is a awesome book\n"
s3="This is a bad book\n"
s4="This is a terrible book\n"
training_data=[[form_sent(s1),'pos'],[form_sent(s2),'pos'],[form_sent(s3),'neg'],[form_sent(s4),'neg']]
for t in training_data:
      print(t)


from nltk.classify import NaiveBayesClassifier
model = NaiveBayesClassifier.train(training_data)
print(model.classify(form_sent('This is a good article')))
print(model.classify(form_sent('This is a bad article')))


[{'This': True, 'is': True, 'a': True, 'good': True, 'book': True}, 'pos']
[{'This': True, 'is': True, 'a': True, 'awesome': True, 'book': True}, 'pos']
[{'This': True, 'is': True, 'a': True, 'bad': True, 'book': True}, 'neg']
[{'This': True, 'is': True, 'a': True, 'terrible': True, 'book': True}, 'neg']
pos
neg


In [17]:
from nltk.corpus import names
nltk.download('names')
labeled_names = ([(name, 'male') for name in names.words('male.txt')] + [(name, 'female') for name in names.words('female.txt')])
import random
random.shuffle(labeled_names)


[nltk_data] Downloading package names to
[nltk_data]     C:\Users\prasa\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\names.zip.


In [19]:
# returns last letter of name
def gender_features(word):
  return {'last_letter': word[-1]}

In [20]:
featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]
train_set, test_set = featuresets[500:], featuresets[:500]
classifier = nltk.NaiveBayesClassifier.train(train_set)

In [22]:
classifier.classify(gender_features("Pratik"))

'male'

In [25]:
classifier.classify(gender_features("Anil"))

'male'

In [27]:
print((nltk.classify.accuracy(classifier, test_set)))

0.78


In [28]:
classifier.show_most_informative_features(5)

Most Informative Features
             last_letter = 'a'            female : male   =     35.7 : 1.0
             last_letter = 'k'              male : female =     31.5 : 1.0
             last_letter = 'f'              male : female =     15.8 : 1.0
             last_letter = 'p'              male : female =     11.8 : 1.0
             last_letter = 'v'              male : female =     11.1 : 1.0


In [29]:
import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
nltk.download('subjectivity')
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
n_instances = 100
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
len(subj_docs), len(obj_docs)


[nltk_data] Downloading package subjectivity to
[nltk_data]     C:\Users\prasa\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\subjectivity.zip.


(100, 100)

In [30]:
train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])


In [35]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
len(unigram_feats)                   
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)


In [36]:
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)


Training classifier


In [37]:
for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
  print('{0}: {1}'.format(key, value))


Evaluating NaiveBayesClassifier results...
Accuracy: 0.8
F-measure [obj]: 0.8
F-measure [subj]: 0.8
Precision [obj]: 0.8
Precision [subj]: 0.8
Recall [obj]: 0.8
Recall [subj]: 0.8


In [46]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sentences = ["VADER is smart, handsome, and funny"]
sentences = ["VADER is good and funny.", "VADER is smart, handsome, and funny!", "VADER is very smart, handsome, and funny.","The plot was good, but the characters are uncompelling and the dialog is not great.", "A really bad, horrible book."]
sid = SentimentIntensityAnalyzer()

sentence = "VADER is worst"
ss1 = sid.polarity_scores(sentence)
print(ss1)


{'neg': 0.672, 'neu': 0.328, 'pos': 0.0, 'compound': -0.6249}


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\prasa\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [47]:
sentence = "VADER is too good sometimes"
ss1 = sid.polarity_scores(sentence)
print(ss1)

{'neg': 0.0, 'neu': 0.58, 'pos': 0.42, 'compound': 0.4404}
