### Sentiment Analysis

In [4]:
!pip install textblob --user

Collecting textblob
  Using cached https://files.pythonhosted.org/packages/60/f0/1d9bfcc8ee6b83472ec571406bd0dd51c0e6330ff1a51b2d29861d389e85/textblob-0.15.3-py2.py3-none-any.whl
Installing collected packages: textblob
Successfully installed textblob-0.15.3


In [5]:
import nltk
nltk.download('punkt')
from textblob import TextBlob
from textblob import Blobber
from textblob.sentiments import NaiveBayesAnalyzer
blob = TextBlob("This restaurant was great, but I'm not sure if I'll go there again")
blob.sentiment

[nltk_data] Downloading package punkt to /home/s_r/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Sentiment(polarity=0.275, subjectivity=0.8194444444444444)

In [6]:
def from_sent(sent):
    return {word: True for word in nltk.word_tokenize(sent)}

s1 = "This is a good book\n"
s2 = "This is an awesome book\n"
s3 = "This is a bad book\n"
s4 = "This is a terrible book\n"

training_data = [[from_sent(s1),'pos'],[from_sent(s2),'pos'],[from_sent(s3),'neg'],[from_sent(s4),'neg']]
for t in training_data:
    print(t)
    
from nltk.classify import NaiveBayesClassifier
model = NaiveBayesClassifier.train(training_data)
model.classify(from_sent("This is a good article"))

model.classify(from_sent("This is a bad article"))

[{'This': True, 'is': True, 'a': True, 'good': True, 'book': True}, 'pos']
[{'This': True, 'is': True, 'an': True, 'awesome': True, 'book': True}, 'pos']
[{'This': True, 'is': True, 'a': True, 'bad': True, 'book': True}, 'neg']
[{'This': True, 'is': True, 'a': True, 'terrible': True, 'book': True}, 'neg']


'neg'

### Gender Identification

In [7]:
import nltk
from nltk.corpus import names
nltk.download('names')
labeled_names = ([(name,'male') for name in names.words('male.txt')] + [(name,'female') for name in names.words('female.txt')])
import random
random.shuffle(labeled_names)

[nltk_data] Downloading package names to /home/s_r/nltk_data...
[nltk_data]   Unzipping corpora/names.zip.


In [8]:
def gender_features(word):
    return {'last_letter':word[-1]}

In [9]:
featuresets = [(gender_features(n),gender) for (n,gender) in labeled_names]
train_set, test_set = featuresets[500:],featuresets[:500]
classifier = nltk.NaiveBayesClassifier.train(train_set)

In [10]:
classifier.classify(gender_features('Pranav'))

'male'

In [11]:
print(nltk.classify.accuracy(classifier,test_set))

0.734


In [12]:
classifier.show_most_informative_features(5)

Most Informative Features
             last_letter = 'k'              male : female =     45.2 : 1.0
             last_letter = 'a'            female : male   =     34.4 : 1.0
             last_letter = 'f'              male : female =     24.3 : 1.0
             last_letter = 'v'              male : female =     11.2 : 1.0
             last_letter = 'd'              male : female =     10.2 : 1.0


In [13]:
import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
nltk.download('subjectivity')
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
n_instances = 100
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
len(subj_docs), len(obj_docs)

[nltk_data] Downloading package subjectivity to /home/s_r/nltk_data...
[nltk_data]   Unzipping corpora/subjectivity.zip.


(100, 100)

In [14]:
train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

In [15]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
len(unigram_feats)                   
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

In [16]:
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)

trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)

for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
    print('{0}: {1}'.format(key, value))

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.8
F-measure [obj]: 0.8
F-measure [subj]: 0.8
Precision [obj]: 0.8
Precision [subj]: 0.8
Recall [obj]: 0.8
Recall [subj]: 0.8


### Vader

In [17]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentences = ["Vader is smart and funny"]
sentences = ["Vader is good and funny","Vader is smart and funny","Vader is very smart and funny"]
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

sentence = "Vader is worst"
ss1 = sid.polarity_scores(sentence)
print(ss1)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/s_r/nltk_data...


{'neg': 0.672, 'neu': 0.328, 'pos': 0.0, 'compound': -0.6249}


In [18]:
sentence = "Vader is too good sometimes"
ss3 = sid.polarity_scores(sentence)
print(ss3)

{'neg': 0.0, 'neu': 0.58, 'pos': 0.42, 'compound': 0.4404}


### Sentiment Analysis using IBM Watson Tone Analyzer API

In [24]:
pip install --upgrade "ibm-watson>=4.6.0" --user

Processing /home/s_r/.cache/pip/wheels/49/6d/cf/1d91261b96363da78bf9b02699fd2262e6b5dad179500690c1/ibm_watson-5.1.0-cp38-none-any.whl
Processing /home/s_r/.cache/pip/wheels/49/1a/93/9c99ecd9fcfcdc862e4f4e61fc596db58f579fbb4c89da47b3/ibm_cloud_sdk_core-3.3.6-cp38-none-any.whl
Collecting websocket-client==0.48.0
  Using cached https://files.pythonhosted.org/packages/8a/a1/72ef9aa26cfe1a75cee09fc1957e4723add9de098c15719416a1ee89386b/websocket_client-0.48.0-py2.py3-none-any.whl
Collecting PyJWT<3.0.0,>=2.0.0a1
  Using cached https://files.pythonhosted.org/packages/b4/9b/8850f99027ed029af6828199cc87179eaccbbf1f9e6e373e7f0177d32dad/PyJWT-2.0.1-py3-none-any.whl
Installing collected packages: PyJWT, ibm-cloud-sdk-core, websocket-client, ibm-watson
Successfully installed PyJWT-2.0.1 ibm-cloud-sdk-core-3.3.6 ibm-watson-5.1.0 websocket-client-0.48.0
Note: you may need to restart the kernel to use updated packages.


In [25]:
#Understand emotions and communication style in text
#Analyze emotions and tones in what people write online, like tweets or reviews. 
#Predict whether they are happy, sad, confident, and more
# Enhance customer service-See if customers are satisfied or frustrated, and if agents are polite and sympathetic.
#The IBM Watson™ Tone Analyzer service uses linguistic analysis to detect emotional and language tones in written text
# Use the service to understand how your written communications are perceived and then to improve the tone of communications. 
#Businesses can use the service to learn the tone of their customers' communications 
#respond to each customer appropriately, or to understand and improve their customer conversations.

In [26]:
#Analyze general tone
#Use the general-purpose endpoint to analyze the tone of your input content. 
#The service analyzes the content for emotional and language tones. 
#The method always analyzes the tone of the full document; 
#by default, it also analyzes the tone of each individual sentence of the content.
#You can submit no more than 128 KB of total input content and 
#no more than 1000 individual sentences in JSON, plain text, or HTML format. 
#The service analyzes the first 1000 sentences for document-level analysis 
#and only the first 100 sentences for sentence-level analysis.

#Per the JSON specification, the default character encoding for JSON content is effectively always UTF-8; 
#per the HTTP specification, the default encoding for plain text and HTML is ISO-8859-1 (effectively, the ASCII character set). 
#When specifying a content type of plain text or HTML, include the charset parameter to indicate the character encoding of the input text;
#for example: Content-Type: text/plain;charset=utf-8. 
#For text/html, the service removes HTML tags and analyzes only the textual content.

#tone(self, tone_input, content_type=None, sentences=None, tones=None, content_language=None, accept_language=None, **kwargs)
#ToneInput- Input for the general-purpose endpoint.
#content_type - The type of the input.
# sentences -Indicates whether the service is to return an analysis of each individual sentence in addition to its analysis of the full document. 
#tones-list of tones for which the service is to return its analysis of the input; the indicated tones apply both to the full document and to individual sentences of the document. 
#content_language-The language of the input text for the request
#accept_language -The desired language of the response

#ToneAnalysis -The tone analysis results for the input from the general-purpose endpoint.
#document_tone - The results of the analysis for the full input content.
#sentences_tone- An array of SentenceAnalysis objects that provides the results of the analysis for the individual sentences of the input content

In [27]:
import json
from ibm_watson import ToneAnalyzerV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator


In [28]:
authenticator = IAMAuthenticator('MDa1gpEjiFn63BJsMSJGvzFjjOVJqBkrHwY3_y41EcfW')
tone_analyzer = ToneAnalyzerV3(
    version='2017-09-21',
    authenticator=authenticator
)

In [29]:
tone_analyzer.set_service_url('https://api.eu-gb.tone-analyzer.watson.cloud.ibm.com/instances/96f955fa-df0a-45cf-9d40-cef4b3995d3b')

In [30]:
text = 'Team, I know that times are tough! Product '\
    'sales have been disappointing for the past three '\
    'quarters. We have a competitive product, but we '\
    'need to do a better job of selling it!'

tone_analysis = tone_analyzer.tone(
    {'text': text},
    content_type='application/json'
).get_result()
print(json.dumps(tone_analysis, indent=2))

{
  "document_tone": {
    "tones": [
      {
        "score": 0.6165,
        "tone_id": "sadness",
        "tone_name": "Sadness"
      },
      {
        "score": 0.829888,
        "tone_id": "analytical",
        "tone_name": "Analytical"
      }
    ]
  },
  "sentences_tone": [
    {
      "sentence_id": 0,
      "text": "Team, I know that times are tough!",
      "tones": [
        {
          "score": 0.801827,
          "tone_id": "analytical",
          "tone_name": "Analytical"
        }
      ]
    },
    {
      "sentence_id": 1,
      "text": "Product sales have been disappointing for the past three quarters.",
      "tones": [
        {
          "score": 0.771241,
          "tone_id": "sadness",
          "tone_name": "Sadness"
        },
        {
          "score": 0.687768,
          "tone_id": "analytical",
          "tone_name": "Analytical"
        }
      ]
    },
    {
      "sentence_id": 2,
      "text": "We have a competitive product, but we need to do a bette

In [31]:
#Analyze customer-engagement tone
#Use the customer-engagement endpoint to analyze the tone of customer service and customer support conversations. 
#For each utterance of a conversation, the method reports the most prevalent subset of the following seven tones: 
#sad, frustrated, satisfied, excited, polite, impolite, and sympathetic.

#If you submit more than 50 utterances, the service returns a warning for the overall content and 
#analyzes only the first 50 utterances. 
#If you submit a single utterance that contains more than 500 characters, 
#the service returns an error for that utterance and does not analyze the utterance. 
#The request fails if all utterances have more than 500 characters. 
#Per the JSON specification, the default character encoding for JSON content is effectively always UTF-8.

#tone_chat(self, utterances, content_language=None, accept_language=None, **kwargs)
#utterances- An array of Utterance objects that provides the input content that the service is to analyze.
#content_language-The language of the input text for the request
#accept_language -The desired language of the response

#UtteranceAnalyses- The results of the analysis for the utterances of the input content.
#utterances_tone- An array of UtteranceAnalysis objects that provides the results for each utterance of the input.

In [32]:
utterances = [
    {
        "text": "Hello, I'm having a problem with your product.",
        "user": "customer"
    },
    {
        "text": "OK, let me know what's going on, please.",
        "user": "agent"
    },
    {
        "text": "Well, nothing is working :(",
        "user": "customer"
    },
    {
        "text": "Sorry to hear that.",
        "user": "agent"
    }
]

utterance_analyses = tone_analyzer.tone_chat(utterances).get_result()
print(json.dumps(utterance_analyses, indent=2))

{
  "utterances_tone": [
    {
      "utterance_id": 0,
      "utterance_text": "Hello, I'm having a problem with your product.",
      "tones": [
        {
          "score": 0.686361,
          "tone_id": "polite",
          "tone_name": "Polite"
        }
      ]
    },
    {
      "utterance_id": 1,
      "utterance_text": "OK, let me know what's going on, please.",
      "tones": [
        {
          "score": 0.92724,
          "tone_id": "polite",
          "tone_name": "Polite"
        }
      ]
    },
    {
      "utterance_id": 2,
      "utterance_text": "Well, nothing is working :(",
      "tones": [
        {
          "score": 0.997795,
          "tone_id": "sad",
          "tone_name": "Sad"
        }
      ]
    },
    {
      "utterance_id": 3,
      "utterance_text": "Sorry to hear that.",
      "tones": [
        {
          "score": 0.730982,
          "tone_id": "polite",
          "tone_name": "Polite"
        },
        {
          "score": 0.672499,
          "to

### Twitter Sentiment Analysis with NLTK

In [33]:
!pip install tweepy



In [6]:
import re
import tweepy
from textblob import TextBlob
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener

In [7]:
class TwitterClient(object): 
    ''' 
    Generic Twitter Class for sentiment analysis. 
    '''
    def __init__(self): 
        ''' 
        Class constructor or initialization method. 
        '''
        # keys and tokens from the Twitter Dev Console 
        consumer_key = 'ohI8ofhxSh84sGU9pWvhOfEE8'
        consumer_secret = 'dIskwpQ9K74GgvOmRNQkVdlSLKx8RaBnaQVvg5dqStMr0gqfBx'
        access_token = '1304458089119100929-89douCsK85ZbD4dzH4KM5KTTBwtpsH'
        access_token_secret = 'ogXgQnu4dWqVdJEJqyROtc2lENC6HTc29fudV8w8vLr8i'
  
        # attempt authentication 
        try: 
            # create OAuthHandler object 
            self.auth = OAuthHandler(consumer_key, consumer_secret) 
            # set access token and secret 
            self.auth.set_access_token(access_token, access_token_secret) 
            # create tweepy API object to fetch tweets 
            self.api = tweepy.API(self.auth) 
        except: 
            print("Error: Authentication Failed") 
  
    def clean_tweet(self, tweet): 
        ''' 
        Utility function to clean tweet text by removing links, special characters 
        using simple regex statements. 
        '''
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split()) 
  
    def get_tweet_sentiment(self, tweet): 
        ''' 
        Utility function to classify sentiment of passed tweet 
        using textblob's sentiment method 
        '''
        # create TextBlob object of passed tweet text 
        analysis = TextBlob(self.clean_tweet(tweet)) 
        # set sentiment 
        if analysis.sentiment.polarity > 0: 
            return 'positive'
        elif analysis.sentiment.polarity == 0: 
            return 'neutral'
        else: 
            return 'negative'
  
    def get_tweets(self, query, count = 10): 
        ''' 
        Main function to fetch tweets and parse them. 
        '''
        # empty list to store parsed tweets 
        tweets = [] 
  
        try: 
            # call twitter api to fetch tweets 
            fetched_tweets = self.api.search(q = query, count = count) 
  
            # parsing tweets one by one 
            for tweet in fetched_tweets: 
                # empty dictionary to store required params of a tweet 
                parsed_tweet = {} 
  
                # saving text of tweet 
                parsed_tweet['text'] = tweet.text 
                # saving sentiment of tweet 
                parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text) 
  
                # appending parsed tweet to tweets list 
                if tweet.retweet_count > 0: 
                    # if tweet has retweets, ensure that it is appended only once 
                    if parsed_tweet not in tweets: 
                        tweets.append(parsed_tweet) 
                else: 
                    tweets.append(parsed_tweet) 
  
            # return parsed tweets 
            return tweets 
  
        except tweepy.TweepError as e: 
            # print error (if any) 
            print("Error : " + str(e)) 
  
def main(): 
    # creating object of TwitterClient Class 
    api = TwitterClient() 
    # calling function to get tweets 
    tweets = api.get_tweets(query = 'Narendra Modi', count = 200) 
  
    # picking positive tweets from tweets 
    ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive'] 
    # percentage of positive tweets 
    print("Positive tweets percentage: {} %".format(100*len(ptweets)/len(tweets))) 
    # picking negative tweets from tweets 
    ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative'] 
    # percentage of negative tweets 
    print("Negative tweets percentage: {} %".format(100*len(ntweets)/len(tweets))) 
    # percentage of neutral tweets 
    print("Neutral tweets percentage: {} %".format(100*(len(tweets) -(len( ntweets )+len( ptweets)))/len(tweets))) 
  
    # printing first 5 positive tweets 
    print("\n\nPositive tweets:") 
    for tweet in ptweets[:10]: 
        print(tweet['text']) 
  
    # printing first 5 negative tweets 
    print("\n\nNegative tweets:") 
    for tweet in ntweets[:10]: 
        print(tweet['text']) 
  
if __name__ == "__main__": 
    # calling main function 
    main() 

Positive tweets percentage: 34.0 %
Negative tweets percentage: 18.0 %
Neutral tweets percentage: 48.0 %


Positive tweets:
RT @zoo_bear: Todays petrol prices proves Narendra Modi is successful in making this country a Vishwaguru. @ashokepandit
RT @schaheid: For the first time the people of India know that China is occupying Indian territory and Modi can't do anything about it: Ind…
Republic Day Parade is tribute to India's great socio-cultural heritage: PM Narendra Modi
https://t.co/P7zqgpdpQs… https://t.co/nPVOy8Bogg
Psn

Republic Day Parade is tribute to India's great socio-cultural heritage: PM Narendra Modi
https://t.co/gulIQMw9Ev via NaMo App
.@BJP4Bengal party offices distributed as many as 400 invitation cards to @victoriamemkol, a programme organised by… https://t.co/AMfAi3qQ9T
I am here to help you elect a govt of farmers, labourers, small &amp; medium business people, a govt that looks after i… https://t.co/zhMJlTfXPY
RT @INCIndia: That's the first attack on foundation of thi