# Rule Based Sentiment Analysis

### Imports

In [2]:
import nltk
from nltk.sentiment import vader
nltk.download('vader_lexicon')



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/sumansigdel/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

### Paths of the file name 

In [18]:
pos_reviews_path = "rt-polaritydata/rt-polaritydata/rt-polarity.pos"
neg_reviews_path = "rt-polaritydata/rt-polaritydata/rt-polarity.neg"

In [14]:
# Initializing the sentiment intensity analyzer
int_analyser = vader.SentimentIntensityAnalyzer()

### Tests

In [16]:
# Test with emoticons
int_analyser.polarity_scores("I am feeling good :D")

{'neg': 0.0, 'neu': 0.106, 'pos': 0.894, 'compound': 0.8143}

In [17]:
# Test without emoticons
int_analyser.polarity_scores("I am feeling good")

{'neg': 0.0, 'neu': 0.185, 'pos': 0.815, 'compound': 0.5267}

## Opening the files and reading them

In [29]:
with open(pos_reviews_path, 'r', encoding='latin-1') as pos_file :
    pos_reviews = pos_file.readlines()

In [31]:
len(pos_reviews)

5331

In [32]:
with open(neg_reviews_path, 'r', encoding='latin-1') as neg_file :
    neg_reviews = neg_file.readlines()

In [33]:
len(neg_reviews)

5331

### Function to use the Vader Sentiment Intensity Analyser

In [34]:
def vader_sentiment(reviews):
    return int_analyser.polarity_scores(reviews)['compound']
    

In [37]:
vader_sentiment("This is the best retaurant in the city")

0.6369

In [38]:
def get_review_sentiment(senti_calculator):
    neg_reviews_result = [senti_calculator(review) for review in neg_reviews]
    pos_reviews_result = [senti_calculator(review) for review in pos_reviews]
    return {'results-on-positive' : pos_reviews_result, 'results-on-negative' : neg_reviews_result}
    

In [39]:
vader_results = get_review_sentiment(vader_sentiment)

In [47]:
len(vader_results['results-on-positive'])

5331

### Testing the accuracy of Vader

In [50]:
pos_reviews_results = vader_results['results-on-positive']

pct_true_positives = sum(x>0 for x in pos_reviews_results)/len(pos_reviews_results)

In [51]:
pct_true_positives

0.6946163946726693

In [52]:
neg_reviews_results = vader_results['results-on-negative']

pct_true_negatives = sum(x>0 for x in neg_reviews_results)/len(neg_reviews_results)

In [53]:
pct_true_negatives

0.42224723316450946

### Calculating the overall accuracy

In [55]:
total_accuracy = sum(x>0 for x in neg_reviews_results) + sum(x>0 for x in pos_reviews_results)
total_number_of_reviews = len(pos_reviews_results) + len(neg_reviews_results)

In [60]:
print("The total accuracy of Vader is " + str((total_accuracy/total_number_of_reviews)))

The total accuracy of Vader is 0.5584318139185894
