# Name: Nidhi Bangera 
# Project : Sentiment Analysis using NLP Libraries
# Unsupervised Lexicons

# Sentiment Analysis using Vader lexicon

**Importing necessary library**

In [1]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

**Loading dataset**

In [2]:
import pandas as pd
dataset=pd.read_csv(r'C:\Users\HP\Downloads\movie_review.csv')
dataset.head()

Unnamed: 0,sentiment,review
0,1,one reviewer ha mentioned watching 1 oz episod...
1,1,wonderful little production filming technique ...
2,1,thought wa wonderful way spend time hot summer...
3,0,basically family little boy jake think zombie ...
4,1,petter matteis love time money visually stunni...


In [3]:
dataset['sentiment'].replace({1:'positive',0:'negative'}, inplace=True)

In [5]:
def analyze_sentiment_vader_lexicon(review, threshold=0.1,verbose=False):
                                      
    # analyze the sentiment for review
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(review)
    # get aggregate scores and final sentiment
    agg_score = scores['compound']
    final_sentiment = 'positive' if agg_score >= threshold\
                                   else 'negative'
    if verbose:
        # display detailed sentiment statistics
        positive = str(round(scores['pos'], 2)*100)+'%'
        final = round(agg_score, 2)
        negative = str(round(scores['neg'], 2)*100)+'%'
        neutral = str(round(scores['neu'], 2)*100)+'%'
        sentiment_frame = pd.DataFrame([[final_sentiment, final, positive,
                                        negative, neutral]],
                                        columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment', 'Polarity Score',
                                                                       'Positive', 'Negative', 'Neutral']], 
                                                              codes=[[0,0,0,0,0],[0,1,2,3,4]]))
        print(sentiment_frame)
    
    return final_sentiment

In [7]:
import numpy as np

**normalize data**

In [8]:
reviews = np.array(dataset['review'])
sentiments = np.array(dataset['sentiment'])

# extract data for model evaluation
test_reviews = reviews[35000:]
test_sentiments = sentiments[35000:]
sample_review_ids = [7626, 3533, 13010]

In [10]:
import nltk

In [11]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...


True

**Predict sentiment for test review**

In [12]:
for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('REVIEW:', review)
    print('Actual Sentiment:', sentiment)
    pred = analyze_sentiment_vader_lexicon(review, threshold=0.4, verbose=True)    
    print('-'*60)

REVIEW: comment stupid movie acting average worse screenplay sense skip
Actual Sentiment: negative
     SENTIMENT STATS:                                         
  Predicted Sentiment Polarity Score Positive Negative Neutral
0            negative          -0.76     0.0%    48.0%   52.0%
------------------------------------------------------------
REVIEW: dont care people voted movie bad want truth good movie ha every thing movie really get one
Actual Sentiment: positive
     SENTIMENT STATS:                                         
  Predicted Sentiment Polarity Score Positive Negative Neutral
0            negative            0.2    34.0%    23.0%   42.0%
------------------------------------------------------------
REVIEW: worst horror film ever funniest film ever rolled one got see film cheap unbeliaveble see really p watch carrot
Actual Sentiment: positive
     SENTIMENT STATS:                                      \
  Predicted Sentiment Polarity Score             Positive   
0      

**Evaluate model Performance**

In [13]:
import model_evaluation_utils as meu

In [14]:
predicted_sentiments = [analyze_sentiment_vader_lexicon(review, threshold=0.4, verbose=False) for review in test_reviews]

In [15]:
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments, 
                                  classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.6868
Precision: 0.7078
Recall: 0.6868
F1 Score: 0.6786

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.64      0.85      0.73      7510
    negative       0.77      0.53      0.63      7490

    accuracy                           0.69     15000
   macro avg       0.71      0.69      0.68     15000
weighted avg       0.71      0.69      0.68     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6357     1153
        negative       3545     3945
