# VADER Vs. TextBlob - Which One Is Better For Social Media Sentiment Analysis?

@Author: [Zoumana KEITA](zoumanakeita.medium.com/)

## VADER

In [2]:
##### VADER PREREQUISITES

# Install and import nltk
!pip install nltk
import nltk

# Download the lexicon
nltk.download("vader_lexicon")

# Import the lexicon 
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Create an instance of SentimentIntensityAnalyzer
sent_analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...




In [4]:
sentence = "VADER is pretty good at identifying the underlying sentiment of a text!"
print(sent_analyzer.polarity_scores(sentence))

{'neg': 0.0, 'neu': 0.585, 'pos': 0.415, 'compound': 0.75}


## TextBlob

In [3]:
##### TextBlob PREREQUISITES
!pip install textblob
from textblob import TextBlob



In [5]:
# Create a textblob object
sentence = TextBlob("Humans bad actions are destroying our only a one planet")

# Get the prediction/sentiment
print(sentence.sentiment)

Sentiment(polarity=-0.3, subjectivity=0.5555555555555555)


## Benchmark Analysis 

In [6]:
import pandas as pd

# Read the data set
data_url = "https://raw.githubusercontent.com/keitazoumana/VADER_sentiment-Analysis/main/data/testdata.manual.2009.06.14.csv"
sentiment_data = pd.read_csv(data_url)

# Show 3 random rows
sentiment_data.sample(3)

Unnamed: 0,4,3,Mon May 11 03:17:40 UTC 2009,kindle2,tpryan,"@stellargirl I loooooooovvvvvveee my Kindle2. Not that the DX is cool, but the 2 is fantastic in its own right."
355,4,2223,Wed May 27 23:56:46 UTC 2009,googleio,daynah,HTML 5 Demos! Lots of great stuff to come! Yes...
132,4,397,Tue Jun 02 02:55:49 UTC 2009,"""night at the museum""",jeremyempire,saw night at the museum 2 last night.. pretty ...
103,0,227,Mon May 25 17:46:06 UTC 2009,twitter api,fwhamm,is Twitter's connections API broken? Some twee...


### Data Preprocessing

In [7]:
def format_data(data):

  last_col = str(data.columns[-1])
  first_col = str(data.columns[0])

  data.rename(columns = {last_col: 'tweet_text', first_col: 'polarity'}, inplace=True) 

  # Change 0, 2, 4 to negative, neutral and positive
  labels = {0: 'negative', 2: 'neutral', 4: 'positive'}
  data['polarity'] = data['polarity'].map(labels)

  # Get only the two columns
  return data[['tweet_text', 'polarity']]

In [8]:
# Apply the transformation
data = format_data(sentiment_data)
data.sample(3)

Unnamed: 0,tweet_text,polarity
278,RT @justindavey: RT @tweetmeme GM OnStar now i...,neutral
325,LAKERS tonight let's go!!!!,positive
26,Booz Allen Hamilton has a bad ass homegrown so...,positive


## Comparative Analysis

### Helper Functions

In [9]:
def format_VADER_output(output_dict):
  
  polarity = "neutral"

  if(output_dict['compound']>= 0.05):
    polarity = "positive"

  elif(output_dict['compound']<= -0.05):
    polarity = "negative"

  return polarity

def predict_VADER_sentiment(text):
  
  output_dict =  sent_analyzer.polarity_scores(text)
  return format_VADER_output(output_dict)

In [10]:
def format_TextBlob_output(prediction):
  
  polarity = "neutral"

  if(prediction.polarity > 0):
    polarity = "positive"

  elif(prediction.polarity < 0):
    polarity = "negative"

  return polarity

def predict_Textblob_sentiment(text):
  
  text_blob_prediction =  TextBlob(text).sentiment
  return format_TextBlob_output(text_blob_prediction)

### Predictions Result

In [11]:
from sklearn.metrics import accuracy_score, classification_report

## VADER SECTION
data["vader_prediction"] = data["tweet_text"].apply(predict_VADER_sentiment)
vader_accuracy = accuracy_score(data['polarity'], data['vader_prediction'])

## TextBlob SECTIOB
data["textblob_prediction"] = data["tweet_text"].apply(predict_Textblob_sentiment)
textblob_accuracy = accuracy_score(data['polarity'], data['textblob_prediction'])


## Print the results
print("Accuracy Scores")
print("- VADER: {}".format(vader_accuracy))
print("- Textblob: {}".format(textblob_accuracy))
print("-"*50)
print("Classification report")
print("- VADER")
print(classification_report(data['polarity'], data['vader_prediction']))
print("\n")

print("- TextBlob")
print(classification_report(data['polarity'], data['textblob_prediction']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Accuracy Scores
- VADER: 0.716297786720322
- Textblob: 0.6498993963782697
--------------------------------------------------
Classification report
- VADER
              precision    recall  f1-score   support

    negative       0.84      0.64      0.72       177
     neutral       0.66      0.70      0.68       139
    positive       0.68      0.81      0.74       181

    accuracy                           0.72       497
   macro avg       0.73      0.71      0.71       497
weighted avg       0.73      0.72      0.72       497



- TextBlob
              precision    recall  f1-score   support

    negative       0.80      0.49      0.60       177
     neutral       0.61      0.68      0.64       139
    positive       0.61      0.79      0.69       181

    accuracy                           0.65       497
   macro avg       0.67      0.65      0.64       497
weighted avg       0.68      0.65      0.64       497



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
