# Sentiment Analysis Project

VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media.

- How this library works
- Sentiment analysis on Robinhood app reviews

Documentation: https://github.com/cjhutto/vaderSentiment

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

## How VADER works

In [2]:
analyzer = SentimentIntensityAnalyzer()

In [3]:
print('Baseline')
print(analyzer.polarity_scores("I'm happy"))

print('Capitalization')
print(analyzer.polarity_scores("I'm HAPPY"))

Baseline
{'neg': 0.0, 'neu': 0.213, 'pos': 0.787, 'compound': 0.5719}
Capitalization
{'neg': 0.0, 'neu': 0.184, 'pos': 0.816, 'compound': 0.6633}


In [4]:
print('Baseline')
print(analyzer.polarity_scores("I'm happy"))

print('Degree modifier')
print(analyzer.polarity_scores("I'm a little happy"))
print(analyzer.polarity_scores("I'm a extremely happy"))

Baseline
{'neg': 0.0, 'neu': 0.213, 'pos': 0.787, 'compound': 0.5719}
Degree modifier
{'neg': 0.0, 'neu': 0.468, 'pos': 0.532, 'compound': 0.5279}
{'neg': 0.0, 'neu': 0.429, 'pos': 0.571, 'compound': 0.6115}


In [5]:
print('Baseline')
print(analyzer.polarity_scores("I'm happy"))

print('Punctuation')
print(analyzer.polarity_scores("I'm happy !"))
print(analyzer.polarity_scores("I'm happy !!!"))

Baseline
{'neg': 0.0, 'neu': 0.213, 'pos': 0.787, 'compound': 0.5719}
Punctuation
{'neg': 0.0, 'neu': 0.334, 'pos': 0.666, 'compound': 0.6114}
{'neg': 0.0, 'neu': 0.304, 'pos': 0.696, 'compound': 0.6784}


In [6]:
print('Baseline')
print(analyzer.polarity_scores("I'm sad"))

print('Conjunctions')
print(analyzer.polarity_scores("I'm sad, but i like it"))

Baseline
{'neg': 0.756, 'neu': 0.244, 'pos': 0.0, 'compound': -0.4767}
Conjunctions
{'neg': 0.22, 'neu': 0.43, 'pos': 0.349, 'compound': 0.296}


In [7]:
# Emotions expressed with emojis

print(analyzer.polarity_scores("I'm happy !! 😥"))
print(analyzer.polarity_scores("I'm happy !! 😄"))
print(analyzer.polarity_scores("I'm happy !! 😊"))

print(analyzer.polarity_scores("This is bad ... :("))
print(analyzer.polarity_scores("This is bad ... :'("))

{'neg': 0.166, 'neu': 0.323, 'pos': 0.511, 'compound': 0.6467}
{'neg': 0.0, 'neu': 0.338, 'pos': 0.662, 'compound': 0.8684}
{'neg': 0.0, 'neu': 0.327, 'pos': 0.673, 'compound': 0.8829}
{'neg': 0.681, 'neu': 0.319, 'pos': 0.0, 'compound': -0.7506}
{'neg': 0.691, 'neu': 0.309, 'pos': 0.0, 'compound': -0.7717}


In [8]:
sentimentScores = analyzer.polarity_scores(str('This is bad ... :('))
sentimentScores

{'neg': 0.681, 'neu': 0.319, 'pos': 0.0, 'compound': -0.7506}

In [11]:
sentimentScores['compound']

-0.7506

## Sentiment Analysis on Robinhood App Reviews

In [12]:
def sentimentScores(text):
    sentiment = 0
    
    sentimentScores = analyzer.polarity_scores(str(text))
    compound = sentimentScores['compound']
    
    if(compound >= 0.05):
        sentiment = 1
    elif(compound <= -0.05):
        sentiment = -1
    else:
        sentiment = 0
        
    return sentiment

In [13]:
df = pd.read_csv('ios_app_clean.csv')

In [14]:
df.head()

Unnamed: 0,Date,Rating,Review
0,2021-03-18,1,"Knowing my shares aren’t real, means RH IS MAN..."
1,2021-03-18,1,This company is currently under investigation ...
2,2021-03-18,1,"They sell your data to MM, halt trading when i..."
3,2021-03-18,1,Easy and simple to use but for the love of god...
4,2021-03-18,5,Easy to learn & Use


In [15]:
df['Sentiment'] = [sentimentScores(x) for x in df['Review']]

In [16]:
df.describe()

Unnamed: 0,Rating,Sentiment
count,27137.0,27137.0
mean,2.553967,0.291042
std,1.8346,0.880151
min,1.0,-1.0
25%,1.0,-1.0
50%,1.0,1.0
75%,5.0,1.0
max,5.0,1.0


In [17]:
df.Sentiment.value_counts()

 1    15609
-1     7711
 0     3817
Name: Sentiment, dtype: int64

In [18]:
df.head(5)

Unnamed: 0,Date,Rating,Review,Sentiment
0,2021-03-18,1,"Knowing my shares aren’t real, means RH IS MAN...",-1
1,2021-03-18,1,This company is currently under investigation ...,-1
2,2021-03-18,1,"They sell your data to MM, halt trading when i...",-1
3,2021-03-18,1,Easy and simple to use but for the love of god...,1
4,2021-03-18,5,Easy to learn & Use,1


In [19]:
corr = df.corr(method = 'pearson')
corr

Unnamed: 0,Rating,Sentiment
Rating,1.0,0.509658
Sentiment,0.509658,1.0


In [20]:
df.to_csv("ios_app_final.csv", index=False)