## SENTIMENT ANALYSIS - PANDAS + NLTK

In [1]:
## IMPORTS
from google_play_scraper import app
from google_play_scraper import Sort, reviews
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
## SCRAPE REVIEWS
result, continuation_token = reviews(
    'posteitaliane.posteapp.apppostepay',
    lang='en',
    country='it',
    sort=Sort.NEWEST,
    count=1000)

In [4]:
## AGGIUNGO RISULTATO SCRAPING REVIEWS A DF
df = pd.DataFrame(np.array(result),columns=['review'])
df = df.join(pd.DataFrame(df.pop('review').tolist()))

In [5]:
## PER IL NOSTRO USE CASE SELEZIONO SOLO COLONNE 'content' e 'score'
df = df[['content','score']]
df.head()

Unnamed: 0,content,score
0,It's a very good app. Instant. I like it. Only...,4
1,"Poor service, my money was stock in this bank ...",1
2,Coustmar care services is very bad 👎 they answ...,1
3,"As much as I liked this card,I can say it's th...",2
4,I'm having problem with login.... Since 15 Nov...,1


In [14]:
## NLTK SAMPLE
sia = SentimentIntensityAnalyzer()
test = 'I love this application'
test2 = 'This application is so bad'
test3 = 'I can use this application'
print(sia.polarity_scores(test))
print(sia.polarity_scores(test2))
print(sia.polarity_scores(test3))

{'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'compound': 0.6369}
{'neg': 0.529, 'neu': 0.471, 'pos': 0.0, 'compound': -0.6696}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


In [15]:
## NLTK SENTIMENT ANALYSIS
sia = SentimentIntensityAnalyzer()

compounds = []
negatives = []
positives = []
neutrals = []

for review in df['content']:
    pol = sia.polarity_scores(review)
    compounds.append(pol['compound'])
    negatives.append(pol['neg'])
    positives.append(pol['pos'])
    neutrals.append(pol['neu'])
    
df['compounds'] = compounds
df['positive'] = positives
df['negative'] = negatives
df['neutral'] = neutrals

df.head()

Unnamed: 0,content,score,compounds,positive,negative,neutral
0,It's a very good app. Instant. I like it. Only...,4,0.9448,0.206,0.0,0.794
1,"Poor service, my money was stock in this bank ...",1,-0.63,0.0,0.138,0.862
2,Coustmar care services is very bad 👎 they answ...,1,-0.9648,0.117,0.338,0.545
3,"As much as I liked this card,I can say it's th...",2,-0.7124,0.025,0.102,0.873
4,I'm having problem with login.... Since 15 Nov...,1,-0.8002,0.088,0.305,0.607


In [16]:
## AGGIUNGO COLONNA SENTIMENT CON VALORE CHE VARIA IN BASE A 'compounds'
df['sentiment'] = df['compounds'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
df.head()

Unnamed: 0,content,score,compounds,positive,negative,neutral,sentiment
0,It's a very good app. Instant. I like it. Only...,4,0.9448,0.206,0.0,0.794,positive
1,"Poor service, my money was stock in this bank ...",1,-0.63,0.0,0.138,0.862,negative
2,Coustmar care services is very bad 👎 they answ...,1,-0.9648,0.117,0.338,0.545,negative
3,"As much as I liked this card,I can say it's th...",2,-0.7124,0.025,0.102,0.873,negative
4,I'm having problem with login.... Since 15 Nov...,1,-0.8002,0.088,0.305,0.607,negative


In [17]:
## CONTO REVIEWS IN BASE A SCORE (STELLE)
fivestars = len(df.loc[df['score'] == 5])
fourstars = len(df.loc[df['score'] == 4])
threestars = len(df.loc[df['score'] == 3])
twostars = len(df.loc[df['score'] == 2])
onestar = len(df.loc[df['score'] == 1])
print("Recensioni totali: "+str(fivestars+fourstars+threestars+twostars+onestar))
print("Ci sono "+str(fivestars)+" recensioni con 5 stelle")
print("Ci sono "+str(fourstars)+" recensioni con 4 stelle")
print("Ci sono "+str(threestars)+" recensioni con 3 stelle")
print("Ci sono "+str(twostars)+" recensioni con 2 stelle")
print("Ci sono "+str(onestar)+" recensioni con 1 stella")
## CON COUNT PANDAS
df.groupby(df['score']).count()

Recensioni totali: 1000
Ci sono 630 recensioni con 5 stelle
Ci sono 115 recensioni con 4 stelle
Ci sono 53 recensioni con 3 stelle
Ci sono 40 recensioni con 2 stelle
Ci sono 162 recensioni con 1 stella


Unnamed: 0_level_0,content,compounds,positive,negative,neutral,sentiment
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,162,162,162,162,162,162
2,40,40,40,40,40,40
3,53,53,53,53,53,53
4,115,115,115,115,115,115
5,630,630,630,630,630,630


In [18]:
## CONTO REVIEWS IN BASE A SENTIMENT
numpositive = len(df.loc[df['sentiment'] == 'positive'])
numnegative = len(df.loc[df['sentiment'] == 'negative'])
numneutral = len(df.loc[df['sentiment'] == 'neutral'])
total = numpositive + numnegative + numneutral
print("Recensioni totali: "+str(total))
print("Ci sono "+str(numpositive)+" recensioni con sentiment positivo - Percentuale: "+str(numpositive/total)+"%")
print("Ci sono "+str(numnegative)+" recensioni con sentiment negativo - Percentuale: "+str(numnegative/total)+"%")
print("Ci sono "+str(numneutral)+" recensioni con sentiment neutrale - Percentuale: "+str(numneutral/total)+"%")
## CON COUNT PANDAS
df.groupby(df['sentiment']).count()

Recensioni totali: 1000
Ci sono 732 recensioni con sentiment positivo - Percentuale: 0.732%
Ci sono 155 recensioni con sentiment negativo - Percentuale: 0.155%
Ci sono 113 recensioni con sentiment neutrale - Percentuale: 0.113%


Unnamed: 0_level_0,content,score,compounds,positive,negative,neutral
sentiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
negative,155,155,155,155,155,155
neutral,113,113,113,113,113,113
positive,732,732,732,732,732,732


In [19]:
## SALVO FILE EXCEL (SENZA COLONNA INDEX)
df.to_excel('result.xlsx',index=False)