## SENTIMENT ANALYSIS - PANDAS + TEXTBLOB

In [3]:
## IMPORTS
from google_play_scraper import app
from google_play_scraper import Sort, reviews
import pandas as pd
import numpy as np
from textblob import TextBlob

In [6]:
## SCRAPE REVIEWS
result, continuation_token = reviews(
    'posteitaliane.posteapp.apppostepay',
    lang='en',
    country='it',
    sort=Sort.NEWEST,
    count=1000)

In [29]:
## AGGIUNGO RISULTATO SCRAPING REVIEWS A DF
df = pd.DataFrame(np.array(result),columns=['review'])
df = df.join(pd.DataFrame(df.pop('review').tolist()))

In [32]:
## PER IL NOSTRO USE CASE SELEZIONO SOLO COLONNE 'content' e 'score'
df = df[['content','score']]
df.head()

Unnamed: 0,content,score
0,It's a very good app. Instant. I like it. Only...,4
1,"Poor service, my money was stock in this bank ...",1
2,Coustmar care services is very bad 👎 they answ...,1
3,"As much as I liked this card,I can say it's th...",2
4,I'm having problem with login.... Since 15 Nov...,1


In [6]:
## TEXTBLOB SAMPLE
test = 'I love this application'
test2 = 'This application is so bad'
test3 = 'I can use this application'
print(TextBlob(str(test)).sentiment)
print(TextBlob(str(test2)).sentiment)
print(TextBlob(str(test3)).sentiment)

Sentiment(polarity=0.5, subjectivity=0.6)
Sentiment(polarity=-0.6999999999999998, subjectivity=0.6666666666666666)
Sentiment(polarity=0.0, subjectivity=0.0)


In [37]:
## TEXTBLOB SENTIMENT ANALYSIS
polarities = []

for review in df['content']:
    polarity = TextBlob(str(review)).sentiment.polarity
    polarities.append(polarity)
    
df['polarity'] = polarities

df.head()

Unnamed: 0,content,score,polarity
0,It's a very good app. Instant. I like it. Only...,4,0.177692
1,"Poor service, my money was stock in this bank ...",1,-0.2
2,Coustmar care services is very bad 👎 they answ...,1,-0.658727
3,"As much as I liked this card,I can say it's th...",2,0.15
4,I'm having problem with login.... Since 15 Nov...,1,-0.875


In [39]:
## AGGIUNGO COLONNA SENTIMENT CON VALORE CHE VARIA IN BASE A 'polarity'
df['sentiment'] = df['polarity'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
df.head()

Unnamed: 0,content,score,polarity,sentiment
0,It's a very good app. Instant. I like it. Only...,4,0.177692,positive
1,"Poor service, my money was stock in this bank ...",1,-0.2,negative
2,Coustmar care services is very bad 👎 they answ...,1,-0.658727,negative
3,"As much as I liked this card,I can say it's th...",2,0.15,positive
4,I'm having problem with login.... Since 15 Nov...,1,-0.875,negative


In [64]:
## CONTO REVIEWS IN BASE A SCORE (STELLE)
fivestars = len(df.loc[df['score'] == 5])
fourstars = len(df.loc[df['score'] == 4])
threestars = len(df.loc[df['score'] == 3])
twostars = len(df.loc[df['score'] == 2])
onestar = len(df.loc[df['score'] == 1])
print("Recensioni totali: "+str(fivestars+fourstars+threestars+twostars+onestar))
print("Ci sono "+str(fivestars)+" recensioni con 5 stelle")
print("Ci sono "+str(fourstars)+" recensioni con 4 stelle")
print("Ci sono "+str(threestars)+" recensioni con 3 stelle")
print("Ci sono "+str(twostars)+" recensioni con 2 stelle")
print("Ci sono "+str(onestar)+" recensioni con 1 stella")
## CON COUNT PANDAS
df.groupby(df['score']).count()

Recensioni totali: 1000
Ci sono 630 recensioni con 5 stelle
Ci sono 115 recensioni con 4 stelle
Ci sono 53 recensioni con 3 stelle
Ci sono 40 recensioni con 2 stelle
Ci sono 162 recensioni con 1 stella


Unnamed: 0_level_0,content,polarity,sentiment
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,162,162,162
2,40,40,40
3,53,53,53
4,115,115,115
5,630,630,630


In [63]:
## CONTO REVIEWS IN BASE A SENTIMENT
numpositive = len(df.loc[df['sentiment'] == 'positive'])
numnegative = len(df.loc[df['sentiment'] == 'negative'])
numneutral = len(df.loc[df['sentiment'] == 'neutral'])
total = numpositive + numnegative + numneutral
print("Recensioni totali: "+str(total))
print("Ci sono "+str(numpositive)+" recensioni con sentiment positivo - Percentuale: "+str(numpositive/total)+"%")
print("Ci sono "+str(numnegative)+" recensioni con sentiment negativo - Percentuale: "+str(numnegative/total)+"%")
print("Ci sono "+str(numneutral)+" recensioni con sentiment neutrale - Percentuale: "+str(numneutral/total)+"%")
## CON COUNT PANDAS
df.groupby(df['sentiment']).count()

Recensioni totali: 1000
Ci sono 759 recensioni con sentiment positivo - Percentuale: 0.759%
Ci sono 110 recensioni con sentiment negativo - Percentuale: 0.11%
Ci sono 131 recensioni con sentiment neutrale - Percentuale: 0.131%


Unnamed: 0_level_0,content,score,polarity
sentiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
negative,110,110,110
neutral,131,131,131
positive,759,759,759


In [65]:
## SALVO FILE EXCEL (SENZA COLONNA INDEX)
df.to_excel('result.xlsx',index=False)