In [None]:
# Install Google Play Scraper to create dataset (https://github.com/JoMingyu/google-play-scraper)
!pip install google_play_scraper

Collecting google_play_scraper
  Downloading google-play-scraper-1.0.2.tar.gz (52 kB)
[?25l[K     |██████▏                         | 10 kB 23.6 MB/s eta 0:00:01[K     |████████████▍                   | 20 kB 25.6 MB/s eta 0:00:01[K     |██████████████████▋             | 30 kB 11.3 MB/s eta 0:00:01[K     |████████████████████████▉       | 40 kB 4.6 MB/s eta 0:00:01[K     |███████████████████████████████ | 51 kB 5.3 MB/s eta 0:00:01[K     |████████████████████████████████| 52 kB 1.1 MB/s 
[?25hBuilding wheels for collected packages: google-play-scraper
  Building wheel for google-play-scraper (setup.py) ... [?25l[?25hdone
  Created wheel for google-play-scraper: filename=google_play_scraper-1.0.2-py3-none-any.whl size=24393 sha256=ae87725c092899daf21042353503b86abfd714f547b17dda69751d9d7da10ee8
  Stored in directory: /root/.cache/pip/wheels/98/99/eb/bbb9d24a5c526980647efc10336eaaeffcf07749f581111128
Successfully built google-play-scraper
Installing collected packages: goo

In [None]:
# Import some required packages
import json
import pandas as pd
from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

from google_play_scraper import Sort, reviews, app

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

### 20 popular Brazilian apps targeted at: Food and Drinks, Sports, Finance, Shopping (12/07/2021)

Name - Reviews: App ID
* iFood - 8.15m: br.com.brainweb.ifood
* Banco do Brasil - 4.40m: br.com.bb.android
* Uber Eats - 4.24m: com.ubercab.eats
* Rappi - 1.62m: com.grability.rappi
* PicPay - 1.29m: com.picpay
* Nubank - 1.07m: com.nu.production
* Cartola FC - 1.05m: br.com.mobits.cartolafc
* Magalu - 800k: com.luizalabs.mlapp
* Americanas - 667k: com.b2w.americanas
* SofaScore - 633k: com.sofascore.results
* Globoplay - 529k: com.globo.globotv
* McDonald’s - 511k: com.mcdo.mcdonalds
* Zé Delivery - 424k: com.cerveceriamodelo.modelonow
* iti: banco digital do Itaú - 326k: com.itau.iti
* aiqfome - 216k: com.vanuatu.aiqfome
* Habib’s - 109k: habibs.alphacode.com.br
* 99 Food - 103k: com.xiaojukeji.didi.brazil.customer
* Premiere - 90.1k: br.tv.horizonte.android.premierefc
* Burger King - 69.6k: burgerking.com.br.appandroid
* ge - 43.6k: com.globo.ge.app

In [None]:
apps_ids = ['br.com.brainweb.ifood', 'com.cerveceriamodelo.modelonow', 'com.mcdo.mcdonalds', 'com.grability.rappi', 'burgerking.com.br.appandroid', 'com.ubercab.eats', 'com.vanuatu.aiqfome', 'com.xiaojukeji.didi.brazil.customer', 'habibs.alphacode.com.br', 'com.b2w.americanas', 'com.luizalabs.mlapp', 'com.nu.production', 'com.itau.iti', 'com.globo.globotv', 'com.picpay', 'br.com.bb.android', 'com.globo.ge.app', 'br.tv.horizonte.android.premierefc', 'com.sofascore.results', 'br.com.mobits.cartolafc']

In [None]:
# Scraping data for each app
app_infos = []

for ap in tqdm(apps_ids):
    info = app(ap, lang='en', country='us')
    del info['comments']
    app_infos.append(info)

100%|██████████| 20/20 [00:05<00:00,  3.98it/s]


In [None]:
# Show info
app_infos_df = pd.DataFrame(app_infos)
app_infos_df.head()

Unnamed: 0,title,description,descriptionHTML,summary,summaryHTML,installs,minInstalls,score,ratings,reviews,histogram,price,free,currency,sale,saleTime,originalPrice,saleText,offersIAP,inAppProductPrice,size,androidVersion,androidVersionText,developer,developerId,developerEmail,developerWebsite,developerAddress,privacyPolicy,developerInternalID,genre,genreId,icon,headerImage,screenshots,video,videoImage,contentRating,contentRatingDescription,adSupported,containsAds,released,updated,version,recentChanges,recentChangesHTML,editorsChoice,similarApps,moreByDeveloper,appId,url
0,iFood Delivery de Comida,Baixe o app e peça sua comida no iFood. Aqui v...,Baixe o app e peça sua comida no iFood. Aqui v...,Delivery of grocery orders & restaurant food i...,Delivery of grocery orders &amp; restaurant fo...,"50,000,000+",50000000,4.673333,8155151,2309,"[346270, 26688, 162870, 869782, 6749541]",0,True,USD,False,,,,False,,46M,5.0,5.0 and up,iFood Delivery de Comida e Mercado,iFood+Delivery+de+Comida+e+Mercado,ifood@ifood.com.br,http://www.ifood.com.br,,https://institucional.ifood.com.br/abrindo-a-c...,7021917525281657200,Food & Drink,FOOD_AND_DRINK,https://play-lh.googleusercontent.com/1Y_VGOwY...,https://play-lh.googleusercontent.com/Faula1rT...,[https://play-lh.googleusercontent.com/9_EI-ej...,,,Everyone,,,False,"Apr 25, 2012",1638818632,9.131.1,"Olá, fã de lanches e de muita economia! Como v...","Olá, fã de lanches e de muita economia! Como v...",False,"[com.mercadolibre, com.wabi.customer, com.merc...",[global.maplink.and_pru],br.com.brainweb.ifood,https://play.google.com/store/apps/details?id=...
1,Zé Delivery de Bebidas,Zé Delivery: o maior app de bebidas do país! \...,Zé Delivery: o maior app de bebidas do país! <...,"Delivery of drinks at a low price, with fast d...","Delivery of drinks at a low price, with fast d...","10,000,000+",10000000,4.77,424621,186,"[16214, 4053, 0, 16214, 388140]",0,True,USD,False,,,,False,,62M,5.0,5.0 and up,Zé Delivery,Z%C3%A9+Delivery,atendimento@ze.delivery,https://ze.delivery/,"R. Dr. Renato Paes de Barros, 1017 - Itaim Bib...",https://ze.delivery/terms?isApp=true,8661611515991206213,Food & Drink,FOOD_AND_DRINK,https://play-lh.googleusercontent.com/dP71wEWW...,https://play-lh.googleusercontent.com/Q5vqyqVe...,[https://play-lh.googleusercontent.com/vGHi8Mc...,https://www.youtube.com/embed/UWejHPyYi0Y?ps=p...,https://play-lh.googleusercontent.com/Q5vqyqVe...,Everyone,,,False,"Aug 19, 2016",1635183695,21.42.1,"Alô, alô, tá chegando uma atualização bem quen...","Alô, alô, tá chegando uma atualização bem quen...",False,"[com.raizen.acelera, com.wabi.customer, com.co...","[com.zedelivery.deliveryman, br.com.ambev.vizi]",com.cerveceriamodelo.modelonow,https://play.google.com/store/apps/details?id=...
2,McDonald's App - Latinoamérica,Enter the new McDonald's App and get exclusive...,Enter the new McDonald&#39;s App and get exclu...,"Enjoy esclusive discounts, promotions and coup...","Enjoy esclusive discounts, promotions and coup...","10,000,000+",10000000,4.389147,511664,2215,"[45235, 8665, 23770, 57876, 376118]",0,True,USD,False,,,,False,,31M,6.0,6.0 and up,Arcos Dorados,Arcos+Dorados,apps@br.mcd.com,http://www.mcdonalds.com.ar,,https://api-discover-mcd.gigigoapps.com/app/te...,7424281334030416541,Food & Drink,FOOD_AND_DRINK,https://play-lh.googleusercontent.com/TCx8i7sv...,https://play-lh.googleusercontent.com/GEr93a5r...,[https://play-lh.googleusercontent.com/gLTfmrF...,,,Everyone,,,False,"Mar 27, 2017",1638365947,3.2.0,Improvements and correction of errors,Improvements and correction of errors,False,"[com.wabi.customer, com.pedidosya, com.littlec...",[br.com.app.gpuu1676426.gpuuddceac8a3bd1d549b5...,com.mcdo.mcdonalds,https://play.google.com/store/apps/details?id=...
3,Rappi,"<b><h2>Live better, live Rappi</h2></b>\r\n\r\...","<b><h2>Live better, live Rappi</h2></b><br><br...",Anything you want delivered in minutes 24/7,Anything you want delivered in minutes 24/7,"50,000,000+",50000000,4.125737,1624566,5540,"[257511, 32910, 55242, 180792, 1098111]",0,True,USD,False,,,,False,,Varies with device,Varies,Varies with device,"Rappi, Inc - Delivery","Rappi,+Inc+-+Delivery",simon@grability.com,http://www.rappi.com,,https://legal.rappi.com/colombia/politica-de-p...,5624915113975274778,Food & Drink,FOOD_AND_DRINK,https://play-lh.googleusercontent.com/zzlIRBWz...,https://play-lh.googleusercontent.com/slA-AxLx...,[https://play-lh.googleusercontent.com/XFVyfC3...,,,Mature 17+,,True,True,"Jul 15, 2015",1638878717,Varies with device,Thanks for using Rappi! To improve your experi...,Thanks for using Rappi! To improve your experi...,False,"[com.wabi.customer, com.mcdo.mcdonalds, com.pe...","[com.rappi.restaurants, com.rappi.partners, co...",com.grability.rappi,https://play.google.com/store/apps/details?id=...
4,Burger King Brasil,Já conhece o programa de recompensas do BK®? O...,Já conhece o programa de recompensas do BK®? O...,Get exclusive coupons with the official applic...,Get exclusive coupons with the official applic...,"10,000,000+",10000000,4.515151,70217,3,"[6383, 0, 2127, 4255, 57452]",0,True,USD,False,,,,False,,65M,5.1,5.1 and up,Burger King Brasil,Burger+King+Brasil,bkbburgerking@gmail.com,http://www.burgerking.com.br/,,https://www.burgerking.com.br/politicas-de-pri...,8498777208384440579,Food & Drink,FOOD_AND_DRINK,https://play-lh.googleusercontent.com/ZrVintO0...,https://play-lh.googleusercontent.com/VdDzoIpT...,[https://play-lh.googleusercontent.com/CNmJEcD...,,,Everyone,,,False,,1637201352,3.7.6,Tem boas novidades para vocês!\r\n\r\nEstamos ...,Tem boas novidades para vocês!<br><br>Estamos ...,False,"[com.raizen.acelera, com.wabi.customer, com.co...",,burgerking.com.br.appandroid,https://play.google.com/store/apps/details?id=...


In [None]:
# Get 200 ratings of each app sorted by newest and relevance in a 2:1:1 ratio (neutral, negative and positive)
app_reviews = []

for ap in tqdm(apps_ids):
    for score in list(range(1, 6)):
        for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
            rvs, _ = reviews(
                ap,
                lang='pt',
                country='br',
                sort=sort_order,
                count= 100 if score == 3 else 50,
                filter_score_with=score
            )
            for r in rvs:
                r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
                r['appId'] = ap
            app_reviews.extend(rvs)

100%|██████████| 20/20 [00:45<00:00,  2.26s/it]


In [None]:
# Total reviews
len(app_reviews)

12000

In [None]:
# Extracting reviews
app_reviews_df = pd.DataFrame(app_reviews)

app_reviews_df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,gp:AOqpTOGVdzyNbozP3fs6JHzYf9MjW_2TubJzG1tJb6t...,Lauro Caversan Junior,https://play-lh.googleusercontent.com/a-/AOh14...,App ruim. Difícil. Interface para escolher os ...,1,82,9.131.1,2021-12-06 00:23:20,,NaT,most_relevant,br.com.brainweb.ifood
1,gp:AOqpTOFsHPYK09sitKqIGMD6Q9UX31ZXwzNVmnz0zdh...,Otávio Gagliardi,https://play-lh.googleusercontent.com/a-/AOh14...,A pior coisa é que os filtros de pagamento não...,1,282,9.131.1,2021-12-01 22:23:01,,NaT,most_relevant,br.com.brainweb.ifood
2,gp:AOqpTOEXyQsIKTL9vQ0kxmi1MbcL5gBQXmiq_Q_Xe9I...,Douglas Alves da Silva,https://play-lh.googleusercontent.com/a-/AOh14...,"Esse aplicativo merece ser zero estrelas, pois...",1,33,9.131.1,2021-12-04 00:13:31,,NaT,most_relevant,br.com.brainweb.ifood
3,gp:AOqpTOFRZZxAe-YMBcrP2riGBETog4FVOy9t6DFB6ve...,Angelica Agapito,https://play-lh.googleusercontent.com/a-/AOh14...,"Sempre usei o aplicativo, porém, nas últimas c...",1,167,9.131.1,2021-12-03 21:52:04,,NaT,most_relevant,br.com.brainweb.ifood
4,gp:AOqpTOHdDawdKNu9-dwhdI1LA-cLmlHo3u7BA4zB-vi...,Rogerio D Felix,https://play-lh.googleusercontent.com/a-/AOh14...,"Layout ruim, difícil de usar e burocrático. Fu...",1,1025,9.128.2,2021-11-14 20:51:19,,NaT,most_relevant,br.com.brainweb.ifood


In [None]:
# Saving reviews in a CSV file
app_reviews_df.to_csv('reviews.csv', index=None, header=True)