In [1]:
!pip install -qq google-play-scraper

In [2]:
!pip install -qq -U watermark

[?25l[K     |▏                               | 10 kB 7.9 MB/s eta 0:00:01[K     |▍                               | 20 kB 4.5 MB/s eta 0:00:01[K     |▋                               | 30 kB 6.4 MB/s eta 0:00:01[K     |▉                               | 40 kB 5.6 MB/s eta 0:00:01[K     |█                               | 51 kB 5.4 MB/s eta 0:00:01[K     |█▎                              | 61 kB 6.4 MB/s eta 0:00:01[K     |█▌                              | 71 kB 6.7 MB/s eta 0:00:01[K     |█▊                              | 81 kB 7.5 MB/s eta 0:00:01[K     |█▉                              | 92 kB 7.3 MB/s eta 0:00:01[K     |██                              | 102 kB 7.0 MB/s eta 0:00:01[K     |██▎                             | 112 kB 7.0 MB/s eta 0:00:01[K     |██▌                             | 122 kB 7.0 MB/s eta 0:00:01[K     |██▊                             | 133 kB 7.0 MB/s eta 0:00:01[K     |███                             | 143 kB 7.0 MB/s eta 0:00:01[K     

In [3]:
%reload_ext watermark
%watermark -v -p pandas,matplotlib,seaborn,google_play_scraper

Python implementation: CPython
Python version       : 3.7.15
IPython version      : 7.9.0

pandas             : 1.3.5
matplotlib         : 3.2.2
seaborn            : 0.11.2
google_play_scraper: 1.2.2



In [4]:
import json
import pandas as pd
from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

from google_play_scraper import Sort, reviews, app

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

In [5]:
app_packages = [
  'jp.konami.pesam'
]

## Scraping App Information


In [6]:
app_infos = []

for ap in tqdm(app_packages):
  info = app(ap, lang='en', country='us') # Specify the language and region
  del info['comments']
  app_infos.append(info)

100%|██████████| 1/1 [00:00<00:00,  4.21it/s]


In [7]:
def print_json(json_object):
  json_str = json.dumps(
    json_object, 
    indent=2, 
    sort_keys=True, 
    default=str
  )
  print(highlight(json_str, JsonLexer(), TerminalFormatter()))

In [8]:
print_json(app_infos[0])

{
  [94m"adSupported"[39;49;00m: [34mtrue[39;49;00m,
  [94m"appId"[39;49;00m: [33m"jp.konami.pesam"[39;49;00m,
  [94m"containsAds"[39;49;00m: [34mtrue[39;49;00m,
  [94m"contentRating"[39;49;00m: [33m"Everyone"[39;49;00m,
  [94m"contentRatingDescription"[39;49;00m: [34mnull[39;49;00m,
  [94m"currency"[39;49;00m: [33m"USD"[39;49;00m,
  [94m"description"[39;49;00m: [33m"Approximately 3.4 GB of free space will be required to install this update, so please confirm that you have enough room on your device before commencing the download.\r\n*Given the size of the file, we highly recommend that you connect to a Wi-Fi network to download it.\r\n\r\nWe have received reports from some users indicating that Google Play does not always display the correct download size.\r\n\r\nTherefore we urge you to wait a while even after the download progress bar has reached 100% to ensure that your device has finished downloading the complete file.\r\nAlso, you will not be able to co

In [9]:
def format_title(title):
  sep_index = title.find(':') if title.find(':') != -1 else title.find('-')
  if sep_index != -1:
    title = title[:sep_index]
  return title[:10]

fig, axs = plt.subplots(2, len(app_infos) // 2, figsize=(14, 5))

for i, ax in enumerate(axs.flat):
  ai = app_infos[i]
  img = plt.imread(ai['icon'])
  ax.imshow(img)
  ax.set_title(format_title(ai['title']))
  ax.axis('off')

<Figure size 1008x360 with 0 Axes>

In [10]:
app_infos_df = pd.DataFrame(app_infos)
app_infos_df.to_csv('apps.csv', index=None, header=True)

## Scraping App Reviews


In [11]:
app_reviews = []

for ap in tqdm(app_packages):
  for score in list(range(1, 6)):
    for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
      rvs, _ = reviews(
        ap,
        lang='en',
        country='us',
        sort=sort_order,
        count= 200 if score == 3 else 100,
        filter_score_with=score
      )
      for r in rvs:
        r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
        r['appId'] = ap
      app_reviews.extend(rvs)

100%|██████████| 1/1 [00:03<00:00,  3.22s/it]


In [12]:
print_json(app_reviews[0])

{
  [94m"appId"[39;49;00m: [33m"jp.konami.pesam"[39;49;00m,
  [94m"at"[39;49;00m: [33m"2022-10-20 10:17:14"[39;49;00m,
  [94m"content"[39;49;00m: [33m"The style of play has become very slow now, the defense performs as the spectator is, there is no automatic switch between the players, the contracts of the legendary players are few and very expensive and illogical for me, in addition to the problem of changing the places of the control buttons and this made me unable to control because of changing the places of the buttons and I became a spectator of the matches due to my inability to control, i deleted it cuz i don't longer like it."[39;49;00m,
  [94m"repliedAt"[39;49;00m: [34mnull[39;49;00m,
  [94m"replyContent"[39;49;00m: [34mnull[39;49;00m,
  [94m"reviewCreatedVersion"[39;49;00m: [33m"7.1.1"[39;49;00m,
  [94m"reviewId"[39;49;00m: [33m"ffc34bcd-e701-4efd-821c-301433866f58"[39;49;00m,
  [94m"score"[39;49;00m: [34m1[39;49;00m,
  [94m"sortOrder"[39;49;

In [13]:
len(app_reviews)

1200

In [14]:
app_reviews_df = pd.DataFrame(app_reviews)
app_reviews_df.to_csv('reviews.csv', index=None, header=True)