# imports & installs

In [None]:
!pip install google-play-scraper tqdm

import pandas as pd
import time
import signal
from google.colab import files
from google_play_scraper import reviews
from tqdm import tqdm




In [None]:
app_data = [
    {"game_name": "Hero Wars: Alliance RPG", "google_play_link": "https://play.google.com/store/apps/details?id=com.nexters.herowars"},
    {"game_name": "Hustle Castle・Medieval Kingdom", "google_play_link": "https://play.google.com/store/apps/details?id=com.my.hc.rpg.kingdom.simulator"},
    {"game_name": "Project Makeover", "google_play_link": "https://play.google.com/store/apps/details?id=com.bgg.jump"},
    {"game_name": "last fortress", "google_play_link": "https://play.google.com/store/apps/details?id=com.more.lastfortress.gp"},
    {"game_name": "Puzzles and survival", "google_play_link": "https://play.google.com/store/apps/details?id=com.global.ztmslg"},
]

In [None]:
class TimeoutException(Exception):
    pass

def timeout_handler(signum, frame):
    raise TimeoutException("Scraping took too long")

# Google Play Scraping Function

In [None]:
def scrape_google_play(app_id, limit=10000, timeout=300):
    unique_reviews = set()
    all_reviews = []
    continuation_token = None

    signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(timeout)

    try:
        with tqdm(total=limit, desc="Google Play reviews") as pbar:
            while len(all_reviews) < limit:
                batch, continuation_token = reviews(
                    app_id,
                    lang='en',
                    country='us',
                    count=100,
                    continuation_token=continuation_token
                )

                for review in batch:
                    if review['reviewId'] not in unique_reviews:
                        unique_reviews.add(review['reviewId'])
                        all_reviews.append(review)
                        pbar.update(1)

                if not continuation_token:
                    break

                time.sleep(1)
    except TimeoutException:
        print(f"Google Play scraping timed out after {timeout} seconds")
    finally:
        signal.alarm(0)

    return pd.DataFrame(all_reviews[:limit])


# Main Scraping Loop

In [None]:
for app in app_data:
    game_name = app["game_name"]
    google_play_id = app["google_play_link"].split('id=')[-1].split('&')[0]

    print(f"\nScraping reviews for {game_name}")
    print(f"Google Play ID: {google_play_id}")

    gp_reviews = pd.DataFrame()

    try:
        gp_reviews = scrape_google_play(google_play_id, timeout=300)
        print(f"Google Play reviews scraped: {len(gp_reviews)}")
    except Exception as e:
        print(f"Error scraping Google Play reviews: {str(e)}")

    output_file = f"{game_name.replace(' ', '_')}_reviews.csv"
    gp_reviews.to_csv(output_file, index=False)
    print(f"Total unique reviews scraped for {game_name}: {len(gp_reviews)}")

    files.download(output_file)


Scraping reviews for Hero Wars: Alliance RPG
Google Play ID: com.nexters.herowars


Google Play reviews: 100%|██████████| 10000/10000 [01:59<00:00, 83.47it/s]


Google Play reviews scraped: 10000
Total unique reviews scraped for Hero Wars: Alliance RPG: 10000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Scraping reviews for Hustle Castle・Medieval Kingdom
Google Play ID: com.my.hc.rpg.kingdom.simulator


Google Play reviews: 100%|██████████| 10000/10000 [01:59<00:00, 83.74it/s]


Google Play reviews scraped: 10000
Total unique reviews scraped for Hustle Castle・Medieval Kingdom: 10000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Scraping reviews for Project Makeover
Google Play ID: com.bgg.jump


Google Play reviews: 100%|██████████| 10000/10000 [01:59<00:00, 83.56it/s]


Google Play reviews scraped: 10000
Total unique reviews scraped for Project Makeover: 10000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Scraping reviews for last fortress
Google Play ID: com.more.lastfortress.gp


Google Play reviews: 100%|██████████| 10000/10000 [01:59<00:00, 83.57it/s]


Google Play reviews scraped: 10000
Total unique reviews scraped for last fortress: 10000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Scraping reviews for Puzzles and survival
Google Play ID: com.global.ztmslg


Google Play reviews: 100%|██████████| 10000/10000 [02:00<00:00, 83.31it/s]


Google Play reviews scraped: 10000
Total unique reviews scraped for Puzzles and survival: 10000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>