### SCRAPDOWN CALL OF DUTY MOBILE - GARENA REVIEWS 


Collecting google-play-scraper
  Downloading google_play_scraper-1.2.7-py3-none-any.whl.metadata (50 kB)
Downloading google_play_scraper-1.2.7-py3-none-any.whl (28 kB)
Installing collected packages: google-play-scraper
Successfully installed google-play-scraper-1.2.7


In [2]:
from google_play_scraper import Sort, reviews
import pandas as pd
import time
import numpy as np
from datetime import datetime

# Function to scrape reviews with robust error handling and rate limiting
def scrape_app_reviews(app_id, count=100, lang='id', country='id', max_retries=3):
    """
    Scrape reviews for a specific app from Google Play Store
    
    Parameters:
    - app_id (str): The app ID in Google Play Store
    - count (int): Number of reviews to scrape
    - lang (str): Language code
    - country (str): Country code
    - max_retries (int): Maximum retry attempts if an error occurs
    
    Returns:
    - DataFrame containing reviews
    """
    
    print(f"Starting to scrape {count} reviews for {app_id}...")
    print(f"Language: {lang}, Country: {country}")
    print(f"This may take a while. Started at {datetime.now().strftime('%H:%M:%S')}")
    
    all_reviews = []
    retry_count = 0
    
    try:
        # Initial batch
        result, continuation_token = reviews(
            app_id=app_id,
            lang=lang,
            country=country,
            sort=Sort.NEWEST,
            count=min(count, 200)
        )
        
        all_reviews.extend(result)
        remaining = count - len(all_reviews)
        
        print(f"Fetched initial batch: {len(all_reviews)} reviews")
        
        # Continue fetching with rate limiting and progress tracking
        last_progress_report = time.time()
        
        while remaining > 0 and continuation_token:
            try:
                # Sleep to avoid being rate-limited (random interval between 1-3 seconds)
                time.sleep(np.random.uniform(1, 3))
                
                batch_size = min(remaining, 200)
                result, continuation_token = reviews(
                    app_id=app_id,
                    continuation_token=continuation_token,
                    count=batch_size
                )
                
                all_reviews.extend(result)
                remaining = count - len(all_reviews)
                
                # Report progress every 30 seconds
                if time.time() - last_progress_report > 30:
                    print(f"Progress: {len(all_reviews)}/{count} reviews ({(len(all_reviews)/count*100):.1f}%)")
                    last_progress_report = time.time()
                
                # Break if no more continuation token
                if not continuation_token:
                    print("No more reviews available to fetch")
                    break
                    
            except Exception as batch_error:
                retry_count += 1
                if retry_count <= max_retries:
                    print(f"Error during batch fetch: {batch_error}. Retry {retry_count}/{max_retries}...")
                    time.sleep(5)  # Wait longer before retry
                else:
                    print(f"Maximum retries reached. Stopping with {len(all_reviews)} reviews collected.")
                    break
        
        # Convert to DataFrame
        if all_reviews:
            df = pd.DataFrame(all_reviews)
            
            # Add sentiment analysis based on score
            df['sentiment'] = df['score'].apply(lambda x: 'positive' if x > 3 else 'neutral' if x == 3 else 'negative')
            
            # Add additional metrics
            df['review_length'] = df['content'].apply(len)
            df['has_reply'] = df['replyContent'].notna()
            
            print(f"\nCompleted! Total reviews scraped: {len(df)}")
            return df
        else:
            print("No reviews were collected.")
            return pd.DataFrame()
    
    except Exception as e:
        print(f"Error initializing scraper: {e}")
        return pd.DataFrame()

# Scrape Call of Duty Mobile reviews (Indonesian version)
cod_app_id = 'com.garena.game.codm'  # Correct app ID from the link
target_count = 10000

# Start scraping with Indonesian language and country code
cod_reviews_df = scrape_app_reviews(
    app_id=cod_app_id,
    count=target_count,
    lang='id',
    country='id'
)

# Display basic info about the collected data
if not cod_reviews_df.empty:
    print("\nSample of collected reviews:")
    display(cod_reviews_df.head())
    
    print("\nBasic statistics:")
    print(f"Total reviews scraped: {len(cod_reviews_df)}")
    print(f"Average rating: {cod_reviews_df['score'].mean():.2f}/5")
    
    # Count reviews by rating
    rating_counts = cod_reviews_df['score'].value_counts().sort_index()
    print("\nReviews by rating:")
    print(rating_counts)
    
    # Save to CSV - just one file
    cod_reviews_df.to_csv('cod_mobile_reviews_10k.csv', index=False)
    print(f"\nAll {len(cod_reviews_df)} reviews saved to 'cod_mobile_reviews_10k.csv'")
else:
    print("Failed to collect reviews. Please check the app ID and try again.")

Starting to scrape 10000 reviews for com.garena.game.codm...
Language: id, Country: id
This may take a while. Started at 18:41:35
Fetched initial batch: 200 reviews
Progress: 2600/10000 reviews (26.0%)
Progress: 5200/10000 reviews (52.0%)
Progress: 7400/10000 reviews (74.0%)
Progress: 9600/10000 reviews (96.0%)

Completed! Total reviews scraped: 10000

Sample of collected reviews:


Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,sentiment,review_length,has_reply
0,44f0d4f8-d73c-4b14-b68c-01e7a788d337,Azka Alfawaz,https://play-lh.googleusercontent.com/a/ACg8oc...,Developer kikir tapi mantap,5,0,,2025-03-28 18:30:28,,NaT,,positive,27,False
1,a889332c-3a4e-4e4c-9711-ec96e5fd32cd,Mr Potato,https://play-lh.googleusercontent.com/a-/ALV-U...,Untuk diriku di masa depan. Jangan instal game...,4,0,,2025-03-28 18:29:07,,NaT,,positive,64,False
2,3caf4287-ce99-4b83-8dc2-5ff2f2bbf07e,FANGIR,https://play-lh.googleusercontent.com/a-/ALV-U...,"makin banyak bug gak, diley banget najis padah...",2,0,1.6.50,2025-03-28 18:25:11,,NaT,1.6.50,negative,59,False
3,aa7959d6-cda3-4618-8f91-9dfee0702932,Dana Maula,https://play-lh.googleusercontent.com/a-/ALV-U...,tinggal bagusin Diki lagi aja,5,0,,2025-03-28 18:15:12,,NaT,,positive,29,False
4,46810fb0-5a76-4598-a34b-b5533d25b58d,Abdul Rohman,https://play-lh.googleusercontent.com/a-/ALV-U...,paling parah sistem sbmmnya,5,1,1.6.44,2025-03-28 17:39:28,"Hi Kak, semua hadiah yang kamu dapatkan itu be...",2022-08-23 09:49:37,1.6.44,positive,27,True



Basic statistics:
Total reviews scraped: 10000
Average rating: 3.19/5

Reviews by rating:
score
1    3307
2     739
3     916
4     784
5    4254
Name: count, dtype: int64

All 10000 reviews saved to 'cod_mobile_reviews_10k.csv'
