In [None]:
import requests
import pandas as pd

In [None]:
from google_play_scraper import reviews as play_reviews, Sort


In [None]:
# 1. Scrape Google Play Store Reviews (multi-country)
# ---------------------------
def get_playstore_reviews(app_id, countries, max_reviews=10000):
    all_reviews = []
    for country in countries:
        print(f"\nScraping Play Store reviews ({country})...")
        token = None
        country_reviews = []
        while True:
            reviews, token = play_reviews(
                app_id,
                lang='en',
                country=country.lower(),
                sort=Sort.NEWEST,
                count=200,
                continuation_token=token
            )
            for r in reviews:
                r["country"] = country.upper()
            country_reviews.extend(reviews)
            print(f"Collected {len(country_reviews)} Play Store reviews for {country} so far...")

            if not token or len(country_reviews) >= max_reviews:
                break
        all_reviews.extend(country_reviews)
    return all_reviews

In [None]:
# 2. Scrape Apple App Store Reviews (multi-country)
# ---------------------------
def get_appstore_reviews(app_id, country="us", pages=10):
    """Scrape up to ~50 reviews per page. For 500 reviews, use ~10 pages."""
    all_reviews = []
    for page in range(1, pages + 1):
        url = f"https://itunes.apple.com/{country}/rss/customerreviews/id={app_id}/page={page}/json"
        r = requests.get(url)
        if r.status_code != 200:
            print(f"Error fetching page {page} for {country}: {r.status_code}")
            break

        data = r.json()
        if "feed" not in data or "entry" not in data["feed"]:
            print(f"No more reviews found on page {page} for {country}")
            break

        entries = data["feed"]["entry"]

        # First entry is the app itself
        for review in entries[1:]:
            all_reviews.append({
                "username": review["author"]["name"]["label"],
                "review": review["content"]["label"],
                "rating": int(review["im:rating"]["label"]),
                "date": review["updated"]["label"],
                "country": country.upper()
            })

        print(f"Fetched {len(all_reviews)} reviews so far for {country}...")

        if len(all_reviews) >= 500:  # stop after 500
            break

    return all_reviews



In [None]:
# 3. Run Scrapers
# ---------------------------
play_countries = ["NG", "CA", "US", "GB"]
app_countries = ["ng", "ca", "us", "gb"]
app_id = 363590051  # Netflix App Store ID


In [None]:
# 4. Play Store
play_reviews_all = get_playstore_reviews("com.netflix.mediaclient", play_countries, max_reviews=10000)
play_df = pd.DataFrame(play_reviews_all)
play_df = play_df[['userName', 'content', 'score', 'at', 'country']]
play_df.rename(columns={'userName': 'username', 'content': 'review',
                        'score': 'rating', 'at': 'date'}, inplace=True)
play_df['source'] = 'Play Store'

In [None]:
# 5. App Store
all_appstore_reviews = []
for c in app_countries:
    print(f"\nScraping App Store reviews ({c.upper()})...")
    reviews = get_appstore_reviews(app_id, country=c, pages=10)
    all_appstore_reviews.extend(reviews)

app_df = pd.DataFrame(all_appstore_reviews)
app_df['source'] = 'App Store'

In [None]:
# Combine both outcome
combined_df = pd.concat([play_df, app_df], ignore_index=True)
combined_df

In [None]:
# To CSV
combined_df.to_csv(r"C:\Users\user\Documents\netflix_reviews.csv", index=False)

In [None]:
# Combined Dataset Summary
print("\n✅ Saved combined reviews to 'netflix_reviews_multi_country.csv'")
print(f"Play Store reviews: {len(play_df)}")
print(f"App Store reviews: {len(app_df)}")
print(f"Total combined: {len(combined_df)}")