In [7]:
!pip -q install -U --force-reinstall \
  "requests>=2.31.0" "urllib3>=2.2.0" \
  certifi charset-normalizer idna \
  pandas langdetect tenacity

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 3.0.0 which is incompatible.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.
bqplot 0.12.45 requires pandas<3.0.0,>=1.0.0, but you have pandas 3.0.0 which is incompatible.
db-dtypes 1.5.0 requires pandas<3.0.0,>=1.5.3, but you have pandas 3.0.0 which is incompatible.
tensorflow 2.19.0 requires numpy<2.2.0,>=1.26.0, but you have numpy 2.4.2 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.4.2 which is incompatible.
gradio 5.50.0 requires pandas<3.0,>=1.0, but you have pandas 3.0.0 which is incompatible.[0m[31m
[0m

In [8]:
from __future__ import annotations

import os
import re
import csv
import time
import hashlib
import random
from dataclasses import dataclass
from typing import Optional, Any

import requests
import pandas as pd
from langdetect import detect as lang_detect
from langdetect.lang_detect_exception import LangDetectException
from tenacity import retry, stop_after_attempt, wait_exponential_jitter, retry_if_exception_type

# Fallback –±–∏–±–ª–∏–æ—Ç–µ–∫–∞ (–ø—É–±–ª–∏—á–Ω—ã–µ endpoints, –Ω–µ HTML)
try:
    from app_store_scraper import AppStore
except Exception:
    AppStore = None


COLUMNS_ORDER = [
    "review_id",
    "date",
    "rating",
    "title",
    "text",
    "author",
    "country",
    "language",
    "link",
]


def extract_app_id(app_url: str) -> str:
    """
    –ò–∑–≤–ª–µ–∫–∞–µ—Ç —á–∏—Å–ª–æ –ø–æ—Å–ª–µ 'id' –≤ URL App Store.
    –ü—Ä–∏–º–µ—Ä: https://apps.apple.com/ru/app/–∫—Ö–ª/id455938766 -> 455938766
    """
    m = re.search(r"id(\d+)", app_url)
    if not m:
        raise ValueError("–ù–µ —É–¥–∞–ª–æ—Å—å –∏–∑–≤–ª–µ—á—å app_id: –≤ —Å—Å—ã–ª–∫–µ –Ω–µ –Ω–∞–π–¥–µ–Ω–æ 'id<—á–∏—Å–ª–æ>'.")
    return m.group(1)


def make_app_page_link(country: str, app_id: str) -> str:
    """
    –ú–∞–∫—Å–∏–º–∞–ª—å–Ω–æ –±–ª–∏–∑–∫–∞—è —Ä–µ–≥–∏–æ–Ω–∞–ª—å–Ω–∞—è —Å—Å—ã–ª–∫–∞ –Ω–∞ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ.
    (Slug –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è –º–æ–∂–µ—Ç –±—ã—Ç—å –Ω–µ–∏–∑–≤–µ—Å—Ç–µ–Ω, –ø–æ—ç—Ç–æ–º—É –∏—Å–ø–æ–ª—å–∑—É–µ–º —Å—Ç–∞–±–∏–ª—å–Ω—ã–π –≤–∞—Ä–∏–∞–Ω—Ç –ø–æ id.)
    """
    return f"https://apps.apple.com/{country}/app/id{app_id}"


def safe_detect_language(text: str) -> str:
    """
    –ê–≤—Ç–æ–æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —è–∑—ã–∫–∞ –ø–æ —Ç–µ–∫—Å—Ç—É (ISO 639-1), –ª–∏–±–æ 'unknown'.
    """
    text = (text or "").strip()
    if len(text) < 10:
        return "unknown"
    try:
        return lang_detect(text)
    except LangDetectException:
        return "unknown"


def stable_hash_id(country: str, author: str, date: str, rating: str, title: str, text: str) -> str:
    """
    –°—Ç–∞–±–∏–ª—å–Ω—ã–π sha256 id, –µ—Å–ª–∏ –∏—Å—Ç–æ—á–Ω–∏–∫ –Ω–µ –≤–µ—Ä–Ω—É–ª review_id.
    """
    raw = f"{country}|{author}|{date}|{rating}|{title}|{text}".encode("utf-8", errors="ignore")
    return hashlib.sha256(raw).hexdigest()


def iso8601(dt_value: Any) -> str:
    """
    –ü—Ä–∏–≤–æ–¥–∏—Ç –¥–∞—Ç—É –∫ ISO 8601 —Å—Ç—Ä–æ–∫–µ.
    """
    if dt_value is None:
        return ""
    try:
        ts = pd.to_datetime(dt_value, utc=True, errors="coerce")
        if pd.isna(ts):
            return str(dt_value)
        return ts.isoformat()
    except Exception:
        return str(dt_value)


@retry(
    stop=stop_after_attempt(5),
    wait=wait_exponential_jitter(initial=1, max=12),
    retry=retry_if_exception_type((requests.RequestException,)),
    reraise=True,
)
def http_get_json(session: requests.Session, url: str, timeout: int = 20) -> dict:
    """
    GET JSON —Å —Ä–µ—Ç—Ä–∞—è–º–∏ (—ç–∫—Å–ø–æ–Ω–µ–Ω—Ü–∏–∞–ª—å–Ω–∞—è –ø–∞—É–∑–∞ + jitter) –Ω–∞ —Å–µ—Ç–µ–≤—ã–µ –æ—à–∏–±–∫–∏.
    """
    resp = session.get(url, timeout=timeout)
    resp.raise_for_status()
    return resp.json()


In [9]:
def fetch_reviews_rss(
    session: requests.Session,
    app_id: str,
    country: str,
    max_per_country: Optional[int] = None,
    max_pages: int = 10,
) -> list[dict]:
    """
    –°–±–æ—Ä –æ—Ç–∑—ã–≤–æ–≤ –∏–∑ –æ—Ñ–∏—Ü–∏–∞–ª—å–Ω–æ–≥–æ iTunes RSS Customer Reviews (JSON).
    –í–∞–∂–Ω–æ: Apple –º–æ–∂–µ—Ç –æ–≥—Ä–∞–Ω–∏—á–∏–≤–∞—Ç—å –≥–ª—É–±–∏–Ω—É –∏—Å—Ç–æ—Ä–∏–∏ (–æ–±—ã—á–Ω–æ –ø–æ—Å–ª–µ–¥–Ω–∏–µ N —Å—Ç—Ä–∞–Ω–∏—Ü/—Å–æ—Ç–Ω–∏ –æ—Ç–∑—ã–≤–æ–≤).
    """
    reviews: list[dict] = []
    processed = 0

    for page in range(1, max_pages + 1):
        if page == 1:
            url = f"https://itunes.apple.com/{country}/rss/customerreviews/id={app_id}/sortBy=mostRecent/json"
        else:
            url = f"https://itunes.apple.com/{country}/rss/customerreviews/page={page}/id={app_id}/sortBy=mostRecent/json"

        data = http_get_json(session, url)

        feed = data.get("feed", {})
        entries = feed.get("entry", [])

        # –ß–∞—Å—Ç–æ first entry ‚Äî –º–µ—Ç–∞-–∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è, –∞ –¥–∞–ª—å—à–µ –æ—Ç–∑—ã–≤—ã.
        if not entries or not isinstance(entries, list):
            break

        # –ï—Å–ª–∏ —Ç–æ–ª—å–∫–æ 1 entry, –≤–µ—Ä–æ—è—Ç–Ω–æ —ç—Ç–æ —Ç–æ–ª—å–∫–æ –º–µ—Ç–∞ (–æ—Ç–∑—ã–≤–æ–≤ –Ω–µ—Ç)
        if len(entries) <= 1:
            break

        for entry in entries[1:]:
            processed += 1
            if max_per_country is not None and processed > max_per_country:
                return reviews

            review_id = ""
            link = ""

            try:
                review_id = (entry.get("id", {}) or {}).get("label", "") or ""
            except Exception:
                review_id = ""

            try:
                link = ((entry.get("link", {}) or {}).get("attributes", {}) or {}).get("href", "") or ""
            except Exception:
                link = ""

            title = (entry.get("title", {}) or {}).get("label", "") or ""
            text = (entry.get("content", {}) or {}).get("label", "") or ""
            author = ((entry.get("author", {}) or {}).get("name", {}) or {}).get("label", "") or ""

            rating = (entry.get("im:rating", {}) or {}).get("label", "") or ""
            date_val = (entry.get("updated", {}) or {}).get("label", "") or ""

            date_str = iso8601(date_val)

            if not review_id:
                review_id = stable_hash_id(
                    country=country,
                    author=author,
                    date=date_str,
                    rating=str(rating),
                    title=title,
                    text=text,
                )

            if not link:
                link = make_app_page_link(country, app_id)

            language = safe_detect_language(f"{title}\n{text}")

            reviews.append(
                {
                    "review_id": str(review_id),
                    "date": date_str,
                    "rating": int(rating) if str(rating).isdigit() else None,
                    "title": title,
                    "text": text,
                    "author": author,
                    "country": country,
                    "language": language,
                    "link": link,
                }
            )

    return reviews


In [10]:
def extract_app_slug(app_url: str) -> str:
    """
    –ü—ã—Ç–∞–µ–º—Å—è –∏–∑–≤–ª–µ—á—å slug/–Ω–∞–∑–≤–∞–Ω–∏–µ –∏–∑ URL:
    https://apps.apple.com/ru/app/–∫—Ö–ª/id455938766 -> "–∫—Ö–ª"
    –ï—Å–ª–∏ –Ω–µ –ø–æ–ª—É—á–∏–ª–æ—Å—å, –≤–µ—Ä–Ω–µ–º "app".
    """
    m = re.search(r"apps\.apple\.com/[a-z]{2}/app/([^/]+)/id\d+", app_url)
    if not m:
        return "app"
    return m.group(1) or "app"


def fetch_reviews_scraper(
    app_url: str,
    app_id: str,
    country: str,
    max_per_country: Optional[int] = None,
) -> list[dict]:
    """
    Fallback —Å–±–æ—Ä —á–µ—Ä–µ–∑ app-store-scraper (–ø—É–±–ª–∏—á–Ω—ã–µ endpoints, –±–µ–∑ HTML).
    –í–∞–∂–Ω–æ: —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ –ø–æ–ª–µ–π –º–æ–∂–µ—Ç –æ—Ç–ª–∏—á–∞—Ç—å—Å—è –≤ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –≤–µ—Ä—Å–∏–∏ –±–∏–±–ª–∏–æ—Ç–µ–∫–∏.
    """
    if AppStore is None:
        raise RuntimeError("app-store-scraper –Ω–µ –¥–æ—Å—Ç—É–ø–µ–Ω (–Ω–µ —É–¥–∞–ª–æ—Å—å –∏–º–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞—Ç—å).")

    app_name = extract_app_slug(app_url)

    # how_many: –µ—Å–ª–∏ None ‚Äî –±–µ—Ä–µ–º "–º–Ω–æ–≥–æ", –Ω–æ –±–∏–±–ª–∏–æ—Ç–µ–∫–∞/endpoint –≤—Å–µ —Ä–∞–≤–Ω–æ –º–æ–∂–µ—Ç –æ–≥—Ä–∞–Ω–∏—á–∏–≤–∞—Ç—å.
    how_many = max_per_country if max_per_country is not None else 10000

    app = AppStore(country=country, app_name=app_name, app_id=int(app_id))
    app.review(how_many=how_many)

    out: list[dict] = []

    for r in getattr(app, "reviews", []) or []:
        title = r.get("title", "") or ""
        text = r.get("review", "") or r.get("text", "") or ""
        author = r.get("userName", "") or r.get("author", "") or ""
        rating = r.get("rating", None)
        date_val = r.get("date", "") or r.get("updated", "") or ""

        date_str = iso8601(date_val)

        # –í –Ω–µ–∫–æ—Ç–æ—Ä—ã—Ö –≤–µ—Ä—Å–∏—è—Ö –µ—Å—Ç—å id; –µ—Å–ª–∏ –Ω–µ—Ç ‚Äî –¥–µ–ª–∞–µ–º —Å—Ç–∞–±–∏–ª—å–Ω—ã–π —Ö—ç—à
        review_id = r.get("id", "") or r.get("reviewId", "") or ""
        if not review_id:
            review_id = stable_hash_id(
                country=country,
                author=author,
                date=date_str,
                rating=str(rating),
                title=title,
                text=text,
            )

        link = r.get("url", "") or make_app_page_link(country, app_id)
        language = safe_detect_language(f"{title}\n{text}")

        out.append(
            {
                "review_id": str(review_id),
                "date": date_str,
                "rating": int(rating) if isinstance(rating, (int, float, str)) and str(rating).isdigit() else None,
                "title": title,
                "text": text,
                "author": author,
                "country": country,
                "language": language,
                "link": link,
            }
        )

    return out


In [11]:
def update_reviews(app_url: str, country_list: list[str], max_per_country: int | None = None) -> pd.DataFrame:
    """
    –û–±–Ω–æ–≤–ª—è–µ—Ç CSV /content/appstore_reviews_{app_id}.csv –∏–Ω–∫—Ä–µ–º–µ–Ω—Ç–∞–ª—å–Ω–æ:
    - –µ—Å–ª–∏ —Ñ–∞–π–ª —Å—É—â–µ—Å—Ç–≤—É–µ—Ç: —á–∏—Ç–∞–µ—Ç, —Å–æ–±–∏—Ä–∞–µ—Ç set review_id, –¥–æ–±–∞–≤–ª—è–µ—Ç —Ç–æ–ª—å–∫–æ –Ω–æ–≤—ã–µ
    - –µ—Å–ª–∏ –Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç: —Å–æ–∑–¥–∞–µ—Ç –Ω–æ–≤—ã–π
    –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç –∏—Ç–æ–≥–æ–≤—ã–π DataFrame.
    """
    if not app_url or not isinstance(app_url, str):
        raise ValueError("app_url –æ–±—è–∑–∞—Ç–µ–ª–µ–Ω –∏ –¥–æ–ª–∂–µ–Ω –±—ã—Ç—å —Å—Ç—Ä–æ–∫–æ–π.")
    if not country_list or not isinstance(country_list, list):
        raise ValueError("country_list –æ–±—è–∑–∞—Ç–µ–ª–µ–Ω –∏ –¥–æ–ª–∂–µ–Ω –±—ã—Ç—å list[str].")

    app_id = extract_app_id(app_url)
    csv_path = f"/content/appstore_reviews_{app_id}.csv"

    # –ó–∞–≥—Ä—É–∂–∞–µ–º —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–µ –¥–∞–Ω–Ω—ã–µ
    if os.path.exists(csv_path):
        existing_df = pd.read_csv(csv_path, dtype={"review_id": str}, keep_default_na=False)
        # –ù–∞ —Å–ª—É—á–∞–π, –µ—Å–ª–∏ —Ñ–∞–π–ª —Å—Ç–∞—Ä—ã–π/—Å –¥—Ä—É–≥–∏–º–∏ –∫–æ–ª–æ–Ω–∫–∞–º–∏:
        for col in COLUMNS_ORDER:
            if col not in existing_df.columns:
                existing_df[col] = ""
        existing_df = existing_df[COLUMNS_ORDER]
        existing_ids = set(existing_df["review_id"].astype(str).tolist())
        print(f"[INIT] –ù–∞–π–¥–µ–Ω —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–π CSV: {csv_path} | —Å—Ç—Ä–æ–∫: {len(existing_df)}")
    else:
        existing_df = pd.DataFrame(columns=COLUMNS_ORDER)
        existing_ids = set()
        print(f"[INIT] CSV –Ω–µ –Ω–∞–π–¥–µ–Ω, –±—É–¥–µ—Ç —Å–æ–∑–¥–∞–Ω: {csv_path}")

    new_rows: list[dict] = []
    new_ids_in_run: set[str] = set()

    # –í–∞–∂–Ω–æ: –Ω–µ –æ–±–µ—â–∞–µ–º –ø–æ–ª–Ω—É—é –∏—Å—Ç–æ—Ä–∏—é
    print("\n[WARNING] Public-—Ä–µ–∂–∏–º App Store –º–æ–∂–µ—Ç –æ–≥—Ä–∞–Ω–∏—á–∏–≤–∞—Ç—å –≥–ª—É–±–∏–Ω—É –∏—Å—Ç–æ—Ä–∏–∏ –æ—Ç–∑—ã–≤–æ–≤.")
    print("          Apple RSS –∏ –ø—É–±–ª–∏—á–Ω—ã–µ endpoints –æ–±—ã—á–Ω–æ –æ—Ç–¥–∞—é—Ç —Ç–æ–ª—å–∫–æ –ø–æ—Å–ª–µ–¥–Ω–∏–µ —Å—Ç—Ä–∞–Ω–∏—Ü—ã/—Å–æ—Ç–Ω–∏ –æ—Ç–∑—ã–≤–æ–≤,")
    print("          –∞ —Ç–∞–∫–∂–µ –∑–∞–≤–∏—Å—è—Ç –æ—Ç –Ω–∞–ª–∏—á–∏—è –æ—Ç–∑—ã–≤–æ–≤ –≤ –∫–æ–Ω–∫—Ä–µ—Ç–Ω–æ–π —Å—Ç—Ä–∞–Ω–µ.\n")

    with requests.Session() as session:
        session.headers.update({
            "User-Agent": "Mozilla/5.0 (compatible; reviews-collector/1.0; +https://apps.apple.com/)"
        })

        for country in country_list:
            country = str(country).lower().strip()
            if not country:
                continue

            print(f"[COUNTRY] {country} ...")
            t0 = time.time()

            try:
                # 1) –ü—ã—Ç–∞–µ–º—Å—è —á–µ—Ä–µ–∑ RSS
                reviews = fetch_reviews_rss(
                    session=session,
                    app_id=app_id,
                    country=country,
                    max_per_country=max_per_country,
                    max_pages=10,  # —Ç–∏–ø–∏—á–Ω—ã–π –ª–∏–º–∏—Ç —Å—Ç—Ä–∞–Ω–∏—Ü —É RSS
                )
                source = "rss"
                # –ï—Å–ª–∏ RSS –≤–µ—Ä–Ω—É–ª–æ 0, –ø—Ä–æ–±—É–µ–º fallback
                if len(reviews) == 0:
                    raise RuntimeError("RSS –≤–µ—Ä–Ω—É–ª 0 –æ—Ç–∑—ã–≤–æ–≤ (–≤–æ–∑–º–æ–∂–µ–Ω 404/–ª–∏–º–∏—Ç—ã/–Ω–µ—Ç –æ—Ç–∑—ã–≤–æ–≤).")

            except Exception as e_rss:
                print(f"  [INFO] RSS –Ω–µ —Å—Ä–∞–±–æ—Ç–∞–ª –¥–ª—è {country}: {type(e_rss).__name__}: {e_rss}")
                print("  [INFO] –ü—Ä–æ–±—É—é fallback: app-store-scraper ...")

                try:
                    reviews = fetch_reviews_scraper(
                        app_url=app_url,
                        app_id=app_id,
                        country=country,
                        max_per_country=max_per_country,
                    )
                    source = "scraper"
                except Exception as e_scr:
                    print(f"  [ERROR] –ù–µ —É–¥–∞–ª–æ—Å—å —Å–æ–±—Ä–∞—Ç—å –ø–æ —Å—Ç—Ä–∞–Ω–µ {country}: {type(e_scr).__name__}: {e_scr}")
                    print("  [SKIP] –ü–µ—Ä–µ—Ö–æ–∂—É –∫ —Å–ª–µ–¥—É—é—â–µ–π —Å—Ç—Ä–∞–Ω–µ.\n")
                    continue

            fetched = len(reviews)

            added = 0
            for r in reviews:
                rid = str(r.get("review_id", "") or "")
                if not rid:
                    # –ù–∞ –≤—Å—è–∫–∏–π —Å–ª—É—á–∞–π –¥–æ–±—å–µ–º —Å—Ç–∞–±–∏–ª—å–Ω—ã–º id
                    rid = stable_hash_id(
                        country=country,
                        author=str(r.get("author", "")),
                        date=str(r.get("date", "")),
                        rating=str(r.get("rating", "")),
                        title=str(r.get("title", "")),
                        text=str(r.get("text", "")),
                    )
                    r["review_id"] = rid

                if rid in existing_ids or rid in new_ids_in_run:
                    continue

                # –≥–∞—Ä–∞–Ω—Ç–∏—Ä—É–µ–º –ø–æ—Ä—è–¥–æ–∫ –∫–æ–ª–æ–Ω–æ–∫
                row = {c: r.get(c, "") for c in COLUMNS_ORDER}
                # –µ—Å–ª–∏ –≤–¥—Ä—É–≥ —Ä–µ–π—Ç–∏–Ω–≥ None, –æ—Å—Ç–∞–≤–∏–º –ø—É—Å—Ç—ã–º
                if row["rating"] is None:
                    row["rating"] = ""

                new_rows.append(row)
                new_ids_in_run.add(rid)
                added += 1

            dt = time.time() - t0
            print(f"  [DONE] –∏—Å—Ç–æ—á–Ω–∏–∫={source} | –ø–æ–ª—É—á–µ–Ω–æ={fetched} | –Ω–æ–≤—ã—Ö –¥–æ–±–∞–≤–ª–µ–Ω–æ={added} | {dt:.1f}s\n")

    # –û–±—ä–µ–¥–∏–Ω—è–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º
    if new_rows:
        new_df = pd.DataFrame(new_rows, columns=COLUMNS_ORDER)
        combined = pd.concat([existing_df, new_df], ignore_index=True)
    else:
        new_df = pd.DataFrame(columns=COLUMNS_ORDER)
        combined = existing_df.copy()

    # –ù–æ—Ä–º–∞–ª–∏–∑—É–µ–º —Ç–∏–ø—ã/–ø–æ—Ä—è–¥–æ–∫
    combined["review_id"] = combined["review_id"].astype(str)
    for col in COLUMNS_ORDER:
        if col not in combined.columns:
            combined[col] = ""
    combined = combined[COLUMNS_ORDER]

    # –ù–∞ –≤—Å—è–∫–∏–π —Å–ª—É—á–∞–π —É–±–∏—Ä–∞–µ–º –¥—É–±–ª–∏ –ø–æ review_id
    combined = combined.drop_duplicates(subset=["review_id"], keep="first")

    # –°–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –ø–æ –¥–∞—Ç–µ (–µ—Å–ª–∏ –ø–∞—Ä—Å–∏—Ç—Å—è)
    try:
        combined["_dt"] = pd.to_datetime(combined["date"], utc=True, errors="coerce")
        combined = combined.sort_values(by="_dt", ascending=True).drop(columns=["_dt"])
    except Exception:
        pass

    # CSV: UTF-8, –∑–∞–ø—è—Ç–∞—è, –±–µ–∑–æ–ø–∞—Å–Ω—ã–µ –∫–∞–≤—ã—á–∫–∏, —á—Ç–æ–±—ã –ø–µ—Ä–µ–≤–æ–¥—ã —Å—Ç—Ä–æ–∫ –Ω–µ –ª–æ–º–∞–ª–∏ —Ñ–∞–π–ª
    combined.to_csv(
    csv_path,
    index=False,
    encoding="utf-8",
    sep=",",
    quoting=csv.QUOTE_ALL,
    escapechar="\\",
    lineterminator="\n",
)

    print(f"[SAVE] –ò—Ç–æ–≥–æ–≤—ã—Ö —Å—Ç—Ä–æ–∫: {len(combined)}")
    print(f"[SAVE] –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ –≤: {csv_path}")

    # –í–µ—Ä–Ω–µ–º combined; —Ç–∞–∫–∂–µ —É–¥–æ–±–Ω–æ –≤–µ—Ä–Ω—É—Ç—å new_df –æ—Ç–¥–µ–ª—å–Ω–æ, –Ω–æ –ø–æ –¢–ó –≤–æ–∑–≤—Ä–∞—â–∞–µ–º –æ–±—â–∏–π DF
    return combined


In [12]:
# –ü—Ä–∏–º–µ—Ä –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
APP_URL = "https://apps.apple.com/ru/app/–∫—Ö–ª/id455938766"

COUNTRY_LIST = ["ru"]  # –∑–∞–¥–∞–π —Å–≤–æ–π —Å–ø–∏—Å–æ–∫
MAX_PER_COUNTRY = 200  # None = –º–∞–∫—Å–∏–º—É–º –¥–æ—Å—Ç—É–ø–Ω–æ–≥–æ (–Ω–æ –ø—É–±–ª–∏—á–Ω—ã–µ –∏—Å—Ç–æ—á–Ω–∏–∫–∏ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω—ã)

df = update_reviews(APP_URL, COUNTRY_LIST, max_per_country=MAX_PER_COUNTRY)

print("\n[RESULT] –í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫ –≤ –¥–∞—Ç–∞—Å–µ—Ç–µ:", len(df))
print("\n[RESULT] –ü–æ—Å–ª–µ–¥–Ω–∏–µ 10 —Å—Ç—Ä–æ–∫ (tail):")
display(df.tail(10))

app_id = extract_app_id(APP_URL)
csv_path = f"/content/appstore_reviews_{app_id}.csv"
print("\n[RESULT] CSV –ø—É—Ç—å:", csv_path)


[INIT] CSV –Ω–µ –Ω–∞–π–¥–µ–Ω, –±—É–¥–µ—Ç —Å–æ–∑–¥–∞–Ω: /content/appstore_reviews_455938766.csv

          Apple RSS –∏ –ø—É–±–ª–∏—á–Ω—ã–µ endpoints –æ–±—ã—á–Ω–æ –æ—Ç–¥–∞—é—Ç —Ç–æ–ª—å–∫–æ –ø–æ—Å–ª–µ–¥–Ω–∏–µ —Å—Ç—Ä–∞–Ω–∏—Ü—ã/—Å–æ—Ç–Ω–∏ –æ—Ç–∑—ã–≤–æ–≤,
          –∞ —Ç–∞–∫–∂–µ –∑–∞–≤–∏—Å—è—Ç –æ—Ç –Ω–∞–ª–∏—á–∏—è –æ—Ç–∑—ã–≤–æ–≤ –≤ –∫–æ–Ω–∫—Ä–µ—Ç–Ω–æ–π —Å—Ç—Ä–∞–Ω–µ.

[COUNTRY] ru ...
  [DONE] –∏—Å—Ç–æ—á–Ω–∏–∫=rss | –ø–æ–ª—É—á–µ–Ω–æ=200 | –Ω–æ–≤—ã—Ö –¥–æ–±–∞–≤–ª–µ–Ω–æ=200 | 2.3s

[SAVE] –ò—Ç–æ–≥–æ–≤—ã—Ö —Å—Ç—Ä–æ–∫: 200
[SAVE] –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ –≤: /content/appstore_reviews_455938766.csv

[RESULT] –í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫ –≤ –¥–∞—Ç–∞—Å–µ—Ç–µ: 200

[RESULT] –ü–æ—Å–ª–µ–¥–Ω–∏–µ 10 —Å—Ç—Ä–æ–∫ (tail):


Unnamed: 0,review_id,date,rating,title,text,author,country,language,link
9,13503751524,2025-12-12T05:44:02+00:00,3,–í–∏–∑—É–∞–ª –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è,–ü—Ä–∏–ª–æ–∂–µ–Ω–∏–µ —É–¥–æ–±–Ω–æ–µ –¥–ª—è –±—ã—Å—Ç—Ä–æ–≥–æ –ø—Ä–æ—Å–º–æ—Ç—Ä–∞ —Ä–µ–∑—É...,bigaelchik,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
8,13546061746,2025-12-23T12:39:17+00:00,5,–•–æ—Ä–æ—à–æ —á—Ç–æ –µ—Å—Ç—å,–ò–º–±–∞ —á—Ç–æ –µ—Å—Ç—å –º–æ–∂–Ω–æ –ø–æ—Å–º–æ—Ç—Ä–µ—Ç—å –∏–≥—Ä—ã,–û–∫–æ–∫ –æ–∫–∞–∫,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
7,13553759863,2025-12-25T15:13:21+00:00,1,–≠—Ç–æ —É–∂–∞—Å!!!,"–í—Å–µ —É–≤–µ–¥–æ–º–ª–µ–Ω–∏—è –æ—Ç—Å—Ç–∞—é—Ç, 3–π –ø–µ—Ä–∏–æ–¥ –ø—Ä–æ–ø—É—Å—Ç–∏–ª–∞ ...",ri_kii,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
6,13572223674,2025-12-30T08:44:29+00:00,3,–î–ª—è–∫—Ö–ª,–ü–ª–æ—Ö–æ –∑–∞–≥—Ä—É–∂–∞–µ—Ç—Å—è,–û—Ç –¥–∞–Ω–∏–∏–∏–ª–∞,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
5,13609779753,2026-01-08T15:18:36+00:00,5,!!!–°–∫–∞—á–∏–≤–∞–π—Ç–µ!!!,–ò–º–±–∞ —Å–∫–∞—á–∏–≤–∞–π—Ç–µ,Lololo·óØkaüëª,ru,bg,https://itunes.apple.com/ru/review?id=45593876...
4,13613005751,2026-01-09T12:20:12+00:00,5,–ü—Ä–∏–ª–æ–∂–µ–Ω–∏–µ,–í—Å—ë —Å—É–ø–µ—Ä !–û—á–µ–Ω—å —É–¥–æ–±–Ω–æ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ ü§ù,–ê–ª–µ–∫—Å–∞–Ω–¥—Ä –í–æ–ª–æ—Ö–æ–≤–∏—á,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
3,13613201780,2026-01-09T13:17:57+00:00,5,–•–æ–∫–∫–µ–π,"–û—á–µ–Ω—å —É–¥–æ–±–Ω–æ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ, –ø–æ–ª—å–∑—É—é—Å—å –∏–º —É–∂–µ —Ç—Ä–µ...",–ö–æ–º–±–∞—Ç74,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
2,13633274789,2026-01-14T17:04:04+00:00,1,–ù–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ç–∏–≤–Ω–æ–µ,–ü–æ—á–µ–º—É –±—ã –Ω–µ —Å–¥–µ–ª–∞—Ç—å –≤ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–∏ –∫–∞–∫ –≤ –ø—Ä–∏–ª–æ–∂...,Amgn174,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
1,13636686694,2026-01-15T15:21:28+00:00,5,–≠—Ñ–∏—Ä –∞–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏,–î–æ–±–∞–≤—å—Ç–µ –ø–æ–∂–∞–ª—É–π—Å—Ç–∞ –≤ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ —ç—Ñ–∏—Ä –∞–∫—Ç–∏–≤–Ω–æ—Å...,-=MEGAMONSTER=-,ru,ru,https://itunes.apple.com/ru/review?id=45593876...
0,13665607478,2026-01-23T07:45:22+00:00,5,‚ù§Ô∏è,–õ—É—á—à–µ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ üî•,–í–∏–∫–∞ –µ–∂–µ–≤–∏–∫–∞ üíù,ru,ru,https://itunes.apple.com/ru/review?id=45593876...



[RESULT] CSV –ø—É—Ç—å: /content/appstore_reviews_455938766.csv
