In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Steam reviews CSV  ↔  SteamSpy API merger
- 네 리뷰 CSV(반드시 'appid' 포함)와 SteamSpy API를 조합해
  게임별 owners/price/ccu 등을 가져오고, 추정 매출(owners×price)까지 계산해서 병합합니다.

출력:
  1) steamspy_appdetails.csv            (앱별 1행 요약)
  2) merged_reviews_with_steamspy.csv   (리뷰에 조인)
  3) summary_by_game.csv                (게임별 요약 테이블)

실행 예시:
  python steamspy_merge.py --reviews weighted_score_aboverealreallast.csv --outdir ./out --sleep 1.2
옵션:
  --appid-limit N   (앞에서 N개만 테스트)
  --timeout 15      (HTTP 타임아웃)
  --retries 3       (앱당 재시도 횟수)
  --sleep 1.2       (요청 간 대기; 1.0~1.5 권장)
주의:
- SteamSpy owners/price는 추정치이며 price는 USD 센트 단위.
- F2P(가격 0)일 경우 매출은 0으로 계산됩니다.
"""
import argparse
import os
import sys
import time
from typing import Dict, Any, List, Tuple, Optional

import pandas as pd

try:
    import requests
except Exception:
    print("requests가 필요합니다:  pip install requests", file=sys.stderr)
    raise

API_URL = "https://steamspy.com/api.php"

FIELDS_KEEP = [
    "appid","name","developer","publisher","genre","score_rank","positive","negative",
    "owners","average_forever","average_2weeks","median_forever","median_2weeks",
    "price","ccu","initialprice","discount"
]

def parse_owners_range(owners_str: str) -> Tuple[Optional[int], Optional[int]]:
    """'20,000 .. 50,000' 같은 문자열을 (20000, 50000)으로 파싱."""
    if not isinstance(owners_str, str):
        return (None, None)
    s = owners_str.replace(",", "")
    parts = s.split("..")
    if len(parts) != 2:
        # en dash 등 변형 대응
        parts = s.split("–")
    try:
        low = int(parts[0].strip())
        high = int(parts[-1].strip())
        return (low, high)
    except Exception:
        return (None, None)

def safe_get(d: Dict[str, Any], k: str, default=None):
    v = d.get(k, default)
    return v

def fetch_appdetails(appid: int, timeout: int = 15) -> Optional[Dict[str, Any]]:
    params = {"request": "appdetails", "appid": str(appid)}
    try:
        resp = requests.get(API_URL, params=params, timeout=timeout)
        if resp.status_code != 200:
            return None
        return resp.json()
    except Exception:
        return None

def polite_sleep(seconds: float):
    try:
        time.sleep(seconds)
    except KeyboardInterrupt:
        raise

def compute_revenue_bounds(row: Dict[str, Any]) -> Tuple[Optional[float], Optional[float]]:
    """owners_low/high × price(USD)로 추정 매출 하한/상한 계산."""
    owners_low, owners_high = parse_owners_range(safe_get(row, "owners"))
    price_cents = safe_get(row, "price", 0) or 0
    try:
        price = float(price_cents) / 100.0  # USD
    except Exception:
        price = 0.0
    if owners_low is None or owners_high is None or price <= 0:
        return (0.0, 0.0)
    rev_low = owners_low * price
    rev_high = owners_high * price
    return (rev_low, rev_high)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--reviews", required=True, help="리뷰 CSV 경로 (appid 포함 필수)")
    ap.add_argument("--outdir", required=True, help="출력 디렉토리")
    ap.add_argument("--appid-limit", type=int, default=None, help="앞에서 N개 appid만 처리(테스트용)")
    ap.add_argument("--timeout", type=int, default=15, help="HTTP 타임아웃(초)")
    ap.add_argument("--retries", type=int, default=3, help="앱당 재시도 횟수")
    ap.add_argument("--sleep", type=float, default=1.2, help="요청 사이 대기(초)")
    args = ap.parse_args()

    os.makedirs(args.outdir, exist_ok=True)

    # 리뷰 로드
    df = pd.read_csv(args.reviews)
    if "appid" not in df.columns:
        print("ERROR: 리뷰 CSV에 'appid' 컬럼이 없습니다.", file=sys.stderr)
        sys.exit(1)

    appids = df["appid"].dropna().astype(int).unique().tolist()
    appids = [int(a) for a in appids]
    appids.sort()
    if args.appid_limit is not None:
        appids = appids[:args.appid_limit]

    print(f"[INFO] 요청 대상 appid 수: {len(appids)}")

    rows: List[Dict[str, Any]] = []
    for i, appid in enumerate(appids, 1):
        success = False
        last = None
        for r in range(args.retries):
            data = fetch_appdetails(appid, timeout=args.timeout)
            if data and isinstance(data, dict) and str(data.get("appid", "")) == str(appid):
                last = data
                success = True
                break
            polite_sleep(0.5)  # 짧은 백오프 후 재시도
        if not success:
            print(f"[WARN] 실패: appid={appid} (retries={args.retries})")
            continue

        # 관심 필드만 추출
        row = {k: last.get(k, None) for k in FIELDS_KEEP}
        # 파생/정리
        owners_low, owners_high = parse_owners_range(row.get("owners"))
        row["owners_low"] = owners_low
        row["owners_high"] = owners_high
        rev_low, rev_high = compute_revenue_bounds(row)
        row["est_revenue_low_usd"] = round(rev_low, 2) if rev_low is not None else None
        row["est_revenue_high_usd"] = round(rev_high, 2) if rev_high is not None else None
        rows.append(row)

        if i % 25 == 0 or i == len(appids):
            print(f"[INFO] 진행상황: {i}/{len(appids)}")
        polite_sleep(args.sleep)

    # SteamSpy 요약 저장
    df_spy = pd.DataFrame(rows)
    spy_path = os.path.join(args.outdir, "steamspy_appdetails.csv")
    df_spy.to_csv(spy_path, index=False, encoding="utf-8-sig")
    print(f"[OK] 저장: {spy_path} (rows={len(df_spy)})")

    # 리뷰에 병합
    merged = df.merge(df_spy, on="appid", how="left")
    merged_path = os.path.join(args.outdir, "merged_reviews_with_steamspy.csv")
    merged.to_csv(merged_path, index=False, encoding="utf-8-sig")
    print(f"[OK] 저장: {merged_path} (rows={len(merged)})")

    # 게임별 요약
    agg = merged.groupby(["appid","game"]).agg(
        review_count=("recommendationid","count"),
        recommend_rate=("voted_up","mean"),
        owners_low=("owners_low","max"),
        owners_high=("owners_high","max"),
        est_revenue_low_usd=("est_revenue_low_usd","max"),
        est_revenue_high_usd=("est_revenue_high_usd","max"),
    ).reset_index().sort_values("review_count", ascending=False)

    agg_path = os.path.join(args.outdir, "summary_by_game.csv")
    agg.to_csv(agg_path, index=False, encoding="utf-8-sig")
    print(f"[OK] 저장: {agg_path}")

if __name__ == "__main__":
    main()


requests가 필요합니다:  pip install requests


ModuleNotFoundError: No module named 'requests'