In [None]:
import pandas as pd
import requests, csv, time
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.adapters import HTTPAdapter, Retry
from datetime import datetime

# ================ 0. 설정 ================
INPUT_REVIEW_FILE = "weighted_score_above_08_modified.csv"   # 리뷰 원본
INPUT_APPID_FILE  = "unique_appid.csv"                       # 유니크 appid 저장 경로
OUTPUT_FILE       = "appid_genres.csv"                       # 성공 저장
FAILED_FILE       = "failed_appid.csv"                       # 실패 저장
MODE              = "medium"   # "safe" | "medium" | "fast"

CONF = {
    "safe":   dict(MAX_WORKERS=1, SLEEP=1.0),
    "medium": dict(MAX_WORKERS=2, SLEEP=0.5),
    "fast":   dict(MAX_WORKERS=5, SLEEP=0.2),
}
if MODE not in CONF:
    raise ValueError("MODE 는 safe/medium/fast 중 하나여야 합니다.")
MAX_WORKERS = CONF[MODE]["MAX_WORKERS"]
SLEEP       = CONF[MODE]["SLEEP"]

# ================ 1. 유니크 appid 추출 ================
print("[단계1] 리뷰 데이터에서 appid 추출")
df = pd.read_csv(INPUT_REVIEW_FILE, usecols=["appid"])
unique_appids = df["appid"].dropna().drop_duplicates().astype(int).astype(str).tolist()
pd.DataFrame(unique_appids, columns=["appid"]).to_csv(INPUT_APPID_FILE, index=False)
print(f"  >> unique_appid.csv 저장 완료 (appid 개수: {len(unique_appids)})")

# ================ 2. 세션 준비 ================
def build_session():
    s = requests.Session()
    s.headers.update({"User-Agent": "Mozilla/5.0 (steam-app-collector)"})
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[429,500,502,503,504])
    s.mount("https://", HTTPAdapter(max_retries=retries))
    return s
SESSION = build_session()

# ================ 3. API 호출 함수 ================
def get_genres(appid, retries=3, delay=2):
    url = f"https://store.steampowered.com/api/appdetails?appids={appid}&l=en&cc=US"
    for attempt in range(retries):
        try:
            r = SESSION.get(url, timeout=15)
            if r.status_code == 429:
                time.sleep(5 + attempt*2)
            r.raise_for_status()
            data = r.json()
            node = data.get(str(appid), {})
            if not node or not node.get("success"):
                time.sleep(delay*(attempt+1))
                continue
            genres_list = node.get("data", {}).get("genres", []) or []
            return ";".join([g.get("description","") for g in genres_list])
        except Exception:
            time.sleep(delay*(attempt+1))
    return ""

# ================ 4. 결과 저장 준비 ================
out = open(OUTPUT_FILE, "w", newline="", encoding="utf-8")
fout = csv.writer(out)
fout.writerow(["appid", "genres"])

fail = open(FAILED_FILE, "w", newline="", encoding="utf-8")
ff = csv.writer(fail)
ff.writerow(["appid","reason"])

# ================ 5. 실행 ================
print(f"[단계2] API 수집 시작 (mode={MODE}, 총 {len(unique_appids)}개)")
processed = 0
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = {ex.submit(get_genres, appid): appid for appid in unique_appids}
    for future in as_completed(futures):
        appid = futures[future]
        try:
            genres = future.result()
            fout.writerow([appid, genres])
        except Exception as e:
            ff.writerow([appid, str(e)])
        processed += 1
        out.flush(); fail.flush()
        time.sleep(SLEEP)
        if processed % 500 == 0:
            print(f"  >> {processed}/{len(unique_appids)} 완료")

out.close(); fail.close()
print(f"[완료] 성공: {OUTPUT_FILE}, 실패: {FAILED_FILE}")


[단계1] 리뷰 데이터에서 appid 추출
  >> unique_appid.csv 저장 완료 (appid 개수: 18439)
[단계2] API 수집 시작 (mode=medium, 총 18439개)
  >> 500/18439 완료
  >> 1000/18439 완료
  >> 1500/18439 완료
  >> 2000/18439 완료
  >> 2500/18439 완료
  >> 3000/18439 완료
  >> 3500/18439 완료
  >> 4000/18439 완료
  >> 4500/18439 완료
  >> 5000/18439 완료
  >> 5500/18439 완료
  >> 6000/18439 완료
  >> 6500/18439 완료
  >> 7000/18439 완료
  >> 7500/18439 완료
  >> 8000/18439 완료
  >> 8500/18439 완료
  >> 9000/18439 완료
  >> 9500/18439 완료
  >> 10000/18439 완료
  >> 10500/18439 완료
  >> 11000/18439 완료
  >> 11500/18439 완료
  >> 12000/18439 완료
  >> 12500/18439 완료
  >> 13000/18439 완료
  >> 13500/18439 완료
  >> 14000/18439 완료
  >> 14500/18439 완료
  >> 15000/18439 완료
  >> 15500/18439 완료
  >> 16000/18439 완료
  >> 16500/18439 완료
  >> 17000/18439 완료
  >> 17500/18439 완료
  >> 18000/18439 완료
[완료] 성공: appid_genres.csv, 실패: failed_appid.csv


: 