# 스팀 오버워치2 전체 리뷰 및 유저 정보 수집

## 조건
- 500건마다 중간저장
- 컬럼 : 스팀아이디, 작성일, 본문, 플레이 시간, 추천여부(0/1), 계정 레벨, 갖고있는 게임 목록

## 절차
1. 리뷰 크롤링
2. 정제
3. 스팀아이디(16자리 숫자) 맵핑
4. 유저 메타 수집
5. 리뷰+유저정보 통합
6. 리뷰 영문으로 번역

### 리뷰 크롤링

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd

# 셀레니움 설정
options = Options()
options.add_argument("--start-maximized")
options.add_experimental_option("detach", True)

driver = webdriver.Chrome(options=options)

# URL 진입
url = "https://steamcommunity.com/app/2357570/reviews/?browsefilter=mostrecent&snr=1_5_100010_&filterLanguage=all&p=1"
driver.get(url)
time.sleep(3)

SCROLL_PAUSE = 2
results = []
last_height = driver.execute_script("return document.body.scrollHeight")
last_seen_content = None
seen_repeat_count = 0

save_interval = 500
save_count = 1

while True:
    # 리뷰 카드 수집
    cards = driver.find_elements(By.CLASS_NAME, "apphub_Card")

    for card in cards[len(results):]:
        try:
            nickname_link = card.find_element(By.CSS_SELECTOR, ".apphub_friend_block_container a").get_attribute("href")
        except:
            nickname_link = None

        try:
            date = card.find_element(By.CLASS_NAME, "date_posted").text.replace("게시 일시: ", "").strip()
        except:
            date = None

        try:
            content = card.find_element(By.CLASS_NAME, "apphub_CardTextContent").text.strip()
        except:
            content = None

        try:
            recommendation = card.find_element(By.CLASS_NAME, "title").text.strip()
        except:
            recommendation = None

        try:
            playtime = card.find_element(By.CLASS_NAME, "hours").text.strip()
        except:
            playtime = None

        # 종료 조건: 동일 댓글 반복 감지
        if content == last_seen_content:
            seen_repeat_count += 1
            if seen_repeat_count >= 3:
                print("\n동일 댓글 3회 반복 감지. 크롤링 종료.")
                break
        else:
            seen_repeat_count = 0
            last_seen_content = content

        row = {
            "닉네임": nickname_link,
            "작성일": date,
            "본문": content,
            "추천 여부": recommendation,
            "플레이 시간": playtime
        }
        results.append(row)

        # 실시간 출력
        print(f"\n닉네임: {nickname_link}")
        print(f"작성일: {date}")
        print(f"추천여부: {recommendation}")
        print(f"플레이 시간: {playtime}")
        print(f"본문: {content[:100]}{'...' if content and len(content) > 100 else ''}")

        # 중간 저장
        if len(results) % save_interval == 0:
            temp_df = pd.DataFrame(results)
            temp_df.to_csv(f"overwatch2_reviews_temp_{save_count}.csv", index=False, encoding="utf-8-sig")
            print(f"\n🔸 중간 저장 완료: overwatch2_reviews_temp_{save_count}.csv ({len(results)}개)")
            save_count += 1

    # 중복 종료 처리
    if seen_repeat_count >= 3:
        break

    # 스크롤 다운
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        print("\n더 이상 로드할 내용 없음. 종료합니다.")
        break
    last_height = new_height

# 최종 저장
df = pd.DataFrame(results)
df.to_csv("overwatch2_reviews.csv", index=False, encoding="utf-8-sig")
print(f"\n✅ 크롤링 완료! 총 리뷰 수: {len(df)}개")
print("저장 완료: overwatch2_reviews.csv")

In [4]:
import pandas as pd

# CSV 파일 읽기
df1 = pd.read_csv("overwatch2_reviews_temp_25.csv", encoding="utf-8-sig")
df2 = pd.read_csv("overwatch2_reviews.csv", encoding="utf-8-sig")

# 병합
merged_df = pd.concat([df1, df2], ignore_index=True)

# 중복 제거
merged_df.drop_duplicates(inplace=True)

# 결과 저장
merged_df.to_csv("14-1. overwatch2_reviews_최종.csv", index=False, encoding="utf-8-sig")
print(f"✅ 병합 완료: 총 {len(merged_df)}개 리뷰 저장됨")


✅ 병합 완료: 총 12535개 리뷰 저장됨


In [5]:
import pandas as pd

user_df = pd.read_csv('14-1. overwatch2_reviews_최종.csv',encoding='utf-8-sig')
user_df.head()

Unnamed: 0,닉네임,작성일,본문,추천 여부,플레이 시간
0,https://steamcommunity.com/id/Magnire/,Posted: 4 August,Posted: 4 August\nreally bad,Not Recommended,4.6 hrs on record
1,https://steamcommunity.com/profiles/7656119884...,Posted: 4 August,Posted: 4 August\nwen juego,Recommended,34.2 hrs on record
2,https://steamcommunity.com/profiles/7656119975...,Posted: 4 August,Posted: 4 August\nww,Recommended,14.3 hrs on record
3,https://steamcommunity.com/profiles/7656119830...,Posted: 4 August,"Posted: 4 August\nReally annoying sometimes, r...",Recommended,0.2 hrs on record
4,https://steamcommunity.com/profiles/7656119912...,Posted: 4 August,Posted: 4 August\nAmo esse jogo balanceado e o...,Not Recommended,667.5 hrs on record


In [None]:
user_df["닉네임"] = user_df["닉네임"].str.replace("https://steamcommunity.com/", "", regex=False)

In [10]:
user_df["작성일"] = user_df["작성일"].str.replace("Posted: ", "", regex=False)

In [12]:
import re

user_df["닉네임"] = user_df["닉네임"].str.extract(r'(?:id/|profiles/)([^/]+)')

In [18]:
user_df["본문"] = user_df["본문"].str.replace(r'^Posted:.*?\n', '', regex=True)

In [20]:
user_df.to_csv("14-1. overwatch2_reviews_최종.csv", index=False, encoding="utf-8-sig")

### 유저 프로필 정보 가져오기

In [None]:
alpha_nick = user_df.copy()

In [None]:
alpha_nick = alpha_nick["닉네임"].tolist()

In [None]:
alpha_nick = [n for n in alpha_nick if not n.isdigit()]

In [29]:
alpha_nick

['Magnire',
 'cowboybaby',
 'corbinmaehr',
 'R2XD3PO',
 'liminalani',
 'starsdontfall',
 'M3p0',
 'Jesuswithanawp',
 'tangerinedragon',
 'PiniaXpress',
 'tadehack',
 'Googlevvv',
 'Instantout',
 'DodoRex23',
 'ZNiiL',
 'looriee',
 'H4rDinMyLove',
 'iuwata',
 'ha1eyjane',
 'BloodDragoonXIV',
 'WINCHEZZER',
 'GoboVR',
 'OndareGue',
 'Not_Frost',
 'noideaijustwantit',
 'alakazamers',
 'Adamfromky',
 'mirhats',
 'whwjdrb1009',
 'friday17live',
 'abodeksa802',
 'elcojetrabas12',
 'ThatManZoro1',
 'nwnier',
 'pandyta',
 'dianinhaxx',
 'dhunderi',
 'XanderTheManHandler',
 'Guttenkleiton2',
 'officialaustria',
 'dolliheart',
 'nihogen',
 'amysplanet',
 'Doki_Dokers',
 'worms01',
 'Verleu',
 'AlexHanter',
 'MaieiaM',
 'Odd247',
 'tashaaaaaaaa',
 'ladygoose',
 'kreemasaleet',
 'masamasa',
 'raityu4789',
 'goleirodovasco',
 'tkv97',
 'Musash1M1amoto',
 'scarletfamettv',
 'MikuMiyuki',
 'KamiGaDaisuki',
 'marulkiscute',
 'Clementiqe',
 'lovchen',
 'mish3al',
 'Altair125',
 'kev12345678',
 'dahmens

### API로 커스텀닉네임에서 스팀고유아이디 추출하기

In [30]:
import requests
import pandas as pd

API_KEY = "8255BBEC6046811618B87143BB30C25E"
alpha_nick

usernames = []
steam_ids = []

for user in alpha_nick:
    url = f"http://api.steampowered.com/ISteamUser/ResolveVanityURL/v1/?key={API_KEY}&vanityurl={user}"
    try:
        response = requests.get(url).json()
        if response['response']['success'] == 1:
            steam_id = response['response']['steamid']
            usernames.append(user)
            steam_ids.append(steam_id)
            print(f"✅ {user} → {steam_id}")
        else:
            usernames.append(user)
            steam_ids.append(None)
            print(f"❌ {user} → SteamID 찾을 수 없음")
    except Exception as e:
        usernames.append(user)
        steam_ids.append(None)
        print(f"⚠️ {user} → 오류 발생: {e}")

# 결과 DataFrame 생성
steam_df = pd.DataFrame({
    "작성자": usernames,
    "SteamID64": steam_ids
})

# 출력
print("\n🎯 최종 결과:")
print(steam_df.head())


✅ Magnire → 76561198089026294
✅ cowboybaby → 76561198120487119
✅ corbinmaehr → 76561199000519759
✅ R2XD3PO → 76561198043774581
✅ liminalani → 76561199675245793
✅ starsdontfall → 76561198354175076
✅ M3p0 → 76561198019349671
✅ Jesuswithanawp → 76561198041569692
✅ tangerinedragon → 76561199069266233
✅ PiniaXpress → 76561198230205940
✅ tadehack → 76561198145723355
✅ Googlevvv → 76561199258942573
✅ Instantout → 76561198130552245
✅ DodoRex23 → 76561198351838906
✅ ZNiiL → 76561198872525455
✅ looriee → 76561199500198501
✅ H4rDinMyLove → 76561198222457676
✅ iuwata → 76561199020766586
✅ ha1eyjane → 76561199443512836
✅ BloodDragoonXIV → 76561198196886815
✅ WINCHEZZER → 76561198249783102
✅ GoboVR → 76561199202143220
✅ OndareGue → 76561198243922706
✅ Not_Frost → 76561198397685298
✅ noideaijustwantit → 76561199819880010
✅ alakazamers → 76561198217170747
✅ Adamfromky → 76561198025444815
✅ mirhats → 76561198968119285
✅ whwjdrb1009 → 76561198119938802
✅ friday17live → 76561198026073503
✅ abodeksa802 → 

In [35]:
steam_df

Unnamed: 0,작성자,SteamID64
0,Magnire,76561198089026294
1,cowboybaby,76561198120487119
2,corbinmaehr,76561199000519759
3,R2XD3PO,76561198043774581
4,liminalani,76561199675245793
...,...,...
3456,abodeksa802,76561199088836402
3457,MikuMiyuki,76561198954594017
3458,mish3al,76561198271314605
3459,zerohux37,76561198401068937


### 맵핑

In [32]:
steam_df.to_csv("14-2. 유저id-고유번호.csv", index=False, encoding="utf-8-sig")

In [69]:
user_review = pd.read_csv("14-1. overwatch2_reviews_최종.csv", encoding="utf-8-sig")

In [70]:
user_mapping = pd.merge(user_review, steam_df, left_on='닉네임', right_on='작성자', how='left')
user_mapping.head()

Unnamed: 0,닉네임,작성일,본문,추천 여부,플레이 시간,작성자,SteamID64
0,Magnire,4 August,really bad,Not Recommended,4.6 hrs on record,Magnire,7.656119808902629e+16
1,76561198840751916,4 August,wen juego,Recommended,34.2 hrs on record,,
2,76561199758331866,4 August,ww,Recommended,14.3 hrs on record,,
3,76561198309660819,4 August,"Really annoying sometimes, really fun sometimes",Recommended,0.2 hrs on record,,
4,76561199125555677,4 August,Amo esse jogo balanceado e o matchmaking é inc...,Not Recommended,667.5 hrs on record,,


In [71]:
for i in range(len(user_mapping)):
    if pd.isna(user_mapping.loc[i, 'SteamID64']):
        user_mapping.loc[i, 'SteamID64'] = user_mapping.loc[i, '닉네임']

In [72]:
user_mapping.drop(columns=['작성자', '닉네임'], inplace=True)

In [73]:
user_mapping = user_mapping[['SteamID64', '작성일', '본문', '플레이 시간', '추천 여부']]

In [74]:
user_mapping.to_csv("14-3. overwatch2_reviews_진짜최종.csv", index=False, encoding="utf-8-sig")

### 작성일 통일

In [75]:
from datetime import datetime
import pandas as pd

def parse_date(text):
    try:
        # 현재 연도 추가
        text = f"{text} {datetime.now().year}"
        # day first 또는 month first 모두 시도
        for fmt in ['%d %B %Y', '%B %d %Y']:
            try:
                return datetime.strptime(text, fmt)
            except ValueError:
                continue
        return pd.NaT
    except:
        return pd.NaT

user_mapping['작성일'] = user_mapping['작성일'].apply(parse_date)
user_mapping['작성일'] = pd.to_datetime(user_mapping['작성일'], errors='coerce')  # 혹시 몰라 한 번 더 안전하게
user_mapping['작성일'] = user_mapping['작성일'].dt.strftime('%Y-%m-%d')  # 통일된 형식으로 출력

In [77]:
user_mapping.to_csv("14-3. overwatch2_reviews_진짜최종.csv", index=False, encoding="utf-8-sig")

In [78]:
overwatch_user = pd.read_csv("14-3. overwatch2_reviews_진짜최종.csv", encoding="utf-8-sig")

In [79]:
overwatch_user

Unnamed: 0,SteamID64,작성일,본문,플레이 시간,추천 여부
0,76561198089026294,2025-08-04,really bad,4.6 hrs on record,Not Recommended
1,76561198840751916,2025-08-04,wen juego,34.2 hrs on record,Recommended
2,76561199758331866,2025-08-04,ww,14.3 hrs on record,Recommended
3,76561198309660819,2025-08-04,"Really annoying sometimes, really fun sometimes",0.2 hrs on record,Recommended
4,76561199125555677,2025-08-04,Amo esse jogo balanceado e o matchmaking é inc...,667.5 hrs on record,Not Recommended
...,...,...,...,...,...
12548,76561198401068937,2025-07-31,your server down by itself and i got suspended...,304.6 hrs on record,Not Recommended
12549,76561199006479136,2025-07-31,讚,301.3 hrs on record,Recommended
12550,76561199719259546,2025-07-30,cool,13.8 hrs on record,Recommended
12551,76561199719259546,2025-07-30,cool,13.8 hrs on record,Recommended


### 스팀고유번호로 메타정보 추출하기

In [None]:
import requests
import pandas as pd
import time

API_KEY = "8255BBEC6046811618B87143BB30C25E"
steamids = overwatch_user["SteamID64"].dropna().astype(str).tolist()

def chunk_list(lst, size):
    for i in range(0, len(lst), size):
        yield lst[i:i + size]

results = []
save_interval = 500
save_count = 1
processed = 0

for chunk in chunk_list(steamids, 100):
    for steamid in chunk:
        steamid_str = str(steamid)

        # 계정 레벨
        try:
            level_url = "http://api.steampowered.com/IPlayerService/GetSteamLevel/v1/"
            level_params = {"key": API_KEY, "steamid": steamid_str}
            res = requests.get(level_url, params=level_params)
            level = res.json().get("response", {}).get("player_level", None)
        except Exception as e:
            print(f"[레벨 실패] {steamid_str}: {e}")
            level = None

        # 게임 목록
        try:
            game_url = "http://api.steampowered.com/IPlayerService/GetOwnedGames/v1/"
            game_params = {
                "key": API_KEY,
                "steamid": steamid_str,
                "include_appinfo": True,
                "include_played_free_games": True
            }
            res = requests.get(game_url, params=game_params)
            games = res.json().get("response", {}).get("games", [])
            game_names = [g["name"] for g in games if "name" in g]
            game_list_str = ", ".join(game_names) if game_names else None
        except Exception as e:
            print(f"[게임 실패] {steamid_str}: {e}")
            game_list_str = None

        # 결과 저장
        results.append({
            "SteamID64": steamid_str,
            "계정 레벨": level,
            "게임 목록": game_list_str
        })
        processed += 1

        # 🔎 실시간 출력
        print(f"\n[{processed}] SteamID: {steamid_str}")
        print(f"  계정 레벨: {level}")
        if game_list_str:
            print(f"  게임 목록 예시: {game_list_str[:100]}{'...' if len(game_list_str) > 100 else ''}")
        else:
            print("  게임 목록: 없음 또는 실패")

        # 💾 중간 저장
        if processed % save_interval == 0:
            temp_df = pd.DataFrame(results)
            temp_df.to_csv(f"유저정보_temp_{save_count}.csv", index=False, encoding="utf-8-sig")
            print(f"\n🔸 중간 저장 완료: 유저정보_info_temp_{save_count}.csv ({processed}명)")
            save_count += 1

    time.sleep(1)

# 📦 최종 저장
final_df = pd.DataFrame(results)
final_df.to_csv("유저정보.csv", index=False, encoding="utf-8-sig")
print(f"\n✅ 전체 저장 완료! 총 {len(final_df)}명: 유저정보.csv")

In [2]:
import pandas as pd

In [4]:
overwatch_realuser = pd.read_csv("14-3. overwatch2_reviews_진짜최종.csv", encoding="utf-8-sig")
user_piece = pd.read_csv("14-4. 유저정보.csv", encoding="utf-8-sig")

In [5]:
overwatch_realuser.head()

Unnamed: 0,SteamID64,작성일,본문,플레이 시간,추천 여부
0,76561198089026294,2025-08-04,really bad,4.6 hrs on record,Not Recommended
1,76561198840751916,2025-08-04,wen juego,34.2 hrs on record,Recommended
2,76561199758331866,2025-08-04,ww,14.3 hrs on record,Recommended
3,76561198309660819,2025-08-04,"Really annoying sometimes, really fun sometimes",0.2 hrs on record,Recommended
4,76561199125555677,2025-08-04,Amo esse jogo balanceado e o matchmaking é inc...,667.5 hrs on record,Not Recommended


In [8]:
user_piece["계정 레벨"] = user_piece["계정 레벨"].astype("Int64")

In [9]:
user_piece.head()

Unnamed: 0,SteamID64,계정 레벨,게임 목록
0,76561198089026294,109,"Counter-Strike, Counter-Strike: Condition Zero..."
1,76561198840751916,8,"Left 4 Dead 2, Worms Ultimate Mayhem, The Walk..."
2,76561199758331866,1,
3,76561198309660819,13,"Left 4 Dead 2, Terraria, Neverwinter, PlanetSi..."
4,76561199125555677,6,"Terraria, The Forest, Trove, DRAGON BALL XENOV..."


In [None]:
merged_user = pd.merge(overwatch_realuser, user_piece, left_on='SteamID64', right_on='SteamID64', how='left')

In [14]:
merged_user['추천 여부'] = merged_user['추천 여부'].replace({'Not Recommended': 0, 'Recommended': 1})

  merged_user['추천 여부'] = merged_user['추천 여부'].replace({'Not Recommended': 0, 'Recommended': 1})


In [30]:
merged_user = merged_user[~merged_user.duplicated()]


In [31]:
merged_user.to_csv("14-5. overwatch2_user&reviews.csv", index=False, encoding="utf-8-sig")

### 리뷰 영어로 번역

In [1]:
import pandas as pd
from googletrans import Translator
import time

# 1. 데이터 로드
df = pd.read_csv("14-5. overwatch2_user&reviews.csv", encoding="utf-8-sig")

# 2. 번역기 초기화
translator = Translator()

# 3. 번역 결과 저장 리스트
translated_results = []

# 4. 실시간 번역 수행 (동기 방식)
for idx, row in df.iterrows():
    text = row["본문"]
    steam_id = row["SteamID64"]
    
    if pd.isna(text) or not str(text).strip():
        translated_text = ""
    else:
        try:
            result = translator.translate(text, src='auto', dest='en')
            translated_text = result.text.strip()
        except Exception as e:
            print(f"[에러] {idx}번 번역 실패: {e}")
            translated_text = "[번역 실패]"
    
    print(f"{idx+1}/{len(df)} | {steam_id} → {translated_text}")
    translated_results.append(translated_text)

    # 너무 빠르게 보내면 차단될 수 있어서 대기 (선택)
    time.sleep(0.5)

# 5. 결과 저장
df["본문_번역"] = translated_results
translated_df = df[["SteamID64", "본문_번역"]]
translated_df.to_csv("유저데이터_번역결과.csv", index=False, encoding='utf-8-sig')

print("✅ 번역 완료 및 저장됨 → 유저데이터_번역결과.csv")


1/12571 | 76561198089026294 → really bad
2/12571 | 76561198840751916 → wen game
3/12571 | 76561199758331866 → ww
4/12571 | 76561198309660819 → Really annoying sometimes, really fun sometimes
5/12571 | 76561199125555677 → I love this balanced game and matchmaking is amazing
6/12571 | 76561199068248962 → Reminds me of my father. got called slurs
7/12571 | 76561198120487119 → I hate this game
8/12571 | 76561199081058790 → One article c o x l d w h a w a w a ㅔ ㅈ H A A A A Young.
9/12571 | 76561198918229890 → lotta fat people
10/12571 | 76561199822570858 → Fuck the producers of this game battle.net i kurani fuck which game you have a telephone number verification of your mother or a game you can not play a game how many years I can not play like a man to set up the login system, what do you get from the phone number of the nation's number of the number of stupid obese ♥♥♥♥.
11/12571 | 76561199810697458 → Product received for free
12/12571 | 76561199810697458 → Product received for free
13/1

In [None]:
df.rename(columns={"본문_번역": "본문"}, inplace=True)

In [None]:
df.drop(columns=['본문'], inplace=True)

In [31]:
df = df[["SteamID64", "작성일", "본문", "플레이 시간", "추천 여부", "계정 레벨", "게임 목록"]]

In [32]:
df.to_csv('14-5. overwatch2_user&reviews.csv', index=False, encoding='utf-8-sig')

In [33]:
df.to_csv("최종_유저데이터.csv", index=False, encoding="utf-8-sig")