In [9]:
import requests
import pandas as pd
import json
import re
from collections import Counter
from konlpy.tag import Okt
from nltk.tokenize import TreebankWordTokenizer
from nltk.corpus import stopwords
import nltk
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import os

# 워드클라우드 폰트 경로 설정 (한글 지원용)
FONT_PATH = "C:/Windows/Fonts/malgun.ttf"  # 윈도우 기준

# 1. 리뷰 가져오기
def fetch_steam_reviews(appid, language='all', count=100):
    url = f'https://store.steampowered.com/appreviews/{appid}?json=1'
    params = {
        'filter': 'all',
        'language': language,
        'review_type': 'all',
        'num_per_page': count
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    return None

# 2. 리뷰 CSV 저장
def save_reviews_to_csv(appid):
    languages = {'korean': 100, 'english': 100}
    all_reviews = []

    for lang, count in languages.items():
        data = fetch_steam_reviews(appid, language=lang, count=count)
        if data and 'reviews' in data:
            for review in data['reviews']:
                all_reviews.append({
                    'recommendationid': review.get('recommendationid', ''),
                    'author_steamid': review.get('author', {}).get('steamid', 'Anonymous'),
                    'author_num_games_owned': review.get('author', {}).get('num_games_owned', 0),
                    'author_num_reviews': review.get('author', {}).get('num_reviews', 0),
                    'author_playtime_forever': review.get('author', {}).get('playtime_forever', 0),
                    'author_playtime_last_two_weeks': review.get('author', {}).get('playtime_last_two_weeks', 0),
                    'author_playtime_at_review': review.get('author', {}).get('playtime_at_review', 0),
                    'language': review.get('language', ''),
                    'review': review.get('review', ''),
                    'timestamp_created': review.get('timestamp_created', ''),
                    'voted_up': review.get('voted_up', False),
                    'votes_up': review.get('votes_up', 0),
                    'votes_funny': review.get('votes_funny', 0),
                    'weighted_vote_score': review.get('weighted_vote_score', ''),
                    'comment_count': review.get('comment_count', 0),
                    'steam_purchase': review.get('steam_purchase', False),
                })
    filename = f"../Output/steam_reviews_{appid}.csv"
    df = pd.DataFrame(all_reviews)
    df.to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"Saved {len(df)} reviews to {filename}")
    return filename

# 3. load_stopwords() 함수
def load_stopwords(filepath):
    if not os.path.exists(filepath):
        print(f"[경고] 불용어 파일이 존재하지 않습니다: {filepath}")
        return set()
    
    with open(filepath, 'r', encoding='utf-8') as f:
        stopwords = set([line.strip() for line in f if line.strip()])
    return stopwords


# 3. 키워드 추출 함수 (갯수 조절 가능)
def extract_keywords(text, is_korean=False, custom_stopwords=None, top_n=20, extra_stopwords=None):
    if is_korean:
        okt = Okt()
        words = okt.nouns(text)

        if custom_stopwords is None:
            custom_stopwords = load_stopwords("../Data/stopwords-ko.txt")

        if extra_stopwords:
            print(f"Extra Korean stopwords: {extra_stopwords}")  # 추가된 불용어 확인
            custom_stopwords.update(extra_stopwords)

        words = [word for word in words if word not in custom_stopwords]

    else:
        tokenizer = TreebankWordTokenizer()
        words = tokenizer.tokenize(text)
        words = [word.lower() for word in words if word.isalnum()]

        if custom_stopwords is None:
            custom_stopwords = load_stopwords("../Data/stopwords-en.txt")

        if extra_stopwords:
            print(f"Extra English stopwords: {extra_stopwords}")  # 추가된 불용어 확인
            custom_stopwords.update(extra_stopwords)

        words = [word for word in words if word not in custom_stopwords]

    print(f"Number of words after stopword filtering: {len(words)}")
    return Counter(words).most_common(top_n)


# 4. 리뷰에서 언어별 키워드 추출
def process_reviews_for_keywords(csv_file, top_n=20,
                                  custom_stopwords=None,
                                  extra_korean_stopwords=None,
                                  extra_english_stopwords=None):
    
    df = pd.read_csv(csv_file)
    # 'koreana'를 'korean'으로 변환
    df['language'] = df['language'].replace('koreana', 'korean')
    keyword_results = []

    for lang in ['korean', 'english']:
        is_korean = (lang == 'korean')
        texts = ' '.join(df[df['language'] == lang]['review'].dropna())

        # extra_stopwords는 언어별로 다르게 전달
        extra = extra_korean_stopwords if is_korean else extra_english_stopwords

        keywords = extract_keywords(
            texts,
            is_korean=is_korean,
            custom_stopwords=custom_stopwords,
            top_n=top_n,
            extra_stopwords=extra
        )

        keyword_results.append({
            'language': lang,
            'keywords': keywords
        })

    return keyword_results


# 5. 워드클라우드 생성 및 저장
def generate_wordcloud(keywords, language):
    word_dict = dict(keywords)
    wc = WordCloud(
        font_path=FONT_PATH,
        background_color=None, # 'white'
        colormap='Set2',
        mode='RGBA',
        width=800,
        height=400
    ).generate_from_frequencies(word_dict)

    csv_output_path = f"../Output/keyword_freq_{language}.csv"
    pd.DataFrame(keywords, columns=['word', 'frequency']).to_csv(csv_output_path, index=False, encoding='utf-8-sig')
    print(f"{language} 키워드 빈도 CSV 저장 완료 → {csv_output_path}")

    plt.figure(figsize=(10, 5))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.title(f"{language.capitalize()} 리뷰에 대한 워드 클라우드")
    
    output_path = f"../Output/wordcloud_{language}.png"
    wc.to_file(output_path)
    print(f"{language} 워드클라우드 저장 완료 → {output_path}")
    plt.close()

# 6. 전체 흐름 실행 함수
def run_analysis(appid, top_n=20, extra_korean_stopwords=None, extra_english_stopwords=None):
    csv_path = save_reviews_to_csv(appid)
    results = process_reviews_for_keywords(
        csv_file=csv_path,
        top_n=top_n,
        extra_korean_stopwords=extra_korean_stopwords,
        extra_english_stopwords=extra_english_stopwords
    )

    for result in results:
        generate_wordcloud(result['keywords'], result['language'])


In [11]:
my_korean_stopwords = {'게임', '진짜', '완전'}
my_english_stopwords = {'game', 'really', 'lol'}

run_analysis(appid=2680010, 
             top_n=1000,
             extra_korean_stopwords=my_korean_stopwords,
             extra_english_stopwords=my_english_stopwords)

Saved 200 reviews to ../Output/steam_reviews_2680010.csv
Extra Korean stopwords: {'완전', '진짜', '게임'}
Number of words after stopword filtering: 1560
Extra English stopwords: {'game', 'really', 'lol'}
Number of words after stopword filtering: 2511
korean 키워드 빈도 CSV 저장 완료 → ../Output/keyword_freq_korean.csv
korean 워드클라우드 저장 완료 → ../Output/wordcloud_korean.png
english 키워드 빈도 CSV 저장 완료 → ../Output/keyword_freq_english.csv
english 워드클라우드 저장 완료 → ../Output/wordcloud_english.png


In [18]:
json_data  = fetch_steam_reviews(2680010, language='all', count=100)
json_data
    

{'success': 1,
 'query_summary': {'num_reviews': 100,
  'review_score': 8,
  'review_score_desc': 'Very Positive',
  'total_positive': 755,
  'total_negative': 47,
  'total_reviews': 802},
 'reviews': [{'recommendationid': '191098142',
   'author': {'steamid': '76561198120512205',
    'num_games_owned': 0,
    'num_reviews': 32,
    'playtime_forever': 515,
    'playtime_last_two_weeks': 515,
    'playtime_at_review': 260,
    'last_played': 1742920294},
   'language': 'english',
   'review': 'This game is absolutely awesome. I enjoy games like Elden Ring, Dark Souls or Lies of P etc.\n\nIt takes literally all the good parts from those game and take away the annoying parts (like long runbacks etc.).\n\nWhat I really enjoy about it:\n\n- it is not open world. It is kind of "Mission based" starting from a gathering hub.\n- Not a whole lot of trash mobs or traps. Everything is carefully considered (not too much to be annoying, but enough for you to keep attention on the screen)\n- There i

In [23]:
a= save_reviews_to_csv(2680010)
a

Saved 200 reviews to ../Output/steam_reviews_2680010.csv


'../Output/steam_reviews_2680010.csv'

In [25]:
results = process_reviews_for_keywords(csv_file=a,
        top_n=1000
        )

In [28]:
for result in results:
    print(result)

{'language': 'korean', 'keywords': []}


In [27]:
for result in results:
    generate_wordcloud(result['keywords'], result['language'])

ValueError: We need at least 1 word to plot a word cloud, got 0.