In [1]:
from googleapiclient.discovery import build
from datetime import datetime, timedelta
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser
import numpy as np
import pandas as pd
import isodate
import requests
import time
from selenium import webdriver
from IPython.display import JSON

from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [2]:
# api key load
with open('api.txt', 'r') as file:
    api_key = file.read().strip()

api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(api_service_name, api_version, developerKey=api_key)

In [3]:
# URL 모음
base_url = "https://store.steampowered.com/"

url_stats = 'stats/'
url_search = 'search/'
url_category = 'category/'

#url_filter = '?filter='
url_filter = '?flavor=contenthub_newandtrending'

# offset
url_offset = '&offset='

# 스팀 장르별 URL 모음
url_action = 'action/'
url_action_fps = 'action_fps/'
url_action_tps = 'action_tps/'
url_fighting_marial_arts = 'fighting_marial_arts/'
url_shump = 'shump/'
url_arcade_rhythm = 'arcade_rhythm/'
url_action_run_jump = 'action_run_jump/'
url_hack_and_slash = 'hack_and_slash/'

url_rpg = 'rpg/'
url_jrpg = 'rpg_jrpg/'
url_rogue_like_rogue_lite = 'rogue_like_rogue_lite/'
url_rpg_action = 'rpg_action/'
url_adventure_rpg = 'adventure_rpg/'
url_rpg_strategy = 'rpg_strategy_tactics/'
url_rpg_turn_based = 'rpg_turn_based/'
url_rpg_party_based = 'rpg_party_based/'

url_strategy = 'strategy/'
url_strategy_military = 'strategy_military/'
url_strategy_grand_4x = 'strategy_grand_4x/'
url_strategy_cities_settlements = 'strategy_cities_settlements/'
url_strategy_real_time = 'strategy_real_time/'
url_strategy_card_board = 'strategy_card_board/'
url_tower_defense = 'tower_defense/'
url_strategy_turn_based = 'strategy_turn_based/'

url_adventure = 'adventure/'
url_metroidvania = 'metroidvania/'
url_visual_novel = 'visual_novel/'
url_adventure_rpg = 'adventure_rpg/'
url_casual = 'casual/'
url_puzzle_matching = 'puzzle_matching/'
url_story_rich = 'story_rich/'
url_hidden_object = 'hidden_object/'

rul_simulation = 'simulation/'
rul_sim_building_automation = 'sim_building_automation/'
rul_sim_farming_crafting = 'sim_farming_crafting/'
rul_sim_physics_sandbox = 'sim_physics_sandbox/'
rul_sim_life = 'sim_life/'
rul_sim_dating = 'sim_dating/'
rul_sim_space_flight = 'sim_space_flight/'
rul_sim_hobby_sim = 'sim_hobby_sim/'

url_sports_and_racing = 'sports_and_racing/'
url_sports_individual = 'sports_individual/'
url_sports_fishing_hunting = 'sports_fishing_hunting/'
url_racing = 'racing/'
url_racing_sim = 'racing_sim/'
url_sports = 'sports/'
url_sports_sim = 'sports_sim/'
url_sports_team = 'sports_team/'

## 테마 별 URL
url_science_fiction = 'science_fiction/'
url_horror = 'horror/'
url_mystery_detective = 'mystery_detective/'
url_survival = 'survival/'
url_adultonly = 'adultonly/'
url_anime = 'anime/'
url_exploration_open_world = 'exploration_open_world/'
url_space = 'space/'

## 플레이어 지원 별 URL
url_multiplayer_lan = 'multiplayer_lan/'
url_multiplayer_mmo = 'multiplayer_mmo/'
url_multiplayer_local_party = 'multiplayer_local_party/'
url_multiplayer = 'multiplayer/'
url_singleplayer = 'singleplayer/'
url_multiplayer_online_competitive = 'multiplayer_online_competitive/'
url_multiplayer_coop = 'multiplayer_coop/'


## URL 합치기
- 태그 바꿀 때는 url_multiplayer만 바꿔서 사용
- 게임 목록들이 12개씩 끊겨서 나와서 url_offset 사용
    - 아래 반복문에서 사용됨
    - 상위 60개만 보게끔 했지만, 더 보고 싶다면 range 인자값을 늘리면 가능.

In [4]:
## URL 합치기
# 태그 바꿀 때만
use_url = base_url + url_category + url_multiplayer + url_filter

In [5]:
def new_popular_game_scan(use_URL):
    # Selenium 옵션 설정
    options = Options()
    options.headless = True

    # Chrome Driver 실행
    service = Service(executable_path=ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)
    
    driver.get(use_URL)
    
    game_data = []
    time.sleep(20)
    games = driver.find_elements(By.CLASS_NAME, "_1lRFu670LVk6Gmeb12h7Hr")
#     time.sleep(20)
    
    # 게임 정보 리스트에 추가
    for game in games:
        name = game.find_element(By.CLASS_NAME, "_3jI467XYJLy1CQ5YZhp2q_").text.strip()
        time.sleep(5)
        price = game.find_element(By.CLASS_NAME, "Wh0L8EnwsPV_8VAu8TOYr").text.strip()
        release_date = game.find_element(By.CLASS_NAME, "_3eOdkTDYdWyo_U5-JPeer1").text.strip()
        tags = game.find_elements(By.CLASS_NAME, "WidgetTag")

        tag_list = [tag.text for tag in tags]
        tag_list += [''] * (5 - len(tag_list))

        # 게임 데이터에 추가
        game_data.append([name, price, release_date] + tag_list[:5])

    # 데이터프레임 생성
    new_games = pd.DataFrame(game_data, columns=['게임 이름', '가격', '출시일', 'Tag 1', 'Tag 2', 'Tag 3', 'Tag 4', 'Tag 5'])
    driver.quit()
    
    return new_games

In [6]:
df_list = []

for i in range(5):
    offset = str(12 * i)
    use_url = base_url + url_category + url_multiplayer + url_filter + url_offset + offset
    
    temp = new_popular_game_scan(use_url)
    df_list.append(temp)
    
df_combined = pd.concat(df_list, ignore_index=True)
df_combined

KeyboardInterrupt: 

## 비어 있는 값('')들이 있어 np.nan 으로 결측치로 바꾼 후 제거

In [8]:
df_combined['게임 이름'] = df_combined['게임 이름'].replace('', np.nan)

df3 = df_combined.copy()
data = df3.dropna()
data = data.reset_index(drop = True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   게임 이름   36 non-null     object
 1   가격      36 non-null     object
 2   출시일     36 non-null     object
 3   Tag 1   36 non-null     object
 4   Tag 2   36 non-null     object
 5   Tag 3   36 non-null     object
 6   Tag 4   36 non-null     object
 7   Tag 5   36 non-null     object
dtypes: object(8)
memory usage: 2.4+ KB


## csv로 저장
- 이름만 바꿔서 저장하기

In [9]:
data.to_csv('multiplayer_new_popular_games.csv',
            encoding="utf-8-sig", index=False)

- 여기까지 진행했을 때 데이터의 수는 12 * n의 값이어야 한다. (n은 range안의 값)
- 만약 값이 다르다면 df_list[] 로 확인해보자.
- 네트워크 환경에 예민하게 반응한다. (웹 페이지 로딩 속도가 느리다면 time.sleep()을 길게 가지자)
- time.sleep()을 많이 사용하다보니 좀 느리다. 기다려보자.
- 복붙해서 잘 사용해보자