In [1]:
# !pip install -q selenium==4.3.0
# !pip install lxml
# !pip install -q beautifulsoup4==4.11.1
# !pip install -q backoff==2.1.2
# import backoff
# backoff.__version__
# !pip install -q pandas
# !pip install python-dotenv

In [2]:
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys # 鍵盤事件
import pandas as pd
import backoff
from datetime import datetime

In [3]:
def set_ua():
    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
    return user_agent

In [4]:
@backoff.on_exception(backoff.expo,
                        Exception, 
                      max_time=10)
def chrome_init():
    chrome_opt = webdriver.ChromeOptions()
    # chrome_opt.add_argument('--headless')
    chrome_opt.add_argument('--no-sandbox')
    chrome_opt.add_argument('--ignore-ssl-errors=yes')
    chrome_opt.add_argument('--ignore-certificate-errors')
    chrome_opt.add_argument(f'user-agent={set_ua()}')
    # chrome_opt.add_argument("--incognito")  # 使用無痕模式。用 selenium開瀏覽器已經很乾淨了，但疑心病重的可以用一下
    driver = webdriver.Remote(
        command_executor='http://selenium-hub:4444/wd/hub',
        options=chrome_opt
    )
    
    return driver

In [5]:
url = 'https://store.steampowered.com/charts/mostplayed' # 指定網址
driver = chrome_init()
driver.get(url) # 進入指定網址
driver.implicitly_wait(5) # 等待伺服器反應最多 5 秒，如果在時間到之前反應就提早結束等待

In [6]:
driver.find_element('css selector', 'html').send_keys(Keys.END) # 通常定位在最根部的 html 即可
sleep(3)

In [7]:
# 獲取頁面源代碼
html_source = driver.page_source

# 使用 BeautifulSoup 解析 HTML 內容
soup = BeautifulSoup(html_source, 'lxml')

data = soup.find_all('tr', class_ = "_2-RN6nWOY56sNmcDHu069P")

columns = ['在線排名', '遊戲名稱', '目前玩家人數', '本日高峰']

save_lst = []
for stream in data:
    rank = stream.find("td", class_ = "_34h48M_x9S-9Q2FFPX_CcU").text
    title = stream.find("div", class_ = "_1n_4-zvf0n4aqGEksbgW9N").text
    current = stream.find("td", class_ = "_3L0CDDIUaOKTGfqdpqmjcy").text
    day_peak = stream.find("td", class_ = "yJB7DYKsuTG2AYhJdWTIk").text
    
    value = [rank, title, current, day_peak]
    save_lst.append(value)


df = pd.DataFrame(
    data = save_lst,
    columns = columns
)


In [8]:
driver.quit()

In [9]:
df.to_csv('steam_mostplayed.csv', index = 0, encoding = "utf-8")