In [1]:
# import libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
# function to grab all HTML content from a page, returns a BeautifulSoup object
def get_page_soup(driver, waittime, url): 

    # Navigate to the webpage
    driver.get(url)

    if "mostplayed" in url:
        # Wait up to 2 seconds for an element to be present on the page (using regular expression pattern)
        wait = WebDriverWait(driver, waittime)
        _    = wait.until(lambda driver: driver.find_element(By.XPATH, "//tr[contains(@class, '_2-')]"))

    try:

        # wait for an age gate to potentially appear
        wait = WebDriverWait(driver, waittime)
        _    = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "age_gate")))

        # click the option for "1950" for "Year" dropdown menu
        driver.find_element(By.XPATH, "//select[contains(@id, 'ageYear')]/option[text()='1950']").click()

        # click the "View Page" button
        driver.find_element(By.XPATH, "//a[contains(@id, 'view_product_page_btn')]").click()

        # wait for the system requirements section to appear
        wait = WebDriverWait(driver, waittime)
        _    = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "game_area_sys_req")))

        print(f'Age Gate detected, try successfully bypassed gate for URL: {url}')

    except Exception as e: 
        
        print(f'No Age Gate, try returned exception: {type(e)} for URL: {url}')

    # Parse the HTML with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # return the soup
    return soup

In [3]:
# Initialize the Chrome web driver
driver = webdriver.Chrome()

# adjust window size
driver.set_window_size(1920, 1080)

# Get top 100 games from Steam
steam_soup = get_page_soup(driver=driver, waittime=5, url='https://store.steampowered.com/charts/mostplayed')

# close the browser window
driver.quit()

No Age Gate, try returned exception: <class 'selenium.common.exceptions.TimeoutException'> for URL: https://store.steampowered.com/charts/mostplayed


In [4]:
# get all the games listed
games = steam_soup.find_all('tr', class_=lambda value: value and value.startswith('_2-'))

# initialize game_data dictionary
game_data = {}

# Initialize the Chrome web driver
driver = webdriver.Chrome()

# adjust window size
driver.set_window_size(1920, 1080)

# loop over all games
for game in games:

    # get the game's rank number (store as padded string)
    game_rank = f'{int(game.find_all("td")[1].text):03d}'
    print(f'Processing game: {game_rank}', end=' | ')

    # get the game's name element (with hyperlink)
    game_name = game.find('a')

    # Store data in game_data dictionary
    game_data[game_rank] = {'rank': game_rank,
                            'name': game_name.text                                                 ,
                            'link': game_name['href']                                              ,
                            'soup': get_page_soup(driver=driver, waittime=1, url=game_name['href']),}

# close the browser window
driver.quit()

Processing game: 001 | No Age Gate, try returned exception: <class 'selenium.common.exceptions.TimeoutException'> for URL: https://store.steampowered.com/app/730/CounterStrike_2?snr=1_7001_7005__7003
Processing game: 002 | No Age Gate, try returned exception: <class 'selenium.common.exceptions.TimeoutException'> for URL: https://store.steampowered.com/app/570/Dota_2?snr=1_7001_7005__7003
Processing game: 003 | No Age Gate, try returned exception: <class 'selenium.common.exceptions.TimeoutException'> for URL: https://store.steampowered.com/app/578080/PUBG_BATTLEGROUNDS?snr=1_7001_7005__7003
Processing game: 004 | No Age Gate, try returned exception: <class 'selenium.common.exceptions.TimeoutException'> for URL: https://store.steampowered.com/app/1172470/Apex_Legends?snr=1_7001_7005__7003
Processing game: 005 | Age Gate detected, try successfully bypassed gate for URL: https://store.steampowered.com/app/553850/HELLDIVERS_2?snr=1_7001_7005__7003
Processing game: 006 | No Age Gate, try ret

In [6]:
# Loop over all games in game_data
for key, data in game_data.items():

    # extract the system requirements from the game's soup
    sys_reqs = data['soup'].find_all('div', class_=re.compile('game_area_sys_req_'))

    # filter for the minimum requirements on Windows platform (will be empty if none are found)
    min_reqs = [req for req in sys_reqs if     'minimum' in req.text.lower() and not 'recommended' in req.text.lower() and 'windows' in req.text.lower()]

    # filter for the recommended requirements on Windows platform (will be empty if none are found)
    rec_reqs = [req for req in sys_reqs if not 'minimum' in req.text.lower() and     'recommended' in req.text.lower() and 'windows' in req.text.lower()]

    # if a minimum requirement exists
    if min_reqs:

        # Add data to game_data dictionary
        try:    data['min_req_os'   ] = min_reqs[0].select_one('li:contains("OS")'       ).text.replace('OS:'       , '').strip()
        except: data['min_req_os'   ] = None
        try:    data['min_req_proc' ] = min_reqs[0].select_one('li:contains("Processor")').text.replace('Processor:', '').strip()
        except: data['min_req_proc' ] = None
        try:    data['min_req_mem'  ] = min_reqs[0].select_one('li:contains("Memory")'   ).text.replace('Memory:'   , '').strip()
        except: data['min_req_proc' ] = None
        try:    data['min_req_graph'] = min_reqs[0].select_one('li:contains("Graphics")' ).text.replace('Graphics:' , '').strip()
        except: data['min_req_proc' ] = None
        try:    data['min_req_dirX' ] = min_reqs[0].select_one('li:contains("DirectX")'  ).text.replace('DirectX:'  , '').strip()
        except: data['min_req_proc' ] = None
        try:    data['min_req_stor' ] = min_reqs[0].select_one('li:contains("Storage")'  ).text.replace('Storage:'  , '').strip()
        except: data['min_req_proc' ] = None

    # if a recommended requirement exists
    if rec_reqs:

        # Add data to game_data dictionary
        try:    data['rec_req_os'   ] = rec_reqs[0].select_one('li:contains("OS")'       ).text.replace('OS:'       , '').strip()
        except: data['rec_req_os'   ] = None
        try:    data['rec_req_proc' ] = rec_reqs[0].select_one('li:contains("Processor")').text.replace('Processor:', '').strip()
        except: data['rec_req_os'   ] = None
        try:    data['rec_req_mem'  ] = rec_reqs[0].select_one('li:contains("Memory")'   ).text.replace('Memory:'   , '').strip()
        except: data['rec_req_os'   ] = None
        try:    data['rec_req_graph'] = rec_reqs[0].select_one('li:contains("Graphics")' ).text.replace('Graphics:' , '').strip()
        except: data['rec_req_os'   ] = None
        try:    data['rec_req_dirX' ] = rec_reqs[0].select_one('li:contains("DirectX")'  ).text.replace('DirectX:'  , '').strip()
        except: data['rec_req_os'   ] = None
        try:    data['rec_req_stor' ] = rec_reqs[0].select_one('li:contains("Storage")'  ).text.replace('Storage:'  , '').strip()
        except: data['rec_req_os'   ] = None

# convert the dictionary to a dataframe, with the rank as the index
game_data_df = pd.DataFrame.from_dict(game_data, orient='index')

# reset the index to be a normal index column
game_data_df.reset_index(drop=True, inplace=True)

# remove the soup and link column
game_data_df.drop(columns=['soup', 'link'], inplace=True)

# save the dataframe to a CSV file
game_data_df.to_csv('./steam_games_reqs/steam_games_req_data.csv', index=False)

game_data_df

Unnamed: 0,rank,name,min_req_os,min_req_proc,min_req_mem,min_req_graph,min_req_dirX,min_req_stor,rec_req_os,rec_req_proc,rec_req_mem,rec_req_graph,rec_req_dirX,rec_req_stor
0,001,Counter-Strike 2,Windows® 10,4 hardware CPU threads - Intel® Core™ i5 750 o...,8 GB RAM,Video card must be 1 GB or more and should be ...,Graphics: Video card must be 1 GB or more and ...,85 GB available space,,,,,,
1,002,Dota 2,OS *: Windows 7 or newer,Dual core from Intel or AMD at 2.8 GHz,4 GB RAM,"NVIDIA GeForce 8600/9600GT, ATI/AMD Radeon HD2...",Version 11,60 GB available space,,,,,,
2,003,PUBG: BATTLEGROUNDS,64-bit Windows 10,Intel Core i5-4430 / AMD FX-6300,8 GB RAM,NVIDIA GeForce GTX 960 2GB / AMD Radeon R7 370...,Version 11,40 GB available space,64-bit Windows 10,Intel Core i5-6600K / AMD Ryzen 5 1600,16 GB RAM,NVIDIA GeForce GTX 1060 3GB / AMD Radeon RX 58...,Version 11,50 GB available space
3,004,Apex Legends™,64-bit Windows 10,"AMD FX 4350 or Equivalent, Intel Core i3 6300 ...",6 GB RAM,"AMD Radeon™ HD 7730, NVIDIA GeForce® GT 640",Version 11,75 GB available space,64-bit Windows 10,Ryzen 5 CPU or Equivalent,8 GB RAM,"AMD Radeon™ R9 290, NVIDIA GeForce® GTX 970",Version 11,75 GB available space
4,005,HELLDIVERS™ 2,Windows 10,,8 GB RAM,NVIDIA GeForce GTX 1050 Ti or AMD Radeon RX 470,,100 GB available space,,Intel Core i7-9700K or AMD Ryzen 7 3700X,16 GB RAM,NVIDIA GeForce RTX 2060 or AMD Radeon RX 6600XT,,100 GB available space
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,096,Factorio,"OS *: Windows 10, 8, 7, Vista (64 Bit)",Dual core 3Ghz+,4 GB RAM,DirectX 10.1 capable GPU with 512 MB VRAM - Ge...,Graphics: DirectX 10.1 capable GPU with 512 MB...,3 GB available space,"OS *: Windows 10, 8, 7 (64 Bit)",Quad core 3Ghz+,8 GB RAM,DirectX 11 capable GPU with 2 GB VRAM - GeForc...,Graphics: DirectX 11 capable GPU with 2 GB VRA...,3 GB available space
96,097,Russian Fishing 4,OS *: Windows 7/8/10 64-bit,Intel® i5 3Ghz,8 GB RAM,GTX 760,Version 11,40 GB available space,OS *: Windows 7/8/10 64-bit,Intel® i5 3.5Ghz,8 GB RAM,GTX 1060,Version 11,40 GB available space
97,098,Squad,Windows 10 (x64),Intel Core i or AMD Ryzen with 4 physical cores,8 GB RAM,Geforce GTX 960 or AMD Radeon HD 7970 with at ...,Version 11,80 GB available space,Windows 10 (x64),Intel Core i or AMD Ryzen with 6 physical cores,16 GB RAM,Nvidia GTX 1060 or AMD Radeon 570 with at leas...,Version 12,80 GB available space
98,099,It Takes Two,OS *: Windows 8.1 64-bit or Windows 10 64-bit,Intel Core i3-2100T or AMD FX 6100,8 GB RAM,Nvidia GeForce GTX 660 or AMD R7 260x,Version 11,50 GB available space,OS *: Windows 8.1 64-bit or Windows 10 64-bit,Intel Core i5 3570K or AMD Ryzen 3 1300x,16 GB RAM,Nvidia GeForce GTX 980 or AMD R9 290X,Version 11,50 GB available space
