# Libraries

In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
import re

global original_window

# Selenium

In [2]:
# Initialize the Chrome WebDriver service
service = Service()

# Set Chrome options
options = webdriver.ChromeOptions()

# Start the WebDriver
driver = webdriver.Chrome(service=service, options=options)

In [3]:
# URL of the Intel CPU Data page
url = 'https://gamesystemrequirements.com/games'

# Open the webpage
driver.get(url)

# Set an implicit wait time
driver.implicitly_wait(3)

# Click Button Function

In [4]:
# Function to click a button given its XPath
def click_button(driver, xpath):
    driver.find_element(By.XPATH, xpath).click()
    time.sleep(1)

# Close Privacy window

In [5]:
# Wait for window appear
time.sleep(2)

# Close Window
click_button(driver, "//span[text()='AGREE']")

# Create a list of all games

In [6]:
# Wait for window load
driver.implicitly_wait(2)

# Save all games images (the images will represent each game)
games_list = driver.find_elements(By.TAG_NAME, "img")


In [7]:
# Game list to store game data
games_data = []

# Iterate through games from index 2 to 50
for i in range(2, 50):
    try:
        # Get game URL from the link inside the <a> tag containing the image
        game_url = games_list[i].find_element(By.XPATH, "./ancestor::a").get_attribute("href")

        # Open the game in a new tab
        driver.execute_script("window.open(arguments[0]);", game_url)
        time.sleep(2)

        # Switch to the new tab
        driver.switch_to.window(driver.window_handles[-1])

        # Wait for the game page to load
        time.sleep(3)

        # Extract game name
        game_name = driver.find_element(By.CLASS_NAME, "game_head_title").text

        # Extract text block containing date, genre, developer, and publisher
        details_text = driver.find_element(By.CLASS_NAME, "game_head_details").text

        # Regular expressions to capture each information
        date_match = re.search(r"Release date:\s*(.+?)\n", details_text)
        genre_match = re.search(r"Genre:\s*(.+?)\n", details_text)
        developer_match = re.search(r"Developer:\s*(.+?)\n", details_text)
        publisher_match = re.search(r"Publisher:\s*(.+)", details_text)

        # Capture values or set to None if not found
        release_date = date_match.group(1) if date_match else None
        genre = genre_match.group(1) if genre_match else None
        developer = developer_match.group(1) if developer_match else None
        publisher = publisher_match.group(1) if publisher_match else None

        # Extract minimum requirements
        min_cpu = driver.find_element(By.XPATH, "//h2[contains(text(),'Minimum')]/following-sibling::div//div[contains(text(),'CPU:')]/following-sibling::div").text
        min_ram = driver.find_element(By.XPATH, "//h2[contains(text(),'Minimum')]/following-sibling::div//div[contains(text(),'RAM:')]/following-sibling::div").text
        min_gpu = driver.find_element(By.XPATH, "//h2[contains(text(),'Minimum')]/following-sibling::div//div[contains(text(),'GPU:')]/following-sibling::div").text

        # Extract recommended requirements
        rec_cpu = driver.find_element(By.XPATH, "//h2[contains(text(),'Recommended')]/following-sibling::div//div[contains(text(),'CPU:')]/following-sibling::div").text
        rec_ram = driver.find_element(By.XPATH, "//h2[contains(text(),'Recommended')]/following-sibling::div//div[contains(text(),'RAM:')]/following-sibling::div").text
        rec_gpu = driver.find_element(By.XPATH, "//h2[contains(text(),'Recommended')]/following-sibling::div//div[contains(text(),'GPU:')]/following-sibling::div").text

        # Add data to the list
        games_data.append([game_name, release_date, genre, developer, publisher, 
                          min_cpu, min_ram, min_gpu, rec_cpu, rec_ram, rec_gpu])

        # Close current tab
        driver.close()

        # Return to main tab
        driver.switch_to.window(driver.window_handles[0])

    except Exception as e:
        print(f"Error processing game {i}: Fail to capture game requirements to run!")
        driver.switch_to.window(driver.window_handles[0])  # Ensure return to main tab

# Close WebDriver after finishing extraction
driver.quit()

Error processing game 9: Fail to capture game requirements to run!
Error processing game 13: Fail to capture game requirements to run!
Error processing game 23: Fail to capture game requirements to run!


In [8]:
# Create DataFrame with collected data
system_requirement_website_data = pd.DataFrame(games_data, columns=["game_name", "release_date", "genre", "developer", "publisher", 
                                        "cpu_minimum", "ram_minimum", "gpu_minimum", 
                                        "cpu_recommended", "ram_recommended", "gpu_recommended"])


In [9]:
# Save data
system_requirement_website_data.to_csv('../tables/system_requirement_website_data.csv', index=True)