In [59]:
# Logic to get all pages
from bs4 import BeautifulSoup
import requests
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import re
from time import sleep
import csv


url = 'https://www.mobygames.com/game/'

# Set up Selenium WebDriver
driver = webdriver.Chrome()
driver.get(url)
sleep(10)

In [61]:
# The "Next" button will eventually disappear because Mobygames impose restrictions on bots, guest users, non-premium users, etc.
game_urls = set()
while True:
    try:
        game_cols = driver.find_elements(By.CSS_SELECTOR, "td")
        for col in game_cols:
            try:
                link = col.find_element(By.CLASS_NAME, "me-1").get_attribute("href")
                game_urls.add(link)
            except:
                pass

        next_button = driver.find_element(By.XPATH, "//a[text()='Next']")

        # Wait until the next button is clickable
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[text()='Next']")))

        # Use ActionChains to move to and click the button
        ActionChains(driver).move_to_element(next_button).click().perform()
        
    except Exception as e:
        break
game_urls = list(game_urls)
print(len(game_urls))

996


In [63]:
# Details: Scraped on each individual game's page
def get_details(soup):
    # Get title
    title_div = soup.find("h1", class_="mb-0")
    title = title_div.text
    
    details = {}
    details["Title"] = title
    keys = ["Publishers", "Developers", "Released", "Genre", "Perspective", "Visual", "Art", "Gameplay", "Interface"]
    for key in keys:
        element = soup.find("dt", string=key)
        if element is not None:
            text = element.find_next("dd").text
            # Remove newlines and extra spaces
            cleaned_text = re.sub(r'\s+', ' ', text).strip()
            details[key] = cleaned_text
    
    return details


In [64]:
# Playe: Scraped on each individual game's page
def get_player_reviews(soup):
    player_reviews_div = soup.find("div", id="players")
    player_reviews = player_reviews_div.find_all("div", class_="border mb")
    
    final_reviews = []
    for review in player_reviews:
        stars = review.find_all("span", class_="stars stars-sm", style=True)
        for star in stars:
            style = star["style"]
            style_dict = dict(item.split(":") for item in style.split(";") if item)
            rating = style_dict.get("--rating")
            try:
                final_reviews.append(float(rating))
            except ValueError as e:
                continue
    return final_reviews

In [65]:
# Critics Reviews: These are scraped at the homepage!!!!

def get_critic_reviews(soup):
    critics_reviews_tag = soup.find("critic-reviews")
    final_critics = []
    a = 0
    if critics_reviews_tag:
        reviews_attr = critics_reviews_tag.get(":reviews")

        if reviews_attr:
            try:
                reviews = json.loads(reviews_attr)
                for review in reviews:
                    score = review['normalized_score']
                    try:
                        final_critics.append(int(round(score)))
                        a += 1
                    except TypeError as e:
                        continue
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")
    else:
        print("No <critic-reviews> tag found.")
    return final_critics

In [66]:
# Open CSV file and write header
with open('game_reviews_23.csv', mode='a', newline='', encoding='utf-8') as output_file:
    gamereviews_csv = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    # Write header only once
    if output_file.tell() == 0:
        gamereviews_csv.writerow(['platform', 'title', 'url', 'published_by', 'developed_by', 
                                 'released', 'genre', 'perspective', 'visual', 'art', 'gameplay', 'interface',
                                 'user_reviews', 'user_review_count', 'average_user_review',
                                 'critic_reviews', 'critic_review_count', 'average_critic_review'])

    # Loop through game URLs
    for url in game_urls:
        driver.get(url)
        try:
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            details = get_details(soup)
            critic_reviews = get_critic_reviews(soup)

            # Go to reviews URL
            reviews_url = f"{driver.current_url}/reviews/"
            driver.get(reviews_url)
            reviews_soup = BeautifulSoup(driver.page_source, 'html.parser')
            player_reviews = get_player_reviews(reviews_soup)

            # Extract details for CSV
            platform = 'SNES'
            title = details.get('Title', '')  # Ensure 'Title' is a key in your details
            published_by = details.get('Publishers', '')
            developed_by = details.get('Developers', '')
            released = details.get('Released', '')
            genre = details.get('Genre', '')
            perspective = details.get('Perspective', '')
            visual = details.get('Visual', '')
            art = details.get('Art', '')
            gameplay = details.get('Gameplay', '')
            interface = details.get('Interface', '')

            # Ccalculate review counts and averages
            user_review_count = len(player_reviews)  
            average_review = sum(player_reviews) / user_review_count if user_review_count > 0 else 0

            critic_count = len(critic_reviews)  
            average_critic = sum(critic_reviews) / critic_count if critic_count > 0 else 0

            # Write data to CSV
            gamereviews_csv.writerow([platform, title, url, published_by, developed_by,
                                     released, genre, perspective, visual, art, gameplay, interface,
                                     player_reviews, user_review_count, average_review,
                                     critic_reviews, critic_count, average_critic])

        except AttributeError as e:
            continue
        
        # Go back to the main page
        driver.back()

driver.close()
    


No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
No <critic-reviews> tag found.
