In [1]:
import requests
from bs4 import BeautifulSoup
import time
import csv

base_url = "https://www.gamespot.com/games/reviews/"

excluded_urls = [
    "https://www.gamespot.com/reviews/nintendo-switch/",
    "https://www.gamespot.com/reviews/xbox-one/",
    "https://www.gamespot.com/reviews/ps4/"
]

game_data = []


for page_num in range(1, 21):
    page_url = f"{base_url}?page={page_num}"
    response = requests.get(page_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    for link in soup.find_all('a', href=True):
        url = link['href']
        game_url = "https://www.gamespot.com" + url
        if (url.startswith('/reviews/') and url.endswith('/') 
            and game_url not in excluded_urls 
            and 'reviews/pc/' not in url):
            
            game_response = requests.get(game_url)
            game_soup = BeautifulSoup(game_response.content, 'html.parser')
            
            # Extract title
            title_tag = game_soup.find('h1', class_='kubrick-info__title')
            title_text = title_tag.get_text(strip=True) if title_tag else "No title found"
            
            # Extract abstract
            abstract_tag = game_soup.find('p', class_='news-deck type--xlarge')
            abstract_text = abstract_tag.get_text(strip=True) if abstract_tag else "No abstract found"
            
            # Extract author name
            author_tag = game_soup.select_one('span.byline-author a')
            author_name = author_tag.get_text(strip=True) if author_tag else "No author found"
            
            # Extract publish date
            date_tag = game_soup.find('time', datetime=True)
            publish_date = date_tag['datetime'][:10] if date_tag else "No date found"  # Format YYYY-MM-DD
            
            # Extract and Combine paragraphs
            description_tags = game_soup.select('section.article-body.typography-format p')
            description_text = ' '.join(tag.get_text(strip=True) for tag in description_tags)
            
            # Append data in order
            game_data.append([title_text, abstract_text, description_text, author_name, publish_date, game_url])
            print(f"Scraped: {title_text} - {abstract_text} - {description_text} - {author_name} - {publish_date} - {game_url}")

            time.sleep(1)

with open('games_reviews.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Title", "Abstract", "Description", "Author", "Publish Date", "URL"])
    writer.writerows(game_data)


Scraped: Redacted Review - Prison Break - With a colorful art style and roguelike hooks, Redacted has little in common with The Callisto Protocol besides a shared setting, yet it's a similarly uneven experience. - You wouldn't know just from looking at it--with its vivid, comic book-esque art style and irreverent punk-rock tone--but Redacted (officially styled as [REDACTED]) actually takes place in the same sci-fi universe as 2022's The Callisto Protocol. While that was a third-person survival-horror game trying to capture the same magic that Dead Space bottled up over a decade and a half ago, Striking Distance Studios has taken a wildly different approach with this spin-off, repurposing various elements from its debut game to create an isometric roguelike dungeon crawler. It's a drastic shift for the young series, ditching the grisly melodrama and Rock 'Em Sock 'Em combat of The Callisto Protocol by pivoting to referential humor and twin-stick shooting. It still feels immediately fami