In [None]:
###Steam spy for college writing

import requests
import pandas as pd
import random
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# Define the base URL for the Steam Spy API
base_url = "https://steamspy.com/api.php"


In [3]:
def fetch_indie_games():
    response = requests.get(f"{base_url}?request=genre&genre=Indie")
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: Unable to fetch data (status code: {response.status_code})")
        return None

def fetch_app_details(appid):
    response = requests.get(f"{base_url}?request=appdetails&appid={appid}")
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: Unable to fetch details for appid {appid} (status code: {response.status_code})")
        return None

In [4]:
# Fetch the indie games data
indie_games = fetch_indie_games()

In [5]:
# Check if data was fetched successfully
if indie_games:
    # Transform the data into a DataFrame
    indie_df = pd.DataFrame.from_dict(indie_games, orient='index')
    # indie_df.summarize()
    # Clean and process the data
    indie_df['owners'] = (
    indie_df['owners']
    .astype(str)  # Ensure the data is a string
    .str.replace(',', '')  # Remove commas
    .str.split(' .. ')  # Split the range
    .apply(lambda x: (int(x[0]) + int(x[1])) // 2 if isinstance(x, list) and len(x) == 2 else None)  # Calculate the average
    )
    indie_df['price_usd'] = indie_df['price'].astype(int)  / 100
    indie_df['average_forever_hours'] = indie_df['average_forever'] / 60
    indie_df['average_2weeks_hours'] = indie_df['average_2weeks'] / 60
   
    # Display summary statistics
    print("Summary Statistics for Indie Games:")
    print(f"Total number of games: {len(indie_df)}")
    print(f"Average Price (USD): {indie_df['price_usd'].mean():.2f}")
    print(f"Average Owners: {indie_df['owners'].mean():.0f}")
    print(f"Average Playtime (Forever): {indie_df['average_forever_hours'].mean():.2f} hours")
    print(f"Average Playtime (Last 2 Weeks): {indie_df['average_2weeks_hours'].mean():.2f} hours")
else:
    print("No data available.")

Summary Statistics for Indie Games:
Total number of games: 55653
Average Price (USD): 6.94
Average Owners: 78697
Average Playtime (Forever): 1.89 hours
Average Playtime (Last 2 Weeks): 0.09 hours


In [5]:
if indie_games:
    indie_df.to_csv("./indie_games_summary.csv", index=False)
    print("Data saved to 'indie_games_summary.csv'.")

Data saved to 'indie_games_summary.csv'.


In [6]:
if not indie_df.empty:
    # Pick a random appid from the existing data
    random_appid = random.choice(indie_df.index)
    print(f"Fetching details for AppID: {random_appid}")
    
    # Fetch the detailed data
    app_details = fetch_app_details(random_appid)
    
    # Display the details
    if app_details:
        print("App Details:")
        for key, value in app_details.items():
            print(f"{key}: {value}")
else:
    print("The DataFrame is empty. Cannot pick a random app.")

Fetching details for AppID: 1508060
App Details:
appid: 1508060
name: Snowy Love
developer: Blessing Company
publisher: Blessing Company
score_rank: 
positive: 4
negative: 5
userscore: 0
owners: 0 .. 20,000
average_forever: 0
average_2weeks: 0
median_forever: 0
median_2weeks: 0
price: 199
initialprice: 199
discount: 0
ccu: 0
languages: English
genre: Adventure, Casual, Indie, Strategy
tags: {'Casual': 32, 'Indie': 31, 'Adventure': 30, 'Strategy': 21, 'Puzzle': 12, 'Sexual Content': 12, 'Relaxing': 11, '2D': 11, 'Anime': 11, 'Minimalist': 11, 'Atmospheric': 11, 'Singleplayer': 11, 'Colorful': 11, 'Cute': 11, 'LGBTQ+': 10, 'Clicker': 10}


In [None]:
# driver_path = "/chromedriver-win64/chromedriver-win64/chromedriver.exe"  # Replace with your ChromeDriver path
url = "https://www.steamspy.com/genre/Indie"  # Replace with the actual page URL


#Use BS4

In [8]:
response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Locate the table (Inspect the page for the exact class or ID)
    table = soup.find('table')  # Add class or ID if needed, e.g., find('table', {'class': 'table-class'})
    
    if table:
        # Extract table headers
        headers = [header.text.strip() for header in table.find_all('th')]
        
        # Extract table rows
        rows = []
        for row in table.find_all('tr')[1:]:  # Skip the header row
            cells = row.find_all('td')
            rows.append([cell.text.strip() for cell in cells])
        
        # Create a pandas DataFrame
        data = pd.DataFrame(rows, columns=headers)
        print("Data scraped successfully!")
        
        # Save to CSV
        data.to_csv("steamspy_table.csv", index=False)
        print("Data saved to 'steamspy_table.csv'.")
    else:
        print("Table not found on the page.")
else:
    print(f"Failed to fetch the webpage (status code: {response.status_code}).")

Data scraped successfully!
Data saved to 'steamspy_table.csv'.
