### Tracking Trending Games

This notebook provides a way to get and save the top 100 games in a set category corresponding to Steam's categories, namely “New & Trending”, “Top Sellers”, “Global Top Sellers”, “Popular Upcoming” and “Specials”. These categories directly correspond to tabs on Steam's store page and can be accessed via the following API: https://store.steampowered.com/search/results

The API has several parameters which correspond to tags on Steam to limit searches, such as 'Show Free to Play', 'Tags', 'Categories', etc.

Run this notebook each time you would like to update these categories for use in the app.

In [1]:
# Imports and Helper functions

from datetime import datetime
import time
import requests
import pickle
from pathlib import Path
import re
import os

def print_log(*args):
    print(f"[{str(datetime.now())[:-3]}] ", end="")
    print(*args)
    
def get_search_results(params):
    req_sr = requests.get(
        "https://store.steampowered.com/search/results/",
        params=params)
    
    if req_sr.status_code != 200:
        print_log(f"Failed to get search results: {req_sr.status_code}")
        return {"items": []}
    
    try:
        search_results = req_sr.json()
    except Exception as e:
        print_log(f"Failed to parse search results: {e}")
        return {"items": []}
    
    return search_results

def get_app_details(appid, logo_url=None):
    def is_bundle_url(url):
        return url and "steam/bundles/" in url

    if appid is None:
        print_log("App ID is None.")
        return {"success": False, "data": {}}

    is_bundle_app = is_bundle_url(logo_url)

    url = (
        "https://store.steampowered.com/actions/ajaxresolvebundles"
        if is_bundle_app else
        "https://store.steampowered.com/api/appdetails/"
    )

    params = (
        {"bundleids": appid, "cc": "US", "l": "english"}
        if is_bundle_app else
        {"appids": appid, "cc": "us", "l": "english"}
    )

    log_type = "bundle" if is_bundle_app else "app"

    while True:
        try:
            resp = requests.get(url, params=params)
        except Exception as e:
            print_log(f"Request failed for {log_type} {appid}: {e}")
            return {"success": False, "data": {}}

        if resp.status_code == 200:
            try:
                json_data = resp.json()
                key = str(appid)
                result_data = json_data.get(key, {})
                print_log(f"{log_type.capitalize()} ID {appid} - Success: {result_data.get('success', True)}")
                return {"success": True, "data": result_data}
            except Exception as e:
                print_log(f"Failed to parse {log_type} JSON for ID {appid}: {e}")
                return {"success": False, "data": {}}
        elif resp.status_code == 429:
            print_log(f"429 Too Many Requests for {log_type} {appid}. Sleeping 10s.")
            time.sleep(10)
        elif resp.status_code == 403:
            print_log(f"403 Forbidden for {log_type} {appid}. Sleeping 5m.")
            time.sleep(300)
        else:
            print_log(f"Error {resp.status_code} while retrieving {log_type} {appid}")
            return {"success": False, "data": {}}

In [2]:
# Code to add games not already present in checkpoint folder

def print_log(*args):
    print(f"[{str(datetime.now())[:-3]}] ", end="")
    print(*args)

def save_checkpoints(checkpoint_folder, apps_dict_filename_prefix, exc_apps_filename_prefix, error_apps_filename_prefix, apps_dict, excluded_apps_list, error_apps_list):
    if not checkpoint_folder.exists():
        checkpoint_folder.mkdir(parents=True)

    save_path = checkpoint_folder.joinpath(
        apps_dict_filename_prefix + f'-ckpt-fin.p'
    ).resolve()

    save_path2 = checkpoint_folder.joinpath(
        exc_apps_filename_prefix + f'-ckpt-fin.p'
    ).resolve()
    
    save_path3 = checkpoint_folder.joinpath(
        error_apps_filename_prefix + f'-ckpt-fin.p'
    ).resolve()

    save_pickle(save_path, apps_dict)
    print_log(f'Successfully create app_dict checkpoint: {save_path}')

    save_pickle(save_path2, excluded_apps_list)
    print_log(f"Successfully create excluded apps checkpoint: {save_path2}")

    save_pickle(save_path3, error_apps_list)
    print_log(f"Successfully create error apps checkpoint: {save_path3}")

    print()


def load_pickle(path_to_load:Path) -> dict:
    obj = pickle.load(open(path_to_load, "rb"))
    
    return obj

def save_pickle(path_to_save:Path, obj):
    with open(path_to_save, 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)

def check_latest_checkpoints(checkpoint_folder, apps_dict_filename_prefix, exc_apps_filename_prefix, error_apps_filename_prefix):
    # app_dict
    all_pkl = []

    # get all pickle files in the checkpoint folder    
    for root, dirs, files in os.walk(checkpoint_folder):
        all_pkl = list(map(lambda f: Path(root, f), files))
        all_pkl = [p for p in all_pkl if p.suffix == '.p']
        break
            
    # create a list to store all the checkpoint files
    # then sort them
    # the latest checkpoint file for each of the object is the last element in each of the lists
    apps_dict_ckpt_files = [f for f in all_pkl if apps_dict_filename_prefix in f.name and "ckpt" in f.name]
    exc_apps_list_ckpt_files = [f for f in all_pkl if exc_apps_filename_prefix in f.name and "ckpt" in f.name]
    error_apps_ckpt_files = [f for f in all_pkl if error_apps_filename_prefix in f.name and 'ckpt' in f.name]

    apps_dict_ckpt_files.sort()
    exc_apps_list_ckpt_files.sort()
    error_apps_ckpt_files.sort()

    latest_apps_dict_ckpt_path = apps_dict_ckpt_files[-1] if apps_dict_ckpt_files else None
    latest_exc_apps_list_ckpt_path = exc_apps_list_ckpt_files[-1] if exc_apps_list_ckpt_files else None
    latest_error_apps_list_ckpt_path = error_apps_ckpt_files[-1] if error_apps_ckpt_files else None

    return latest_apps_dict_ckpt_path, latest_exc_apps_list_ckpt_path, latest_error_apps_list_ckpt_path

apps_dict_filename_prefix = 'apps_dict'
exc_apps_filename_prefix = 'excluded_apps_list'
error_apps_filename_prefix = 'error_apps_list'

apps_dict = {}
excluded_apps_list = []
error_apps_list = []

# path = project directory (i.e. steam_data_scraping)/checkpoints
checkpoint_folder = Path('../checkpoints').resolve()

print_log('Checkpoint folder:', checkpoint_folder)

if not checkpoint_folder.exists():
    print_log(f'Fail to find checkpoint folder: {checkpoint_folder}')
    print_log(f'Start at blank.')

    checkpoint_folder.mkdir(parents=True)

latest_apps_dict_ckpt_path, latest_exc_apps_list_ckpt_path, latest_error_apps_list_ckpt_path = check_latest_checkpoints(checkpoint_folder, apps_dict_filename_prefix, exc_apps_filename_prefix, error_apps_filename_prefix)

if latest_apps_dict_ckpt_path:
    apps_dict = load_pickle(latest_apps_dict_ckpt_path)
    print_log('Successfully load apps_dict checkpoint:', latest_apps_dict_ckpt_path)
    print_log(f'Number of apps in apps_dict: {len(apps_dict)}')

if latest_exc_apps_list_ckpt_path:
    excluded_apps_list = load_pickle(latest_exc_apps_list_ckpt_path)
    print_log("Successfully load excluded_apps_list checkpoint:", latest_exc_apps_list_ckpt_path)
    print_log(f'Number of apps in excluded_apps_list: {len(excluded_apps_list)}')

if latest_error_apps_list_ckpt_path:
    error_apps_list = load_pickle(latest_error_apps_list_ckpt_path)
    print_log("Successfully load error_apps_list checkpoint:", latest_error_apps_list_ckpt_path)
    print_log(f'Number of apps in error_apps_list: {len(error_apps_list)}')

[2025-05-16 08:17:20.910] Checkpoint folder: C:\Users\azure\Documents\CS122\project\checkpoints
[2025-05-16 08:17:20.923] Successfully load apps_dict checkpoint: C:\Users\azure\Documents\CS122\project\checkpoints\apps_dict-ckpt-fin.p
[2025-05-16 08:17:20.923] Number of apps in apps_dict: 500
[2025-05-16 08:17:20.923] Successfully load excluded_apps_list checkpoint: C:\Users\azure\Documents\CS122\project\checkpoints\excluded_apps_list-ckpt-fin.p
[2025-05-16 08:17:20.923] Number of apps in excluded_apps_list: 187
[2025-05-16 08:17:20.923] Successfully load error_apps_list checkpoint: C:\Users\azure\Documents\CS122\project\checkpoints\error_apps_list-ckpt-fin.p
[2025-05-16 08:17:20.923] Number of apps in error_apps_list: 0


In [3]:
# Main code

execute_datetime = datetime.now()

search_result_folder_path = Path(f"../checkpoints/searchresults/search_results_{execute_datetime.strftime('%Y%m%d')}")
if not search_result_folder_path.exists():
    search_result_folder_path.mkdir()
    
# a list of filters
params_list = [
    {"filter": "topsellers"},
    {"filter": "globaltopsellers"},
    {"filter": "popularnew"},
    {"filter": "popularcommingsoon"},
    {"filter": "", "specials": 1}
]
page_list = list(range(1, 5))

params_sr_default = {
    "filter": "topsellers",
    "hidef2p": 1,
    "page": 1,            # page is used to go through different parts of the ranking. Each page contains 25 results
    "json": 1
}

for update_param in params_list:

    items_all = []
    if update_param["filter"]:
        filename = f"{update_param['filter']}_{execute_datetime.strftime('%Y%m%d')}.pkl"
    else:
        filename = f"specials_{execute_datetime.strftime('%Y%m%d')}.pkl"

    if (search_result_folder_path / filename).exists():
        print_log(f"File {filename} exists. Skip.")
        continue

    for page_no in page_list:
        param = params_sr_default.copy()
        param.update(update_param)
        param["page"] = page_no

        search_results = get_search_results(param)
        print_log(search_results)

        if not search_results:
            continue

        items = search_results.get("items", [])

        # proprocessing search results to retrieve the appid of the game
        for item in items:
            try:
                item["appid"] = re.search(r"steam/\w+/(\d+)", item["logo"]).group(1)      # the URL can be steam/bundles/{appid} or steam/apps/{appid}
            except Exception as e:
                print_log(f"Failed to extract appid: {e}")
                item["appid"] = None

        # request for game information using appid
        for item in items:
            appid = item["appid"]
            if not appid:
                continue
        
            appid = int(appid)  # Ensure it's an int
            logo_url = item.get("logo")
        
            try:
                appdetails = get_app_details(appid, logo_url)
            except Exception as e:
                print_log(f"Error retrieving details for app ID {appid}: {e}")
                item["appdetail"] = {"success": False}
                if appid not in error_apps_list:
                    error_apps_list.append(appid)
                continue
        
            item["appdetail"] = appdetails
        
            # Add to apps_dict only if successful and not already present
            if appdetails.get("success") and appid not in apps_dict:
                appdetails_data = appdetails['data']

                appdetails_data['appid'] = appid     

                apps_dict[appid] = appdetails_data
                print_log(f"Successfully get content of App ID: {appid}")
        
            elif not appdetails.get("success"):
                if appid not in excluded_apps_list:
                    excluded_apps_list.append(appid)
                    print_log(f"App ID {appid} not successful. Added to excluded_apps_list.")


        items_all.extend(items)

    # save the search results
    with open(search_result_folder_path / filename, "wb") as f:
        pickle.dump(items_all, f)
    print_log(f"Saved {filename}")

    save_checkpoints(checkpoint_folder,apps_dict_filename_prefix,exc_apps_filename_prefix,error_apps_filename_prefix,apps_dict,excluded_apps_list,error_apps_list)


[2025-05-16 08:17:20.941] File topsellers_20250516.pkl exists. Skip.
[2025-05-16 08:17:20.941] File globaltopsellers_20250516.pkl exists. Skip.
[2025-05-16 08:17:20.941] File popularnew_20250516.pkl exists. Skip.
[2025-05-16 08:17:20.941] File popularcommingsoon_20250516.pkl exists. Skip.
[2025-05-16 08:17:20.941] File specials_20250516.pkl exists. Skip.


In [4]:
# --- RETROACTIVE FIX: Flatten any wrapped appdetails entries ---
fixed_count = 0
new_apps_dict = {} 

for item in items_all:
    appid = item.get("appid")
    if not appid:
        continue
    appid = int(appid)

    existing = apps_dict.get(appid)

    # Check for wrapped format
    if isinstance(existing, dict) and "success" in existing and "data" in existing:
        if existing["success"] and isinstance(existing["data"], dict):
            new_apps_dict[appid] = existing["data"]
            new_apps_dict[appid]["appid"] = appid  # Optional, add if not present
            fixed_count += 1
        else:
            # If 'success' is False or 'data' is not a dict, keep the original
            new_apps_dict[appid] = existing
    else:
        # If not in the wrapped format, keep the original
        new_apps_dict[appid] = existing

print_log(f"Flattened {fixed_count} incorrectly structured app entries in apps_dict.")

# Resave updated checkpoint with the new dictionary
save_checkpoints(
    checkpoint_folder,
    apps_dict_filename_prefix,
    exc_apps_filename_prefix,
    error_apps_filename_prefix,
    new_apps_dict,
    excluded_apps_list,
    error_apps_list
)

print(new_apps_dict.get(2277560))

[2025-05-16 08:17:20.953] Flattened 0 incorrectly structured app entries in apps_dict.
[2025-05-16 08:17:20.954] Successfully create app_dict checkpoint: C:\Users\azure\Documents\CS122\project\checkpoints\apps_dict-ckpt-fin.p
[2025-05-16 08:17:20.954] Successfully create excluded apps checkpoint: C:\Users\azure\Documents\CS122\project\checkpoints\excluded_apps_list-ckpt-fin.p
[2025-05-16 08:17:20.955] Successfully create error apps checkpoint: C:\Users\azure\Documents\CS122\project\checkpoints\error_apps_list-ckpt-fin.p

None
