In [1]:
import pandas as pd
import numpy as np 
import requests
import time
import csv
import json

**Reference materials:**

Steam API: (200 calls per 5 minutes, 100,000 calls per day)

https://partner.steamgames.com/doc/webapi_overview

https://steamapi.xpaw.me/

https://nik-davis.github.io/posts/2019/steam-data-collection/

https://steamspy.com/api.php


In [2]:
def get_request(url, parameters=None, headers=None, print_url=True):
    try:
        response = requests.get(url=url, params=parameters, headers=headers)
    except KeyboardInterrupt:
        raise KeyboardInterrupt
    except:
        print("Error.")
        return
    
    if response:
        if print_url:
            print(response.url)
            
        # requests might return an empty string instead of a json object
        if response.text == '':
            return {}
        else:
            return response.json()
    else:
        # Recursively wait and try again
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

## Steam Web API

We will get a list of Steam App IDs from the Steam Web API

In [3]:
STEAM_WEB_API_KEY = open("../AUTH/STEAM_WEB_API_KEY.txt").read()

def get_steam_web_data(interface, method, version, parameters={}):
    url = f"https://api.steampowered.com/{interface}/{method}/{version}/"
    json_data = get_request(url, parameters={'key': STEAM_WEB_API_KEY, **parameters})
    return json_data

In [4]:
%%time 

# ISteamNews: GetNewsForApp
# Get data from Steam News Hub
parameters = { 'appid': 570, 'format':'json'}
appnews = get_steam_web_data("ISteamNews", "GetNewsForApp", "v2", parameters)['appnews']['newsitems']
appnews_df = pd.DataFrame.from_dict(appnews)
appnews_df.head()

https://api.steampowered.com/ISteamNews/GetNewsForApp/v2/?key=792AD82A8A06F3B2411A2A5711A9AD60&appid=570&format=json
CPU times: total: 15.6 ms
Wall time: 104 ms


Unnamed: 0,gid,title,url,is_external_url,author,contents,feedlabel,date,feedname,feed_type,appid,tags
0,5125698536858982908,Dota 2 Update - 3/17/2023,https://steamstore-a.akamaihd.net/news/externa...,True,NathanKell,[h1]Gameplay Update[/h1]\n\n[b]Muerta:[/b]\n[l...,Community Announcements,1679092843,steam_community_announcements,1,570,[patchnotes]
1,5125698536850978678,Dota 2 Update - 3/15/2023,https://steamstore-a.akamaihd.net/news/externa...,True,flynnch,[list]\n[*] Fixed an issue where some players ...,Community Announcements,1678903122,steam_community_announcements,1,570,[patchnotes]
2,5070528810304321813,Bans for 46 players in cheating case hit Dota ...,https://steamstore-a.akamaihd.net/news/externa...,True,Jonathan Bolding,"<img src=""https://cdn.mos.cms.futurecdn.net/JW...",PC Gamer,1678656643,PC Gamer,0,570,
3,5070528810296818379,DPC Spring Tour Kickoff,https://steamstore-a.akamaihd.net/news/externa...,True,ericl_*****,[img]{STEAM_CLAN_IMAGE}/3703047/d5d396eb58a140...,Community Announcements,1678478729,steam_community_announcements,1,570,
4,5070528810286421665,Dota 2's new ghostly gunslinger hero is live,https://steamstore-a.akamaihd.net/news/externa...,True,Andy Chalk,"<img src=""https://cdn.mos.cms.futurecdn.net/SK...",PC Gamer,1678231724,PC Gamer,0,570,


In [5]:
%%time 

# ISteamApps: GetAppList
parameters = {'format':'json'}

applist = get_steam_web_data("ISteamApps", "GetAppList", "v2", parameters)['applist']['apps']
applist_df = pd.DataFrame.from_dict(applist)
print(applist_df.shape)
applist_df.head()

https://api.steampowered.com/ISteamApps/GetAppList/v2/?key=792AD82A8A06F3B2411A2A5711A9AD60&format=json
(159031, 2)
CPU times: total: 109 ms
Wall time: 686 ms


Unnamed: 0,appid,name
0,1383152,
1,1897482,
2,2112761,
3,1829051,
4,1983382,


In [6]:
# Remove rows with blank name
applist_df.replace('', np.nan, inplace=True)
appid_list = list(applist_df.dropna(how="any")['appid'])

# Convert ids to string
appid_list = [str(x) for x in appid_list]
len(appid_list)

159001

## Data Parsing Functions

In [7]:
# Get info about provided app id from steam store
def get_steamstore_app_info(appid):
    url = "http://store.steampowered.com/api/appdetails"
    parameters = {"appids": appid}
    json_data = get_request(url, parameters, print_url=False)
    if json_data:
        if json_data[str(appid)]['success']:
            return json_data[str(appid)]['data']
    else:
        return

# Returns list of values in the order specified by the columns
def get_approw(approw, columns):
    fields = []
    for col in columns:
        if col in approw:
            fields.append(approw[col])
        else:
            fields.append(None)
    return fields
    
# Get info about provided app id from steam spy 
def get_steamspy_app_info(appid):
    url = "https://steamspy.com/api.php"
    parameters = {"request": "appdetails", "appid": appid}
    json_data = get_request(url, parameters, print_url=False)
    return json_data

def get_remaining_apps(file, ls):
    if file.tell()==0:
        return ls
    
    file.seek(0)
    lines = file.readlines()
    done_apps = set([line.split(',')[0].strip() for line in lines])
    all_apps = set(ls)
    return list(all_apps - done_apps)

## Steam Store API

In [8]:
steam_store_columns = ["steam_appid", "type", "name", "required_age", "is_free", 
                       "dlc", "short_description", "supported_languages", "header_image", "website", 
                       "publishers", "developers", "platforms", "metacritic","categories",
                       "release_date", "controller_support", "price_overview", "packages", "package_groups", 
                       "genres", "movies", "recommendations", "achievements"]

In [1]:
  
# Write batches of apps to a csv file
# Since Steam Store API only allows 200 api calls every 5 minutes, we wait for 5 minutes each time
# api_count = 0
batch_size = 200

with open("../Data/steam_store_data.csv", 'a+', newline='', encoding="utf-8", errors='ignore') as f:
    writer = csv.writer(f)
    
    begin = 0
    
    # Get remaining columns 
    rem_app_list = get_remaining_apps(f, appid_list)
    print("Count of apps not called: ", len(rem_app_list))
    
    while begin < len(rem_app_list):
        # Calculate end index of batch
        end = min(begin + batch_size + 1, len(rem_app_list))
        batch = []

        # Iterate through app ids by batch
        for i in range(begin, end):

            appid = rem_app_list[i]

            # API Call
            approw = get_steamstore_app_info(appid)
#             api_count += 1
            if approw == None: 
                batch.append([appid])
            else: 
                batch.append(get_approw(approw, steam_store_columns))

        # Select columns and write to csv
        writer.writerows(batch)
        
        # Since batch size is the api limit (200), we wait 5 minutes after each batch
        print("Waiting 5 minutes for next available api call...")
        time.sleep(300)
            
        begin = end

## Steam Spy API

In [9]:
steam_spy_columns = ['appid', 'name', 'developer', 'publisher', 'score_rank', 
                     "positive", "negative", "userscore", 'owners', 'average_forever', 
                     'average_2weeks', 'median_forever', 'median_2weeks', 'price', 'initialprice', 
                     'discount', 'ccu', 'languages',  'genre', 'tags']

In [10]:
batch_size = 200

# Write batches of apps to a csv file
# No known limitations for the steam spy api
with open("../Data/steam_spy_data.csv", 'a+', newline='', encoding="utf-8", errors='ignore') as f:
    writer = csv.writer(f)
    
    begin = 0 
    
    # Get remaining columns 
    rem_app_list = get_remaining_apps(f, appid_list)
    print("Count of apps not called: ", len(rem_app_list))
    
    while begin < len(rem_app_list):
        # Calculate end index of batch
        end = min(begin + batch_size + 1, len(rem_app_list))
        batch = []

        # Iterate through app ids by batch
        for i in range(begin, end):

            appid = rem_app_list[i]

            # API Call
            approw = get_steamspy_app_info(appid)
            if approw == None: 
                continue

            batch.append(get_approw(approw, steam_spy_columns))

        # Select columns and write to csv
        writer.writerows(batch)
            
        begin = end

Count of apps not called:  5610


##### Add headers

In [17]:
# Steam Store data
steam1 = pd.read_csv("../Data/Raw/steam_store_data.csv", header=None)
steam1.columns = steam_store_columns
steam1.to_csv("../Data/Raw/steam_store_data.csv", header=True, index=False)

# Steam Spy data
steam2 = pd.read_csv("../Data/Raw/steam_spy_data.csv", header=None)
steam2.columns = steam_spy_columns
steam2.to_csv("../Data/Raw/steam_spy_data.csv", header=True, index=False)