In [1]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests
import glob

# customisations - ensure tables show all columns
pd.set_option("display.max_columns", 100)

Next, we define a general, all-purpose function to process get requests from an API, supplied through a URL parameter. A dictionary of parameters can be supplied which is passed into the get request automatically, depending on the requirements of the API.

Rather than simply returning the response, we handle a couple of scenarios to help automation. Occasionally we encounter an SSL Error, in which case we simply wait a few seconds then try again (by recursively calling the function). When this happens, and generally throughout this project, we provide quite verbose feedback to show when these errors are encountered and how they are handled.

Sometimes there is no response when a request is made (returns None). This usually happens when too many requests are made in a short period of time, and the polling limit has been reached. We try to avoid this by pausing briefly between requests, as we'll see later, but in case we breach the polling limit we wait 10 seconds then try again.

Handling these errors in this way ensures that our function almost always returns the desired response, which we return in json format to make processing easier.

In [2]:
def get_request(url, parameters=None):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_request(url, parameters)
    
    if response:
        return response.json()
    else:
        # response is none usually means too many requests. Wait and try again 
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

# Generate List of App IDs
Every app on the steam store has a unique app ID. Whilst different apps can have the same name, they can't have the same ID. This will be very useful to us for identifying apps and eventually merging our tables of data.

Before we get to that, we need to generate a list of app ids which we can use to build our data sets. It's possible to generate one from the Steam API, however this has over 70,000 entries, many of which are demos and videos with no way to tell them apart. Instead, SteamSpy provides an 'all' request, supplying some information about the apps they track. It doesn't supply all information about each app, so we still need to request this information individually, but it provides a good starting point.

Because many of the return fields are strings containing commas and other punctuation, it is easiest to read the response into a pandas dataframe, and export the required appid and name fields to a csv. We could keep only the appid column as a list or pandas series, but it may be useful to keep the app name at this stage.

# AppList Data Collection
When trying to receive our Data using SteamSpy, we realised that it only returned 1000 entries of games on Steam. Through debugging, we discovered that the SteamAPI from SteamSpy only returned the 1st page of Games on Steam thus the code needed to be modified. We used a for loop, looping through all the pages (0 to 71) using the API. We then converted each DataFrame into its own Excel sheet (CSV) and concatenated them into a single Excel sheet.

In [5]:
# Use steamspy API to retrieve SteamApp ID and Name
url = "https://steamspy.com/api.php"

#Loop through the 71 pages of SteamData using the page parameter
for i in range(0, 72, 1):
    pageNum = i;
    parameters = {"request": "all", "page": pageNum}
    
    # request 'all&page=i' using the parameters from steamspy and parse into dataframe
    json_data = get_request(url, parameters=parameters)
    steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')
    time.sleep(1) #pause
    
    # generate sorted app_list from steamspy data
    app_list = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)

    # export to csv files based on their page number, formatted-strings
    app_list.to_csv(f"AppList/app_list_{i}.csv", index=False)

In [21]:
# folder name
path = r'AppList' 

# combine into a group
all_files = glob.glob(path + "/*.csv") 
# empty array
li = []
# loop through all files in group and append
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

In [51]:
#concatenate into 1 excel DF
df = pd.concat(li, axis=0, ignore_index=True)
print(df)

         appid                       name
0           10             Counter-Strike
1           20      Team Fortress Classic
2           30              Day of Defeat
3           40         Deathmatch Classic
4           50  Half-Life: Opposing Force
...        ...                        ...
71188  2726450                 Windowkill
71189  2765100             SHIJIE XIUXIAN
71190  2786680    Knowledge, or know Lady
71191  2792510                    Area 19
71192  2807870   Darkness Ritual: Impasse

[71193 rows x 2 columns]


In [25]:
# sort DF and convert to csv file
df = df[['appid', 'name']].sort_values('appid').reset_index(drop=True)
df.to_csv("MainAppList.csv", index=False)

In [3]:
# read from MainAppList
mainAppList = pd.read_csv('MainAppList.csv') 
mainAppList.head() #confirm that data has been exported properly

Unnamed: 0,appid,name
0,10,Counter-Strike
1,20,Team Fortress Classic
2,30,Day of Defeat
3,40,Deathmatch Classic
4,50,Half-Life: Opposing Force


# Download Steam Data
Now we are ready to start downloading data and writing to file. We define our logic particular to handling the steam API - in fact if no data is returned we return just the name and appid - then begin setting some parameters. We define the files we will write our data and index to, and the columns for the csv file. The API doesn't return every column for every app, so it is best to explicitly set these.

Next we run our functions to set up the files, and make a call to process_batches to begin the process. Some additional parameters have been added for demonstration, to constrain the download to just a few rows and smaller batches. Removing these would allow the entire download process to be repeated.

In [4]:
def get_app_data(start, stop, parser, pause):
    """Return list of app data generated from parser.
    
    parser : function to handle request
    """
    app_data = []
    
    # iterate through each row of app_list, confined by start and stop
    for index, row in mainAppList[start:stop].iterrows():
        print('Current index: {}'.format(index), end='\r')
        
        appid = row['appid']
        name = row['name']

        # retrive app data for a row, handled by supplied parser, and append to list
        data = parser(appid, name)
        app_data.append(data)

        time.sleep(pause) # prevent overloading api with requests
    
    return app_data


def process_batches(parser, mainAppList, download_path, data_filename, index_filename,
                    columns, begin=0, end=-1, batchsize=100, pause=1):
    """Process app data in batches, writing directly to file.
    
    parser : custom function to format request
    app_list : dataframe of appid and name
    download_path : path to store data
    data_filename : filename to save app data
    index_filename : filename to store highest index written
    columns : column names for file
    
    Keyword arguments:
    
    begin : starting index (get from index_filename, default 0)
    end : index to finish (defaults to end of app_list)
    batchsize : number of apps to write in each batch (default 100)
    pause : time to wait after each api request (defualt 1)
    
    returns: none
    """
    print('Starting at index {}:\n'.format(begin))
    
    # by default, process all apps in app_list
    if end == -1:
        end = len(mainAppList) + 1
    
    # generate array of batch begin and end points
    batches = np.arange(begin, end, batchsize)
    batches = np.append(batches, end)
    
    apps_written = 0
    batch_times = []
    
    for i in range(len(batches) - 1):
        start_time = time.time()
        
        start = batches[i]
        stop = batches[i+1]
        
        app_data = get_app_data(start, stop, parser, pause)
        
        rel_path = os.path.join(download_path, data_filename)
        
        # writing app data to file
        with open(rel_path, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=columns, extrasaction='ignore')
            
            for j in range(3,0,-1):
                print("\rAbout to write data, don't stop script! ({})".format(j), end='')
                time.sleep(0.5)
            
            writer.writerows(app_data)
            print('\rExported lines {}-{} to {}.'.format(start, stop-1, data_filename), end=' ')
            
        apps_written += len(app_data)
        
        idx_path = os.path.join(download_path, index_filename)
        
        # writing last index to file
        with open(idx_path, 'w') as f:
            index = stop
            print(index, file=f)
            
        # logging time taken
        end_time = time.time()
        time_taken = end_time - start_time
        
        batch_times.append(time_taken)
        mean_time = statistics.mean(batch_times)
        
        est_remaining = (len(batches) - i - 2) * mean_time
        
        remaining_td = dt.timedelta(seconds=round(est_remaining))
        time_td = dt.timedelta(seconds=round(time_taken))
        mean_td = dt.timedelta(seconds=round(mean_time))
        
        print('Batch {} time: {} (avg: {}, remaining: {})'.format(i, time_td, mean_td, remaining_td))
            
    print('\nProcessing batches complete. {} apps written'.format(apps_written))

In [5]:
def reset_index(download_path, index_filename):
    """Reset index in file to 0."""
    rel_path = os.path.join(download_path, index_filename)
    
    with open(rel_path, 'w') as f:
        print(0, file=f)
        

def get_index(download_path, index_filename):
    """Retrieve index from file, returning 0 if file not found."""
    try:
        rel_path = os.path.join(download_path, index_filename)

        with open(rel_path, 'r') as f:
            index = int(f.readline())
    
    except FileNotFoundError:
        index = 0
        
    return index


def prepare_data_file(download_path, filename, index, columns):
    """Create file and write headers if index is 0."""
    if index == 0:
        rel_path = os.path.join(download_path, filename)

        with open(rel_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=columns)
            writer.writeheader()

In [55]:
def parse_steam_request(appid, name):
    """Unique parser to handle data from Steam Store API.
    
    Returns : json formatted data (dict-like)
    """
    url = "http://store.steampowered.com/api/appdetails/"
    parameters = {"appids": appid}
    
    json_data = get_request(url, parameters=parameters)
    json_app_data = json_data[str(appid)]
    
    if json_app_data['success']:
        data = json_app_data['data']
    else:
        data = {'name': name, 'steam_appid': appid}
        
    return data


# Set file parameters
download_path = 'Test'
steam_app_data = 'steam_app_data.csv'
steam_index = 'steam_index.txt'

steam_columns = [
    'type', 'name', 'steam_appid', 'required_age', 'is_free', 'controller_support',
    'dlc', 'fullgame', 'supported_languages','pc_requirements', 'mac_requirements',
    'linux_requirements', 'legal_notice', 'drm_notice', 'ext_user_account_notice',
    'developers', 'publishers', 'demos', 'price_overview', 'packages', 'package_groups',
    'platforms', 'metacritic', 'reviews', 'categories', 'genres', 'recommendations', 
    'achievements', 'release_date', 'support_info', 'ratings'
]

# Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
#reset_index(download_path, steam_index)

# Retrieve last index downloaded from file
index = get_index(download_path, steam_index)

# Wipe or create data file and write headers if index is 0
prepare_data_file(download_path, steam_app_data, index, steam_columns)

# Set end and chunksize for demonstration - remove to run through entire app list
process_batches(
    parser=parse_steam_request,
    mainAppList=mainAppList,
    download_path=download_path,
    data_filename=steam_app_data,
    index_filename=steam_index,
    columns=steam_columns,
    begin=index,
    #change end to indicate stopping index
    #end=40,
    batchsize=10,
    pause=0.3
)

Starting at index 61200:

Exported lines 61200-61209 to steam_app_data.csv. Batch 0 time: 0:00:14 (avg: 0:00:14, remaining: 3:51:53)
Exported lines 61210-61219 to steam_app_data.csv. Batch 1 time: 0:00:14 (avg: 0:00:14, remaining: 3:50:47)
Exported lines 61220-61229 to steam_app_data.csv. Batch 2 time: 0:00:14 (avg: 0:00:14, remaining: 3:51:08)
Exported lines 61230-61239 to steam_app_data.csv. Batch 3 time: 0:00:14 (avg: 0:00:14, remaining: 3:50:58)
Exported lines 61240-61249 to steam_app_data.csv. Batch 4 time: 0:00:14 (avg: 0:00:14, remaining: 3:49:47)
Exported lines 61250-61259 to steam_app_data.csv. Batch 5 time: 0:00:14 (avg: 0:00:14, remaining: 3:49:24)
Exported lines 61260-61269 to steam_app_data.csv. Batch 6 time: 0:00:14 (avg: 0:00:14, remaining: 3:48:27)
Exported lines 61270-61279 to steam_app_data.csv. Batch 7 time: 0:00:14 (avg: 0:00:14, remaining: 3:48:03)
Exported lines 61280-61289 to steam_app_data.csv. Batch 8 time: 0:00:14 (avg: 0:00:14, remaining: 3:47:56)
Exported li

Exported lines 62640-62649 to steam_app_data.csv. Batch 144 time: 0:00:14 (avg: 0:00:15, remaining: 3:34:58)
Exported lines 62650-62659 to steam_app_data.csv. Batch 145 time: 0:00:13 (avg: 0:00:15, remaining: 3:34:32)
Exported lines 62660-62669 to steam_app_data.csv. Batch 146 time: 0:00:13 (avg: 0:00:15, remaining: 3:34:07)
Exported lines 62670-62679 to steam_app_data.csv. Batch 147 time: 0:00:13 (avg: 0:00:15, remaining: 3:33:43)
Exported lines 62680-62689 to steam_app_data.csv. Batch 148 time: 0:00:13 (avg: 0:00:15, remaining: 3:33:18)
Exported lines 62690-62699 to steam_app_data.csv. Batch 149 time: 0:00:14 (avg: 0:00:15, remaining: 3:32:55)
Exported lines 62700-62709 to steam_app_data.csv. Batch 150 time: 0:00:13 (avg: 0:00:15, remaining: 3:32:31)
Exported lines 62710-62719 to steam_app_data.csv. Batch 151 time: 0:00:14 (avg: 0:00:15, remaining: 3:32:09)
Exported lines 62720-62729 to steam_app_data.csv. Batch 152 time: 0:00:13 (avg: 0:00:15, remaining: 3:31:44)
Exported lines 6273

Exported lines 64060-64069 to steam_app_data.csv. Batch 286 time: 0:00:15 (avg: 0:00:15, remaining: 2:59:42)
Exported lines 64070-64079 to steam_app_data.csv. Batch 287 time: 0:00:14 (avg: 0:00:15, remaining: 2:59:24)
Exported lines 64080-64089 to steam_app_data.csv. Batch 288 time: 0:00:14 (avg: 0:00:15, remaining: 2:59:05)
Exported lines 64090-64099 to steam_app_data.csv. Batch 289 time: 0:00:14 (avg: 0:00:15, remaining: 2:58:47)
Exported lines 64100-64109 to steam_app_data.csv. Batch 290 time: 0:00:14 (avg: 0:00:15, remaining: 2:58:29)
Exported lines 64110-64119 to steam_app_data.csv. Batch 291 time: 0:00:14 (avg: 0:00:15, remaining: 2:58:12)
Exported lines 64120-64129 to steam_app_data.csv. Batch 292 time: 0:00:14 (avg: 0:00:15, remaining: 2:57:53)
Exported lines 64130-64139 to steam_app_data.csv. Batch 293 time: 0:00:13 (avg: 0:00:15, remaining: 2:57:34)
No response, waiting 10 seconds...
Retrying.
No response, waiting 10 seconds...
Retrying.
Exported lines 64140-64149 to steam_ap

Exported lines 64780-64789 to steam_app_data.csv. Batch 358 time: 0:00:13 (avg: 0:00:15, remaining: 2:41:53)
Exported lines 64790-64799 to steam_app_data.csv. Batch 359 time: 0:00:13 (avg: 0:00:15, remaining: 2:41:35)
Exported lines 64800-64809 to steam_app_data.csv. Batch 360 time: 0:00:13 (avg: 0:00:15, remaining: 2:41:17)
Exported lines 64810-64819 to steam_app_data.csv. Batch 361 time: 0:00:14 (avg: 0:00:15, remaining: 2:40:59)
Exported lines 64820-64829 to steam_app_data.csv. Batch 362 time: 0:00:15 (avg: 0:00:15, remaining: 2:40:43)
Exported lines 64830-64839 to steam_app_data.csv. Batch 363 time: 0:00:14 (avg: 0:00:15, remaining: 2:40:27)
Exported lines 64840-64849 to steam_app_data.csv. Batch 364 time: 0:00:14 (avg: 0:00:15, remaining: 2:40:09)
Exported lines 64850-64859 to steam_app_data.csv. Batch 365 time: 0:00:14 (avg: 0:00:15, remaining: 2:39:52)
Exported lines 64860-64869 to steam_app_data.csv. Batch 366 time: 0:00:14 (avg: 0:00:15, remaining: 2:39:35)
Exported lines 6487

Exported lines 65510-65519 to steam_app_data.csv. Batch 431 time: 0:00:14 (avg: 0:00:15, remaining: 2:23:14)
Exported lines 65520-65529 to steam_app_data.csv. Batch 432 time: 0:00:14 (avg: 0:00:15, remaining: 2:22:58)
Exported lines 65530-65539 to steam_app_data.csv. Batch 433 time: 0:00:14 (avg: 0:00:15, remaining: 2:22:41)
Exported lines 65540-65549 to steam_app_data.csv. Batch 434 time: 0:00:14 (avg: 0:00:15, remaining: 2:22:24)
No response, waiting 10 seconds...
Retrying.
No response, waiting 10 seconds...
Retrying.
Exported lines 65550-65559 to steam_app_data.csv. Batch 435 time: 0:00:36 (avg: 0:00:15, remaining: 2:22:37)
Exported lines 65560-65569 to steam_app_data.csv. Batch 436 time: 0:00:14 (avg: 0:00:15, remaining: 2:22:20)
Exported lines 65570-65579 to steam_app_data.csv. Batch 437 time: 0:00:14 (avg: 0:00:15, remaining: 2:22:03)
Exported lines 65580-65589 to steam_app_data.csv. Batch 438 time: 0:00:14 (avg: 0:00:15, remaining: 2:21:47)
Exported lines 65590-65599 to steam_ap

Exported lines 66230-66239 to steam_app_data.csv. Batch 503 time: 0:00:14 (avg: 0:00:15, remaining: 2:05:22)
Exported lines 66240-66249 to steam_app_data.csv. Batch 504 time: 0:00:14 (avg: 0:00:15, remaining: 2:05:05)
Exported lines 66250-66259 to steam_app_data.csv. Batch 505 time: 0:00:14 (avg: 0:00:15, remaining: 2:04:49)
Exported lines 66260-66269 to steam_app_data.csv. Batch 506 time: 0:00:14 (avg: 0:00:15, remaining: 2:04:33)
Exported lines 66270-66279 to steam_app_data.csv. Batch 507 time: 0:00:14 (avg: 0:00:15, remaining: 2:04:17)
Exported lines 66280-66289 to steam_app_data.csv. Batch 508 time: 0:00:14 (avg: 0:00:15, remaining: 2:04:01)
Exported lines 66290-66299 to steam_app_data.csv. Batch 509 time: 0:00:14 (avg: 0:00:15, remaining: 2:03:45)
Exported lines 66300-66309 to steam_app_data.csv. Batch 510 time: 0:00:14 (avg: 0:00:15, remaining: 2:03:29)
Exported lines 66310-66319 to steam_app_data.csv. Batch 511 time: 0:00:14 (avg: 0:00:15, remaining: 2:03:13)
Exported lines 6632

Retrying.
Exported lines 66950-66959 to steam_app_data.csv. Batch 575 time: 0:00:37 (avg: 0:00:15, remaining: 1:47:24)
Exported lines 66960-66969 to steam_app_data.csv. Batch 576 time: 0:00:14 (avg: 0:00:15, remaining: 1:47:09)
Exported lines 66970-66979 to steam_app_data.csv. Batch 577 time: 0:00:15 (avg: 0:00:15, remaining: 1:46:53)
Exported lines 66980-66989 to steam_app_data.csv. Batch 578 time: 0:00:14 (avg: 0:00:15, remaining: 1:46:37)
Exported lines 66990-66999 to steam_app_data.csv. Batch 579 time: 0:00:14 (avg: 0:00:15, remaining: 1:46:21)
Exported lines 67000-67009 to steam_app_data.csv. Batch 580 time: 0:00:14 (avg: 0:00:15, remaining: 1:46:05)
Exported lines 67010-67019 to steam_app_data.csv. Batch 581 time: 0:00:14 (avg: 0:00:15, remaining: 1:45:49)
Exported lines 67020-67029 to steam_app_data.csv. Batch 582 time: 0:00:15 (avg: 0:00:15, remaining: 1:45:34)
Exported lines 67030-67039 to steam_app_data.csv. Batch 583 time: 0:00:15 (avg: 0:00:15, remaining: 1:45:18)
Exported 

Exported lines 67680-67689 to steam_app_data.csv. Batch 648 time: 0:00:14 (avg: 0:00:15, remaining: 1:28:56)
Exported lines 67690-67699 to steam_app_data.csv. Batch 649 time: 0:00:14 (avg: 0:00:15, remaining: 1:28:40)
Exported lines 67700-67709 to steam_app_data.csv. Batch 650 time: 0:00:14 (avg: 0:00:15, remaining: 1:28:25)
Exported lines 67710-67719 to steam_app_data.csv. Batch 651 time: 0:00:14 (avg: 0:00:15, remaining: 1:28:09)
Exported lines 67720-67729 to steam_app_data.csv. Batch 652 time: 0:00:14 (avg: 0:00:15, remaining: 1:27:53)
Exported lines 67730-67739 to steam_app_data.csv. Batch 653 time: 0:00:14 (avg: 0:00:15, remaining: 1:27:37)
Exported lines 67740-67749 to steam_app_data.csv. Batch 654 time: 0:00:15 (avg: 0:00:15, remaining: 1:27:22)
No response, waiting 10 seconds...
Retrying.
No response, waiting 10 seconds...
Retrying.
Exported lines 67750-67759 to steam_app_data.csv. Batch 655 time: 0:00:36 (avg: 0:00:15, remaining: 1:27:18)
Exported lines 67760-67769 to steam_ap

Exported lines 69140-69149 to steam_app_data.csv. Batch 794 time: 0:00:14 (avg: 0:00:15, remaining: 0:51:54)
No response, waiting 10 seconds...
Retrying.
No response, waiting 10 seconds...
Retrying.
Exported lines 69150-69159 to steam_app_data.csv. Batch 795 time: 0:00:36 (avg: 0:00:15, remaining: 0:51:44)
Exported lines 69160-69169 to steam_app_data.csv. Batch 796 time: 0:00:14 (avg: 0:00:15, remaining: 0:51:29)
Exported lines 69170-69179 to steam_app_data.csv. Batch 797 time: 0:00:15 (avg: 0:00:15, remaining: 0:51:14)
Exported lines 69180-69189 to steam_app_data.csv. Batch 798 time: 0:00:14 (avg: 0:00:15, remaining: 0:50:58)
Exported lines 69190-69199 to steam_app_data.csv. Batch 799 time: 0:00:14 (avg: 0:00:15, remaining: 0:50:43)
Exported lines 69200-69209 to steam_app_data.csv. Batch 800 time: 0:00:14 (avg: 0:00:15, remaining: 0:50:27)
Exported lines 69210-69219 to steam_app_data.csv. Batch 801 time: 0:00:15 (avg: 0:00:15, remaining: 0:50:12)
Exported lines 69220-69229 to steam_ap

Exported lines 70580-70589 to steam_app_data.csv. Batch 938 time: 0:00:14 (avg: 0:00:15, remaining: 0:15:28)
Exported lines 70590-70599 to steam_app_data.csv. Batch 939 time: 0:00:14 (avg: 0:00:15, remaining: 0:15:13)
Exported lines 70600-70609 to steam_app_data.csv. Batch 940 time: 0:00:14 (avg: 0:00:15, remaining: 0:14:57)
Exported lines 70610-70619 to steam_app_data.csv. Batch 941 time: 0:00:14 (avg: 0:00:15, remaining: 0:14:42)
Exported lines 70620-70629 to steam_app_data.csv. Batch 942 time: 0:00:14 (avg: 0:00:15, remaining: 0:14:27)
Exported lines 70630-70639 to steam_app_data.csv. Batch 943 time: 0:00:14 (avg: 0:00:15, remaining: 0:14:12)
Exported lines 70640-70649 to steam_app_data.csv. Batch 944 time: 0:00:14 (avg: 0:00:15, remaining: 0:13:56)
Exported lines 70650-70659 to steam_app_data.csv. Batch 945 time: 0:00:14 (avg: 0:00:15, remaining: 0:13:41)
Exported lines 70660-70669 to steam_app_data.csv. Batch 946 time: 0:00:14 (avg: 0:00:15, remaining: 0:13:26)
Exported lines 7067

In [56]:
# inspect downloaded data
pd.read_csv('Test/steam_app_data.csv').head()

  pd.read_csv('Test/steam_app_data.csv').head()


Unnamed: 0,type,name,steam_appid,required_age,is_free,controller_support,dlc,fullgame,supported_languages,pc_requirements,mac_requirements,linux_requirements,legal_notice,drm_notice,ext_user_account_notice,developers,publishers,demos,price_overview,packages,package_groups,platforms,metacritic,reviews,categories,genres,recommendations,achievements,release_date,support_info,ratings
0,game,Counter-Strike,10,0.0,False,,,,"English<strong>*</strong>, French<strong>*</st...",{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'SGD', 'initial': 1000, 'final': ...","[574941, 7]","[{'name': 'default', 'title': 'Buy Counter-Str...","{'windows': True, 'mac': True, 'linux': True}","{'score': 88, 'url': 'https://www.metacritic.c...",,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]",{'total': 148406},,"{'coming_soon': False, 'date': '1 Nov, 2000'}","{'url': 'http://steamcommunity.com/app/10', 'e...",{'usk': {'rating': '16'}}
1,game,Team Fortress Classic,20,0.0,False,,,,"English, French, German, Italian, Spanish - Sp...",{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'SGD', 'initial': 525, 'final': 5...",[29],"[{'name': 'default', 'title': 'Buy Team Fortre...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]",{'total': 5981},,"{'coming_soon': False, 'date': '1 Apr, 1999'}","{'url': '', 'email': ''}",
2,game,Day of Defeat,30,0.0,False,,,,"English, French, German, Italian, Spanish - Spain",{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'SGD', 'initial': 525, 'final': 5...","[30, 944613]","[{'name': 'default', 'title': 'Buy Day of Defe...","{'windows': True, 'mac': True, 'linux': True}","{'score': 79, 'url': 'https://www.metacritic.c...",,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]",{'total': 3984},,"{'coming_soon': False, 'date': '1 May, 2003'}","{'url': '', 'email': ''}",{'usk': {'rating': '16'}}
3,game,Deathmatch Classic,40,0.0,False,,,,"English, French, German, Italian, Spanish - Sp...",{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'SGD', 'initial': 525, 'final': 5...",[31],"[{'name': 'default', 'title': 'Buy Deathmatch ...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]",{'total': 2093},,"{'coming_soon': False, 'date': '1 Jun, 2001'}","{'url': '', 'email': ''}",
4,game,Half-Life: Opposing Force,50,0.0,False,,,,"English, French, German, Korean",{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Gearbox Software'],['Valve'],,"{'currency': 'SGD', 'initial': 525, 'final': 5...",[32],"[{'name': 'default', 'title': 'Buy Half-Life: ...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]",{'total': 18597},,"{'coming_soon': False, 'date': '1 Nov, 1999'}","{'url': 'https://help.steampowered.com', 'emai...",


# Download SteamSpy data
To retrieve data from SteamSpy we perform a very similar process. Our parse function is a little simpler because of the how data is returned, and the maximum polling rate of this API is higher so we can set a lower value for pause in the process_batches function and download more quickly. Apart from that we set the new variables and make a call to the process_batches function once again.

In [8]:
def parse_steamspy_request(appid, name):
    """Parser to handle SteamSpy API data."""
    url = "https://steamspy.com/api.php"
    parameters = {"request": "appdetails", "appid": appid}
    
    json_data = get_request(url, parameters)
    return json_data


# set files and columns
download_path = 'Test'
steamspy_data = 'steamspy_data.csv'
steamspy_index = 'steamspy_index.txt'

steamspy_columns = [
    'appid', 'name', 'developer', 'publisher', 'score_rank', 'positive',
    'negative', 'userscore', 'owners', 'average_forever', 'average_2weeks',
    'median_forever', 'median_2weeks', 'price', 'initialprice', 'discount',
    'languages', 'genre', 'ccu', 'tags'
]

#reset_index(download_path, steamspy_index)
index = get_index(download_path, steamspy_index)

# Wipe data file if index is 0
prepare_data_file(download_path, steamspy_data, index, steamspy_columns)

process_batches(
    parser=parse_steamspy_request,
    mainAppList=mainAppList,
    download_path=download_path, 
    data_filename=steamspy_data,
    index_filename=steamspy_index,
    columns=steamspy_columns,
    begin=index,
    #change end to indicate stopping index
    #end=40,
    batchsize=5,
    pause=0.3
)

Starting at index 25225:

Current index: 25229

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [35]:
# inspect downloaded steamspy data
pd.read_csv('Test/steamspy_data.csv').head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,languages,genre,ccu,tags
0,10,Counter-Strike,Valve,Valve,,229703,5993,0,"10,000,000 .. 20,000,000",8894,525,157,757,999,999,0,"English, French, German, Italian, Spanish - Sp...",Action,10504,"{'Action': 5468, 'FPS': 4894, 'Multiplayer': 3..."
1,20,Team Fortress Classic,Valve,Valve,,7007,1065,0,"5,000,000 .. 10,000,000",129,0,9,0,499,499,0,"English, French, German, Italian, Spanish - Sp...",Action,69,"{'Action': 762, 'FPS': 326, 'Multiplayer': 276..."
2,30,Day of Defeat,Valve,Valve,,6037,664,0,"5,000,000 .. 10,000,000",158,188,15,188,499,499,0,"English, French, German, Italian, Spanish - Spain",Action,111,"{'FPS': 797, 'World War II': 268, 'Multiplayer..."
3,40,Deathmatch Classic,Valve,Valve,,2413,512,0,"5,000,000 .. 10,000,000",13,0,5,0,499,499,0,"English, French, German, Italian, Spanish - Sp...",Action,3,"{'Action': 634, 'FPS': 149, 'Classic': 114, 'M..."
4,50,Half-Life: Opposing Force,Gearbox Software,Valve,,20677,1051,0,"2,000,000 .. 5,000,000",226,0,142,0,499,499,0,"English, French, German, Korean",Action,144,"{'FPS': 915, 'Action': 348, 'Classic': 279, 'S..."
