In [4]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests

# customisations - ensure tables show all columns
pd.set_option("max_columns", 100)

In [110]:
def query(url,parameter,interval,maxtrial):
    '''
    return json of requesting url with parameters, otherwise retry with an interval
    -----------------
    Parameters:
    -- url: string, API
    -- parameter: {'parameters' : 'value'}, parameters attached
    -- interval: int, time of sleep if request fail
    -- maxtrial: int, maximum number of attempt
    -----------------
    Returns:
    -- json
    '''
    if maxtrial == 0:
        print("request failed, stop trying!")
        return None
    response = requests.get(url=url,params=parameter)
    if response.status_code == 200:
        print('request successful!')
        return response.json()
    else:
        print('An error occurs! Retry after {} seconds'.format(interval))
        time.sleep(interval)
        return query(url,parameter,interval,maxtrial-1)
        
        

In [86]:
test = query(url='https://steamspy.com/api.php',parameter={"request":"all","page": "0"},interval=3,maxtrial=3)
print(len(test))

request successful!
1000


In [87]:
test

{'570': {'appid': 570,
  'name': 'Dota 2',
  'developer': 'Valve',
  'publisher': 'Valve',
  'score_rank': '',
  'positive': 1504979,
  'negative': 309274,
  'userscore': 0,
  'owners': '100,000,000 .. 200,000,000',
  'average_forever': 41871,
  'average_2weeks': 1911,
  'median_forever': 1011,
  'median_2weeks': 1061,
  'price': '0',
  'initialprice': '0',
  'discount': '0',
  'ccu': 727646},
 '730': {'appid': 730,
  'name': 'Counter-Strike: Global Offensive',
  'developer': 'Valve, Hidden Path Entertainment',
  'publisher': 'Valve',
  'score_rank': '',
  'positive': 5870197,
  'negative': 779243,
  'userscore': 0,
  'owners': '50,000,000 .. 100,000,000',
  'average_forever': 30187,
  'average_2weeks': 845,
  'median_forever': 6349,
  'median_2weeks': 263,
  'price': '0',
  'initialprice': '0',
  'discount': '0',
  'ccu': 920569},
 '578080': {'appid': 578080,
  'name': 'PUBG: BATTLEGROUNDS',
  'developer': 'KRAFTON, Inc.',
  'publisher': 'KRAFTON, Inc.',
  'score_rank': '',
  'positiv

In [104]:
class appidExtractor:
    '''
    extract appid from given url
    Parameters:
    --- url: string, API
    --- name: string, data will be stored in name.csv
    --- interval: int, time of sleep if request fail
    '''
    def __init__(self, url, name, interval):
        '''
        -- csvpath: path of csv file
        -- pagepath: path of txt that stores pathidx
        -- pageidx: next page index to request. !!It is a string!!
        -- fieldnames: names of the columns to be written into .csv file
        '''
        csvfile = name+".csv"
        pagefile = name+"_page_idx.txt"
        self.url = url
        self.name = name
        self.interval = interval
        self.pageidx = 0
        self.fieldnames = []
        self.csvpath = os.path.join(os.getcwd(),csvfile)
        self.pagepath = os.path.join(os.getcwd(),pagefile)
        
    def writeheader(self):
        '''
        write header of csvfile based fieldnames
        '''
        with open(self.csvpath,mode='a',newline = '',encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames,extrasaction='ignore')
            writer.writeheader()
        return
        
    def initialize(self):
        '''
        initialize csv and page file if no previous request made
        otherwise, read page idx from pagefile
        if only one file exists, reset the progress
        '''
        try:
            csvfile = open(self.csvpath,'r')
            try:
                pagefile = open(self.pagepath,'r')
                self.pageidx = int(pagefile.read())
                pagefile.close()
                csvfile.close()
            except FileNotFoundError:
                csvfile.close()
                csvfile = open(self.csvpath,'w')
                csvfile.write("")
                csvfile.close()
                pagefile = open(self.pagepath,'w')
                pagefile.write("0")
                pagefile.close()
            except: 
                print("something else went wrong!")
                exit()
                
        except FileNotFoundError:
            csvfile = open(self.csvpath,'x')
            pagefile = open(self.pagepath,'w')
            pagefile.write("0")
            csvfile.close()
            pagefile.close()
        except:
            print("something else went wrong!")
            exit()
        
        self.writeheader()
        return
            
    def deleteAll(self):
        '''delete both csvfile and pagefile'''
        if os.path.exists(self.csvpath):
            os.remove(self.csvpath)
        if os.path.exists(self.pagepath):
            os.remove(self.pagepath)

        return
    
    def get_page_idx(self):
        '''
        read page idx from pagefile and set self.pageidx
        '''
        with open(self.pagepath,'r') as pagefile:
            self.pageidx = pagefile.read()
        return 
    
    def set_page_idx(self,val):
        '''
        set page idx in both self.pageidx and pagefile
        -- val: int
        '''
        self.pageidx = str(val)
        with open(self.pagepath,'w') as pagefile:
            pagefile.write(self.pageidx)
        return
    
    def set_fieldnames(self,fieldnames):
        '''
        set the names of the columns to be written into .csv file
        !!! must be set before run() !!!
        -- fieldnames: list of string
        '''
        self.fieldnames = fieldnames
        return
    
    
    def process_batch(self):
        '''
        recursion to request data with all page idx
        '''
        self.get_page_idx()
        para = {"request":"all","page": self.pageidx}
        data = query(self.url,para,self.interval,3) # request data
        if data == None: # if request faied, exit
            exit()
        # write dictionary data into csv
        with open(self.csvpath, mode='a',newline = '',encoding='utf-8') as csvfile:
            fieldnames = ['appid', 'name']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames,extrasaction='ignore')
            for key in data:
                writer.writerow(data[key])
        print("successfully write {number} entries, page {pageidx}".format(number=len(data),pageidx=self.pageidx))
        # update pageidx locally
        self.set_page_idx(int(self.pageidx)+1)
        if len(data) < 1000: # check if it is the last page
            print("request finished")
            return
        else:
            return self.process_batch()
        
        
    
    def run(self):
        '''
        request data from API in batches
        '''
        if len(self.fieldnames) == 0:
            raise ValueError("fieldnames cannot be None") # if the fieldnames are not set, raise
        self.initialize()
        self.process_batch()
        
        return
        
        

In [105]:
extractor = appidExtractor("https://steamspy.com/api.php","appid",10)
extractor.set_fieldnames(['appid', 'name'])

In [106]:
extractor.deleteAll()


In [107]:
extractor.run()

request successful!
successfully write 1000 entries, page 0
request successful!
successfully write 1000 entries, page 1
request successful!
successfully write 1000 entries, page 2
request successful!
successfully write 1000 entries, page 3
request successful!
successfully write 1000 entries, page 4
request successful!
successfully write 1000 entries, page 5
request successful!
successfully write 1000 entries, page 6
request successful!
successfully write 1000 entries, page 7
request successful!
successfully write 1000 entries, page 8
request successful!
successfully write 1000 entries, page 9
request successful!
successfully write 1000 entries, page 10
request successful!
successfully write 1000 entries, page 11
request successful!
successfully write 1000 entries, page 12
request successful!
successfully write 1000 entries, page 13
request successful!
successfully write 1000 entries, page 14
request successful!
successfully write 1000 entries, page 15
request successful!
successfully w

In [108]:
appid_df = pd.read_csv("test.csv")
appid_df

Unnamed: 0,appid,name
0,570,Dota 2
1,730,Counter-Strike: Global Offensive
2,578080,PUBG: BATTLEGROUNDS
3,1063730,New World
4,440,Team Fortress 2
...,...,...
995,233290,Murdered: Soul Suspect
996,1030830,Mafia II: Definitive Edition
997,107310,Cthulhu Saves the World
998,575110,Magic Lantern


In [109]:
appid_df.sort_values(by=['appid'])

Unnamed: 0,appid,name
29,10,Counter-Strike
145,20,Team Fortress Classic
115,30,Day of Defeat
96,40,Deathmatch Classic
442,50,Half-Life: Opposing Force
...,...,...
614,1817070,Marvel’s Spider-Man Remastered
211,1818750,MultiVersus
572,1824220,Chivalry 2
889,1832640,Mirror 2: Project X
