In [12]:
#import libraries needed for this proces
import requests
from datetime import datetime
import numpy
import time
import json
from pathlib import Path
import pandas as pd
import sys

In [2]:
#function to create an API request based on URL and  parameters
def get_request(url, parameters=None):
    print("Your request is being processed.")
    response = requests.get(url=url, params=parameters)
    if response:
        return response.json()
        
    else:
        print("Request unsuccessful, retrying in 5 seconds.")
        time.sleep(5)
        return get_request(url, parameters)
    time.sleep(1)

In [3]:
#using the function to get all data id's which include appid and app name 
url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
parameters = {"request": all}
data_ids = get_request(url, parameters=parameters)
print("Your request has been processed successfully, call upon 'data_ids' to preview the full list of IDs and names of all currently available products on Steam as of ", datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ".")

Your request is being processed.
Your request has been processed successfully, call upon 'data_ids' to preview the full list of IDs and names of all currently available products on Steam as of  20/03/2022 15:48:12 .


In [21]:
#Code to collect all app id's in one library
app_ids = []
print("Creating a list of all IDs currently available on Steam")
for item in data_ids['applist']['apps']:
    app_ids.append(item['appid'])

print("List created sucessfully, the total number of IDs available on Steam is:", len(app_ids),".")
print("Call upon 'app_ids' to preview the full list of extracted IDs.")

print("Saving the Steam ID list in the 'data' folder as 'steam_id_list.csv'.")
numpy.savetxt("../gen/steam_id_list.csv", app_ids,delimiter =", ", fmt ='% s')
print("File 'steam_id_list.csv' has been saved successfully!")


Creating a list of all IDs currently available on Steam
List created sucessfully, the total number of IDs available on Steam is: 138483 .
Call upon 'app_ids' to preview the full list of extracted IDs.
Saving the Steam ID list in the 'data' folder as 'steam_id_list.csv'.
File 'steam_id_list.csv' has been saved successfully!


In [None]:
## collecting the data with a for loop that checks if the response was a succes = True. and than takes the data attribute 
# feedback: seperate the raw data collection, and then do the parsing  
# add timestamp 
# add logic that the id's arent run multiple times 

amount_app_ids = 10
counter = 1 
dataframe_raw = pd.DataFrame()
dataframe_categories = pd.DataFrame()
dataframe_package_groups =  pd.DataFrame()
dataframe_genres =  pd.DataFrame()
dataframe_screenshots =  pd.DataFrame()
dataframe_movies =  pd.DataFrame()
dataframe_achievements = pd.DataFrame()

for i in app_ids: 
    url = "http://store.steampowered.com/api/appdetails/"
    parameters = {"appids": {i}}
    app_data = get_request(url, parameters=parameters)
    id = str(i)
    print("Processing data pertaining to Steam ID:"+id)
    response = app_data[id]
    succes = response["success"]
    if (succes): 
        data_json = response["data"]
        result = pd.json_normalize(data_json)
        dataframe_raw = pd.concat([dataframe_raw, result],ignore_index= True)    
        try:
            categories = pd.json_normalize(data_json,record_path = ['categories'], meta = ["steam_appid"], errors="ignore")
            dataframe_categories = pd.concat([dataframe_categories,categories], ignore_index= True)
        except:
            pass
        try:
            package_groups = pd.json_normalize(data_json,record_path = ['package_groups'], meta = ["steam_appid"], errors="ignore")
            dataframe_package_groups = pd.concat([dataframe_package_groups,package_groups], ignore_index= True)
        except:
            pass
        try:
            genres = pd.json_normalize(data_json,record_path = ['genres'], meta = ["steam_appid"], errors="ignore")
            dataframe_genres = pd.concat([dataframe_genres,genres], ignore_index= True)
        except:
            pass
        try:
            screenshots = pd.json_normalize(data_json,record_path = ['screenshots'], meta = ["steam_appid"], errors="ignore")
            dataframe_screenshots = pd.concat([dataframe_screenshots,screenshots], ignore_index= True)
        except:
            pass
        try:
            movies = pd.json_normalize(data_json,record_path = ['movies'], meta = ["steam_appid"], errors="ignore")
            dataframe_movies = pd.concat([dataframe_movies,movies], ignore_index= True)
        except:
            pass
        try:
            achievements = pd.json_normalize(data_json,record_path = ['achievements.highlighted'], meta = ["steam_appid"], errors="ignore")
            dataframe_achievements = pd.concat([dataframe_achievements,achievements], ignore_index= True)
        except:
            pass
        counter = counter + 1 
    if (counter > amount_app_ids):
        break


print("Saving all data about the Steam ID's in the 'gen' folder as 'steam_data_collection'.")
numpy.savetxt("../gen/full_steam_data.json", dataframe_raw, delimiter=", ", fmt = '% s')
print("File 'full_steam_data.json' has been saved successfully!")




In [13]:
## collecting the data with a for loop that checks if the response was a succes = True. and than takes the data attribute 
# feedback: seperate the raw data collection, and then do the parsing  
# add timestamp 
# add logic that the id's arent run multiple times 

import json

def get_data_from_api(amount_app_ids = 100):
    counter = 1 
    dataframe_raw = pd.DataFrame()
    
    for i in app_ids: 
        url = "http://store.steampowered.com/api/appdetails/"
        parameters = {"appids": {i}}
        app_data = get_request(url, parameters=parameters)
        id = str(i)
        print("Processing data pertaining to Steam ID:"+id)
        response = app_data[id]
        succes = response["success"]
        if (succes): 
            data_json = response["data"]
            # enrich it
            data_json['collection_details'] = {'created_by': 'our_scraper',
                                                'created_at': 12345678}
            f = open('raw_data.json','a',encoding='utf-8')
            f.write(json.dumps(data_json)+'\n')
            f.close()
            #result = pd.json_normalize(data_json)
            #dataframe_raw = pd.concat([dataframe_raw, result],ignore_index= True)    
            counter = counter + 1 
        if (counter > amount_app_ids):
            break
        #return dataframe_raw

get_data_from_api(10) 
#dataframe_raw


Your request is being processed.
Processing data pertaining to Steam ID:216938
Your request is being processed.
Processing data pertaining to Steam ID:660010
Your request is being processed.
Processing data pertaining to Steam ID:660130
Your request is being processed.
Processing data pertaining to Steam ID:1118314
Your request is being processed.
Processing data pertaining to Steam ID:1275822
Your request is being processed.
Processing data pertaining to Steam ID:1343832
Your request is being processed.
Processing data pertaining to Steam ID:1828741
Your request is being processed.
Processing data pertaining to Steam ID:662172
Your request is being processed.
Processing data pertaining to Steam ID:1360782
Your request is being processed.
Processing data pertaining to Steam ID:1820332
Your request is being processed.
Processing data pertaining to Steam ID:1815690
Your request is being processed.
Processing data pertaining to Steam ID:1815720
Your request is being processed.
Processing 

In [None]:
# split up in seperate functions: get raw data, parse data, 

dataframe_raw
#dataframe_categories
#dataframe_package_groups 
#dataframe_genres 
#dataframe_screenshots
#dataframe_movies 
#dataframe_achievements 

In [37]:
#write the dataframes to an xlsx file
dataframe_raw.to_excel("D:\\1. Studie\\Master files\\dPrep-oDCM\\Git repo\\Steam-API\\steam-API\\data\\dataraw_df.xlsx")
dataframe_categories.to_excel("D:\\1. Studie\\Master files\\dPrep-oDCM\\Git repo\\Steam-API\\steam-API\\data\\datacategories_df.xlsx")

In [12]:
### example to create a dictionary that checks if the id is already in the dictionary and then if not in dictionary get data 

"""
details = {}

for i in app_ids:
  id = str(i)
  if id not in details:
     // get data

     if(success):
       // put data in details
       details[id] = data

"""     

'\ndetails = {}\n\nfor i in app_ids:\n  id = str(i)\n  if id not in details:\n     // get data\n\n     if(success):\n       // put data in details\n       details[id] = data\n\n'