In [1]:
import requests 
import json
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import os

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [25]:
#get the app id from this api https://api.steampowered.com/ISteamApps/GetAppList/v2/ 

#call the steam api to get the list of games
url = 'https://api.steampowered.com/ISteamApps/GetAppList/v2/'
response = requests.get(url)
data = response.json()
games = data['applist']['apps']

In [26]:
#convert the list of games to a dataframe
appid_df = pd.DataFrame(games)
appid_df.head()

Unnamed: 0,appid,name
0,1941401,
1,2170321,
2,1825161,
3,1897482,
4,2112761,


In [27]:
print('Number of games:', len(appid_df))

Number of games: 191065


In [28]:
#drop all games that have no name or name length is 0
appid_df = appid_df.dropna(subset=['name'])
appid_df = appid_df[appid_df['name'].str.len() > 0]

print('Number of games:', len(appid_df))

Number of games: 191032


In [29]:
#split the appid into 1000 chunks to avoid getting blocked
chunks = [appid_df['appid'][x:x+1000] for x in range(0, len(appid_df), 1000)]

In [30]:
print(chunks)

[9        216938
10       660010
11       660130
36      1904630
37      1904640
         ...   
1028    1892040
1029    1892050
1030    1892060
1031    1892080
1032    1892090
Name: appid, Length: 1000, dtype: int64, 1033    1892100
1034    1891130
1035    1891140
1036    1891150
1037    1891170
         ...   
2028    1876670
2029    1876680
2030    1876690
2031    1876700
2032    1876710
Name: appid, Length: 1000, dtype: int64, 2033    1876720
2034    1876730
2035    1876740
2036    1876750
2037    1876760
         ...   
3028    1932050
3029    1932070
3030    1932080
3031    1932090
3032    1932100
Name: appid, Length: 1000, dtype: int64, 3033    1932110
3034    1932120
3035    1932130
3036    1932140
3037    1932150
         ...   
4028    1917330
4029    1917340
4030    1917341
4031    1917350
4032    1917370
Name: appid, Length: 1000, dtype: int64, 4033    1916420
4034    1916450
4035    1916470
4036    1916480
4037    1916490
         ...   
5028    2096160
5029    2096170
503

In [31]:
print('Number of chunks:', len(chunks))

Number of chunks: 192


In [32]:
#convert the chunks to a list of strings
chunks = [','.join(map(str, chunk)) for chunk in chunks]

In [10]:
game_url = 'https://store.steampowered.com/api/appdetails?appids='


for chunk in chunks:
    game_data = []
    for appid in chunk.split(','):
        try:
            response = requests.get(game_url + appid)
            data = response.json()
            
            if data!=None:
                if data[appid]['success'] == True:
                    game_data.append({'appid': appid, 'data': data[appid]['data']})
        
        except Exception as e:
            print('Error:', appid, e)
            continue
    game_df = pd.DataFrame(game_data)
    game_df.to_json('steam_games_' + chunk.split(',')[0] + '.json')
    print('Saved chunk to file:', chunk.split(',')[0])
    time.sleep(1)

Saved chunk to file: 216938
Saved chunk to file: 1854320
Saved chunk to file: 1839390
Saved chunk to file: 1825874
Saved chunk to file: 1812640
Saved chunk to file: 1799540
Saved chunk to file: 1434850
Saved chunk to file: 1421863
Saved chunk to file: 1469580
Saved chunk to file: 1454570
Saved chunk to file: 1565770
Saved chunk to file: 1550830
Saved chunk to file: 1505932
Saved chunk to file: 1491920
Saved chunk to file: 1598150
Saved chunk to file: 1583920
Saved chunk to file: 1537580
Saved chunk to file: 1522650
Saved chunk to file: 1415440
Saved chunk to file: 1400720
Saved chunk to file: 1385760
Saved chunk to file: 1371406
Saved chunk to file: 1357080
Saved chunk to file: 1341701


KeyboardInterrupt: 

In [None]:
#convert the json files to dataframes and save them to csv
for file in os.listdir():
    if file.endswith('.json'):
        try:
            df = pd.read_json(file)
            df = df['data'].apply(pd.Series)
            df.to_csv(file.replace('.json', '.csv'))
            print('Saved', file.replace('.json', '.csv'))
        except Exception as e:
            print('Error:', file, e)
            continue

In [None]:
#combine all csv files in this directory into one csv file, append into one list and then convert to a dataframe, then save to a csv file
df_list = []
for file in os.listdir():
    if file.endswith('.csv'):
        try:
            df = pd.read_csv(file)
            df_list.append(df)
        except Exception as e:
            print('Error:', file, e)
            continue
combined_df = pd.concat(df_list)
combined_df.to_csv('steam_games.csv')

In [None]:
#read steam_games.csv into a dataframe and print the first 5 rows
df = pd.read_csv('steam_games.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()