In [1]:
import time
import urllib.request, urllib.parse
from urllib.error import HTTPError, URLError
import json
import numpy as np
import pandas as pd
import pprint as pp
import requests
import csv
import datetime as dt
import os
import statistics

pd.set_option("max_columns", 100)

def get_request(url, parameters=None):
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        return get_request(url, parameters)
    
    if response:
        return response.json()
    else:
        print('No response, waiting 30 seconds...')
        time.sleep(30)
        print('Retrying.')
        return get_request(url, parameters)
    
url = "https://steamspy.com/api.php"
parameters = {"request": "top100forever"}

json_data = get_request(url, parameters=parameters)
steamspy_top100 = pd.DataFrame.from_dict(json_data, orient='index')

pp.pprint(steamspy_top100)

         appid                              name  \
570        570                            Dota 2   
730        730  Counter-Strike: Global Offensive   
578080  578080     PLAYERUNKNOWN'S BATTLEGROUNDS   
440        440                   Team Fortress 2   
304930  304930                          Unturned   
...        ...                               ...   
219740  219740                      Don't Starve   
15700    15700           Oddworld: Abe's Oddysee   
242760  242760                        The Forest   
555570  555570            Infestation: The New Z   
273350  273350                    Evolve Stage 2   

                               developer              publisher score_rank  \
570                                Valve                  Valve              
730     Valve, Hidden Path Entertainment                  Valve              
578080                  PUBG Corporation       PUBG Corporation              
440                                Valve                  Valve

In [2]:
steamspy_top100 = steamspy_top100.drop('appid', axis='columns')
steamspy_top100 = steamspy_top100.drop('developer', axis='columns')
steamspy_top100 = steamspy_top100.drop('publisher', axis='columns')
steamspy_top100 = steamspy_top100.drop('score_rank', axis='columns')
steamspy_top100 = steamspy_top100.drop('positive', axis='columns')
steamspy_top100 = steamspy_top100.drop('negative', axis='columns')
steamspy_top100 = steamspy_top100.drop('userscore', axis='columns')
steamspy_top100 = steamspy_top100.drop('price', axis='columns')
steamspy_top100 = steamspy_top100.drop('initialprice', axis='columns')
steamspy_top100 = steamspy_top100.drop('discount', axis='columns')

pp.pprint(steamspy_top100)
print("\nDescribe Data")
print(steamspy_top100.describe())
print("\nSummarized Data")
print(steamspy_top100.describe(include=['O']))

                                    name                      owners  \
570                               Dota 2  100,000,000 .. 200,000,000   
730     Counter-Strike: Global Offensive  100,000,000 .. 200,000,000   
578080     PLAYERUNKNOWN'S BATTLEGROUNDS    20,000,000 .. 50,000,000   
440                      Team Fortress 2    20,000,000 .. 50,000,000   
304930                          Unturned    20,000,000 .. 50,000,000   
...                                  ...                         ...   
219740                      Don't Starve     5,000,000 .. 10,000,000   
15700            Oddworld: Abe's Oddysee     5,000,000 .. 10,000,000   
242760                        The Forest     5,000,000 .. 10,000,000   
555570            Infestation: The New Z     5,000,000 .. 10,000,000   
273350                    Evolve Stage 2     5,000,000 .. 10,000,000   

        average_forever  average_2weeks  median_forever  median_2weeks  
570               32154            1796            1022       

In [3]:
steamspy_top100.owners.unique()

array(['100,000,000 .. 200,000,000', '20,000,000 .. 50,000,000',
       '10,000,000 .. 20,000,000', '5,000,000 .. 10,000,000'],
      dtype=object)

In [4]:
steamspy_top100 = steamspy_top100.replace({'owners': {'100,000,000 .. 200,000,000': '100M-200M',
                                           '20,000,000 .. 50,000,000': '20M-50M',
                                           '10,000,000 .. 20,000,000': '10M-20M',
                                           '5,000,000 .. 10,000,000': '5M-10M'}})
steamspy_top100 = steamspy_top100.rename(columns = {'name': 'Game',
                                                    'owners': 'Total Steam owners (range)',
                                                    'average_forever': 'Mean Playtime since March 2009 (minutes)',
                                                    'average_2weeks': 'Mean Playtime for Last Two Weeks (minutes)',
                                                    'median_forever': 'Median Playtime since March 2009 (minutes)',
                                                    'median_2weeks': 'Median Playtime for Last Two Weeks (minutes)'})

pp.pprint(steamspy_top100)
print("\nDescribe Data")
print(steamspy_top100.describe())
print("\nSummarized Data")
print(steamspy_top100.describe(include=['O']))

                                    Game Total Steam owners (range)  \
570                               Dota 2                  100M-200M   
730     Counter-Strike: Global Offensive                  100M-200M   
578080     PLAYERUNKNOWN'S BATTLEGROUNDS                    20M-50M   
440                      Team Fortress 2                    20M-50M   
304930                          Unturned                    20M-50M   
...                                  ...                        ...   
219740                      Don't Starve                     5M-10M   
15700            Oddworld: Abe's Oddysee                     5M-10M   
242760                        The Forest                     5M-10M   
555570            Infestation: The New Z                     5M-10M   
273350                    Evolve Stage 2                     5M-10M   

        Mean Playtime since March 2009 (minutes)  \
570                                        32154   
730                                        

In [5]:
steamspy_top100.to_csv('SteamSpy Top 100 Data (clean).csv')