# SteamDB scraper

## Features
- Loads gameid's from csv file
- Detects a temporary ban and waits till it's able to reconnect
- Scrapes PlayTracker ownership data for each given gameid
- Exports results to csv file


In [85]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import os
import time
from pandas import *

In [None]:

# change options to prevent one of the webscraper mitigations done by SteamDB
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options, executable_path=r'C:\Utilities\BrowserDrivers\chromedriver.exe')

In [14]:
# create dataframe with csv as input
data = read_csv('game_id.csv')
# add owners amount column, set all values to -1 to note it hasn't scraped
data['owners'] = -1

In [103]:
# set row index to id 
data.set_index('id', inplace = True)

In [86]:
def isTemporaryBanned():
    return driver.find_elements(By.LINK_TEXT, 'the realtime updates page')

# wait until temporary ban by SteamDB has been lifted 
def tempBanCheck(gameID):
    if isTemporaryBanned():
        print('We have been temporarily banned \n')
        canConnect = False
        while not canConnect:
            for secondsTillRetry in range(300):
                print('{} seconds till retry'.format(str(secondsTillRetry).ljust(3, ' ')), end='\r')
                time.sleep(1)
            # check if page is accessible 
            driver.get("https://steamdb.info/app/{}/".format(gameID))
            if not isTemporaryBanned():
                canConnect = True
                print('ban has been lifted!')
            else:
                print('still banned :(')

In [87]:
# tries to click on charts button. 
# returns false if not found and sets owner count of that gameid to nan. 
def clickChartsButton():
    tabs = driver.find_element_by_class_name('tabnav-tabs')
    chartsButton = tabs.find_elements(By.LINK_TEXT, 'Charts')
    if not chartsButton:
        return False
    else:
        chartsButton[0].click()
        return True

In [70]:
def textToNumber(i):
    switcher={
            'M':1e6,
            'k':1e3
         }
    return switcher.get(i,0)

In [109]:
def scrapeOwnerCount(df, gameID):
    # get all app-charts 
    appCharts = driver.find_element_by_class_name('row-app-charts')
    # get owners chart
    ownerEstimations = appCharts.find_elements_by_class_name('span6')[1]
    # search for playTracker owners estimations 
    playTrackerElement = ownerEstimations.find_elements_by_tag_name('li')[1]
    # make sure it is data from PlayTracker
    if playTrackerElement.find_elements(By.LINK_TEXT, 'by PlayTracker'):
        playTrackerData = playTrackerElement.text.split()
        # convert string to number
        highApproximation = float(playTrackerData[3]) * textToNumber(playTrackerData[4])
        # save owner count to dataframe     
        df['owners'][gameID] = highApproximation
        return True
    else: 
        df['owners'][gameID] = None
        return False

In [121]:
# open webpage
gameIDs = list(data.index.values)
for gameID in gameIDs:
    # check if data hasn't been scraped
    rowVal = data['owners'][gameID]
    if isna(rowVal):
        rowVal = -1
    if rowVal == -1:
        # load page by gameid 
        driver.get("https://steamdb.info/app/{}/".format(gameID))
        driver.implicitly_wait(0.5)
        tempBanCheck(gameID)
        # check if Charts button exists, continue if non existent 
        if not clickChartsButton():
            print("couldn't find charts for game: {}".format(gameID))
            # save owners count as NaN 
            data['owners'][gameID] = None
            continue
        scrapeOwnerCount(data, gameID)

We have been temporarily banned 



In [92]:
data.to_csv(r'results.csv', index = False, header = True)

In [119]:
data

Unnamed: 0_level_0,owners
id,Unnamed: 1_level_1
10,11100000.0
30,6400000.0
70,9300000.0
80,10500000.0
130,7700000.0
...,...
883710,
893180,-1
899440,-1
942970,-1


In [118]:
data.drop(data.columns[1], axis=1, inplace=True)

In [46]:
text = '11100000.0\n6400000.0\n9300000.0\n10500000.0\n7700000.0\n12800000.0\n13000000.0\n6700000.0\n10700000.0\n13400000.0\n10600000.0\n20000000.0\n8600000.0\n20100000.0\n80500000.0\n16600000.000000002\n5500000.0\n68800000.0\n2100000.0\n11700000.0\n2600000.0\n2800000.0\n4300000.0\n2100000.0\n910000.0\n2000000.0\n1700000.0\n5400000.0\n2600000.0\n1600000.0\n680000.0\n950000.0\n1400000.0\n400000.0\n1800000.0\n3900000.0\n2900000.0\n1800000.0\n7400000.0\n2900000.0\n6700000.0\n2400000.0\n4000000.0\n6700000.0\n4400000.0\n2300000.0\n2900000.0\n3200000.0\n590000.0\n4200000.0\n2800000.0\n3600000.0\n3000000.0\n5200000.0\n2200000.0\n1200000.0\n1600000.0\n1100000.0\n1200000.0\n850000.0\n2300000.0\n4099999.9999999995\n4099999.9999999995\n4400000.0\n3400000.0\n4300000.0\n3300000.0\n2500000.0\n3300000.0\n540000.0\n3800000.0\n3700000.0\n1500000.0\n2300000.0\n3400000.0\n2500000.0\n3600000.0\n1300000.0\n9200000.0\n4400000.0\n3900000.0\n2700000.0\n9000000.0\n9700000.0\n1700000.0\n1500000.0\n3100000.0\n2500000.0\n2700000.0\n8400000.0\n1400000.0\n2500000.0\n3600000.0\n1700000.0\n4600000.0\n420000.0\n3100000.0\n2500000.0\n4500000.0\n5000000.0\n5500000.0\n6900000.0\n1600000.0\n2900000.0\n2600000.0\n3200000.0\n7900000.0\n810000.0\n1400000.0\n3400000.0\n2900000.0\n1600000.0\n2000000.0\n1600000.0\n1500000.0\n2200000.0\n1000000.0\n1200000.0\n2100000.0\n1500000.0\n850000.0\n820000.0\n1700000.0\n3300000.0\n4500000.0\n1900000.0\n1900000.0\n2900000.0\n2700000.0\n2200000.0\n3200000.0\n7100000.0\n1200000.0\n2200000.0\n3200000.0\n5400000.0\n1400000.0\n2200000.0\n1800000.0\n2000000.0\n3200000.0\n990000.0\n3600000.0\n1700000.0\n4400000.0\n2400000.0\n1600000.0\n7100000.0\n8500000.0\n1800000.0\n4000000.0\n4900000.0\n1700000.0\n4300000.0\n1100000.0\n3500000.0\n2900000.0\n1800000.0\n2200000.0\n2700000.0\n2200000.0\n1900000.0\n2700000.0\n3900000.0\n2900000.0\n4400000.0\n8000000.0\n2400000.0\n2600000.0\n4500000.0\n860000.0\n8500000.0\n1600000.0\n1600000.0\n5000000.0\n1000000.0\n3300000.0\n1100000.0\n830000.0\n4099999.9999999995\n3800000.0\n2000000.0\n570000.0\n1700000.0\n2000000.0\n3000000.0\n3200000.0\n3200000.0\n1400000.0\n2500000.0\n2800000.0\n2100000.0\n2400000.0\n2800000.0\n820000.0\n1600000.0\n6800000.0\n3600000.0\n2300000.0\n3100000.0\n4000000.0\n3200000.0\n3800000.0\n6500000.0\n8700000.0\n4500000.0\n6200000.0\n7700000.0\n3300000.0\n3000000.0\n2600000.0\n2200000.0\n2200000.0\n3100000.0\n1800000.0\n1800000.0\n1600000.0\n1800000.0\n1700000.0\n1400000.0\n1500000.0\n2900000.0\n3300000.0\n930000.0\n3600000.0\n2700000.0\n2600000.0\n1700000.0\n2300000.0\n930000.0\n3700000.0\n5500000.0\n1700000.0\n2900000.0\n3600000.0\n3000000.0\n820000.0\n3400000.0\n7100000.0\n1600000.0\n8900000.0\n930000.0\n3100000.0\n790000.0\n2200000.0\n4099999.9999999995\n4000000.0\n1900000.0\n610000.0\n4000000.0\n4099999.9999999995\n2100000.0\n2600000.0\n2100000.0\n8700000.0\n210000.0\n2100000.0\n1200000.0\n5200000.0\n3300000.0\n15000000.0\n2700000.0\n5400000.0\n9200000.0\n1500000.0\n7500000.0\n4400000.0\n6800000.0\n9100000.0\n11500000.0\n6800000.0\n950000.0\n2900000.0\n1800000.0\n930000.0\n2300000.0\n1600000.0\n4099999.9999999995\n7600000.0\n1900000.0\n8400000.0\n5800000.0\n3800000.0\n1500000.0\n2700000.0\n3800000.0\n11400000.0\n1900000.0\n1400000.0\n4400000.0\n3800000.0\n1700000.0\n2000000.0\n4200000.0\n2200000.0\n'
firstScrappy = text.split()
scrape = DataFrame()
scrape['numbers'] = firstScrappy
scrape.to_csv(r'firstScrape.csv', index=False, header=True)

        numbers
0    11100000.0
1     6400000.0
2     9300000.0
3    10500000.0
4     7700000.0
..          ...
290   3800000.0
291   1700000.0
292   2000000.0
293   4200000.0
294   2200000.0

[295 rows x 1 columns]


In [54]:
# add first scrape to data
for i in range(len(firstScrappy)):
    data['owners'][i] = firstScrappy[i];

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['owners'][i] = firstScrappy[i];
