# Steam API / Web Scraping Data

In [1]:
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup
from splinter import Browser
import time
import pymongo

## Web Scraping Steam Store

In [2]:
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}

In [3]:
browser = Browser('chrome', **executable_path, headless=True)

In [4]:
# Most popular free to play games
baseURL = 'https://store.steampowered.com/search/?&genre=Free%20to%20Play'
browser.visit(baseURL)

In [13]:
# Scrape Functions
gameData = []
def scrape():
    section = browser.find_by_css('.search_result_row')

    urls = []
    for game in section:
        url = game['href']
        urls.append(url)

    for url in urls:

        # Visit game page
        browser.visit(url)
        time.sleep(1)
        
        if browser.find_by_css('.btnv6_blue_hoverfade').text == 'View Page':
            continue
        else:

            # Scrape game data
            try:
                name = browser.find_by_css('.apphub_AppName').value
                app_id = url.split('/')[4]
                release = browser.find_by_css('.date').value
                reviews = browser.find_by_css('.game_review_summary').value
            except:
                name = 'not found'
                release = 'not found'
                reviews = 'none'
            
            try:
                price = browser.find_by_css('.price').value
            except:
                price = browser.find_by_css('.discount_original_price').value



            # Get Dev / Genre / Clean
            try:
                dev = browser.find_by_css('.dev_row').value
                dev = dev.split('DEVELOPER:\n')
                dev = dev[1].split(',')[0]
            except:
                dev = 'Not Found'

            tags = browser.find_by_css('.app_tag')
            ignore = ['Free to Play','Multiplayer','Singleplayer']
            for t in tags:
                if t.value not in ignore:
                    genre = t.value
                    break

            # Get critic score if any
            try:
                criticScore = browser.find_by_css('.score').value
                criticScore
            except:
                criticScore = 'none'
            criticScore

            gameInfo = {
            'game':name,
            'price':price,
            'released':release,
            'reviews':reviews,
            'developer':dev,
            'criticScore':criticScore,
            'genre':genre,
            'app_id':app_id}
            
            gameData.append(gameInfo)
            print(gameInfo)


            time.sleep(1)

            browser.visit(baseURL)
            

    print(gameData)
        


In [6]:
# Top Free to play games
baseURL = 'https://store.steampowered.com/search/?&genre=Free%20to%20Play'
browser.visit(baseURL)
scrape()

{'game': 'Counter-Strike: Global Offensive', 'price': 'Free to Play', 'released': 'Aug 21, 2012', 'reviews': 'Very Positive', 'developer': 'Valve', 'criticScore': '83', 'genre': 'FPS', 'app_id': '730'}
{'game': 'Destiny 2', 'price': 'Free To Play', 'released': 'Oct 1, 2019', 'reviews': 'Very Positive', 'developer': 'Bungie', 'criticScore': '83', 'genre': 'Looter Shooter', 'app_id': '1085660'}
{'game': 'Warframe', 'price': 'Free to Play', 'released': 'Mar 25, 2013', 'reviews': 'Very Positive', 'developer': 'Digital Extremes', 'criticScore': '69', 'genre': 'Looter Shooter', 'app_id': '230410'}
{'game': 'Path of Exile', 'price': 'Free to Play', 'released': 'Oct 23, 2013', 'reviews': 'Very Positive', 'developer': 'Grinding Gear Games', 'criticScore': '86', 'genre': 'Action RPG', 'app_id': '238960'}
{'game': 'War Thunder', 'price': 'Free to Play', 'released': 'Aug 15, 2013', 'reviews': 'Very Positive', 'developer': 'Gaijin Entertainment', 'criticScore': '81', 'genre': 'World War II', 'app_i

In [None]:
# Connect to MongoDB 
conn = 'mongodb://localhost:27017'
clients = pymongo.MongoClient(conn)


db = client.steamDB
collection = db.scraped_f2p

# Insert data in database
collection.insert_many(gameData)

In [11]:
# Top Selling Games
baseURL = 'https://store.steampowered.com/search/?filter=globaltopsellers&os=win'
browser.visit(baseURL)
scrape()

{'game': 'RimWorld - Royalty', 'price': '$19.99', 'released': 'Feb 24, 2020', 'reviews': 'Very Positive', 'developer': 'Ludeon Studios', 'criticScore': 'none', 'genre': 'Indie', 'app_id': '1149640'}
{'game': 'ARK: Genesis Season Pass', 'price': '$34.99', 'released': 'Aug 8, 2019', 'reviews': 'Mixed', 'developer': 'Studio Wildcard', 'criticScore': 'none', 'genre': 'Adventure', 'app_id': '1113410'}
{'game': 'Wolcen: Lords of Mayhem', 'price': '$39.99', 'released': 'Feb 13, 2020', 'reviews': 'Mixed', 'developer': 'WOLCEN Studio', 'criticScore': '66', 'genre': 'RPG', 'app_id': '424370'}
{'game': 'not found', 'price': '$19.99', 'released': 'not found', 'reviews': 'none', 'developer': 'Paradox Development Studio', 'criticScore': 'none', 'genre': 'Strategy', 'app_id': '1158100'}
{'game': "PLAYERUNKNOWN'S BATTLEGROUNDS", 'price': '$29.99', 'released': 'Dec 21, 2017', 'reviews': 'Mixed', 'developer': 'PUBG Corporation', 'criticScore': '86', 'genre': 'Survival', 'app_id': '578080'}
{'game': 'Dis

In [12]:
# Connect to MongoDB 
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)


db = client.steamDB
collection = db.scraped_data

# Insert data in database
collection.insert_many(gameData)

<pymongo.results.InsertManyResult at 0x1233ca280>

## Steam SPY API (by genre)

In [8]:
# URL and response request for 
base_url = "https://steamspy.com/api.php?request=genre&genre="
genres = ['Action', 'Strategy', 'RPG', 'Indie', 'Adventure', 'Sports', 'Simulation',
         'Early+Access', 'Ex+Early+Access', 'MMO', 'Free']

gen_response = requests.get(base_url + genres[-1])
response_json = gen_response.json()

In [9]:
# Use json.dumps to print the json stored in variable
time.sleep(4)
print(json.dumps(response_json, indent=4, sort_keys=True))

{
    "1000110": {
        "appid": 1000110,
        "average_2weeks": 0,
        "average_forever": 0,
        "developer": "\u91cd\u5e86\u73af\u6e38\u8005\u7f51\u7edc\u79d1\u6280",
        "discount": "0",
        "initialprice": "0",
        "median_2weeks": 0,
        "median_forever": 0,
        "name": "Jumping Master(\u8df3\u8df3\u5927\u5496)",
        "negative": 27,
        "owners": "20,000 .. 50,000",
        "positive": 42,
        "price": "0",
        "publisher": "\u91cd\u5e86\u73af\u6e38\u8005\u7f51\u7edc\u79d1\u6280",
        "score_rank": "",
        "userscore": 0
    },
    "1000380": {
        "appid": 1000380,
        "average_2weeks": 0,
        "average_forever": 0,
        "developer": "Fireroot Studios",
        "discount": "0",
        "initialprice": "0",
        "median_2weeks": 0,
        "median_forever": 0,
        "name": "Rogue Reaper",
        "negative": 84,
        "owners": "50,000 .. 100,000",
        "positive": 277,
        "price": "0",
       

In [10]:
# Store each genres data in a list
free = []

for key in response_json.keys():
    free.append(response_json[key])

In [11]:
# Connect to MongoDB default port
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define Steam database in Mongo
db = client.steamDB
collection = db.steam_genres

# Insert data in database
collection.insert_many(free)

<pymongo.results.InsertManyResult at 0x118b17b40>

In [12]:
# Verifying the number of free games on Steam
len(free)

2356

In [13]:
# Verifying an arbitary index within the collection of free games
free[2350]

{'appid': 751100,
 'name': 'Nonograms Prophecy',
 'developer': 'No Gravity Games',
 'publisher': 'No Gravity Games',
 'score_rank': '',
 'positive': 0,
 'negative': 0,
 'userscore': 0,
 'owners': '0 .. 20,000',
 'average_forever': 0,
 'average_2weeks': 0,
 'median_forever': 0,
 'median_2weeks': 0,
 'price': '0',
 'initialprice': '0',
 'discount': '0',
 '_id': ObjectId('5e5498ae68fd86484c862a6c')}

## TOP 100 Games

In [2]:
url = 'https://steamspy.com/api.php?request=top100forever'

In [3]:
response = requests.get(url)
print(response)

<Response [200]>


In [4]:
data = response.text

In [5]:
parsed = json.loads(data)

In [6]:
print(json.dumps(parsed, indent=4))

{
    "570": {
        "appid": 570,
        "name": "Dota 2",
        "developer": "Valve",
        "publisher": "Valve",
        "score_rank": "",
        "positive": 1046686,
        "negative": 181761,
        "userscore": 0,
        "owners": "100,000,000 .. 200,000,000",
        "average_forever": 33279,
        "average_2weeks": 1981,
        "median_forever": 1248,
        "median_2weeks": 918,
        "price": "0",
        "initialprice": "0",
        "discount": "0"
    },
    "578080": {
        "appid": 578080,
        "name": "PLAYERUNKNOWN'S BATTLEGROUNDS",
        "developer": "PUBG Corporation",
        "publisher": "PUBG Corporation",
        "score_rank": "",
        "positive": 726369,
        "negative": 620096,
        "userscore": 0,
        "owners": "50,000,000 .. 100,000,000",
        "average_forever": 23203,
        "average_2weeks": 670,
        "median_forever": 11279,
        "median_2weeks": 201,
        "price": "2999",
        "initialprice": "2999",
  

In [7]:
#turning it into a list
games = []

for key in parsed.keys():
    games.append(parsed[key])

In [8]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [9]:
# Define database and collection
db = client.steamDB
collection = db.steamgames

In [10]:
#inserting data into database
collection.insert_many(games) 

<pymongo.results.InsertManyResult at 0x11bd35140>

In [11]:
# Printing the data inserted 
cursor = collection.find() 
for games in cursor: 
    print(games)

{'_id': ObjectId('5e549b14c979fb5b555e2f53'), 'appid': 570, 'name': 'Dota 2', 'developer': 'Valve', 'publisher': 'Valve', 'score_rank': '', 'positive': 1046686, 'negative': 181761, 'userscore': 0, 'owners': '100,000,000 .. 200,000,000', 'average_forever': 33279, 'average_2weeks': 1981, 'median_forever': 1248, 'median_2weeks': 918, 'price': '0', 'initialprice': '0', 'discount': '0'}
{'_id': ObjectId('5e549b14c979fb5b555e2f54'), 'appid': 578080, 'name': "PLAYERUNKNOWN'S BATTLEGROUNDS", 'developer': 'PUBG Corporation', 'publisher': 'PUBG Corporation', 'score_rank': '', 'positive': 726369, 'negative': 620096, 'userscore': 0, 'owners': '50,000,000 .. 100,000,000', 'average_forever': 23203, 'average_2weeks': 670, 'median_forever': 11279, 'median_2weeks': 201, 'price': '2999', 'initialprice': '2999', 'discount': '0'}
{'_id': ObjectId('5e549b14c979fb5b555e2f55'), 'appid': 440, 'name': 'Team Fortress 2', 'developer': 'Valve', 'publisher': 'Valve', 'score_rank': '', 'positive': 600808, 'negative