<center> <h2>Final Project Dataset Creation</h2></center>

In [6]:
import requests
import pandas as pd
import re
import time

In [3]:
df = pd.DataFrame(columns=["name", "steam_review_score", "steam_total_positive", "steam_total_negative",  "price_initial", 
                           "steam_genres", "steam_categories", "release_date", "short_description", "required_age", 
                           "developers"])

* The following are the keys from the JSON objects returned from our Steam get call that we will use and what they represent:
    * 'name' : the name of the game (string)
    * 'steam_review_score' : rating out of 10
    * 'steam_total_positive' : count of total amount of reviews that are positive
    * 'steam_total_negative' : count of total amount of reviews that are negative
    * 'price_initial' : price of the game when it was first released (in cents)
    * 'steam_genres' : array of objects representing descriptive "genres" of this game for classification. These genres can include: Action, Casual, Indie, Strategy, etc.
    * 'steam_categories' : array of objects representing descriptive "categories" of this game for classification. These categories are Steam specific and can include: Single-player, Steam Achievements, Steam Cloud 
    * 'release_date' : the date of release for the game
    * 'short_description': a short sentence describing the game, there is a character cap so the sentence might not be complete
    * 'required_age' : the required player age as a guidline to play this game, default value is 0 and can take value such as: 14, 16, 18, etc.
    * 'developers' : the name of the developer

In [4]:
#pull a JSON object that includes ALL games and their Steam ID from Steam platform for further data pulling
new_requests = requests.get("http://api.steampowered.com/ISteamApps/GetAppList/v0002/?key=3DCA25D37E6882591F5A4B62A3E2A1C3&format=json").json()
new_requests['applist']['apps']

[{'appid': 216938, 'name': 'Pieterw test app76 ( 216938 )'},
 {'appid': 660010, 'name': 'test2'},
 {'appid': 660130, 'name': 'test3'},
 {'appid': 1118314, 'name': ''},
 {'appid': 383660, 'name': 'Deluxe Upgrade for Shadowrun Chronicles'},
 {'appid': 383670, 'name': 'That Old Time Religion'},
 {'appid': 383680, 'name': 'Void & Meddler - Soundtrack'},
 {'appid': 383690, 'name': 'Mu Complex'},
 {'appid': 383700, 'name': 'Boneless Zombie'},
 {'appid': 383710, 'name': 'Princess KAGUYA'},
 {'appid': 383720, 'name': 'Swords & Crossbones: An Epic Pirate Story'},
 {'appid': 383730, 'name': 'RPG Maker 2000'},
 {'appid': 383740, 'name': 'Marble Age: Remastered'},
 {'appid': 383750, 'name': 'Funk of Titans'},
 {'appid': 383760, 'name': 'War Thunder - Defenders Advanced Pack'},
 {'appid': 383780, 'name': 'Judgement'},
 {'appid': 383790, 'name': 'The Ship: Remasted'},
 {'appid': 383800, 'name': 'Close Order'},
 {'appid': 383810, 'name': "Doomed'n Damned"},
 {'appid': 383840, 'name': 'Nimbatus - The 

In [5]:
#A sample request URL that provides a starting point for pulling game metadata
requestURL = 'https://store.steampowered.com/api/appdetails?appids=1121910'

In [13]:
#turn the referece list into a DataFrame object and save it to a csv file
reference_db = pd.DataFrame(new_requests['applist']['apps'])
reference_db.to_csv("reference_app_data.csv")

In [14]:
#add everything that is a success and has a type of 'game' to the database, spit out the error whenever one fail to add
#this took many trail and error to complete as every game sometimes have different key value missing
for item in new_requests['applist']['apps'][0:10000]:
  time.sleep(1.6);
  try:
    steamAppId = str(item['appid'])
    name = str(item['name'])
    requestURL = 'https://store.steampowered.com/api/appdetails?appids=' + steamAppId
    new_request = requests.get(requestURL).json()
    if new_request[steamAppId]['success'] == True:
      if 'type' in new_request[steamAppId]['data'].keys() and new_request[steamAppId]['data']['type'] == 'game':
        details = new_request[steamAppId]['data']
        required_age = details['required_age']            
        if details['is_free'] == True:
          price = '0'
        else:
          if 'price_overview' in details.keys() and 'initial' in details['price_overview'].keys():
            price = details['price_overview']['initial']
          else: 
            if len(details['package_groups']) > 1:
                price = details['package_groups'][0]['subs'][0]['price_in_cents_with_discount']
            else: price = ''

        #recommendations = details['recommendations']['total']

        steam_releasedate = details['release_date']['date']

        plat = [a for a in details['platforms'] if details['platforms'][a] == True]
        platform_available = ', '.join([str(i) for i in plat])

        steam_cat = [details['categories'][i]['description'] for i in range(len(details['categories']))]
        steam_categories = ', '.join([str(i) for i in steam_cat])

        steam_gen = [details['genres'][i]['description'] for i in range(len(details['genres']))]
        steam_genres = ', '.join([str(i) for i in steam_gen])

        devs = details['developers']
        steam_devs = ', '.join([str(i) for i in devs])

        short_description = details['short_description']

        # get review info
        try:
          requestURL = "https://store.steampowered.com/appreviews/" + steamAppId + "?json=1"
          response_reviews = requests.get(requestURL).json()
          summary = response_reviews["query_summary"]
          game_score_num = summary['review_score']
          total_positive = summary['total_positive']
          total_negative = summary['total_negative']
        except Exception as e:
          total_negative = ''
          total_positive = ''
          game_score_num = ''

        info = {"name": name, "steam_id" : steamAppId, "steam_review_score": game_score_num, 
                "steam_total_positive" : total_positive, "steam_total_negative": total_negative, 
                "price_initial": price, "steam_genres": steam_genres, "steam_categories": steam_categories, 
                "release_date": steam_releasedate, "short_description": short_description, "required_age" : required_age,
                "developers": steam_devs}
        df = df.append(info, ignore_index=True);
  except Exception as e2:
    print("Failed to add " + str(item['appid']))
    print(e2);


Failed to add 1081520
'categories'
Failed to add 1081730
'categories'
Failed to add 1080690
'developers'
Failed to add 1079340
'categories'
Failed to add 1079520
'categories'
Failed to add 1077300
'categories'
Failed to add 1075850
'categories'
Failed to add 1075860
'categories'
Failed to add 1074870
'categories'
Failed to add 1074880
'categories'
Failed to add 1074900
'categories'
Failed to add 1074910
'categories'
Failed to add 1074920
'categories'
Failed to add 1074930
'categories'
Failed to add 1073720
'categories'
Failed to add 1074060
'categories'
Failed to add 1073420
'categories'
Failed to add 1071860
'categories'
Failed to add 1071100
'categories'
Failed to add 1070910
'categories'
Failed to add 1068620
'categories'
Failed to add 1066860
'categories'
Failed to add 1065540
'categories'
Failed to add 1063520
'categories'
Failed to add 1062410
'categories'
Failed to add 1061690
'categories'
Failed to add 1058990
'categories'
Failed to add 1136210
'categories'
Failed to add 113364

KeyboardInterrupt: 

In [15]:
#df in this preview has empty fields as the initial idea was to include data from the game review website RAWG
#however due to time limitation and the complexity of RAWG API, we decided against it.
df

Unnamed: 0,name,steam_review_score,steam_total_positive,steam_total_negative,metacritic,rawg_rating,avg_playtime,price_initial,rawg_genres,rawg_tags,steam_genres,steam_categories,total_recommendations,release_date,short_description,required_age,esrb,recommendation_score,developers,steam_id
0,Scallywag's Honor,0,0,0,,,,,,,"Action, Adventure, Indie, RPG, Simulation, Str...","Single-player, Steam Achievements, Steam Cloud",,Coming soon,"The Caribbean Sea, your sea, has been the home...",0,,,Crazy Goat Games,1083150
1,Disc Creatures,8,60,4,,,,1499,,,"Adventure, Casual, Indie, RPG","Single-player, Partial Controller Support",,"Oct 17, 2019","Humans live alongside creatures, each with dif...",0,,,PICORINNE SOFT,1083190
2,符文女孩/Rune Girl,5,41,21,,,,99,,,"Adventure, Casual, Indie, RPG","Single-player, Steam Cloud",,"Jun 28, 2019",This is a &quot;leisure&quot; game with a fine...,0,,,ADOG,1083210
3,PokeyPoke,0,0,0,,,,,,,"Action, Indie","Single-player, Full controller support",,When it's done™,Stab stuff and find shiny things.,0,,,Shaun Spalding,1083230
4,Wingsuit: Gudvangen,0,0,2,,,,799,,,"Action, Simulation, Sports","Single-player, Steam Achievements, Full contro...",,"Jun 25, 2019",Take to the skies in Wingsuit: Gudvangen and p...,0,,,Ninja Whale Studios,1083260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4380,Actionpaint VR,0,0,0,,,,599,,,"Casual, Indie, Simulation",Single-player,,"Sep 10, 2018",A fast and powerful paint experience.,0,,,DNV,894360
4381,Exoplanet,0,0,0,,,,0,,,"Adventure, Casual, Free to Play, Indie, RPG, S...",Single-player,,"Jul 25, 2018",Create a crew and a ship to navigate a procedu...,0,,,Gregg Brzozowski,894380
4382,Bernie’s Nightmare,5,13,6,,,,99,,,"Action, Adventure, Casual, Indie, Simulation, ...","Single-player, Steam Achievements, Captions av...",,"Jul 28, 2018","Nightmares come true in Bernie's Nightmare, a ...",0,,,Kevin Yang Games,894390
4383,Duke Dashington Remastered,7,17,0,,,,499,,,"Action, Adventure, Indie","Single-player, Steam Achievements, Partial Con...",,"Nov 16, 2018",Hop into the shoes of Duke Dashington and save...,0,,,Adventure Islands,894420


In [16]:
#export the data
df.to_csv("data_finaltest.csv")