#### Import Required Packages

In [32]:
#install requests if not installed using (pip install requests)
import requests
import json
import time
import pandas as pd

#### Constant declaration and utility methods to get games and review data using Streamspy API

In [33]:
#Code to show a progress bar
def loadbar(iteration, total, prefix = '', suffix = '', decimals= 2, length = 75, fill ='>'):
    percent = ('{0:.' + str(decimals) + 'f}').format(100*(iteration/float(total)))
    filledLength = int(length* iteration // total)
    bar = fill * filledLength  + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}',end ='\r')
    if iteration == total:
        print()

In [34]:
#Define the endpoint for the api
url = "https://steamspy.com/api.php"
status_count = 100

#get data by page numbers
def loadDataInDF(reqtype, page_num):
    #Define variable to configure the api
    req_type = reqtype
    initialRun = True

    #Define API parameters and call the API using GET request
    payload = {'request' : req_type, 'page' : page_num}
    response = requests.get(url, params = payload)

    # Check for HTTP 200 status for success
    if(response.status_code == 200 and response.ok):
        df_pages = pd.read_json(response.text, orient = 'index')
        
        #Define Progress
        count = 0
        l = len(df_pages.index)
        loadbar(count, l, prefix = 'Progress' , suffix = 'Complete')
        
        #Loop through the appid's to get details for weach appid
        for indx in df_pages.index:
            appdetails = loadAppDetails(indx)

            if appdetails != None:
                appdetails = json.loads(appdetails)
                tags = ''
                for tag in appdetails['tags']:
                    if(tags != ''):
                        tags += ', ' + tag
                    else:
                        tags += tag

                appdetails['tags'] =  tags
                df_appDetails = pd.DataFrame(appdetails, index = [appdetails['appid']])

                if initialRun:
                    df_games = df_appDetails
                    initialRun = False
                else:         
                    df_games = df_games.append(df_appDetails) 
                count += 1
                
                loadbar(count, l, prefix = 'Progress' , suffix = 'Complete')
                #if count%status_count == 0:
                    #print('Completed App Details for ', count)

    elif(response.status_code == 500):
        print('Invalid Page!!!')
        return None
            
    return df_games

In [35]:
#Get details for each app
def loadAppDetails(appid):
    payload = {'request' : 'appdetails', 'appid' : appid}
    response = requests.get(url, params = payload)
    time.sleep(1)
    if(response.status_code == 200 and response.ok):
        return response.text
    else:
        return None

In [36]:
#Get all game reviews
def loadGameReviews(appIds):
    
    initialRun_revsum = True
    initialRun_rev = True
    
    #Used to check the count of API calls completed
    count =0
    l = len(appIds)
    loadbar(count, l, prefix = 'Progress' , suffix = 'Complete')
    
    #Loop though the api to get all games data 
    for appId in appIds:
        
        #Callout to the api to get reviews JSON
        response = requests.get('https://store.steampowered.com/appreviews/'+ str(appId) +'?json=1')
        
        #Increment counter after every callout
        count += 1
        loadbar(count, l, prefix = 'Progress' , suffix = 'Complete')
        #if count%status_count == 0:
            #print('Completed Review Fetch for ', count, ' appid\'s')
       
        
        # Check for HTTP 200 status for success
        if(response.status_code == 200 and response.ok):
            
            reviews = json.loads(response.text)
            
            #Initialize the counter and reviewId's
            reviewIds = []
            rev_count = 0
            
            reviews['query_summary']['appid']= appId
            #Get review summary in a dataframe
            if initialRun_revsum:
                
                df_revsummary =  pd.DataFrame(reviews['query_summary'], index =[appId])
                initialRun_revsum = False
            else:
                df_revsummary = df_revsummary.append(pd.DataFrame(reviews['query_summary'], index =[appId]))
            
            
            if reviews['query_summary']['num_reviews'] > 0 :
                #Update all authors with steam Id and add appId for each review
                for auth in reviews['reviews']:
                    auth['author'] = auth['author']['steamid']
                    auth['appid'] = appId
                    rev_count += 1
                    reviewIds.append(str(appId) + '_' + str(rev_count))

                #Store the reviews in a different data frame
                if initialRun_rev:
                    df_reviews = pd.DataFrame(reviews['reviews'], index = reviewIds)
                    initialRun_rev = False
                else:
                    df_reviews = df_reviews.append(pd.DataFrame(reviews['reviews'], index = reviewIds))
                   
    return df_revsummary, df_reviews

In [37]:
def mergeGamesDFWithReviewSummary(df_game_stats, df_rev_summary):
    return pd.merge(df_game_stats, df_rev_summary, how='inner', on = 'appid')

In [38]:
#Used to export data 
def exportDataToCSV(df_game_stats, df_reviews, page):
    df_game_stats.to_csv('GameStats'+ str(page) +'.csv')
    df_reviews.to_csv('GameReviews'+ str(page) +'.csv')

#### Call utility methods to retrieve games & reviews data. Store all retireved data in csv for future use.

In [None]:
#Change page numbers to get data for specific page 
page_num = 14

print('======>>>>>>> Start AppDetails Fetch <<<<<<<==========')
#Get all appId's
df_games = loadDataInDF('all', page_num)
print('======>>>>>>> End AppDetails Fetch <<<<<<<==========')

print('======>>>>>>> Start Review Fetch <<<<<<<==========')
# Get all Reviews for all the appid's for the specific page
df_rev_sumry, df_reviews = loadGameReviews(df_games.index)

#Merge the game stats data from the game details
df_games = mergeGamesDFWithReviewSummary(df_games, df_rev_sumry)

#Export all the 
exportDataToCSV(df_games, df_reviews, page_num)
print('======>>>>>>> End Review Fetch <<<<<<<==========')

In [72]:
import os

#Get all files names within 
fileNames = os.listdir()
intialGames = True
initailReview = True

for fileName in fileNames:
    if(fileName.endswith(".csv") and fileName.startswith("GameStats")):
        if(intialGames):
            df_games = pd.read_csv(fileName)
            intialGames = False
        else:
            df_games = df_games.append(pd.read_csv(fileName))
    elif(fileName.endswith(".csv") and fileName.startswith("GameReviews")):
        if(initailReview):
            df_review = pd.read_csv(fileName)
            initailReview = False
        else:
            df_review = df_review.append(pd.read_csv(fileName))


#Export the dataframes to CSV
exportDataToCSV(df_games, df_review, "_consolidated")