# Library imports

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
pd.set_option('display.max_columns', 100)

# Collecting data through Steam API

In [6]:
import json
import requests
import time
import csv
from datetime import datetime
import urllib.parse

In [14]:
# columns that will be kept (probably all of them for now)
collectedData = pd.DataFrame(columns=['recommendation_id', 'review', 'timestamp_created', 
                                      'timestamp_updated', 'voted_up', 'votes_up', 
                                      'votes_funny', 'weighted_vote_score', 'comment_count', 
                                      'steam_purchase', 'received_for_free', 'written_during_early_access', 
                                      'hidden_in_steam_china', 'steam_china_location', 'author_steamid', 
                                      'author_num_games_owned', 'author_num_reviews', 'author_playtime_forever', 
                                      'author_playtime_last_two_weeks', 'author_playtime_at_review', 'author_last_played', 
                                      'language'])


In [15]:
next_cursor = ""
prev_cursor = "asd"
response = requests.get("https://store.steampowered.com/appreviews/1658280?filter=recent&language=all&purchase_type=all&num_per_page=100&filter_offtopic_activity=0&json=1").json()

# temporarily 
while response['success'] == 1:
    # break the loop if the cursor is the same as the previous cursor
    if next_cursor == prev_cursor:
        break

    # temporarily store the data of the current review in a dictionary
    review_list = []
    for review in response['reviews']:
        review_dict = {}
        review_dict['recommendation_id'] = review['recommendationid']
        review_dict['language'] = review['language']
        review_dict['review'] = review['review']
        review_dict['timestamp_created'] = review['timestamp_created']
        review_dict['timestamp_updated'] = review['timestamp_updated']
        review_dict['voted_up'] = review['voted_up']
        review_dict['votes_up'] = review['votes_up']
        review_dict['votes_funny'] = review['votes_funny']
        review_dict['weighted_vote_score'] = review['weighted_vote_score']
        review_dict['comment_count'] = review['comment_count']
        review_dict['steam_purchase'] = review['steam_purchase']
        review_dict['received_for_free'] = review['received_for_free']
        review_dict['written_during_early_access'] = review['written_during_early_access']
        review_dict['hidden_in_steam_china'] = review['hidden_in_steam_china']
        review_dict['steam_china_location'] = review['steam_china_location']
        review_dict['author_steamid'] = review['author']['steamid']
        review_dict['author_num_games_owned'] = review['author']['num_games_owned']
        review_dict['author_num_reviews'] = review['author']['num_reviews']
        review_dict['author_playtime_forever'] = review['author']['playtime_forever']
        review_dict['author_playtime_last_two_weeks'] = review['author']['playtime_last_two_weeks']
        review_dict['author_playtime_at_review'] = review['author']['playtime_at_review']
        review_dict['author_last_played'] = review['author']['last_played']
        review_list.append(review_dict)

    # insert the data of the current review into the dataframe
    collectedData = pd.concat([collectedData, pd.DataFrame(review_list)], ignore_index=True)
    print("Number of reviews collected: " + str(len(collectedData)))
    
    prev_cursor = next_cursor
    next_cursor = response['cursor']
    print("next cursor: " + next_cursor + " prev cursor: " + prev_cursor)

    # update the next cursor and the previous cursor
    
    # add delay to avoid being blocked by the API
    time.sleep(1)

    # get the next page of reviews
    response = requests.get("https://store.steampowered.com/appreviews/1658280?filter=recent&language=all&purchase_type=all&num_per_page=100&filter_offtopic_activity=0&json=1&cursor=" + urllib.parse.quote(next_cursor)).json()

# save the collected data to a csv file
collectedData.to_csv("data/reviews.csv", index=False)
print("data is saved, with total of " + str(len(collectedData)) + " reviews.")

Number of reviews collected: 100
next cursor: AoJ4sJyw+ZEDfpC9mwU= prev cursor: 
Number of reviews collected: 200
next cursor: AoJwk/TElJEDcqa+jgU= prev cursor: AoJ4sJyw+ZEDfpC9mwU=
Number of reviews collected: 300
next cursor: AoJ4+t2wzZADcoa+hgU= prev cursor: AoJwk/TElJEDcqa+jgU=
Number of reviews collected: 400
next cursor: AoJ415CFqZADfu3YgQU= prev cursor: AoJ4+t2wzZADcoa+hgU=
Number of reviews collected: 500
next cursor: AoJwhKSxi5ADccar/gQ= prev cursor: AoJ415CFqZADfu3YgQU=
Number of reviews collected: 600
next cursor: AoJw27mY8o8Dd5bq+wQ= prev cursor: AoJwhKSxi5ADccar/gQ=
Number of reviews collected: 700
next cursor: AoJwuPHz2I8DcaW9+QQ= prev cursor: AoJw27mY8o8Dd5bq+wQ=
Number of reviews collected: 800
next cursor: AoJw5sbLyI8DeeH/9wQ= prev cursor: AoJwuPHz2I8DcaW9+QQ=
Number of reviews collected: 900
next cursor: AoJwxO/wvY8DeMT59gQ= prev cursor: AoJw5sbLyI8DeeH/9wQ=
Number of reviews collected: 1000
next cursor: AoJwl6j+tI8DdOiM9gQ= prev cursor: AoJwxO/wvY8DeMT59gQ=
Number of

In [16]:
collectedData.head(10)

Unnamed: 0,recommendation_id,review,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,hidden_in_steam_china,steam_china_location,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played,language
0,176664948,"Con aprox 80 hs de juego, estoy en el tramo fi...",1728432535,1728432535,True,0,0,0.0,0,True,False,False,True,,76561198813838002,0,1,5051,3034,4822,1728445070,spanish
1,176651405,recommended game for suikoden lovers out there,1728417523,1728417523,True,0,0,0.0,0,True,False,False,True,,76561198073574534,176,8,1763,1763,1763,1728417492,english
2,176625244,沒有跳過系統，玩起來很惡心，無論是戰鬥結算時，打造裝備等等。,1728389968,1728389968,False,0,0,0.0,0,True,False,False,True,,76561198406716431,0,3,4046,4046,4046,1728389573,tchinese
3,176622050,Nice!,1728385443,1728385443,True,0,0,0.0,0,True,False,False,True,,76561198065822407,0,11,2160,2160,2160,1728340929,german
4,176577063,"Gostosim demaaais, Recomendo!!",1728321918,1728321918,True,0,0,0.0,0,False,False,False,True,,76561198106957134,0,9,1148,1148,691,1728432562,brazilian
5,176511255,I waited years for this game... I grew up Pla...,1728236678,1728236678,True,0,0,0.0,0,True,False,False,True,,76561199577403523,0,5,4001,0,4001,1716442956,english
6,176506820,love the game its entertaining takes me back t...,1728232827,1728232827,True,1,0,0.5238095521926879,0,True,False,False,True,,76561198105194239,0,24,4380,4380,1841,1728440822,english
7,176484945,one of the best game that I have played. Good ...,1728213333,1728213333,True,0,0,0.0,0,True,False,False,True,,76561199514826337,0,1,4047,44,4006,1728216118,english
8,176470579,Eiyuden Chronicles is traditional turn based r...,1728196261,1728196760,True,0,0,0.0,0,True,False,False,True,,76561198014523450,0,32,4725,408,4725,1728193560,english
9,176464612,I enjoyed this game. Has an interesting story ...,1728187874,1728187874,True,0,0,0.0,0,True,False,False,True,,76561198046147875,0,6,4263,3717,4263,1728187719,english
