In [82]:
import requests
import pandas as pd

In [156]:
# Get reviews from a single page
def get_reviews(appid, params={'json':1}):
  # Convert App-ID to string
  appid = str(appid)
  url = "https://store.steampowered.com/appreviews/"
  # Get the page source
  response = requests.get(url+appid, params=params, headers={'User-Agent': 'Mozilla/5.0'})
  # Return as json format
  return response.json()

# Get a batch of reviews using cursor
def get_batch_reviews(appid, sentiment, filter, n, language='english'):
  appid = str(appid)
  reviews = []
  cursor = '*'

  # Parameters for the API
  params = {'json':1,
            'filter':filter,
            'language':language,
            'day_range': 365,
            'cursor': cursor.encode(),
            'review_type':sentiment}

  # Loop through the "pages" until we get reviews at least as many as we want (may get more!)
  while n > len(reviews):
    # Number of reviews that should be obtained in every page
    params['num_per_page'] = min(n, 100)

    # Grab the reviews for a single page
    response = get_reviews(appid, params)

    # Pass the new cursor to scrape the next page/batch
    params['cursor'] = response['cursor']

    # Save to a list
    reviews += response['reviews']

    # Debugging tool
    # print(f'Reviews added = {len(reviews)}')
  
  # Get n reviews (sometimes the API does not return 100 reviews per page, 
  # resulting in number of reviews gathered > n)
  reviews = reviews[0:n]

  return reviews

In [164]:
# Load the game list
games = pd.read_csv('game_list_new.csv')

# Features to be extracted from the API
features = ['author', 'review', 'timestamp_created', 'voted_up', 'votes_up', 'votes_funny']

# Get 1000 most recent reviews from each game
# DataFrame to store the results
df_recent = pd.DataFrame()
for i in range(len(games['id'])):
  game_name = games.loc[i, 'name']
  print(f'Fetching reviews of {game_name}')
  reviews = pd.DataFrame(get_batch_reviews(str(games.loc[i]['id']), 'all', 'recent', 1000))[features]
  reviews['name'] = game_name
  df_recent = pd.concat([df_recent, reviews], axis=0)
  print('Finished. \n')

# Shuffle the dataframe and save the file
df_recent.sample(frac=1, random_state=42).reset_index(drop=True).to_csv('recent_reviews.csv', index=False)

Fetching reviews of Cyberpunk 2077
Finished. 

Fetching reviews of DayZ
Finished. 

Fetching reviews of New World
Finished. 

Fetching reviews of No Man's Sky
Finished. 

Fetching reviews of Battlefield™ 2042
Finished. 

Fetching reviews of NARAKA: BLADEPOINT
Finished. 

Fetching reviews of Battlefield V
Finished. 

Fetching reviews of Grand Theft Auto IV: The Complete Edition
Finished. 

Fetching reviews of Dying Light 2 Stay Human
Finished. 

Fetching reviews of Elite Dangerous
Finished. 

Fetching reviews of SCUM
Finished. 

Fetching reviews of Tom Clancy's Ghost Recon® Wildlands
Finished. 

Fetching reviews of Conan Exiles
Finished. 

Fetching reviews of Microsoft Flight Simulator Game of the Year Edition
Finished. 

Fetching reviews of Total War: WARHAMMER III
Finished. 

Fetching reviews of Fallout 76: The Pitt
Finished. 

Fetching reviews of Resident Evil 3
Finished. 

Fetching reviews of Stranded Deep
Finished. 

Fetching reviews of UNO
Finished. 

Fetching reviews of Back 4 Bl

In [165]:
# Get 1000 most helpful reviews from each game
# DataFrame to store the results
df_helpful = pd.DataFrame()
for i in range(len(games['id'])):
  game_name = games.loc[i, 'name']
  print(f'Fetching reviews of {game_name}')
  reviews = pd.DataFrame(get_batch_reviews(str(games.loc[i]['id']), 'all', 'all', 1000))[features]
  reviews['name'] = game_name
  df_helpful = pd.concat([df_helpful, reviews], axis=0)
  print('Finished. \n')

# Shuffle the dataframe and save the file
df_helpful.sample(frac=1, random_state=42).reset_index(drop=True).to_csv('helpful_reviews.csv', index=False)

Fetching reviews of Cyberpunk 2077
Finished. 

Fetching reviews of DayZ
Finished. 

Fetching reviews of New World
Finished. 

Fetching reviews of No Man's Sky
Finished. 

Fetching reviews of Battlefield™ 2042
Finished. 

Fetching reviews of NARAKA: BLADEPOINT
Finished. 

Fetching reviews of Battlefield V
Finished. 

Fetching reviews of Grand Theft Auto IV: The Complete Edition
Finished. 

Fetching reviews of Dying Light 2 Stay Human
Finished. 

Fetching reviews of Elite Dangerous
Finished. 

Fetching reviews of SCUM
Finished. 

Fetching reviews of Tom Clancy's Ghost Recon® Wildlands
Finished. 

Fetching reviews of Conan Exiles
Finished. 

Fetching reviews of Microsoft Flight Simulator Game of the Year Edition
Finished. 

Fetching reviews of Total War: WARHAMMER III
Finished. 

Fetching reviews of Fallout 76: The Pitt
Finished. 

Fetching reviews of Resident Evil 3
Finished. 

Fetching reviews of Stranded Deep
Finished. 

Fetching reviews of UNO
Finished. 

Fetching reviews of Back 4 Bl

In [166]:
# Get 1000 balanced reviews (500 good, 500 bad) from each game (based on helpfulness)
# DataFrame to store the results
df = pd.DataFrame()
for i in range(len(games['id'])):
  
  game_name = games.loc[i, 'name'] 

  print(f'Fetching reviews of {game_name}')
  # Get 500 (or more) good reviews
  reviews = pd.DataFrame(get_batch_reviews(str(games.loc[i, 'id']), 'positive', 'all', 500))[features]
  reviews['name'] = game_name
  df = pd.concat([df, reviews], axis=0)

  # Get 500 (or more) bad reviews
  reviews = pd.DataFrame(get_batch_reviews(str(games.loc[i, 'id']), 'negative', 'all', 500))[features]
  reviews['name'] = game_name
  df = pd.concat([df, reviews], axis=0)
  print('Finished. \n')

# Shuffle the dataframe and save the file
df.sample(frac=1, random_state=42).reset_index(drop=True).to_csv('balanced_reviews.csv', index=False)

Fetching reviews of Cyberpunk 2077
Finished. 

Fetching reviews of DayZ
Finished. 

Fetching reviews of New World
Finished. 

Fetching reviews of No Man's Sky
Finished. 

Fetching reviews of Battlefield™ 2042
Finished. 

Fetching reviews of NARAKA: BLADEPOINT
Finished. 

Fetching reviews of Battlefield V
Finished. 

Fetching reviews of Grand Theft Auto IV: The Complete Edition
Finished. 

Fetching reviews of Dying Light 2 Stay Human
Finished. 

Fetching reviews of Elite Dangerous
Finished. 

Fetching reviews of SCUM
Finished. 

Fetching reviews of Tom Clancy's Ghost Recon® Wildlands
Finished. 

Fetching reviews of Conan Exiles
Finished. 

Fetching reviews of Microsoft Flight Simulator Game of the Year Edition
Finished. 

Fetching reviews of Total War: WARHAMMER III
Finished. 

Fetching reviews of Fallout 76: The Pitt
Finished. 

Fetching reviews of Resident Evil 3
Finished. 

Fetching reviews of Stranded Deep
Finished. 

Fetching reviews of UNO
Finished. 

Fetching reviews of Back 4 Bl