## Run everything

In [1]:
import pandas as pd # Managing dataframes
from tqdm.auto import tqdm # Progress bars. Will automatically choose between GUI & console bars.

In [2]:
from preprocess import preprocess
from summarise import summarise
from report_features import report_features

In [3]:
# Set run variables
reviews_folder_directory = '../review_data/'

app_id = None
ask_for_game = app_id is None # If no app_id is given, ask the user which game to summarise.

In [4]:
# Import game details
details = pd.read_csv(reviews_folder_directory+"game_details.csv", sep = '|', escapechar = '@', index_col=0)

In [5]:
if(ask_for_game):
    # Get the max string length of the steam_appids
    id_len = max( 
             map(len,
             map(str,
             details['steam_appid']
             )))
    
    print("Which game would you like to summarise? Found:")
    for app_id in details.index:
        name = details.loc[app_id, "name"]
        print(f"""{app_id:>{id_len}}: {name}""")
    print("")
    app_id = int(input("App_ID: "))
    
app_name = details.loc[app_id, "name"]

Which game would you like to summarise? Found:
230410: Warframe
359550: Tom Clancy's Rainbow Six® Siege
   570: Dota 2
578080: PLAYERUNKNOWN'S BATTLEGROUNDS
   730: Counter-Strike: Global Offensive

App_ID: 578080


In [6]:
# Import reviews, and delete bad reviews.
review_data = pd.read_csv(reviews_folder_directory+str(app_id)+".csv", sep = '|', escapechar = '@', dtype=str)
review_data.dropna(subset=['review'], inplace=True)

In [7]:
print(f"""Opening appID {app_id}: \"{app_name}\".
Reviews found: {review_data.shape[0]}.""")

Opening appID 578080: "PLAYERUNKNOWN'S BATTLEGROUNDS".
Reviews found: 175151.


In [8]:
# Preprocess & tokenize our data
print(f"Preprocessing data...")
preprocessed_reivews = preprocess(review_data, lower_case=True, minimum_length=10)
print(f"Preprocessing complete. Reviews after preprocessing: {preprocessed_reivews.shape[0]}")

Preprocessing data...


HBox(children=(IntProgress(value=0, max=175151), HTML(value='')))


Preprocessing complete. Reviews after preprocessing: 108936


In [9]:
# Get our features with our summaries
print(f"Summarising reviews...")
feature_data = summarise(preprocessed_reivews)
print(f"Summarisation complete!")

Summarising reviews...


HBox(children=(IntProgress(value=0, max=108936), HTML(value='')))


Summarisation complete!


In [10]:
report_features(feature_data, app_name, number = 15)

Feature report for PLAYERUNKNOWN'S BATTLEGROUNDS:

regionlockchina (28.57% positive) 
positive = 2, negative = 5, total = 10254

access (75.35% positive) 
positive = 1076, negative = 352, total = 10087

time (61.09% positive) 
positive = 1743, negative = 1110, total = 9279

people (53.01% positive) 
positive = 1393, negative = 1235, total = 7874

servers (39.55% positive) 
positive = 1046, negative = 1599, total = 7678

players (56.03% positive) 
positive = 1235, negative = 969, total = 7460

hackers (43.68% positive) 
positive = 442, negative = 570, total = 5371

bugs (50.39% positive) 
positive = 514, negative = 506, total = 5208

map (70.56% positive) 
positive = 1076, negative = 449, total = 5067

battle (85.47% positive) 
positive = 2418, negative = 411, total = 4827

money (52.84% positive) 
positive = 680, negative = 607, total = 4301

hours (66.55% positive) 
positive = 589, negative = 296, total = 3793

gameplay (76.58% positive) 
positive = 1573, negative = 481, total = 3625
