# Feature Summariser
This script performs summarisation of downloaded game reviews.

Sections:
* [Load Game Data](#load_data)
* [Preprocess Reviews](#preprocess)
* [Summarise Reviews](#summarise)

In [1]:
import pandas as pd # Managing dataframes
from tqdm.auto import tqdm # Progress bars. Will automatically choose between GUI & console bars.

# Load project code
from preprocess import preprocess
from summarise import summarise
from report_features import report_features

In [2]:
# Set run variables
reviews_folder_directory = '../review_data/'

app_id = None # Optional: app_id to summarise.
ask_for_game = app_id is None # If no app_id is given, ask the user which game to summarise.

## Load Game Data <a name="load_data"/>

Load game metadata and reviews. If no app_id was provided previously, this section prompts to select a game.

In [3]:
# Import game details
details = pd.read_csv(reviews_folder_directory+"game_details.csv", sep = '|', escapechar = '@', index_col=0)

In [4]:
if(ask_for_game):
    # Get the max string length of the steam_appids
    id_len = max( 
             map(len,
             map(str,
             details['steam_appid']
             )))
    
    print("Which game would you like to summarise? Found:")
    for app_id in details.index:
        name = details.loc[app_id, "name"]
        print(f"""{app_id:>{id_len}}: {name}""")
    print("")
    app_id = int(input("App_ID: "))
    
app_name = details.loc[app_id, "name"]

Which game would you like to summarise? Found:
230410: Warframe
359550: Tom Clancy's Rainbow Six® Siege
   570: Dota 2
578080: PLAYERUNKNOWN'S BATTLEGROUNDS
   730: Counter-Strike: Global Offensive

App_ID: 730


In [5]:
# Import reviews, and delete bad reviews.
print(f"""Opening appID {app_id}: \"{app_name}\".""")

review_data = pd.read_csv(reviews_folder_directory+str(app_id)+".csv", sep = '|', escapechar = '@', dtype=str)
review_data.dropna(subset=['review'], inplace=True)

print(f"Reviews found: {review_data.shape[0]}.")

Opening appID 730: "Counter-Strike: Global Offensive".
Reviews found: 150173.


## Preprocess Reviews <a name="preprocess"/>

Tokenise reviews using the punkt tokenizer. Reviews that are too short are filtered out.

In [6]:
# Preprocess & tokenize our data
print(f"Preprocessing data...")
preprocessed_reivews = preprocess(review_data, lower_case=True, minimum_length=10)
print(f"Preprocessing complete. Reviews after preprocessing: {preprocessed_reivews.shape[0]}")

Preprocessing data...


  0%|          | 0/150173 [00:00<?, ?it/s]

Preprocessing complete. Reviews after preprocessing: 58648


## Summarise Reviews <a name="summarise"/>

In [7]:
# Get our features with our summaries
print(f"Summarising reviews...")
feature_data = summarise(preprocessed_reivews)
print(f"Summarisation complete!")

Summarising reviews...


  0%|          | 0/58648 [00:00<?, ?it/s]

Summarisation complete!


In [8]:
report_features(feature_data, app_name, number = 10)

Feature report for Counter-Strike: Global Offensive:

people (64.41% positive) 
positive = 1220, negative = 674, total = 4248

hackers (72.32% positive) 
positive = 755, negative = 289, total = 3936

players (69.93% positive) 
positive = 1058, negative = 455, total = 3684

time (73.07% positive) 
positive = 898, negative = 331, total = 3349

cheaters (34.42% positive) 
positive = 455, negative = 867, total = 3340

community (60.23% positive) 
positive = 798, negative = 527, total = 2523

fps (92.73% positive) 
positive = 1250, negative = 98, total = 1935

cs (84.85% positive) 
positive = 846, negative = 151, total = 1851

valve (59.67% positive) 
positive = 549, negative = 371, total = 1679

hours (62.50% positive) 
positive = 255, negative = 153, total = 1576

