# Notebook: Predictions with Dashboard

#### In this notebook, the models developed and fitted in the other analyses notebooks are used to make predicitons.
#### Use the dashboard in the last cell to make predicitons for game success.

#### Step 1: Imports, functions and other requirements
#### Step 2: Dash UI with sales prediction

## Step 1: Imports, functions and other requirements

In [1]:
## General imports
import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime
from warnings import simplefilter
from collections import Counter
import pickle 
import os
import signal
from threading import Thread
import time
import gzip

## Imports for NLP
import nltk, re, spacy, string
from spacy.lang.en.examples import sentences
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

## Imports for analyses
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer

## Imports for UI
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
from flask import request

## Imports from analyses_tools (local)
from analyses_tools import oh_encoder

#########################################
## Requirements:
## Users might need to manually download stopwords:
nltk.download('stopwords')
## Additionally, en_core_web_sm has to be downloaded manually:
# in terminal: python -m spacy download en_core_web_sm
#########################################

## Function to filter entries in detected_technologies for engines used

def filter_engine_entries(text):
    """Extract engine information from detected_technologies"""
    # only keep "Engine" entries for detected_technologies:
    if isinstance(text, str):
        entries = text.replace('; ', ', ').replace(" ", "").split(',')
        filtered_entries = [entry.replace("Engine.", "") for entry in entries if entry.startswith('Engine.')]
        cleaned_text = '; '.join(filtered_entries)    
        if cleaned_text == "":
            return "Unknown"
        else:
            return cleaned_text
    else:
        return "Unknown"


## Function for counting genres

def count_genres(genres):
    """Count number of genres."""
    if pd.isna(genres) or genres == '':
        return 0
    return len(genres.split(','))


## Function for extracting word count and average length

def calculate_description_metrics(description):
    """Split the description into words and extract word count and average length."""
    if pd.isna(description):
        return 0, 0.0 
    ## Split description
    words = re.findall(r'\b\w+\b', description)
    # Calculate word count
    word_count = len(words)
    # Calculate average word length
    if word_count > 0:
        avg_word_length = sum(len(word) for word in words) / word_count
    else:
        avg_word_length = 0
    return word_count, avg_word_length


## Function for data preparation

def data_preparation_aim(df, is_datetime=False):
    df=df.copy()

    list_of_engines = ["Source", "Unknown", "MonoGame", "AdventureGameStudio", 
                "Unity", "CryEngine", "Solar2D", "KiriKiri", 
                "XNA", "FNA", "Unreal", "Godot", 
                "Construct", "Cocos", "Adobe_AIR", "TyranoBuilder", 
                "Torque", "GameGuru", "RenPy", "OGRE", 
                "RPGMaker", "Love2D", "GameMaker", "Lime_OR_OpenFL",
                "BlenderGameEngine"]
                
    
    ## publish date as timedelta
    if is_datetime==True:
        df["published_store"] = df["published_store"] - pd.Timestamp(1997, 1, 1)
    else:   
        df["published_store"] = pd.to_datetime(df["published_store"]) - pd.Timestamp(1997, 1, 1)
    df["published_store"] = df["published_store"].apply(lambda value: value.days)

    ## process engines
    game_engines=[]
    for entry in df.loc[0,"engine"]:
        if entry not in list_of_engines:
            game_engines.append("Misc")
        else:
            game_engines.append(entry)
    df.loc[0,"engine"] = list(set(game_engines))

    ## Extract word count and average length from description
    df['description_count'], df['description_length'] = zip(*df['description'].apply(calculate_description_metrics))
    
    ## Count number of genres
    df['genres_count'] = df['genres'].apply(count_genres)
       
    ## Splitting mutliple entries
    ## split strings in genre and platform columns
    df['genres'] = df['genres'].apply(lambda x: x.split(','))
    df['platforms'] = df['platforms'].apply(lambda x: x.split(','))
    
    ## One-Hot Encoding
    df["genres"] = df["genres"].fillna("Unknown")
    df["platforms"] = df["platforms"].fillna("Unknown")
    df["engine"] = df["engine"].fillna("Unknown")
    df = oh_encoder(df, "genres")
    df = oh_encoder(df, "platforms")
    df = oh_encoder(df, "engine")
    
    # Ensure same columns in aim_df as in train_df

    columns_df = ['store_uscore', 'published_store', 'name', 'description', 'full_price',
       'developers', 'publishers', 'languages', 'voiceovers', 'tags',
       'achievements', 'gfq_rating', 'stsp_owners', 'hltb_single',
       'igdb_popularity', 'merge_col', 'peak_players', 'total_reviews',
       'rating', 'description_count', 'description_length', 'genres_count',
       'genres_Massively Multiplayer', 'genres_Movie', 'genres_Violent',
       'genres_RPG', 'genres_Action', 'genres_Gore', 'genres_Sexual Content',
       'genres_Sports', 'genres_Early Access', 'genres_Game Development',
       'genres_Indie', 'genres_Strategy', 'genres_Racing',
       'genres_Free to Play', 'genres_Casual', 'genres_Nudity',
       'genres_Adventure', 'genres_Simulation', 'platforms_WIN',
       'platforms_MAC', 'platforms_LNX', 'engine_GameGuru', 'engine_Unity',
       'engine_XNA', 'engine_RenPy', 'engine_FNA',
       'engine_AdventureGameStudio', 'engine_Cocos', 'engine_RPGMaker',
       'engine_Construct', 'engine_Unreal', 'engine_GameMaker',
       'engine_Lime_OR_OpenFL', 'engine_KiriKiri', 'engine_Godot',
       'engine_Solar2D', 'engine_Unknown', 'engine_CryEngine', 'engine_Source',
       'engine_Love2D', 'engine_OGRE', 'engine_Adobe_AIR', 'engine_Torque',
       'engine_TyranoBuilder', 'engine_MonoGame', 'engine_Misc',
       'engine_BlenderGameEngine', 'Multiplayer', 'description_clean_nonum']
    
    for col in columns_df:
        if col not in df.columns:
            df[col] = 0
    df = df[columns_df]

    ## apply text cleaner
    df["description_clean_nonum"] = df["description"].apply(text_cleaner)

    return df


def text_cleaner(sentence):
    """Take a string, clean it for use in vectorization and return cleaned string.
    
    Args:
        sentence (string): Original string to be cleaned
        
    Returns:
        doc_str (string): Cleaned String
        
    """

    if sentence is None:
        doc_str = ""
    else:
        ## OPTIONAL: delete html tags (tags can be excluded if one wants to limit analyses to ignore this information):
        # sentence = re.sub("<.*?>", "", sentence)
        
        ## tokenize and delete pronouns, stopwords and punctuation
        doc = nlp(sentence)
        clean_doc = [token.lemma_.lower() for token in doc if (token.pos_ !="PRON") and (token.lemma_ not in stopWords) and (token.lemma_ not in punctuations)]
        ## rejoin texts
        doc_str = " ".join(clean_doc)
        ## deleting points, tabs, spaces and line breaks
        doc_str = re.sub("[\s]+", " ", doc_str)
        ## deleting numbers
        doc_str = re.sub(r'\d+', '', doc_str) 
    return doc_str


def predictor(models, df_aim):

    ## predictions
    y_owners = models[0].predict(df_aim)
    y_rating = models[1].predict(df_aim)
    y_uscore = models[2].predict(df_aim)

    return y_owners, y_rating, y_uscore


def predictor_sales(model_ols, df_aim_sales):

    df_aim_sales = df_aim_sales[["stsp_owners"]]
    ## df_aim_sales = df_aim_sales[["const", "stsp_owners"]]
    
    ## prediction
    y_sales = model_ols.predict(df_aim_sales)

    if y_sales[0] < 0:
        return [0]
    else:
        return y_sales*1000000
        
## Adjusting display
pd.set_option('display.max_rows', 200) # display more rows
pd.set_option('display.max_columns', 50) # display more columns
pd.set_option('display.float_format', '{:.2f}'.format) # display numbers as decimals

## Suppress some warnings 
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

## load models from pickle objects
with gzip.open("../../models/final_models.pkl.gz", "rb") as f:
    models = pickle.load(f)
with gzip.open("../../models/final_model_sales.pkl.gz", "rb") as f:
    model_ols = pickle.load(f)
## open vocabulary from pickle
with open("../../models/extracted_word_index.pkl", "rb") as handle:
    word_index_lists = pickle.load(handle)

## load language model and additional requirements
nlp = spacy.load("en_core_web_sm")
stopWords = stopwords.words("english")
punctuations = string.punctuation

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Tom\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Step 2: Dash UI with sales prediction

In [5]:
from threading import Thread
import time
from datetime import datetime
import dash
from dash import dcc, html
import pandas as pd

# List for saved predictions
saved_predictions = []

# Dash layout
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Game Success Prediction", style={'margin-bottom': '2px', 'color': 'white', 'font-family': 'Arial'}),

    html.Div([
        html.Div([
            html.H5("Engines:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Dropdown(
                id='engines',
                options=[
                    {'label': 'Unreal', 'value': 'Unreal'},
                    {'label': 'Unity', 'value': 'Unity'},
                    {'label': 'Source', 'value': 'Source'},
                    {'label': 'Godot', 'value': 'Godot'},
                    {'label': 'MonoGame', 'value': 'MonoGame'},
                    {'label': 'GameMaker', 'value': 'GameMaker'},
                    {'label': 'RPGMaker', 'value': 'RPGMaker'},
                    {'label': 'CryEngine', 'value': 'CryEngine'},
                    {'label': 'AdventureGameStudio', 'value': 'AdventureGameStudio'},
                    {'label': 'Solar2D', 'value': 'Solar2D'},
                    {'label': 'KiriKiri', 'value': 'KiriKiri'},
                    {'label': 'XNA', 'value': 'XNA'},
                    {'label': 'FNA', 'value': 'FNA'},
                    {'label': 'Construct', 'value': 'Construct'},
                    {'label': 'Cocos', 'value': 'Cocos'},
                    {'label': 'Adobe_AIR', 'value': 'Adobe_AIR'},
                    {'label': 'TyranoBuilder', 'value': 'TyranoBuilder'},
                    {'label': 'Torque', 'value': 'Torque'},
                    {'label': 'GameGuru', 'value': 'GameGuru'},
                    {'label': 'RenPy', 'value': 'RenPy'},
                    {'label': 'OGRE', 'value': 'OGRE'},
                    {'label': 'Love2D', 'value': 'Love2D'},
                    {'label': 'Lime OR OpenFL', 'value': 'Lime_OR_OpenFL'},
                    {'label': 'BlenderGameEngine', 'value': 'BlenderGameEngine'},
                    {'label': 'Other', 'value': 'Other'},
                    {'label': 'Unknown', 'value': 'Unknown'}
                ],
                value=['Unreal'],
                multi=True,
                style={'margin-bottom': '1px', 'backgroundColor': 'white', 'font-family': 'Arial'}
            )
        ], style={'flex': '1', 'margin-right': '10px'}),

        html.Div([
            html.H5("Genres:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Dropdown(
                id='genres',
                options=[
                    {'label': 'Action', 'value': 'Action'},
                    {'label': 'Adventure', 'value': 'Adventure'},
                    {'label': 'RPG', 'value': 'RPG'},
                    {'label': 'Strategy', 'value': 'Strategy'},
                    {'label': 'Simulation', 'value': 'Simulation'},
                    {'label': 'Violent', 'value': 'Violent'},                    
                    {'label': 'Racing', 'value': 'Racing'},
                    {'label': 'Nudity', 'value': 'Nudity'},                    
                    {'label': 'Free to Play', 'value': 'Free to Play'},
                    {'label': 'Sports', 'value': 'Sports'},                    
                    {'label': 'Movie', 'value': 'Movie'},
                    {'label': 'Casual', 'value': 'Casual'},                    
                    {'label': 'Gore', 'value': 'Gore'},
                    {'label': 'Massively Multiplayer', 'value': 'Massively Multiplayer'},                    
                    {'label': 'Early Access', 'value': 'Early Access'},
                    {'label': 'Sexual Content', 'value': 'Sexual Content'},                    
                    {'label': 'Game Development', 'value': 'Game Development'},
                    {'label': 'Indie', 'value': 'Indie'}                 
                ],
                value=['Action'],
                multi=True,
                style={'margin-bottom': '1px', 'backgroundColor': 'white', 'font-family': 'Arial'}
            )
        ], style={'flex': '1', 'margin-right': '10px'}),

        html.Div([
            html.H5("Platforms:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Dropdown(
                id='platforms',
                options=[
                    {'label': 'WIN', 'value': 'WIN'},
                    {'label': 'LNX', 'value': 'LNX'},
                    {'label': 'MAC', 'value': 'MAC'}
                ],
                value=['WIN'],
                multi=True,
                style={'margin-bottom': '1px', 'backgroundColor': 'white', 'font-family': 'Arial'}
            )
        ], style={'flex': '1'})
    ], style={'display': 'flex', 'margin-bottom': '2px'}),

    html.Div([
        html.Div([
            html.H5("Number of supported languages:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Input(id='languages', type='number', placeholder="Number of Languages", value=1, style={'margin-bottom': '1px', 'backgroundColor': 'white', 'color': 'black', 'font-family': 'Arial'})
        ], style={'flex': '1', 'margin-right': '10px'}),
    
        html.Div([
            html.H5("Full Price (in $US cent):", style={'margin-bottom': '2px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Input(id='full_price', type='number', placeholder="Full Price ($)", value=99, style={'margin-bottom': '1px', 'backgroundColor': 'white', 'color': 'black', 'font-family': 'Arial'})
        ], style={'flex': '1', 'margin-right': '10px'}),
    
        html.Div([
            html.H5("Length of single-player campaign (in hours):", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Input(id='game_length', type='number', placeholder="Length of main single-player campaign (hours)" , value=1.0, style={'margin-bottom': '1px', 'backgroundColor': 'white', 'color': 'black', 'font-family': 'Arial'})
        ], style={'flex': '1'})
    ], style={'display': 'flex', 'margin-bottom': '2px'}),

    html.Div([
        html.H5("Description:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
        dcc.Textarea(id='description', placeholder="Description", value="An epic adventure game...", style={'width': '100%', 'margin-bottom': '1px', 'backgroundColor': 'white', 'color': 'black', 'font-family': 'Arial'})
    ]),
    
    html.Div([
        html.Div([
            html.H5("Multiplayer support:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.Dropdown(id='multiplayer', options=[{'label': 'Yes', 'value': 1}, {'label': 'No', 'value': 0}], value=0, style={'width': '200px', 'margin-bottom': '1px', 'backgroundColor': 'white', 'font-family': 'Arial'})
        ], style={'flex': '1', 'margin-right': '10px'}),
        html.Div([
            html.H5("Publication Date:", style={'margin-bottom': '1px', 'color': 'white', 'font-family': 'Arial'}),
            dcc.DatePickerSingle(id='publish_date', date=datetime(2024, 7, 1), style={'margin-bottom': '1px', 'backgroundColor': '#4d4dd0', 'color': 'white', 'font-family': 'Arial'})
        ], style={'flex': '1'})
    ], style={'display': 'flex', 'margin-bottom': '2px'}),
    
    html.Div([
        html.Button('Make new prediction', id='predict-button', n_clicks=0, style={'margin-bottom': '1px', 'margin-right': '10px', 'backgroundColor': '#4d4dd0', 'color': 'white', 'font-family': 'Arial'}),
        html.Button('Save last three predictions', id='save-button', n_clicks=0, style={'margin-bottom': '1px', 'backgroundColor': '#4d4dd0', 'color': 'white', 'font-family': 'Arial'})
    ]),
    
    html.Hr(),
    
    html.Div(id='predictions-output', style={'display': 'flex', 'justify-content': 'space-between'})
], style={'width': '80%', 'margin': '0 auto', 'background-color': '#3c228b'})

@app.callback(
    Output('predictions-output', 'children'),
    Input('predict-button', 'n_clicks'),
    Input('save-button', 'n_clicks'),
    State('engines', 'value'),
    State('genres', 'value'),
    State('platforms', 'value'),
    State('description', 'value'),
    State('languages', 'value'),
    State('full_price', 'value'),
    State('publish_date', 'date'),
    State('multiplayer', 'value'),
    State('game_length', 'value'),
    State('predictions-output', 'children')
)
def predict(n_clicks, n_clicks_save, engines, genres, platforms, description, languages, full_price, publish_date, multiplayer, game_length, existing_predictions):
    global saved_predictions

    # Initialize saved predictions
    if 'saved_predictions' not in globals():
        saved_predictions = []

    if n_clicks is None:
        n_clicks = 0

    if n_clicks_save is None:
        n_clicks_save = 0

    if n_clicks > 0:
        engines = ', '.join(engines)
        genres = ', '.join(genres)
        platforms = ', '.join(platforms)

        data = {
            'published_store': [publish_date],
            'name': ["New Game"],
            'description': [description],
            'full_price': [full_price],
            'languages': [languages],
            'hltb_single': [game_length],
            'genres': [genres],
            'platforms': [platforms],
            'engine': [engines],
            'Multiplayer': [multiplayer],
        }

        df_aim = pd.DataFrame.from_dict(data)

        df_aim = data_preparation_aim(df_aim)

        # Make predictions
        y_owners, y_rating, y_uscore = predictor(models, df_aim)

        data_sales = {
            'const': [1],
            'rating': [y_rating[0]],
            'store_uscore': [y_uscore[0]],
            'stsp_owners': [y_owners[0]]
        }
        
        df_aim_sales = pd.DataFrame.from_dict(data_sales)
       
        y_sales = predictor_sales(model_ols, df_aim_sales)

        if n_clicks_save > 0:
            saved_predictions.append({
                'Steam Owners': int(y_owners[0]),
                'User Rating': round(y_rating[0], 2),
                'User Score': round(y_uscore[0], 2),
                'Sales': int(y_sales[0]),
                'Published Date': publish_date,
                'Description': description,
                'Full Price': full_price,
                'Languages': languages,
                'Game Length': game_length,
                'Genres': genres,
                'Platforms': platforms,
                'Engine': engines,
                'Multiplayer': "Yes" if multiplayer == 1 else "No"
            })

            # Return predictions
            return [
                html.Div([
                    html.H3("Current Prediction", style={'color': 'white'}),
                    html.P(f"Steam Owners: {int(y_owners[0])}", style={'color': 'white'}),
                    html.P(f"User Rating: {round(y_rating[0], 2)}", style={'color': 'white'}),
                    html.P(f"User Score: {round(y_uscore[0], 2)}", style={'color': 'white'}),
                    html.P(f"Sales: {int(y_sales[0])}", style={'color': 'white'})
                ]),
                html.Div([
                    html.Div([
                        html.Div([
                            html.H4(f"Saved Prediction {(i+1)}", style={'color': 'white'}),
                            html.P(f"Owners: {pred['Steam Owners']}", style={'color': 'white'}),
                            html.P(f"User Rating: {pred['User Rating']}", style={'color': 'white'}),
                            html.P(f"User Score: {pred['User Score']}", style={'color': 'white'}),
                            html.P(f"Sales: {pred['Sales']}", style={'color': 'white'})
                        ], style={'display': 'inline-block', 'justify-content': 'space-between', 'margin-right': '30px'})
                        for i, pred in enumerate(saved_predictions[-3:])
                    ], style={'display': 'flex', 'justify-content': 'space-between'})
                ])
            ]

        # Return predictions without saved predictions
        return [
            html.Div([
                html.H3("Current Prediction", style={'color': 'white'}),
                html.P(f"Steam Owners: {int(y_owners[0])}", style={'color': 'white'}),
                html.P(f"User Rating: {round(y_rating[0], 2)}", style={'color': 'white'}),
                html.P(f"User Score: {round(y_uscore[0], 2)}", style={'color': 'white'}),
                html.P(f"Sales: {int(y_sales[0])}", style={'color': 'white'})
            ])
        ]

    # Return empty list if no prediction was made
    return []

def run_dash():
    app.run_server(debug=False, use_reloader=False)

# Start dash in a separate thread
thread = Thread(target=run_dash)
thread.start()

# Give some time to start
time.sleep(3)
print("Dash app is running...")

Dash app is running...


In [3]:
# Function to stop the dashboard

def stop_dash():
    os.kill(os.getpid(), signal.SIGINT)


In [4]:
##########################################################################
## warning: this stops dash, but also kills the notebook kernel!
#
# stop_dash()
#
##########################################################################