## Querying and Organizing Data

In [1]:

import os
import logging

import pandas as pd

from SteamXmlProcessor import SteamXmlProcessor
from MathUtils import MathUtils
from SteamSpyQuery import SteamSpyQuery

In [2]:
# Set up logging (Jupyter sets up it's own so we have to add ours instead of using a basicConfig)
log = logging.getLogger()
fhandler = logging.FileHandler(filename='mylog.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
log.addHandler(fhandler)
log.setLevel(logging.CRITICAL)

In [3]:
def query_steam_data_for_user():
    # Note, your 'Game details' must be set to 'Public' for this to work.
    # This is done in your profile -> Edit Profile -> Privacy Settings -> Game details
    # To find your username, check your profile under General -> Custom URL
    username = ''
    if username == '':
        if os.path.exists("steam_id.dat"):
            log.info('Reading steam ID from file')
            with open("steam_id.dat", "r", encoding="utf-8") as id_file:
                username = id_file.read()
        else:
            raise Exception("Missing steam id")
            log.critical('Need steam user ID')

    cache_file = f"{username}_steam_games.xml"
    if os.path.exists(cache_file):
        xmlProcessor = SteamXmlProcessor.from_file(cache_file)
    else:
        xmlProcessor = SteamXmlProcessor.from_username(
            username, cache_file)

    return xmlProcessor.get_game_infos()   

In [4]:
# Main

# Decorate our steam library info with ranking info from SteamSpy
game_infos = query_steam_data_for_user()
decorated_game_infos = pd.DataFrame.copy(game_infos)
steam_spy_query = SteamSpyQuery()
decorated_game_infos = steam_spy_query.get_data_for_games(decorated_game_infos)
decorated_game_infos = MathUtils.add_bayesian_average_to_gamespy_dataframe(decorated_game_infos)

In [6]:
# Do any filtering or re-arranging you want to here
decorated_game_infos = decorated_game_infos.sort_values(by=['BayesianAverage'], ascending=False)

# DLCs have no ratings, drop them from the list
decorated_game_infos.drop(decorated_game_infos[decorated_game_infos["RatingsRatio"] == 0].index, inplace = True)

# Write to file for easy access
decorated_game_infos.to_csv("decorated_game_infos.csv")

## Plotting

In [7]:
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, Label, LabelSet
from bokeh.palettes import Turbo256 as palette
from bokeh.transform import linear_cmap

In [8]:
# Plot most played games (ignore non played games)
colName = "HoursOnRecord"
output_file("MostPlayed.html")

most_played_games = pd.DataFrame.copy(decorated_game_infos)
most_played_games.drop(most_played_games[most_played_games[colName] == 0].index, inplace = True)

# Take just top 30
most_played_games = most_played_games.nlargest(50, colName)
most_played_games = most_played_games.sort_values(by=[colName], ascending=True)

tooltips = [
    ('Game', '@Name'),
    ('Hours Played', '@HoursOnRecord'),
    ('Rating', '@BayesianAverage')
]

select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']

color_mapper = linear_cmap(field_name = colName,
                           palette=palette,
                           low=min(most_played_games[colName]),
                           high=max(most_played_games[colName]))

# Weird issue here where the text in LabelSet must be a string or it won't work, so decorate the data with strings
most_played_games["{0}Text".format(colName)] = most_played_games[colName].apply(lambda x: str(x))
data_source = ColumnDataSource(most_played_games)

p = figure(y_range = most_played_games["Name"],
           plot_width = 2000, plot_height = 1250, title = "Most Played Games of All Time", tools = select_tools,
           x_range = (0, max(most_played_games[colName] + 20)),)

p.title.text_font_size = '32pt'
p.yaxis.major_label_text_font_size = "12pt"
p.xaxis.major_label_text_font_size = "12pt"
p.xaxis[0].axis_label = 'Hours Played'

p.hbar(y = "Name", left = 0, right = colName, height = 0.5, source = data_source, color = color_mapper)
p.add_tools(HoverTool(tooltips=tooltips))


labels = LabelSet(x = colName, y = "Name", level = 'annotation', text_color = 'black',
                  x_offset = 5, y_offset = -6, text = "{0}Text".format(colName), source = data_source, render_mode = 'canvas')

p.add_layout(labels)

show(p)

In [9]:
# Plot most played games in last 2 weeks
colName = "HoursLast2Weeks"
output_file("MostPlayedLast2Weeks.html")

most_played_games_2w = pd.DataFrame.copy(decorated_game_infos)
most_played_games_2w.drop(most_played_games_2w[most_played_games_2w[colName] == 0].index, inplace = True)
most_played_games_2w = most_played_games_2w.sort_values(by = [colName], ascending = False)


# Weird issue here where the text in LabelSet must be a string or it won't work, so decorate the data with strings
most_played_games_2w["{0}Text".format(colName)] = most_played_games_2w[colName].apply(lambda x: "{:.2f}".format(x))
data_source = ColumnDataSource(most_played_games_2w)

p = figure(x_range = most_played_games_2w["Name"], y_range = (0, max(most_played_games_2w[colName] + 20)),
           width = 2000, title = "Most Played Games in Last 2 Weeks")

p.vbar(x = "Name", top = colName, source = data_source, width = 0.5)
p.xaxis.major_label_orientation = "vertical"
p.xgrid.grid_line_color = None
p.xaxis.major_label_text_font_size = "12pt"
p.y_range.start = 0
p.yaxis[0].axis_label = 'Hours Played'

labels = LabelSet(x = "Name", y = colName, level = 'annotation', text_color = 'black',
                  x_offset = -15, y_offset = 10, text = "{0}Text".format(colName), source = data_source, render_mode = 'canvas')

p.add_layout(labels)

show(p)

In [331]:
# Plot most played games versus their rating
colName = "HoursOnRecord"
output_file("MostPlayedVsRating.html")

most_played_games = pd.DataFrame.copy(decorated_game_infos)
most_played_games = most_played_games.sort_values(by=[colName], ascending=False)

tooltips = [
    ('Game', '@Name'),
    ('Hours Played', '@HoursOnRecord'),
    ('Rating', '@BayesianAverage')
]

color_mapper = linear_cmap(field_name = colName,
                           palette=palette,
                           low=min(most_played_games[colName]),
                           high=max(most_played_games[colName]))

select_tools = ['tap', 'reset', 'box_zoom']

# For the most played and highest rated games, add names as labels.
most_played_games["DisplayName"] = most_played_games.apply(axis = 1, func = lambda x: x["Name"] if x[colName] > 75 else "")
data_source = ColumnDataSource(most_played_games)

labels = LabelSet(x = colName, y = "BayesianAverage", level = 'annotation', text_color = 'black',
                  x_offset = 10, y_offset = -5, text = "DisplayName",
                  source = data_source, render_mode = 'canvas', text_font_size = "8pt")

p = figure(plot_height = 1000,
           plot_width = 2000,
           title = "Most played vs ranking",
           tools = select_tools,
           x_range = (0, max(most_played_games[colName]) + 50))

p.circle(x = colName, y = "BayesianAverage",
         color = color_mapper,
         source = data_source, size = 10)

p.xaxis[0].axis_label = 'Hours Played'
p.yaxis[0].axis_label = 'Positive vs Negative Rating % Adjusted Using a Bayesian average'

p.add_tools(HoverTool(tooltips=tooltips))
p.add_layout(labels)
show(p)

In [332]:
# Plot best ranked unplayed games
colName = "RatingsRatio"
output_file("UnplayedPlainRating.html")

tooltips = [
    ('Game', '@Name'),
    ('Rating', '@RatingsRatio')
]

select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']

best_unplayed_games = pd.DataFrame.copy(decorated_game_infos)
best_unplayed_games.drop(best_unplayed_games[best_unplayed_games["HoursOnRecord"] != 0].index, inplace = True)

# Remove DLC and tools (0% and 100% rated)
best_unplayed_games.drop(best_unplayed_games[best_unplayed_games[colName] == 0].index, inplace = True)
best_unplayed_games.drop(best_unplayed_games[best_unplayed_games[colName] == 100].index, inplace = True)

# Take just top 30
best_unplayed_games = best_unplayed_games.nlargest(50, colName)
best_unplayed_games = best_unplayed_games.sort_values(by=[colName], ascending=True)

color_mapper = linear_cmap(field_name = colName,
                           palette=palette,
                           low=min(best_unplayed_games[colName]),
                           high=max(best_unplayed_games[colName]))

# Weird issue here where the text in LabelSet must be a string or it won't work, so decorate the data with strings
best_unplayed_games["{0}Text".format(colName)] = best_unplayed_games[colName].apply(lambda x: "{:.2f}".format(x))

data_source = ColumnDataSource(best_unplayed_games)

min_range = int(min(best_unplayed_games[colName]) - 1)
max_range = int(max(best_unplayed_games[colName]) + 2)
p = figure(y_range = best_unplayed_games["Name"], x_range = (min_range, max_range),
           plot_width = 2000, plot_height = 1250, title = "Best unplayed games", tools = select_tools)

p.title.text_font_size = '32pt'
p.yaxis.major_label_text_font_size = "12pt"
p.xaxis.major_label_text_font_size = "12pt"
p.xaxis[0].axis_label = 'Positive vs. Negative Ratings %'

p.hbar(y = "Name", left = 0, right = colName, height = 0.5, source = data_source, color = color_mapper)

labels = LabelSet(x = colName, y = "Name", level = 'annotation', text_color = 'black',
                  x_offset = 5, y_offset = -6, text = "{0}Text".format(colName), source = data_source, render_mode = 'canvas')

p.add_layout(labels)

p.add_tools(HoverTool(tooltips=tooltips))
show(p)

In [333]:
# Plot best ranked unplayed games
colName = "BayesianAverage"
output_file("UnplayedBayesian.html")

tooltips = [
    ('Game', '@Name'),
    ('Rating', '@BayesianAverage')
]

select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']

best_unplayed_games = pd.DataFrame.copy(decorated_game_infos)
best_unplayed_games.drop(best_unplayed_games[best_unplayed_games["HoursOnRecord"] != 0].index, inplace = True)

# Remove DLC and tools (0% and 100% rated)
best_unplayed_games.drop(best_unplayed_games[best_unplayed_games[colName] == 0].index, inplace = True)
best_unplayed_games.drop(best_unplayed_games[best_unplayed_games[colName] == 100].index, inplace = True)

# Take just top 30
best_unplayed_games = best_unplayed_games.nlargest(50, colName)
best_unplayed_games = best_unplayed_games.sort_values(by=[colName], ascending=True)

color_mapper = linear_cmap(field_name = colName,
                           palette=palette,
                           low=min(best_unplayed_games[colName]),
                           high=max(best_unplayed_games[colName]))

# Weird issue here where the text in LabelSet must be a string or it won't work, so decorate the data with strings
best_unplayed_games["{0}Text".format(colName)] = best_unplayed_games[colName].apply(lambda x: "{:.2f}".format(x))

data_source = ColumnDataSource(best_unplayed_games)

min_range = int(min(best_unplayed_games[colName]) - 1)
max_range = int(max(best_unplayed_games[colName]) + 2)
p = figure(y_range = best_unplayed_games["Name"], x_range = (min_range, max_range),
           plot_width = 2000, plot_height = 1250, title = "Best unplayed games", tools = select_tools)

p.title.text_font_size = '32pt'
p.yaxis.major_label_text_font_size = "12pt"
p.xaxis.major_label_text_font_size = "12pt"
p.xaxis[0].axis_label = 'Positive vs. Negative Ratings % Adjusted Using a Bayesian Average'

p.hbar(y = "Name", left = 0, right = colName, height = 0.5, source = data_source, color = color_mapper)

labels = LabelSet(x = colName, y = "Name", level = 'annotation', text_color = 'black',
                  x_offset = 5, y_offset = -6, text = "{0}Text".format(colName), source = data_source, render_mode = 'canvas')

p.add_layout(labels)

p.add_tools(HoverTool(tooltips=tooltips))
show(p)