In [1]:
import pandas as pd
import numpy as np
from config import CONFIGS
import os
import os
from datetime import datetime

from utils.processing_functions import explode_columnar_df
from utils.weaviate_client import WeaviateClient

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_distances

# visualization packages
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# turn off jupyter warnings
import warnings
warnings.filterwarnings('ignore')

ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
S3_SCRAPER_BUCKET = CONFIGS["s3_scraper_bucket"]
GAME_CONFIGS = CONFIGS["games"]
RATINGS_CONFIGS = CONFIGS["ratings"]
IS_LOCAL = True if os.environ.get("IS_LOCAL", "False").lower() == "true" else False

read_dir = "data/prod/games/game_dfs_clean/"
sims_dir = "data/prod/games/similarity_files/"

weaviate_client = WeaviateClient()
weaviate_client.connect_weaviate_client_docker()


IS_LOCAL: True

Checking for local config file and evaluating for updates from S3.
Loading config from local


  warn(


<weaviate.client.WeaviateClient at 0x117eca6f0>

# Content Similarity Models

### File Setup for All Models

In [2]:
games_df = pd.read_pickle(f"{read_dir}games_clean.pkl")
games_df = games_df.sort_values("BayesAvgRating", ascending=False)[
            0 : 1000
        ].reset_index(drop=True)

id_game_map = {x:y for x,y in zip(games_df['BGGId'],games_df['Name'])}
game_id_map = {y:x for x,y in zip(games_df['BGGId'],games_df['Name'])}
bgg_ids = games_df['BGGId'].values
relevant_df = games_df[['BGGId']]

rag_cleaned_sentences_df = pd.read_pickle(f'{sims_dir}top_1000_cleaned_rag.pkl')
rag_cleaned_sentences_df = relevant_df.merge(rag_cleaned_sentences_df, on="BGGId", how="left")
rag_cleaned_sentences_df = rag_cleaned_sentences_df.drop(columns=["Positive_Components","Positive_Sentences","Negative_Components","Negative_Sentences"], axis=1)
rag_cleaned_sentences_df.columns = [x.lower() for x in rag_cleaned_sentences_df.columns]

all_rag_attributes_df = pd.read_pickle(f"{sims_dir}top_1000_cleaned_rag_with_ratings_extrap.pkl")
all_rag_attributes_df = all_rag_attributes_df.drop(columns=["Description","About","Positive_Components","Negative_Components","Positive_Sentences","Negative_Sentences"])
all_rag_attributes_df.columns = all_rag_attributes_df.columns.str.replace(" ","_")
all_rag_attributes_df.columns = [x.lower() for x in all_rag_attributes_df.columns]
all_rag_attributes_df = rag_cleaned_sentences_df.merge(all_rag_attributes_df, on="bggid", how="left")

rag_pros_cons_only_df = all_rag_attributes_df.drop(columns=["about","description"],axis=1)
rag_pros_cons_only_df = rag_pros_cons_only_df.fillna(0).set_index("bggid")
rag_pros_cons_only_df.columns = [x.lower() for x in rag_pros_cons_only_df.columns]

positive_columns = [x for x in rag_pros_cons_only_df.columns if x.startswith('positive_')]
negative_columns = [x for x in rag_pros_cons_only_df.columns if x.startswith('negative_')]
all_columns_to_produce = positive_columns + negative_columns

models_df_storage = {}
arrays_df_storage = {}


In [3]:
test_games = ["Dominion", "Gloomhaven", "Too Many Bones", "Pandemic", "Splendor", "Viticulture Essential Edition", "Great Western Trail", "Everdell", "Chess", "Azul", "Codenames", "Pandemic Legacy: Season 1"]

game_name = "Chess"
game_id = int(games_df[games_df['Name']==game_name]['BGGId'].values[0])
print(game_name, game_id)

Chess 171


In [4]:
def get_closet_picks_weaviate(df, game_id, model, n_picks=10):
    uuid = df.loc[df['bggid']==game_id]['UUID'].values[0]

    similars = weaviate_client.find_near_objects(collection_name=model, uuid=uuid, limit=50)

    picks = {}

    for item in similars:
        picks[str(item.uuid)]=item.metadata.distance
        
    picks = pd.DataFrame.from_dict(picks, columns=['distance'], orient='index').sort_values(by='distance', ascending=True).reset_index().rename(columns={'index':'UUID'})

    picks = picks.merge(df, on='UUID', how='inner')[1:]

    return picks[['bggid','distance']].head(n_picks)

In [5]:
def build_results_for_game(game_name):
    game_id = game_id_map[game_name]
    print(f"\nEvaluation: {game_name} {game_id}")

    single_game_results = {}

    for model, model_df in arrays_df_storage.items():
        print(f"Model: {model}")

        mask = model_df[game_id].sort_values(ascending=True) < 1.0
        picks = model_df[game_id].sort_values(ascending=True)[mask]

        # remove the entry that is the same as the game_id, by index. It is not necessarily entry 0
        picks = picks.drop(game_id)

        single_game_results[model] = picks

    for collection_name, model_df in models_df_storage.items():
        print(f"Model: {collection_name}")

        picks = get_closet_picks_weaviate(df=model_df, game_id=game_id, model=collection_name, n_picks=100)

        single_game_results[collection_name] = {x:y for x,y in zip(picks['bggid'], picks['distance'])}

    results_df = pd.DataFrame(single_game_results)

    results_df = results_df.fillna(1).round(3)
    results_df['average_score'] = results_df.mean(axis=1).round(2)

    results_df['name'] = results_df.index.map(id_game_map)

    return results_df.sort_values("average_score", ascending=True).head(20)

In [6]:
def explode_merge_fill(df, reference_df, fill=False, explode=False):
    if explode:
        df = explode_columnar_df(df)
    df = reference_df.merge(df, on='BGGId', how='left').set_index('BGGId')

    if fill==True:
        df = df.astype(float).replace(0,0.01)
        df = df.fillna(0.01)
    else:
        df = df.fillna(0)
    return df

def get_cosine_distances_df(df):
    cosine_sims = cosine_distances(df).round(3)
    return pd.DataFrame(cosine_sims, columns=bgg_ids, index=bgg_ids)

## Array Models - Themes, Categories, Mechanics, Mechanics, Pros-Cons, Game Family

### Weight

In [7]:
model="weight"
weight_df = games_df[['BGGId', 'GameWeight']].set_index("BGGId").round(2)

# Create the matrix of absolute differences
matrix = pd.DataFrame(
    np.abs(weight_df['GameWeight'].values[:, None] - weight_df['GameWeight'].values),
    index=weight_df.index,
    columns=weight_df.index
).round(4)

arrays_df_storage[model] = matrix

{id_game_map[x]:y for x,y in zip(matrix[game_id].sort_values(ascending=True)[:15].index, matrix[game_id].sort_values(ascending=True)[:15])}

{'Chess': 0.0,
 'ISS Vanguard': 0.0,
 'Eclipse: Second Dawn for the Galaxy': 0.0,
 'Millennium Blades': 0.0,
 'Pax Porfiriana': 0.01,
 'Vast: The Crystal Caverns': 0.01,
 'Trajan': 0.01,
 'Civilization': 0.01,
 'Agricola': 0.01,
 'Gloomhaven: Jaws of the Lion': 0.01,
 "Tzolk'in: The Mayan Calendar": 0.02,
 'Labyrinth: The War on Terror, 2001 – ?': 0.02,
 'Oathsworn: Into the Deepwood': 0.02,
 'Escape Plan': 0.02,
 'Bonfire': 0.02}

### Game Family

In [8]:
model="game_family"

family_df = explode_merge_fill(games_df[['BGGId', 'Family']], relevant_df, explode=True, fill=False)
sims_by_id = get_cosine_distances_df(family_df)

arrays_df_storage[model] = sims_by_id

results = [id_game_map[x] for x in (sims_by_id[game_id].sort_values(ascending=True)[:10].index)]

{id_game_map[x]:y for x,y in zip(sims_by_id[game_id].sort_values(ascending=True)[:15].index, sims_by_id[game_id].sort_values(ascending=True)[:15])}

{'Chess': 0.0,
 'Acquire': 0.0,
 'Go': 0.0,
 'Pagan: Fate of Roanoke': 1.0,
 'Draftosaurus': 1.0,
 'Broom Service': 1.0,
 'Carcassonne: Hunters and Gatherers': 1.0,
 'Queendomino': 1.0,
 'Pax Porfiriana': 1.0,
 'Baseball Highlights: 2045': 1.0,
 'Pathfinder Adventure Card Game: Rise of the Runelords – Base Set': 1.0,
 'Nexus Ops': 1.0,
 'Star Trek: Ascendancy': 1.0,
 'Colosseum': 1.0,
 "Warp's Edge": 1.0}

### Themes

In [9]:
model = "themes"

themes_df = pd.read_pickle(f"{read_dir}themes_clean.pkl")
themes_df = explode_merge_fill(themes_df, relevant_df, explode=True, fill=False)
sims_by_id = get_cosine_distances_df(themes_df)

arrays_df_storage[model] = sims_by_id

{id_game_map[x]:y for x,y in zip(sims_by_id[game_id].sort_values(ascending=True)[:15].index, sims_by_id[game_id].sort_values(ascending=True)[:15])}


{'Chess': 0.0,
 'Pagan: Fate of Roanoke': 1.0,
 'Draftosaurus': 1.0,
 'Broom Service': 1.0,
 'Carcassonne: Hunters and Gatherers': 1.0,
 'Queendomino': 1.0,
 'Pax Porfiriana': 1.0,
 'Baseball Highlights: 2045': 1.0,
 'Pathfinder Adventure Card Game: Rise of the Runelords – Base Set': 1.0,
 'Nexus Ops': 1.0,
 'Star Trek: Ascendancy': 1.0,
 'Colosseum': 1.0,
 "Warp's Edge": 1.0,
 'Vinhos': 1.0,
 'Brass: Birmingham': 1.0}

### Categories Only

In [10]:
# TO DO: Add this to the game cleaning script

# category_df = df[['BGGId','Cat:Thematic', 'Cat:Strategy', 'Cat:War', 'Cat:Family', 'Cat:CGS', 'Cat:Abstract', 'Cat:Party', 'Cat:Childrens']]
# subcats_df = pd.read_pickle(f"{read_dir}subcategories_clean.pkl")
# subcats_df = explode_columnar_df(subcats_df).reset_index()
# category_df = category_df.merge(subcats_df, on="BGGId", how="left")
# category_df = category_df.set_index("BGGId")
# category_df.to_pickle(f"{read_dir}categories_clean.pkl")
# category_df.head()

In [11]:
model = "categories"

cat_df = pd.read_pickle(f"{read_dir}categories_clean.pkl")

cat_df = explode_merge_fill(cat_df, relevant_df, fill=True)

sims_by_id = get_cosine_distances_df(cat_df)

arrays_df_storage[model] = sims_by_id

{id_game_map[x]:y for x,y in zip(sims_by_id[game_id].sort_values(ascending=True)[:15].index, sims_by_id[game_id].sort_values(ascending=True)[:15])}

{'Santorini': 0.0,
 'Ingenious': 0.0,
 'TZAAR': 0.0,
 'Go': 0.0,
 'Hive Pocket': 0.0,
 'DVONN': 0.0,
 'Mahjong': 0.0,
 'ZÈRTZ': 0.0,
 'Onitama': 0.0,
 'YINSH': 0.0,
 'The Duke': 0.0,
 'Chess': 0.0,
 'Blokus': 0.0,
 'Hive': 0.0,
 'Reef': 0.286}

### Mechanics Only

In [12]:
model = "mechanics"

mech_df = pd.read_pickle(f"{read_dir}mechanics_clean.pkl")

mech_df = explode_merge_fill(mech_df, relevant_df, explode=True)
sims_by_id = get_cosine_distances_df(mech_df)

arrays_df_storage[model] = sims_by_id

{id_game_map[x]:y for x,y in zip(sims_by_id[game_id].sort_values(ascending=True)[:15].index, sims_by_id[game_id].sort_values(ascending=True)[:15])}

{'Chess': 0.0,
 'Onitama': 0.27,
 'The Duke': 0.388,
 'Scoville': 0.592,
 'Under Falling Skies': 0.613,
 'Specter Ops': 0.613,
 'Quantum': 0.635,
 'Ricochet Robots': 0.635,
 'The Downfall of Pompeii': 0.691,
 'Mexica': 0.691,
 'Mage Wars Arena': 0.691,
 'Hive': 0.691,
 'Santorini': 0.691,
 'Arcadia Quest: Inferno': 0.691,
 'DVONN': 0.711}

### Pros Cons

In [13]:
model = "pros_cons_only"

sims_by_id = get_cosine_distances_df(rag_pros_cons_only_df)

arrays_df_storage[model] = sims_by_id

{id_game_map[x]:y for x,y in zip(sims_by_id[game_id].sort_values(ascending=True)[:15].index, sims_by_id[game_id].sort_values(ascending=True)[:15])}

{'Chess': 0.0,
 'Go': 0.09,
 'Terra Mystica': 0.102,
 'Lords of Vegas': 0.105,
 'Fury of Dracula (Second Edition)': 0.107,
 'Underwater Cities': 0.11,
 'Flamme Rouge': 0.11,
 'Robinson Crusoe: Adventures on the Cursed Island': 0.111,
 'Formula D': 0.111,
 'Downforce': 0.111,
 'Dominion': 0.111,
 'Middara: Unintentional Malum – Act 1': 0.112,
 'Jaipur': 0.112,
 'Shadows over Camelot': 0.113,
 'Terraforming Mars': 0.113}

### Various Numerical Attributes

In [14]:
various_df = games_df[['BGGId', 'Name',
    'BestPlayers',# 'YearPublished',
       'MfgPlaytime',
       'BayesAvgRating',
       'Rank:strategygames', 'Rank:abstracts', 'Rank:partygames',
       'Rank:wargames', 'Rank:thematic', 'Rank:familygames',
       'Rank:childrensgames', 'Rank:cgs', 'GameWeight']].copy()

# Find the mask where 'Rank:strategygames' is NA
mask = various_df['Rank:strategygames'].isna()

# Assign the index positions only to the rows matching the mask
various_df.loc[mask, 'Rank:strategygames'] = various_df.index[mask]

# various_df['GameAgeYears'] = datetime.now().year - various_df['YearPublished']
# various_df = various_df.drop(columns=['Name','YearPublished'], axis=1)

various_df = various_df.drop(columns=['Name'], axis=1)

various_df.columns = [x.lower() for x in various_df.columns]

various_df.head()

Unnamed: 0,bggid,bestplayers,mfgplaytime,bayesavgrating,rank:strategygames,rank:abstracts,rank:partygames,rank:wargames,rank:thematic,rank:familygames,rank:childrensgames,rank:cgs,gameweight
0,224517,3,120,8.41053,1,28017,28017,28017,28017,28017,28017,28017,3.8692
1,161936,4,60,8.37415,2,28017,28017,28017,1,28017,28017,28017,2.8297
2,174430,3,120,8.34152,4,28017,28017,28017,2,28017,28017,28017,3.911
3,342942,2,150,8.3385,3,28017,28017,28017,28017,28017,28017,28017,3.7673
4,363622,2,120,8.25461,4,28017,28017,28017,28017,28017,28017,28017,2.875


In [15]:
# sns.set(font_scale=1)
# games_c = various_df.drop("Name", axis=1).set_index("BGGId").corr()
# # plot a heat map for all correlations in our data set

# # make our figure
# fig, ax = plt.subplots(figsize=(10, 10))

# # we want our heatmap to not show the upper triangle, which is redundant data
# games_c_mask = np.triu(np.ones_like(games_c, dtype=bool))

# # adjust mask and df to hide center diagonal
# games_c_mask = games_c_mask[1:, :-1]
# corr = games_c.iloc[1:, :-1].copy()

# # color map
# cmap = sns.diverging_palette(220, 20, as_cmap=True)

# # plot heatmap
# sns.heatmap(
#     corr,
#     mask=games_c_mask,
#     annot=True,
#     fmt=".2f",
#     cmap=cmap,
#     vmin=-1,
#     vmax=1,
#     cbar_kws={"shrink": 0.8},
#     square=True,
# )

# # yticks rotate
# plt.yticks(rotation=1)

# # title
# title = "CORRELATION MATRIX\nRanking Categories\n"
# plt.title(title, fontsize=14)

# # plt.savefig('images/heatmap.png')

# plt.show()

In [16]:
various_df = various_df.set_index('bggid')

scaler = MinMaxScaler(feature_range=(0,1))

for col in various_df.columns:
    various_df[col] = scaler.fit_transform(various_df[[col]])

various_df = various_df.fillna(0)

various_df.head(2)

Unnamed: 0_level_0,bestplayers,mfgplaytime,bayesavgrating,rank:strategygames,rank:abstracts,rank:partygames,rank:wargames,rank:thematic,rank:familygames,rank:childrensgames,rank:cgs,gameweight
bggid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
224517,0.181818,0.1,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.751198
161936,0.272727,0.05,0.979446,3.6e-05,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.479042


In [17]:
model = "various_attributes"

sims_byid = get_cosine_distances_df(various_df)

arrays_df_storage[model] = sims_byid

{id_game_map[x]:y for x,y in zip(sims_by_id[game_id].sort_values(ascending=True)[:15].index, sims_by_id[game_id].sort_values(ascending=True)[:15])}

{'Chess': 0.0,
 'Go': 0.09,
 'Terra Mystica': 0.102,
 'Lords of Vegas': 0.105,
 'Fury of Dracula (Second Edition)': 0.107,
 'Underwater Cities': 0.11,
 'Flamme Rouge': 0.11,
 'Robinson Crusoe: Adventures on the Cursed Island': 0.111,
 'Formula D': 0.111,
 'Downforce': 0.111,
 'Dominion': 0.111,
 'Middara: Unintentional Malum – Act 1': 0.112,
 'Jaipur': 0.112,
 'Shadows over Camelot': 0.113,
 'Terraforming Mars': 0.113}

## Vector DB Models

### About Only

In [18]:
# collection_name = "about_only"

# about_df = rag_cleaned_sentences_df[['bggid','about']].copy()

# weaviate_client.create_bgg_collection(collection_name=collection_name, reset=True, use_about=True)

# about_df = weaviate_client.add_bgg_collection_batch(df=about_df, collection_name=collection_name, use_about=True)

# about_df.to_pickle(f'{sims_dir}about_df.pkl')

In [19]:
collection_name = "about_only"

about_df = pd.read_pickle(f'{sims_dir}about_df.pkl')
models_df_storage[collection_name] = about_df

picks = get_closet_picks_weaviate(about_df, game_id, model=collection_name, n_picks=10)

picks['name'] = [id_game_map[x] for x in picks['bggid']]

picks

Unnamed: 0,bggid,distance,name
1,2453,0.205593,Blokus
2,2655,0.20962,Hive
3,188,0.221789,Go
4,31999,0.235672,TZAAR
5,244114,0.24636,Yellow & Yangtze
6,294484,0.251611,Unmatched: Cobble & Fog
7,124742,0.253118,Android: Netrunner
8,21763,0.25346,Mr. Jack
9,7854,0.253993,YINSH
10,156566,0.260023,Lords of Xidit


### About and Description

In [20]:
# collection_name = "about_and_desc"

# desc_df = rag_cleaned_sentences_df[['bggid','about', 'description']].copy()

# weaviate_client.create_bgg_collection(collection_name=collection_name, reset=True, use_about=True, use_description=True)

# about_df = weaviate_client.add_bgg_collection_batch(df=desc_df, collection_name=collection_name, use_about=True, use_description=True)

# desc_df.to_pickle(f'{sims_dir}desc_df.pkl')

In [21]:
# collection_name = "about_and_desc"

# desc_df = pd.read_pickle(f'{sims_dir}desc_df.pkl')
# models_df_storage[collection_name] = desc_df

# picks = get_closet_picks_weaviate(desc_df, game_id, model=collection_name, n_picks=10)

# picks['name'] = [id_game_map[x] for x in picks['bggid']]

# picks

### About and Keywords

In [22]:
# collection_name = "about_and_pros_cons"

# about_and_pros_cons = all_rag_attributes_df.drop(columns=['description']).copy()

# weaviate_client.create_bgg_collection(collection_name=collection_name, reset=True, use_about=True, attributes=all_columns_to_produce)

# about_df = weaviate_client.add_bgg_collection_batch(df=about_and_pros_cons, collection_name=collection_name, use_about=True, attributes=all_columns_to_produce)

# about_and_pros_cons.to_pickle(f'{sims_dir}about_and_pros_cons.pkl')

In [23]:
collection_name = "about_and_pros_cons"

about_and_pros_cons = pd.read_pickle(f'{sims_dir}about_and_pros_cons.pkl')
models_df_storage[collection_name] = about_and_pros_cons

picks = get_closet_picks_weaviate(about_and_pros_cons, game_id, model=collection_name, n_picks=10)

picks['name'] = [id_game_map[x] for x in picks['bggid']]

picks

Unnamed: 0,bggid,distance,name
1,2453,0.217975,Blokus
2,2655,0.22318,Hive
3,188,0.233348,Go
4,31999,0.246654,TZAAR
5,124742,0.257665,Android: Netrunner
6,294484,0.261491,Unmatched: Cobble & Fog
7,7854,0.261783,YINSH
8,244114,0.26311,Yellow & Yangtze
9,21763,0.265621,Mr. Jack
10,156566,0.268559,Lords of Xidit


### With Keywords, About, and Desc

In [24]:
# collection_name = "all_attributes"

# # Optional scaling of attributes
# # scaler = MinMaxScaler(feature_range=(0,1))
# # for col in all_columns_to_produce:
# #     df[col] = scaler.fit_transform(df[[col]])

# weaviate_client.create_bgg_collection(collection_name=collection_name, reset=True, use_about=True, use_description=True, attributes=all_columns_to_produce)

# all_attributes_df = weaviate_client.add_bgg_collection_batch(df=all_rag_attributes_df, collection_name=collection_name, use_about=True, use_description=True, attributes=all_columns_to_produce)

# all_attributes_df.to_pickle(f'{sims_dir}all_attributes_df.pkl')

In [25]:
# collection_name = "all_attributes"

# all_attributes_df = pd.read_pickle(f'{sims_dir}all_attributes_df.pkl')
# models_df_storage[collection_name] = all_attributes_df

# picks = get_closet_picks_weaviate(all_attributes_df, game_id, model=collection_name, n_picks=10)

# picks['name'] = [id_game_map[x] for x in picks['bggid']]

# picks

### All Comparisons

In [26]:
models_df_storage.keys(), arrays_df_storage.keys()

(dict_keys(['about_only', 'about_and_pros_cons']),
 dict_keys(['weight', 'game_family', 'themes', 'categories', 'mechanics', 'pros_cons_only', 'various_attributes']))

In [27]:
test_games

['Dominion',
 'Gloomhaven',
 'Too Many Bones',
 'Pandemic',
 'Splendor',
 'Viticulture Essential Edition',
 'Great Western Trail',
 'Everdell',
 'Chess',
 'Azul',
 'Codenames',
 'Pandemic Legacy: Season 1']

In [28]:
build_results_for_game("Too Many Bones")


Evaluation: Too Many Bones 192135
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
Model: about_and_pros_cons


Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
235802,0.19,0.0,0.134,0.0,0.298,0.124,0.005,0.288,0.29,0.15,Too Many Bones: Undertow
174430,0.06,1.0,0.134,0.179,0.612,0.134,0.005,0.248,0.244,0.29,Gloomhaven
291457,0.21,1.0,0.134,0.179,0.682,0.14,0.002,0.241,0.244,0.31,Gloomhaven: Jaws of the Lion
169427,0.27,1.0,0.134,0.49,0.44,0.11,0.087,0.22,0.22,0.33,Middara: Unintentional Malum – Act 1
170771,0.26,1.0,0.134,0.49,0.51,0.129,0.083,0.239,0.244,0.34,Sword & Sorcery
295770,0.53,1.0,0.134,0.179,0.608,0.109,0.002,0.272,0.275,0.35,Frosthaven
55690,0.42,1.0,0.244,0.49,0.412,0.124,0.067,0.266,0.272,0.37,Kingdom Death: Monster
251661,0.18,1.0,0.106,0.632,0.653,0.152,0.068,0.295,0.295,0.38,Oathsworn: Into the Deepwood
146791,0.47,1.0,0.329,0.49,0.476,0.1,0.091,0.276,0.281,0.39,Shadows of Brimstone: City of the Ancients
150997,0.45,1.0,0.5,0.49,0.476,0.12,0.1,0.268,0.269,0.41,Shadows of Brimstone: Swamps of Death


In [29]:
build_results_for_game("Gloomhaven")


Evaluation: Gloomhaven 174430
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
Model: about_and_pros_cons


Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
291457,0.27,0.0,0.0,0.0,0.128,0.128,0.001,0.205,0.2,0.1,Gloomhaven: Jaws of the Lion
295770,0.47,0.0,0.0,0.0,0.199,0.12,0.005,0.266,0.26,0.15,Frosthaven
169427,0.21,1.0,0.0,0.179,0.423,0.113,0.106,0.234,0.228,0.28,Middara: Unintentional Malum – Act 1
96848,0.46,1.0,0.0,0.0,0.38,0.137,0.009,0.27,0.269,0.28,Mage Knight Board Game
192135,0.06,1.0,0.134,0.179,0.612,0.134,0.005,0.248,0.244,0.29,Too Many Bones
170771,0.2,1.0,0.0,0.179,0.646,0.126,0.104,0.238,0.233,0.3,Sword & Sorcery
251661,0.24,1.0,0.225,0.414,0.45,0.145,0.073,0.272,0.27,0.34,Oathsworn: Into the Deepwood
17226,0.57,1.0,0.0,0.179,0.622,0.141,0.113,0.217,0.22,0.34,Descent: Journeys in the Dark
55690,0.36,1.0,0.345,0.179,0.505,0.141,0.074,0.254,0.252,0.35,Kingdom Death: Monster
146791,0.53,1.0,0.225,0.179,0.546,0.119,0.111,0.258,0.258,0.36,Shadows of Brimstone: City of the Ancients


In [30]:
all_game_results = {}
for game_name in test_games:
    all_game_results[game_name] = build_results_for_game(game_name)


Evaluation: Dominion 36218
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
Model: about_and_pros_cons

Evaluation: Gloomhaven 174430
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
Model: about_and_pros_cons

Evaluation: Too Many Bones 192135
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
Model: about_and_pros_cons

Evaluation: Pandemic 30549
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
Model: about_and_pros_cons

Evaluation: Splendor 148228
Model: weight
Model: game_family
Model: themes
Model: categories
Model: mechanics
Model: pros_cons_only
Model: various_attributes
Model: about_only
M

In [31]:
all_game_results["Dominion"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
209418,0.19,0.0,0.0,0.0,0.0,0.139,0.001,0.126,0.129,0.06,Dominion (Second Edition)
40834,0.06,0.0,0.0,0.0,0.087,0.133,0.0,0.153,0.159,0.07,Dominion: Intrigue
244115,0.29,1.0,1.0,0.0,0.087,0.126,0.01,0.21,0.212,0.33,Shards of Infinity
147020,0.43,1.0,1.0,0.0,0.134,0.141,0.001,0.232,0.229,0.35,Star Realms
69789,0.21,1.0,1.0,0.0,0.293,0.118,0.012,0.262,0.258,0.35,Ascension: Deckbuilding Game
121408,0.03,1.0,1.0,0.0,0.592,0.107,0.01,0.254,0.254,0.36,Trains
68425,0.16,1.0,1.0,0.0,0.508,0.168,0.013,0.234,0.234,0.37,Eminent Domain
296912,0.1,1.0,1.0,0.0,0.592,0.136,0.01,0.26,0.262,0.37,Fort
271324,0.03,1.0,1.0,0.0,0.728,0.135,0.001,0.234,0.235,0.37,It's a Wonderful World
107529,0.29,1.0,0.0,0.49,0.877,0.113,0.081,0.268,0.269,0.38,Kingdom Builder


In [32]:
all_game_results["Gloomhaven"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
291457,0.27,0.0,0.0,0.0,0.128,0.128,0.001,0.205,0.2,0.1,Gloomhaven: Jaws of the Lion
295770,0.47,0.0,0.0,0.0,0.199,0.12,0.005,0.266,0.26,0.15,Frosthaven
169427,0.21,1.0,0.0,0.179,0.423,0.113,0.106,0.234,0.228,0.28,Middara: Unintentional Malum – Act 1
96848,0.46,1.0,0.0,0.0,0.38,0.137,0.009,0.27,0.269,0.28,Mage Knight Board Game
192135,0.06,1.0,0.134,0.179,0.612,0.134,0.005,0.248,0.244,0.29,Too Many Bones
170771,0.2,1.0,0.0,0.179,0.646,0.126,0.104,0.238,0.233,0.3,Sword & Sorcery
251661,0.24,1.0,0.225,0.414,0.45,0.145,0.073,0.272,0.27,0.34,Oathsworn: Into the Deepwood
17226,0.57,1.0,0.0,0.179,0.622,0.141,0.113,0.217,0.22,0.34,Descent: Journeys in the Dark
55690,0.36,1.0,0.345,0.179,0.505,0.141,0.074,0.254,0.252,0.35,Kingdom Death: Monster
146791,0.53,1.0,0.225,0.179,0.546,0.119,0.111,0.258,0.258,0.36,Shadows of Brimstone: City of the Ancients


In [33]:
all_game_results["Everdell"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
319422,0.01,0.0,0.0,0.179,0.147,0.126,0.088,0.287,0.299,0.13,Everdell: Collector's Edition
332398,0.2,0.0,0.0,0.0,0.273,0.163,0.004,0.346,0.356,0.15,Everdell: The Complete Collection
192458,0.08,1.0,0.75,0.179,0.508,0.126,0.085,0.347,0.345,0.38,51st State: Master Set
314491,0.57,1.0,0.711,0.0,0.316,0.14,0.008,0.347,0.341,0.38,Meadow
262712,0.18,1.0,0.646,0.179,0.574,0.13,0.075,0.324,0.319,0.38,Res Arcana
206941,0.03,1.0,1.0,0.179,0.478,0.15,0.091,0.353,0.354,0.4,First Class: All Aboard the Orient Express!
21882,0.53,1.0,0.293,0.179,0.73,0.116,0.032,0.352,0.348,0.4,Blue Moon City
232414,0.03,1.0,0.75,0.327,0.631,0.154,0.09,0.332,0.321,0.4,Oceans
400314,0.14,1.0,0.423,0.411,0.631,0.151,0.082,0.358,0.36,0.4,Apiary
143693,0.13,1.0,0.75,0.411,0.455,0.173,0.082,0.349,0.341,0.41,Glass Road


In [34]:
all_game_results["Viticulture Essential Edition"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
128621,0.05,0.0,0.0,0.0,0.324,0.099,0.009,0.21,0.217,0.1,Viticulture
104006,0.17,1.0,0.184,0.0,0.622,0.158,0.006,0.395,0.413,0.33,Village
39683,0.22,1.0,0.184,0.0,0.691,0.108,0.015,0.417,0.437,0.34,At the Gates of Loyang
196340,0.4,1.0,0.293,0.0,0.733,0.116,0.004,0.352,0.369,0.36,Yokohama
234277,0.04,1.0,0.592,0.0,0.733,0.161,0.016,0.403,0.425,0.37,Nusfjord
251219,0.38,1.0,0.293,0.0,0.733,0.099,0.011,0.379,0.396,0.37,Istanbul: Big Box
24480,0.08,1.0,0.646,0.0,0.782,0.116,0.013,0.396,0.426,0.38,The Pillars of the Earth
124361,0.1,1.0,0.592,0.0,0.79,0.134,0.0,0.415,0.429,0.38,Concordia
58421,0.06,1.0,0.592,0.0,0.811,0.113,0.02,0.4,0.421,0.38,Egizia
148949,0.31,1.0,0.293,0.286,0.622,0.149,0.071,0.347,0.359,0.38,Istanbul


In [35]:
all_game_results["Pandemic"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
150658,0.34,0.0,0.5,0.0,0.283,0.119,0.007,0.254,0.251,0.19,Pandemic: The Cure
198928,0.19,0.0,0.293,0.286,0.244,0.117,0.07,0.252,0.25,0.19,Iberia
161936,0.43,0.0,0.184,0.49,0.163,0.125,0.167,0.225,0.225,0.22,Pandemic Legacy: Season 1
370913,0.25,0.0,1.0,0.0,0.198,0.121,0.014,0.261,0.258,0.23,Star Wars: The Clone Wars
234671,0.13,0.0,1.0,0.286,0.332,0.142,0.086,0.25,0.246,0.27,Pandemic: Rising Tide
314040,0.75,0.0,1.0,0.49,0.163,0.117,0.157,0.23,0.223,0.35,Pandemic Legacy: Season 0
248490,0.05,1.0,1.0,0.0,0.691,0.108,0.008,0.251,0.246,0.37,Atlantis Rising (Second Edition)
260428,0.05,0.0,1.0,0.286,0.244,0.129,0.075,1.0,1.0,0.42,Fall of Rome
136063,0.36,1.0,1.0,0.286,0.564,0.117,0.078,0.26,0.246,0.43,Forbidden Desert
307002,0.49,1.0,1.0,0.179,0.662,0.121,0.011,0.239,0.228,0.44,Regicide


In [36]:
all_game_results["Splendor"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
364073,0.22,0.0,0.184,0.179,0.106,0.151,0.073,0.21,0.22,0.15,Splendor Duel
293296,0.02,0.0,0.592,0.0,0.0,0.15,0.004,0.334,0.345,0.16,Splendor: Marvel
232832,0.09,1.0,0.5,0.179,0.493,0.126,0.071,0.258,0.262,0.33,Century: Golem Edition
385761,0.08,1.0,1.0,0.0,0.329,0.136,0.001,0.336,0.343,0.36,Faraway
379629,0.13,1.0,1.0,0.0,0.329,0.117,0.005,0.321,0.322,0.36,Knarr
241724,0.43,1.0,1.0,0.0,0.329,0.125,0.008,0.278,0.283,0.38,Villagers
290236,0.09,1.0,1.0,0.286,0.493,0.158,0.003,0.354,0.347,0.41,Canvas
367220,0.32,1.0,1.0,0.0,0.553,0.147,0.002,0.341,0.346,0.41,Sea Salt & Paper
56692,0.32,1.0,1.0,0.0,0.684,0.13,0.01,0.315,0.317,0.42,Parade
314503,0.02,1.0,1.0,0.0,1.0,0.134,0.01,0.314,0.315,0.42,Codex Naturalis


In [37]:
all_game_results["Great Western Trail"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
341169,0.02,0.0,0.0,0.0,0.118,0.11,0.0,0.276,0.281,0.09,Great Western Trail: Second Edition
364011,0.19,0.0,0.184,0.0,0.065,0.131,0.009,0.299,0.303,0.13,Great Western Trail: Argentina
380607,0.26,0.0,0.184,0.0,0.143,0.147,0.006,0.3,0.306,0.15,Great Western Trail: New Zealand
196340,0.41,1.0,0.423,0.0,0.733,0.147,0.007,0.25,0.249,0.36,Yokohama
306040,0.27,1.0,0.742,0.0,0.496,0.134,0.031,0.299,0.3,0.36,Merv: The Heart of the Silk Road
276025,0.21,1.0,0.711,0.0,0.555,0.145,0.002,0.307,0.314,0.36,Maracaibo
262215,0.23,1.0,0.423,0.0,0.748,0.141,0.025,0.324,0.329,0.36,Blackout: Hong Kong
238799,0.11,1.0,0.711,0.0,0.733,0.136,0.033,0.319,0.324,0.37,Messina 1347
140620,0.38,1.0,0.592,0.0,0.564,0.134,0.014,0.309,0.313,0.37,Lewis & Clark: The Expedition
35677,0.01,1.0,0.711,0.0,0.857,0.128,0.002,0.298,0.302,0.37,Le Havre


In [38]:
all_game_results["Chess"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
188,0.28,0.0,1.0,0.0,0.711,0.09,0.004,0.222,0.233,0.28,Go
36235,1.0,1.0,1.0,0.0,0.388,0.144,0.009,0.28,0.294,0.46,The Duke
2655,1.0,1.0,1.0,0.0,0.691,0.138,0.009,0.21,0.223,0.47,Hive
31999,1.0,1.0,1.0,0.0,0.711,0.132,0.006,0.236,0.247,0.48,TZAAR
7854,1.0,1.0,1.0,0.0,0.764,0.139,0.006,0.254,0.262,0.49,YINSH
528,0.91,1.0,1.0,0.0,0.764,0.134,0.007,0.273,0.281,0.49,ZÈRTZ
2453,1.0,1.0,1.0,0.0,0.846,0.119,0.021,0.206,0.218,0.49,Blokus
2346,0.99,1.0,1.0,0.0,0.711,0.128,0.005,0.271,0.284,0.49,DVONN
154597,1.0,1.0,1.0,0.0,0.846,0.133,0.011,0.274,0.289,0.51,Hive Pocket
9674,1.0,1.0,1.0,0.0,1.0,0.132,0.014,0.273,0.28,0.52,Ingenious


In [39]:
all_game_results["Azul"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
287954,0.29,0.0,0.293,0.0,0.0,0.128,0.003,0.178,0.178,0.12,Azul: Summer Pavilion
256226,0.24,0.0,0.0,0.0,0.087,0.15,0.082,0.301,0.297,0.13,Azul: Stained Glass of Sintra
346965,1.0,0.0,0.0,0.0,0.106,0.173,0.023,0.237,0.235,0.2,Azul: Queen's Garden
363247,0.02,0.0,1.0,0.0,0.106,0.171,0.023,0.297,0.287,0.21,Azul: Master Chocolatier
283155,0.42,1.0,1.0,0.0,0.255,0.132,0.004,0.272,0.278,0.37,Calico
284435,0.1,1.0,1.0,0.0,0.742,0.124,0.014,0.321,0.317,0.4,Nova Luna
163412,0.17,1.0,1.0,0.0,0.662,0.144,0.001,0.301,0.302,0.4,Patchwork
357563,0.01,1.0,1.0,0.411,0.324,0.152,0.082,0.316,0.316,0.4,Akropolis
244228,0.05,1.0,1.0,0.179,0.553,0.169,0.093,0.284,0.291,0.4,Reef
199561,0.15,1.0,0.293,0.0,0.404,0.121,0.002,1.0,1.0,0.44,Sagrada


In [40]:
all_game_results["Codenames"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
198773,0.02,0.0,0.0,0.179,0.0,0.144,0.004,0.344,0.351,0.12,Codenames: Pictures
224037,0.1,0.0,0.0,0.327,0.2,0.147,0.147,0.229,0.24,0.15,Codenames: Duet
225694,0.56,1.0,0.293,0.179,0.329,0.138,0.002,0.307,0.317,0.35,Decrypto
41114,0.33,1.0,0.293,0.327,0.576,0.122,0.004,0.453,0.452,0.4,The Resistance
329839,0.15,1.0,1.0,0.179,0.684,0.159,0.004,0.29,0.294,0.42,So Clover!
256788,0.02,1.0,1.0,0.179,0.662,0.122,0.011,0.384,0.385,0.42,Detective Club
36553,0.08,1.0,1.0,0.411,0.329,0.123,0.007,0.447,0.449,0.43,Time's Up! Title Recall!
156546,0.16,1.0,1.0,0.327,0.635,0.141,0.004,0.359,0.368,0.44,Monikers
188834,0.48,1.0,0.423,0.327,0.662,0.121,0.004,0.451,0.461,0.44,Secret Hitler
254640,0.22,1.0,1.0,0.179,0.742,0.162,0.0,0.392,0.395,0.45,Just One


In [41]:
all_game_results["Pandemic Legacy: Season 1"]

Unnamed: 0,weight,game_family,themes,categories,mechanics,pros_cons_only,various_attributes,about_only,about_and_pros_cons,average_score,name
221107,0.43,0.0,0.423,0.0,0.087,0.128,0.01,0.154,0.148,0.15,Pandemic Legacy: Season 2
314040,0.32,0.0,1.0,0.0,0.0,0.15,0.009,0.238,0.239,0.22,Pandemic Legacy: Season 0
30549,0.43,0.0,0.184,0.49,0.163,0.125,0.167,0.225,0.225,0.22,Pandemic
192153,0.68,0.0,1.0,0.0,0.163,0.156,0.046,0.274,0.274,0.29,Reign of Cthulhu
370913,0.68,0.0,1.0,0.49,0.329,0.129,0.212,0.237,0.241,0.37,Star Wars: The Clone Wars
198928,0.24,0.0,0.423,0.286,0.262,0.127,0.088,1.0,1.0,0.38,Iberia
234671,0.3,0.0,0.423,0.286,0.329,0.16,0.134,1.0,1.0,0.4,Pandemic: Rising Tide
163602,0.08,1.0,1.0,0.286,0.613,0.144,0.133,0.253,0.251,0.42,XCOM: The Board Game
156858,0.41,1.0,1.0,0.286,0.452,0.131,0.114,0.256,0.257,0.43,Black Orchestra
240196,0.06,1.0,1.0,0.49,0.702,0.157,0.114,0.209,0.206,0.44,Betrayal Legacy
