In [30]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import threadpoolctl as tpc
import sklearn 

In [29]:
!pip install threadpoolctl



In [31]:
file = pd.read_csv('.\Resources\games.csv')
file.head()

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,die macher game seven sequential political rac...,1986,4.3206,7.61428,7.10363,1.57979,3,5,...,21926,21926,0,1,0,0,0,0,0,0
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...,1981,1.963,6.64537,5.78447,1.4544,3,4,...,21926,21926,0,1,0,0,0,0,0,0
2,3,Samurai,samurai set medieval japan player compete gain...,1998,2.4859,7.45601,7.23994,1.18227,2,4,...,21926,21926,0,1,0,0,0,0,0,0
3,4,Tal der Könige,triangular box luxurious large block tal der k...,1992,2.6667,6.60006,5.67954,1.23129,2,4,...,21926,21926,0,0,0,0,0,0,0,0
4,5,Acquire,acquire player strategically invest business t...,1964,2.5031,7.33861,7.14189,1.33583,2,6,...,21926,21926,0,1,0,0,0,0,0,0


In [32]:
file.columns

Index(['BGGId', 'Name', 'Description', 'YearPublished', 'GameWeight',
       'AvgRating', 'BayesAvgRating', 'StdDev', 'MinPlayers', 'MaxPlayers',
       'ComAgeRec', 'LanguageEase', 'BestPlayers', 'GoodPlayers', 'NumOwned',
       'NumWant', 'NumWish', 'NumWeightVotes', 'MfgPlaytime', 'ComMinPlaytime',
       'ComMaxPlaytime', 'MfgAgeRec', 'NumUserRatings', 'NumComments',
       'NumAlternates', 'NumExpansions', 'NumImplementations',
       'IsReimplementation', 'Family', 'Kickstarted', 'ImagePath',
       'Rank:boardgame', 'Rank:strategygames', 'Rank:abstracts',
       'Rank:familygames', 'Rank:thematic', 'Rank:cgs', 'Rank:wargames',
       'Rank:partygames', 'Rank:childrensgames', 'Cat:Thematic',
       'Cat:Strategy', 'Cat:War', 'Cat:Family', 'Cat:CGS', 'Cat:Abstract',
       'Cat:Party', 'Cat:Childrens'],
      dtype='object')

In [33]:
file.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21925 entries, 0 to 21924
Data columns (total 48 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   BGGId                21925 non-null  int64  
 1   Name                 21925 non-null  object 
 2   Description          21924 non-null  object 
 3   YearPublished        21925 non-null  int64  
 4   GameWeight           21925 non-null  float64
 5   AvgRating            21925 non-null  float64
 6   BayesAvgRating       21925 non-null  float64
 7   StdDev               21925 non-null  float64
 8   MinPlayers           21925 non-null  int64  
 9   MaxPlayers           21925 non-null  int64  
 10  ComAgeRec            16395 non-null  float64
 11  LanguageEase         16034 non-null  float64
 12  BestPlayers          21925 non-null  int64  
 13  GoodPlayers          21925 non-null  object 
 14  NumOwned             21925 non-null  int64  
 15  NumWant              21925 non-null 

In [34]:
# Removing from original dataset the columns related to players only, image url and columns with many null values
# In addition, based on the percentage of populated columns (values > 0) in the dataset (see Data Cleaning.ipynb file) ,
# we propose to remove the columns NumComments based on lack of data. 
# NumAlternates, NumExpansions, and NumImplementations also have low percentage of games populated, but they carry 
# value as they indicate the number of alternate versions, expansions, and implementations the games have. 

nn_game_df = pd.DataFrame(file, columns=['BGGId', 'Name', 'YearPublished', 'GameWeight',
       'AvgRating', 'BayesAvgRating', 'StdDev', 'MinPlayers', 'MaxPlayers', 'ComAgeRec', 'LanguageEase',
       'NumOwned','NumWant', 'NumWish', 'MfgPlaytime', 'ComMinPlaytime',
       'ComMaxPlaytime', 'MfgAgeRec', 'NumUserRatings',
       'NumAlternates', 'NumExpansions', 'NumImplementations',
       'IsReimplementation', 'Kickstarted',
       'Rank:boardgame', 'Rank:strategygames', 'Rank:abstracts',
       'Rank:familygames', 'Rank:thematic', 'Rank:cgs', 'Rank:wargames',
       'Rank:partygames', 'Rank:childrensgames', 'Cat:Thematic',
       'Cat:Strategy', 'Cat:War', 'Cat:Family', 'Cat:CGS', 'Cat:Abstract',
       'Cat:Party', 'Cat:Childrens'])

nn_game_df.head()

Unnamed: 0,BGGId,Name,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,ComAgeRec,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,1986,4.3206,7.61428,7.10363,1.57979,3,5,14.366667,...,21926,21926,0,1,0,0,0,0,0,0
1,2,Dragonmaster,1981,1.963,6.64537,5.78447,1.4544,3,4,,...,21926,21926,0,1,0,0,0,0,0,0
2,3,Samurai,1998,2.4859,7.45601,7.23994,1.18227,2,4,9.307692,...,21926,21926,0,1,0,0,0,0,0,0
3,4,Tal der Könige,1992,2.6667,6.60006,5.67954,1.23129,2,4,13.0,...,21926,21926,0,0,0,0,0,0,0,0
4,5,Acquire,1964,2.5031,7.33861,7.14189,1.33583,2,6,11.410256,...,21926,21926,0,1,0,0,0,0,0,0


In [35]:
nn_df_scaled = StandardScaler().fit_transform(nn_game_df[[
                                                        'YearPublished', 'GameWeight',
                   'AvgRating', 'BayesAvgRating', 'StdDev', 'MinPlayers', 'MaxPlayers', 
                   'ComAgeRec', 'LanguageEase', 'NumOwned','NumWant', 'NumWish', 
                   'MfgPlaytime', 'ComMinPlaytime','ComMaxPlaytime', 'MfgAgeRec', 
                   'NumUserRatings', 'NumAlternates', 'NumExpansions', 'NumImplementations',
                   'IsReimplementation', 'Kickstarted', 'Rank:boardgame', 'Rank:strategygames', 
                   'Rank:abstracts', 'Rank:familygames', 'Rank:thematic', 'Rank:cgs', 
                   'Rank:wargames', 'Rank:partygames', 'Rank:childrensgames', 
                   'Cat:Thematic', 'Cat:Strategy', 'Cat:War', 'Cat:Family', 'Cat:CGS', 
                   'Cat:Abstract', 'Cat:Party', 'Cat:Childrens']])


nn_df_transformed = pd.DataFrame(nn_df_scaled, columns=[
                                                        'YearPublished', 'GameWeight',
                   'AvgRating', 'BayesAvgRating', 'StdDev', 'MinPlayers', 'MaxPlayers', 
                   'ComAgeRec', 'LanguageEase', 'NumOwned','NumWant', 'NumWish', 
                   'MfgPlaytime', 'ComMinPlaytime','ComMaxPlaytime', 'MfgAgeRec', 
                   'NumUserRatings', 'NumAlternates', 'NumExpansions', 'NumImplementations',
                   'IsReimplementation', 'Kickstarted', 'Rank:boardgame', 'Rank:strategygames', 
                   'Rank:abstracts', 'Rank:familygames', 'Rank:thematic', 'Rank:cgs', 
                   'Rank:wargames', 'Rank:partygames', 'Rank:childrensgames', 
                   'Cat:Thematic', 'Cat:Strategy', 'Cat:War', 'Cat:Family', 'Cat:CGS', 
                   'Cat:Abstract', 'Cat:Party', 'Cat:Childrens'])
nn_df_transformed

Unnamed: 0,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,ComAgeRec,LanguageEase,NumOwned,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,0.002377,2.754499,1.275510,3.881596,0.222067,1.432245,-0.047146,1.334414,-0.909033,1.139054,...,0.173395,0.204594,-0.243162,2.907663,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
1,-0.021154,-0.022535,0.236416,0.270454,-0.217018,1.432245,-0.113749,,-0.800810,-0.034539,...,0.173395,0.204594,-0.243162,2.907663,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
2,0.058853,0.593393,1.105776,4.254739,-1.169949,-0.010595,-0.113749,-0.213119,-0.910706,2.665310,...,0.173395,0.204594,-0.243162,2.907663,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
3,0.030615,0.806359,0.187824,-0.016787,-0.998293,-0.010595,-0.113749,0.916352,0.167118,-0.156753,...,0.173395,0.204594,-0.243162,-0.343919,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
4,-0.101161,0.613653,0.979872,3.986331,-0.632220,-0.010595,0.019457,0.430052,-0.825524,4.206111,...,0.173395,0.204594,-0.243162,2.907663,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21920,0.167097,-2.334766,1.099331,-0.440874,0.135013,1.432245,-0.113749,0.610454,,-0.268577,...,0.173395,0.204594,-0.243162,-0.343919,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
21921,0.157685,-0.371548,1.606336,-0.335181,-2.178684,-1.453435,-0.246956,1.834048,-0.500710,-0.248555,...,0.173395,0.204594,-0.243162,-0.343919,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
21922,0.167097,3.039435,0.584239,-0.399758,8.881123,-1.453435,-0.180353,0.610454,-0.259784,-0.276133,...,0.173395,0.204594,-0.243162,-0.343919,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609
21923,0.167097,-1.156859,0.061587,-0.436248,-1.247688,-1.453435,0.419076,,-0.783903,-0.256488,...,0.173395,0.204594,-0.243162,-0.343919,-0.438064,-0.34367,-0.118379,-0.231474,-0.173402,-0.204609


In [36]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=6)
neigh.fit(nn_df_transformed.iloc[:,23:39])

In [37]:
nn_game_df[nn_game_df["Name"].str.contains("Die Macher")].head()

Unnamed: 0,BGGId,Name,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,ComAgeRec,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,1986,4.3206,7.61428,7.10363,1.57979,3,5,14.366667,...,21926,21926,0,1,0,0,0,0,0,0


In [38]:
game = nn_df_transformed.iloc[0][23:39]
game

Rank:strategygames    -3.059695
Rank:abstracts         0.231446
Rank:familygames       0.343471
Rank:thematic          0.243126
Rank:cgs               0.118378
Rank:wargames          0.437398
Rank:partygames        0.173395
Rank:childrensgames    0.204594
Cat:Thematic          -0.243162
Cat:Strategy           2.907663
Cat:War               -0.438064
Cat:Family            -0.343670
Cat:CGS               -0.118379
Cat:Abstract          -0.231474
Cat:Party             -0.173402
Cat:Childrens         -0.204609
Name: 0, dtype: float64

In [39]:
game.to_numpy()

array([-3.0596954 ,  0.23144583,  0.34347099,  0.24312601,  0.11837768,
        0.43739785,  0.17339508,  0.20459363, -0.24316163,  2.90766337,
       -0.43806389, -0.34366995, -0.11837865, -0.23147356, -0.17340162,
       -0.20460856])

In [40]:
closest_neighbor = neigh.kneighbors(game.to_numpy().reshape(1, -1))
print(closest_neighbor)
print("--------------------------------------------------------")
print(f"Nearest neighbor: ")
print("--------------------------------------------------------")
# print(f"Distance to nearest neighbor: {closest_neighbor[0]}")
print(nn_game_df.iloc[closest_neighbor[1][0][1],1])
print(nn_game_df.iloc[closest_neighbor[1][0][2],1])
print(nn_game_df.iloc[closest_neighbor[1][0][3],1])
print(nn_game_df.iloc[closest_neighbor[1][0][4],1])
print(nn_game_df.iloc[closest_neighbor[1][0][5],1])



AttributeError: 'NoneType' object has no attribute 'split'

In [41]:
print(nn_game_df.iloc[closest_neighbor[1][0],1])

NameError: name 'closest_neighbor' is not defined

In [42]:
s = nn_game_df.iloc[closest_neighbor[1][0][1],:6]
df = pd.DataFrame(s)
df.to_html()

NameError: name 'closest_neighbor' is not defined

In [43]:
import threadpoolctl as tpc
tpc.__version__

'2.2.0'

In [44]:
import sklearn 
sklearn.__version__

'1.2.2'