In [53]:
References: https://www.kaggle.com/code/mfaaris/content-based-and-tensorflow-recommender-system

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
games_df = pd.read_csv('games.csv')

In [3]:
games_df.head()

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,die macher game seven sequential political rac...,1986,4.3206,7.61428,7.10363,1.57979,3,5,...,21926,21926,0,1,0,0,0,0,0,0
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...,1981,1.963,6.64537,5.78447,1.4544,3,4,...,21926,21926,0,1,0,0,0,0,0,0
2,3,Samurai,samurai set medieval japan player compete gain...,1998,2.4859,7.45601,7.23994,1.18227,2,4,...,21926,21926,0,1,0,0,0,0,0,0
3,4,Tal der Könige,triangular box luxurious large block tal der k...,1992,2.6667,6.60006,5.67954,1.23129,2,4,...,21926,21926,0,0,0,0,0,0,0,0
4,5,Acquire,acquire player strategically invest business t...,1964,2.5031,7.33861,7.14189,1.33583,2,6,...,21926,21926,0,1,0,0,0,0,0,0


In [4]:
games_df['Description']

0        die macher game seven sequential political rac...
1        dragonmaster tricktaking card game base old ga...
2        samurai set medieval japan player compete gain...
3        triangular box luxurious large block tal der k...
4        acquire player strategically invest business t...
                               ...                        
21920    oil tanker fire rescue team send deal   damage...
21921    new square edition include nippon expansion up...
21922    million year people force play timehonored gam...
21923    splitter group number score point mdash s s md...
21924    gather resource build crew fight rival claim p...
Name: Description, Length: 21925, dtype: object

In [5]:
games_df['IsReimplementation']

0        0
1        1
2        0
3        0
4        0
        ..
21920    0
21921    1
21922    0
21923    0
21924    0
Name: IsReimplementation, Length: 21925, dtype: int64

In [6]:
games_df = games_df.loc[games_df['IsReimplementation'] == 0]

In [7]:
games_df['IsReimplementation']

0        0
2        0
3        0
4        0
5        0
        ..
21917    0
21920    0
21922    0
21923    0
21924    0
Name: IsReimplementation, Length: 19365, dtype: int64

In [8]:
games_df.reset_index(inplace=True, drop=True)

In [9]:
games_df.loc[games_df['Name'] == 'War of the Ring']

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
1484,2228,War of the Ring,war ring large map middle earth army muster he...,1977,3.4653,6.66408,5.88219,1.64333,2,3,...,21926,21926,0,0,1,0,0,0,0,0
4491,9609,War of the Ring,war ring player take control free people fp pl...,2004,3.8469,7.77497,7.40822,1.62755,2,4,...,21926,21926,1,0,0,0,0,0,0,0
8390,42131,War of the Ring,age draw close middleearth ravage war ring gre...,2009,3.1667,7.24881,5.60749,2.08058,2,2,...,21926,21926,0,0,1,0,0,0,0,0


In [10]:
#Remove any uncessary words
tfidf = TfidfVectorizer(stop_words='english')

#Remove null values in game description
games_df['Description'] = games_df['Description'].fillna('')

tfidf_matrix = tfidf.fit_transform(games_df['Description'])

In [11]:
games_df.head()

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,die macher game seven sequential political rac...,1986,4.3206,7.61428,7.10363,1.57979,3,5,...,21926,21926,0,1,0,0,0,0,0,0
1,3,Samurai,samurai set medieval japan player compete gain...,1998,2.4859,7.45601,7.23994,1.18227,2,4,...,21926,21926,0,1,0,0,0,0,0,0
2,4,Tal der Könige,triangular box luxurious large block tal der k...,1992,2.6667,6.60006,5.67954,1.23129,2,4,...,21926,21926,0,0,0,0,0,0,0,0
3,5,Acquire,acquire player strategically invest business t...,1964,2.5031,7.33861,7.14189,1.33583,2,6,...,21926,21926,0,1,0,0,0,0,0,0
4,6,Mare Mediterraneum,ancient land mediterranean player attempt sati...,1989,3.0,6.5537,5.54614,1.6535,2,6,...,21926,21926,0,0,0,0,0,0,0,0


In [12]:
#Cosine similarity
cosine_sim = pd.DataFrame(linear_kernel(tfidf_matrix, tfidf_matrix))

In [13]:
cosine_sim.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19355,19356,19357,19358,19359,19360,19361,19362,19363,19364
0,1.0,0.016331,0.022626,0.065044,0.042947,0.017817,0.022295,0.029545,0.040282,0.042897,...,0.016033,0.015848,0.041921,0.01673,0.0,0.08874,0.043888,0.014526,0.057442,0.012651
1,0.016331,1.0,0.010321,0.029459,0.009556,0.068271,0.030143,0.026261,0.039945,0.049113,...,0.016666,0.001703,0.032315,0.01392,0.006291,0.073616,0.01177,0.006359,0.024839,0.009566
2,0.022626,0.010321,1.0,0.04139,0.03644,0.030268,0.023212,0.007156,0.024627,0.040548,...,0.015745,0.005938,0.048006,0.015598,0.007202,0.101026,0.021766,0.010743,0.037574,0.021205
3,0.065044,0.029459,0.04139,1.0,0.058439,0.021327,0.014836,0.022122,0.061721,0.068523,...,0.028593,0.005477,0.029901,0.013838,0.0074,0.046889,0.00889,0.028924,0.010908,0.003469
4,0.042947,0.009556,0.03644,0.058439,1.0,0.070919,0.017323,0.069205,0.042102,0.033022,...,0.008721,0.002201,0.014569,0.018006,0.0,0.089967,0.01355,0.005702,0.035236,0.020203


In [14]:
games_df['Name']

0                                 Die Macher
1                                    Samurai
2                             Tal der Könige
3                                    Acquire
4                         Mare Mediterraneum
                        ...                 
19360                            Mille Fiori
19361                                Salvage
19362    Rock Paper Scissors: Deluxe Edition
19363                               Splitter
19364                          Captains' War
Name: Name, Length: 19365, dtype: object

In [15]:
#indexed the names of games
indices= pd.Series(games_df.index, index = games_df['Name']).drop_duplicates()
indices

Name
Die Macher                                 0
Samurai                                    1
Tal der Könige                             2
Acquire                                    3
Mare Mediterraneum                         4
                                       ...  
Mille Fiori                            19360
Salvage                                19361
Rock Paper Scissors: Deluxe Edition    19362
Splitter                               19363
Captains' War                          19364
Length: 19365, dtype: int64

In [33]:
def get_recommendation(Name, cosine_sim = cosine_sim):
   index = indices[Name]
   sim = enumerate(cosine_sim[index])
   sim = sorted(sim, key=lambda x: x[1], reverse = True)
   sim = sim[1:11]
   sim_index = [i[0] for i in sim]
   print(games_df['Name'].iloc[sim_index])


get_recommendation('Wingspan')
    

12841    Birds of a Feather
16578              Piepmatz
13542                 Flock
11230               Migrato
16544               CuBirds
13742           Flip a Bird
4997            Bunte Vögel
14572             What's Up
8179           Emu Ranchers
13211           4 the Birds
Name: Name, dtype: object


In [17]:
R = games_df['AvgRating']
v = games_df['NumUserRatings']
m = games_df['NumUserRatings'].quantile(0.8)
C = games_df['AvgRating'].mean()

games_df['weighted_average'] = (R*v + C*m)/(v+m)



In [18]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(games_df[['NumWish', 'weighted_average']])
weighted_df = pd.DataFrame(scaled, columns=['NumWish', 'weighted_average'])

weighted_df.index = games_df['Name']

In [19]:
weighted_df['score'] = weighted_df['weighted_average']*0.3 + weighted_df['NumWish'].astype('float64')*0.7

In [20]:
weighted_df_sorted = weighted_df.sort_values(by='score', ascending=False)
weighted_df_sorted.head(10)

Unnamed: 0_level_0,NumWish,weighted_average,score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Terraforming Mars,0.995569,0.942424,0.979625
Scythe,1.0,0.906566,0.97197
Gloomhaven,0.912261,1.0,0.938583
Spirit Island,0.825722,0.928496,0.856554
Root,0.758263,0.882345,0.795488
Wingspan,0.732353,0.884606,0.778029
Everdell,0.712804,0.880839,0.763214
Terra Mystica,0.670942,0.888686,0.736265
Star Wars: Rebellion,0.58044,0.938822,0.687954
7 Wonders,0.627255,0.821036,0.685389


In [38]:
content_df = games_df[['Name','Description']].drop_duplicates()

In [39]:
content_df = weighted_df_sorted.merge(content_df, on='Name', how='left')



In [40]:
content_df.head()

Unnamed: 0,Name,NumWish,weighted_average,score,Description
0,Terraforming Mars,0.995569,0.942424,0.979625,s mankind begin terraform planet mar giant cor...
1,Scythe,1.0,0.906566,0.97197,time unrest s europa ashe great war darken sno...
2,Gloomhaven,0.912261,1.0,0.938583,gloomhaven game euroinspired tactical combat...
3,Spirit Island,0.825722,0.928496,0.856554,distant reach world magic exists embody spirit...
4,Root,0.758263,0.882345,0.795488,root game adventure war riverfolk expan...


In [41]:
#Remove any uncessary words
tfidf = TfidfVectorizer(stop_words='english')

#Remove null values in game description
content_df['Description'] = content_df['Description'].fillna('')

tfidf_matrix = tfidf.fit_transform(content_df['Description'])
tfidf_matrix.shape

(20043, 116406)

In [42]:
cosine_sim_rat = cosine_similarity(tfidf_matrix)

In [43]:
cosine_sim_rat.shape

(20043, 20043)

In [50]:
def predict(title, similarity_weight=0.65, top_n=10):
    data = content_df.reset_index()
    index = data[data['Name'] == title].index
    similarity = cosine_sim_rat[index].T
    sim_df = pd.DataFrame(similarity, columns=['similarity'])
    final_df = pd.concat([data, sim_df], axis=1)
    # You can also play around with the number
    final_df['final_score'] = final_df['score']*(1-similarity_weight) + final_df['similarity']*similarity_weight
    
    final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
    final_df_sorted.set_index('Name', inplace=True)
    return final_df_sorted[['score', 'similarity', 'final_score']]

In [51]:
predict('Wingspan', similarity_weight=0.65, top_n=10)

Unnamed: 0_level_0,score,similarity,final_score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Wingspan,0.778029,1.0,0.92231
Birds of a Feather,0.189766,0.482417,0.379989
Scythe,0.97197,0.044307,0.368989
Terraforming Mars,0.979625,0.032873,0.364236
Piepmatz,0.229335,0.414794,0.349883
Gloomhaven,0.938583,0.017076,0.339603
CuBirds,0.224926,0.393253,0.334339
Migrato,0.162932,0.394472,0.313433
Flock,0.159369,0.395043,0.312557
Flip a Bird,0.17113,0.388611,0.312493
