In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
import warnings
import sys

warnings.filterwarnings('ignore')

###
### Load games into games_df
###

# updatge with local file path
games_file = "data/games_top500.csv"
#user_rating_file "user_ratings_top500.csv"

def semicolon_to_list(value):
    if pd.isna(value) or value == "":
        return []
    return [item.strip() for item in value.split(';')]

games_df = pd.read_csv(
    games_file,
    usecols=['bgg_id',
             'name',
             'description',
             'image',
             'thumbnail',
             'bgg_link',
             'avg_rating',
             'bgg_rating',
             'users_rated',
             'game_weight',
             'players_min',
             'players_max',
             'players_best',
             'time_min',
             'time_max',
             'time_avg',
             'game_mechanics',
             'game_categories',
             'game_types',
             'year_published'],
    
    converters={'game_mechanics': semicolon_to_list,
                'game_categories': semicolon_to_list,
                'game_types': semicolon_to_list},
    
    dtype={'bgg_id':        'int64',
           'avg_rating':    'float64',
           'bgg_rating':    'float64',
           'users_rated':   'int64',
           'game_weight':   'float64',
           'players_best':  'float64',
           'players_min':   'int64',
           'players_max':   'int64',
           'players_best':  'float64',
           'time_min':      'int64',
           'time_max':      'int64',
           'time_avg':      'int64'})



In [2]:
###
### BoardGameRecommender Class
###

class BoardGameRecommender:
    
    def __init__(self):
        
        self.games_df = games_df
    
        # prepare content-based filter CBF
        self._prepare_CBF()

        # prepare collaborative filter CF
        # self._prepare_CF()

        # prepare LLM filter
        # self._prepare_LLM()


    ### prepare content-based filter CBF
    def _prepare_CBF(self, cat_wt=1.5, mech_wt=2.0, type_wt=1.0, num_wt=0.5):
        # learn attributes of all games
        self.mlb_game_categories = MultiLabelBinarizer().fit(self.games_df['game_categories'])
        self.mlb_game_mechanics = MultiLabelBinarizer().fit(self.games_df['game_mechanics'])
        self.mlb_game_types = MultiLabelBinarizer().fit(self.games_df['game_types'])

        self.cbf_cat_wt = cat_wt
        self.cbf_mech_wt = mech_wt
        self.cbf_type_wt = type_wt
        self.cbf_num_wt = num_wt
        
        # create attribute vectors
        game_categories_features = self.mlb_game_categories.transform(self.games_df['game_categories'])
        game_mechanics_features = self.mlb_game_mechanics.transform(self.games_df['game_mechanics'])
        game_types_features = self.mlb_game_types.transform(self.games_df['game_types'])

        # scale numeric features to have similar influence
        self.scaler = MinMaxScaler()
        numeric_features = self.scaler.fit_transform(self.games_df[['game_weight', 'players_best', 'time_avg']])

        # combine features (weighted)
        self.game_features = np.hstack([game_categories_features * self.cbf_cat_wt,
                                        game_mechanics_features * self.cbf_type_wt,
                                        game_types_features * self.cbf_type_wt,
                                        numeric_features * self.cbf_num_wt])

        # combine features (unweighted)
        numeric_features_raw = self.games_df[['game_weight', 'players_best', 'time_avg']].values
        raw_features = np.hstack([game_categories_features, game_mechanics_features, game_types_features, numeric_features_raw])
    
        # compute similarity matrices - not used burrently but may be useful for simialry/edge graphs
        self.content_similarity = cosine_similarity(self.game_features)
        self.content_similarity_raw = cosine_similarity(raw_features)




    ### calcualte LLM scores - placeholder
    ### current code returns vector of zeros
    def get_LLM_scores(self):

        n_games = self.games_df.shape[0]
        llm_scores = np.zeros(n_games)
        llm_scores_norm = np.zeros(n_games)
        return llm_scores, llm_scores_norm


    ### calcualte CF scores - placeholder
    ### current code returns vector of zeros
    def get_CF_scores(self):

        n_games = self.games_df.shape[0]
        cf_scores = np.zeros(n_games)
        cf_scores_norm = np.zeros(n_games)
        return cf_scores, cf_scores_norm
    

    ### calculate CBF scores
    def get_CBF_scores(self, attributes=None):
    
        n_games = self.games_df.shape[0]

        # Build query feature vector
        if attributes:
            cat_vec = self.mlb_game_categories.transform(
                [attributes.get('game_categories', [])]
            ) if 'game_categories' in attributes else np.zeros((1, len(self.mlb_game_categories.classes_)))

            mech_vec = self.mlb_game_mechanics.transform(
                [attributes.get('game_mechanics', [])]
            ) if 'game_mechanics' in attributes else np.zeros((1, len(self.mlb_game_mechanics.classes_)))

            type_vec = self.mlb_game_types.transform(
                [attributes.get('game_types', [])]
            ) if 'game_types' in attributes else np.zeros((1, len(self.mlb_game_types.classes_)))

            if 'game_weight' in attributes:
                game_weight_avg = np.mean(attributes['game_weight'])
            else:
                game_weight_avg = 2.5
            
            if 'play_time' in attributes:
                play_time_avg = np.mean(attributes['play_time'])
            else:
                play_time_avg = 90

            if 'players' in attributes:
                players_avg = np.mean(attributes['players'])
            else:
                players_avg = 3

                
            numeric_vec = np.array([[game_weight_avg, players_avg, play_time_avg]])
            numeric_vec_scaled = self.scaler.transform(numeric_vec)

            # Match feature group weights exactly
            query_vector = np.hstack([cat_vec * self.cbf_cat_wt,
                                      mech_vec * self.cbf_mech_wt,
                                      type_vec * self.cbf_type_wt,
                                      numeric_vec_scaled * self.cbf_num_wt])
            
            cbf_scores = cosine_similarity(query_vector, self.game_features).flatten()

            #print(query_vector)
            
        else:
            # no cbf info zero vector
            cbf_scores = np.zeros(n_games)

        # normalize between 0 and 1
        
        if cbf_scores.max() > cbf_scores.min():
            cbf_scores_norm = (cbf_scores - cbf_scores.min()) / (cbf_scores.max() - cbf_scores.min())
        else:
            cbf_scores_norm = np.zeros_like(cbf_scores) 

        #print(sorted(cbf_scores, reverse=True))
        #print(sorted(cbf_scores_norm, reverse=True))
        
        return cbf_scores, cbf_scores_norm
    
    ###
    ### get recommendations and return results
    ###
    def get_game_recommendations(self, liked_game_names=None, disliked_game_names=None, exclude_game_names=None, attributes=None, description=None, n_recommendations=5, alpha=0.5, beta=0.3):

        # remove empty attributes
        liked_game_names = liked_game_names or []
        disliked_game_names = disliked_game_names or []
        exclude_game_names = exclude_game_names or []
        attributes = {k: v for k, v in (attributes or {}).items() if v}

        n_games = self.games_df.shape[0]
        
        id_to_index = {row.bgg_id: idx for idx, row in self.games_df.iterrows()}

        # Identify indices of liked games
        liked_indices = [id_to_index[g] for g in liked_games if g in id_to_index]
        disliked_indices = [id_to_index[g] for g in disliked_games if g in id_to_index]
        excluded_indices = [id_to_index[g] for g in exclude_games if g in id_to_index]

                
        # get scores from each model
        cbf_scores, cbf_scores_norm = self.get_CBF_scores(attributes)
        cf_scores, cf_scores_norm = self.get_CF_scores()
        llm_scores, llm_scores_norm = self.get_LLM_scores()

        # Combine normalized scores
        #self.final_scores =  ((cf_scores_norm * alpha) + (cbf_scores_norm * (1-alpha))) * (1-beta) + (llm_scores_norm * beta)
        self.final_scores = cbf_scores_norm
        final_scores = self.final_scores.copy()


    
        ###
        ### Filter Logic
        ###
        
        # exclude liked games
        for idx in liked_indices:
            final_scores[idx] = 0
            
        # exclude disliked games
        for idx in disliked_indices:
            final_scores[idx] = 0
            
        # exclude excluded games
        for idx in excluded_indices:
            final_scores[idx] = 0
            
        # filter game_mechanics, game_categories, game_typesattribute 
        if attributes:
            for attr_name in ['game_mechanics', 'game_categories', 'game_types']:
                selected = attributes.get(attr_name, [])
                if selected:
                    # Keep only games that have at least one element
                    mask = self.games_df[attr_name].apply(lambda game_attrs: any(a in game_attrs for a in selected))
                    final_scores[~mask] = 0

            # weight filter
            if 'game_weight' in attributes:
                w_min, w_max = attributes['game_weight']
                mask = (self.games_df['game_weight'] >= w_min) & (self.games_df['game_weight'] <= w_max)
                final_scores[~mask] = 0
                
            # number of players filter
            if 'players' in attributes:
                p_min, p_max = attributes['players']
                mask = (self.games_df['players_max'] >= p_min) & (self.games_df['players_min'] <= p_max)
                final_scores[~mask] = 0
                
            # play time filter
            if 'play_time' in attributes:
                t_min, t_max = attributes['play_time']
                # keep games where range [time_min, time_max] overlaps with [t_min, t_max]
                mask = (self.games_df['time_max'] >= t_min) & (self.games_df['time_min'] <= t_max)
                final_scores[~mask] = 0

            # published year filter
            if 'year_published' in attributes:
                y_min, y_max = attributes['year_published']
                # keep games where range [time_min, time_max] overlaps with [t_min, t_max]
                mask = (self.games_df['year_published'] >= y_min) & (self.games_df['year_published'] <= y_max)
                final_scores[~mask] = 0
                
            # rating filter
            if 'min_rating' in attributes:
                min_rating = attributes['min_rating']
                if isinstance(min_rating, (list, tuple)):
                    min_rating = min_rating[0]  # take the first element
                mask = (self.games_df['avg_rating'] >= min_rating)
                final_scores[~mask] = 0
                
        # return top N-games above score > 0.01
        # top_n_idx = np.argsort(final_scores)[::-1][:n_recommendations]
        valid_idx = np.where(final_scores >= 0.01)[0]

        # sort descending and take top N
        top_n_idx = valid_idx[np.argsort(final_scores[valid_idx])[::-1][:n_recommendations]]

        recommendations = self.games_df.iloc[top_n_idx][['bgg_id']].copy()
        
        recommendations['score'] = final_scores[top_n_idx].round(4)

        return recommendations

In [3]:
###
### function to display recommendations
### (only for debugging, will not be used)
###

def display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes, description, n_recommendations=5, alpha=0.2, beta=0.3):
    
    recommendations = recommender.get_game_recommendations(liked_games, disliked_games, exclude_games, attributes=attributes, description=description, n_recommendations=n_recommendations, alpha=alpha, beta=beta)
    
    games_df = recommender.games_df.set_index("bgg_id")

    # get names
    def get_game_names(id_list):
        if not id_list:
            return "None"
        names = [
            games_df.loc[g]["name"]
            for g in id_list
            if g in games_df.index
        ]
        return ", ".join(names) if names else "None"

    # display search criteria
    print("=" * 120)
    print("Find games based on...")
    print(f"  liking:    {get_game_names(liked_games)}")
    print(f"  disliking: {get_game_names(disliked_games)}")
    print(f"  excluding: {get_game_names(exclude_games)}")
    for key, values in attributes.items():
        if values:
            print(f"  {key}: {', '.join(str(v) for v in values)}")
    print("=" * 120)

    # no results
    if recommendations.empty:
        print("\nNo recommendations found.")
        return

    # show recommendations
    print("\nRecommendations:\n")

    for _, row in recommendations.iterrows():
        bgg_id = row["bgg_id"]
        score = row["score"]

        if bgg_id not in games_df.index:
            print(f"Game ID {bgg_id} not found in games_df.")
            continue

        game = games_df.loc[bgg_id]

        print(f"*** {game['name']:<25} Recommender score: {score:.4f}")
        print(f"    User Rating: {game.get('avg_rating', 'N/A'):.2f}")
        print(f"    Categories: {', '.join(game.get('game_categories', []))}")
        print(f"    Game Types: {', '.join(game.get('game_types', []))}")
        print(f"    Mechanics:  {', '.join(game.get('game_mechanics', []))}")
        print(f"    Year: {int(game.get('year_published', 0))} "
              f"| Players: {int(game.get('players_min', 0))}–{int(game.get('players_max', 0))}\n")


In [4]:
###
### run recommender examples
###
#
#To execute the recommender:
#
# 1.  initalize the object 
#
#            recommender = BoardGameRecommender(games_df)
#
# 2.  call the recommender method get_game_recommendations
#
#           recommender.get_game_recommendations(liked_games, disliked_games, exclude_games, attributes=attributes, description=description, n_recommendations=n_recommendations, alpha=alpha, beta=beta)
#    
#     the method will return a dataframe of bgg_id(int), score(float)
#
#
#     input paramters:
#
#     liked_games,disliked_games, exclude_games:  list of bgg_ids
#
#     attributes=attributes: dictionary
#           {'game_types': list of game type strings,
#              'game_categories': list of game category strings,
#              'game_weight': list of min and max game weight decimal,
#              'players': list of min and max players int,
#              'play_time': list of min and max play time int,
#              'min_rating': list of rating decimal,
#              'year_published':list of min and max year int}
#
#     description=description, 
#     n_recommendations:  number of n-recommendations to return
#     alpha: decimal 0.00 to 1.00
#     beta: decimal 0.00 to 1.00





# initialize recommender model
recommender = BoardGameRecommender()

# Example 1
liked_games = [163412, 230802]
disliked_games = []
exclude_games = []
description = ''
attributes = {'game_types': ['Abstract Game', 'Family Game'],
              'game_categories': ['Puzzle'],
              'game_weight': [1.5, 2.8],
              'players': [2,5],
              'play_time': [30,90],
              'min_rating':[7.5],
              'year_published':[1999,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes, description, n_recommendations=5, alpha=0.2, beta=0.3)

# Example 2
liked_games = [167791]
disliked_games = []
description = ''
exclude_games = [175155, 221194]
attributes = {'game_types': ['Strategy Game'],
              'game_categories': ['Science Fiction', 'Space Exploration'],
              'game_mechanics':['Dice Rolling', 'Hand Management', 'Hexagon Grid'],
              'game_weight': [2.0, 3.9],
              'players': [2,5],
              'play_time': [60,180],
              'min_rating':[7.5],
              'year_published':[1999,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes, description, n_recommendations=5, alpha=0.2, beta=0.3)


# Example 3
liked_games = [31260, 13]
disliked_games = []
description = ''
exclude_games = [58421]
attributes = {'  game_types': ['Strategy Game'],
              '  game_categories': ['Animals', 'Farming', 'Economic'],
              '  game_mechanics':['Worker Placement'],
              '  game_weight': [2.0, 3.5],
              '  players': [2,5],
              '  play_time': [],
              '  min_rating':[6.0],
              '  year_published':[1999,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes, description, n_recommendations=5, alpha=0.2, beta=0.3)



# Example 4
liked_games = []
disliked_games = []
exclude_games = []
description = ''
attributes = {'game_types': ['Strategy Game'],
              'game_categories': ['American West','Exploration'],
              'game_mechanics':[],
              'game_weight': [2.0, 4.0],
              'players': [2,5],
              'play_time': [],
              'min_rating':[6.0],
              'year_published':[2000,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes, description, n_recommendations=5, alpha=0.2, beta=0.3)


Find games based on...
  liking:    Patchwork, Azul
  disliking: None
  excluding: None
  game_types: Abstract Game, Family Game
  game_categories: Puzzle
  game_weight: 1.5, 2.8
  players: 2, 5
  play_time: 30, 90
  min_rating: 7.5
  year_published: 1999, 2025

Recommendations:

*** Azul: Summer Pavilion     Recommender score: 1.0000
    User Rating: 7.78
    Categories: Abstract Strategy, Puzzle
    Game Types: Abstract Game, Family Game
    Mechanics:  End Game Bonuses, Open Drafting, Pattern Building, Set Collection, Tile Placement, Turn Order: Claim Action
    Year: 2019 | Players: 2–4

*** Calico                    Recommender score: 0.8976
    User Rating: 7.79
    Categories: Animals, Puzzle
    Game Types: Abstract Game, Family Game
    Mechanics:  Enclosure, End Game Bonuses, Grid Coverage, Hexagon Grid, Open Drafting, Pattern Building, Set Collection, Solo / Solitaire Game, Tile Placement
    Year: 2020 | Players: 1–4

*** Sagrada                   Recommender score: 0.8697
