In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
import warnings
import sys

warnings.filterwarnings('ignore')

###
### Load games into games_df
###
games_file = "games_top500.csv"

def semicolon_to_list(value):
    if pd.isna(value) or value == "":
        return []
    return [item.strip() for item in value.split(';')]

games_df = pd.read_csv(
    games_file,
    usecols=['bgg_id',
             'name',
             'description',
             'image',
             'thumbnail',
             'bgg_link',
             'avg_rating',
             'bgg_rating',
             'users_rated',
             'game_weight',
             'players_min',
             'players_max',
             'players_best',
             'time_min',
             'time_max',
             'time_avg',
             'game_mechanics',
             'game_categories',
             'game_types',
             'year_published'],
    
    converters={'game_mechanics': semicolon_to_list,
                'game_categories': semicolon_to_list,
                'game_types': semicolon_to_list},
    
    dtype={'bgg_id':        'int64',
           'avg_rating':    'float64',
           'bgg_rating':    'float64',
           'users_rated':   'int64',
           'game_weight':   'float64',
           'players_best':  'float64',
           'players_min':   'int64',
           'players_max':   'int64',
           'players_best':  'float64',
           'time_min':      'int64',
           'time_max':      'int64',
           'time_avg':      'int64'})



In [3]:
###
### BoardGameRecommender Class
###
class BoardGameRecommender:
    
    def __init__(self, games_df, alpha=0.5, beta=0.5):
        
        self.games_df = games_df
        self.alpha = alpha
        self.beta = beta

    
        # prepare content-based filter CBF
        self._prepare_CBF()

        # prepare collaborative filter CF
        # self._prepare_CF()

        # prepare LLM filter
        # self._prepare_LLM()


    ### prepare content-based filter CBF
    def _prepare_CBF(self):
        # learn attributes of all games
        self.mlb_game_categories = MultiLabelBinarizer().fit(self.games_df['game_categories'])
        self.mlb_game_mechanics = MultiLabelBinarizer().fit(self.games_df['game_mechanics'])
        self.mlb_game_types = MultiLabelBinarizer().fit(self.games_df['game_types'])

        # create attribute vectors
        game_categories_features = self.mlb_game_categories.transform(self.games_df['game_categories'])
        game_mechanics_features = self.mlb_game_mechanics.transform(self.games_df['game_mechanics'])
        game_types_features = self.mlb_game_types.transform(self.games_df['game_types'])

        # scale numeric features to have similar influence
        self.scaler = MinMaxScaler()
        numeric_features = self.scaler.fit_transform(self.games_df[['game_weight', 'players_best', 'time_avg']])

        # combine features (weighted)
        self.game_features = np.hstack([game_categories_features * 1.5,
                                        game_mechanics_features * 2.0,
                                        game_types_features * 1.0,
                                        numeric_features * 0.5])

        # combine features (unweighted)
        numeric_features_raw = self.games_df[['game_weight', 'players_best', 'time_avg']].values
        raw_features = np.hstack([game_categories_features, game_mechanics_features, game_types_features, numeric_features_raw])
    
        # compute similarity matrices
        self.content_similarity = cosine_similarity(self.game_features)
        self.content_similarity_raw = cosine_similarity(raw_features)




    ### calcualte LLM scores - placeholder
    def get_LLM_scores(self):

        n_games = self.games_df.shape[0]
        llm_scores = np.zeros(n_games)
        llm_scores_norm = np.zeros(n_games)
        return llm_scores, llm_scores_norm


    ### calcualte CF scores - placeholder
    def get_CF_scores(self):

        n_games = self.games_df.shape[0]
        cf_scores = np.zeros(n_games)
        cf_scores_norm = np.zeros(n_games)
        return cf_scores, cf_scores_norm
    

    ### calculate CBF scores
    def get_CBF_scores(self, attributes=None):
    
        n_games = self.games_df.shape[0]

        # Build query feature vector
        if attributes:
            cat_vec = self.mlb_game_categories.transform(
                [attributes.get('game_categories', [])]
            ) if 'game_categories' in attributes else np.zeros((1, len(self.mlb_game_categories.classes_)))

            mech_vec = self.mlb_game_mechanics.transform(
                [attributes.get('game_mechanics', [])]
            ) if 'game_mechanics' in attributes else np.zeros((1, len(self.mlb_game_mechanics.classes_)))

            type_vec = self.mlb_game_types.transform(
                [attributes.get('game_types', [])]
            ) if 'game_types' in attributes else np.zeros((1, len(self.mlb_game_types.classes_)))

            if 'game_weight' in attributes:
                game_weight_avg = np.mean(attributes['game_weight'])
            else:
                game_weight_avg = 2.5
            
            if 'play_time' in attributes:
                play_time_avg = np.mean(attributes['play_time'])
            else:
                play_time_avg = 90

            if 'players' in attributes:
                players_avg = np.mean(attributes['players'])
            else:
                players_avg = 3

                
            numeric_vec = np.array([[game_weight_avg, players_avg, play_time_avg]])
            numeric_vec_scaled = self.scaler.transform(numeric_vec)

            # Match feature group weights exactly
            query_vector = np.hstack([cat_vec * 1.5,
                                      mech_vec * 2.0,
                                      type_vec * 1.0,
                                      numeric_vec_scaled * 0.5])
            
            cbf_scores = cosine_similarity(query_vector, self.game_features).flatten()

            #print(query_vector)
            
        else:
            # no cbf info zero vector
            cbf_scores = np.zeros(n_games)

        # normalize between 0 and 1
        if cbf_scores.max() > cbf_scores.min():
            cbf_scores_norm = (cbf_scores - cbf_scores.min()) / (cbf_scores.max() - cbf_scores.min())
        else:
            cbf_scores_norm = np.zeros_like(cbf_scores) 

        
        return cbf_scores, cbf_scores_norm
    
    # get recommendations and return results
    def get_game_recommendations(self, liked_game_names=None, disliked_game_names=None, exclude_game_names=None, attributes=None, n_recommendations=5):

        # remove empty attributes
        liked_game_names = liked_game_names or []
        disliked_game_names = disliked_game_names or []
        exclude_game_names = exclude_game_names or []
        attributes = {k: v for k, v in (attributes or {}).items() if v}

        
        n_games = self.games_df.shape[0]

        # Identify indices of liked games
        liked_indices = []
        for game_name in liked_game_names:
            idx = self.games_df[self.games_df['name'] == game_name].index
            if len(idx) > 0:
                liked_indices.append(idx[0])

        # Identify indices of disliked games
        disliked_indices = []
        for game_name in disliked_game_names:
            idx = self.games_df[self.games_df['name'] == game_name].index
            if len(idx) > 0:
                disliked_indices.append(idx[0])
                
        # Identify indices of excluded games
        excluded_indices = []
        for game_name in exclude_game_names:
            idx = self.games_df[self.games_df['name'] == game_name].index
            if len(idx) > 0:
                excluded_indices.append(idx[0])

                
        # get scores from each model
        cbf_scores, cbf_scores_norm = self.get_CBF_scores(attributes)
        cf_scores, cf_scores_norm = self.get_CF_scores()
        llm_scores, llm_scores_norm = self.get_LLM_scores()

        # Combine normalized scores
        #self.final_scores =  ((cf_scores_norm * self.alpha) + (cbf_scores_norm * (1-self.alpha))) * (1-self.beta) + (llm_scores_norm * self.beta)
        self.final_scores = cbf_scores_norm
        final_scores = self.final_scores.copy()


    
        ###
        ### Filter Logic
        ###
        
        # exclude liked games
        for idx in liked_indices:
            final_scores[idx] = 0
            
        # exclude disliked games
        for idx in disliked_indices:
            final_scores[idx] = 0
            
        # exclude excluded games
        for idx in excluded_indices:
            final_scores[idx] = 0
            
        # filter game_mechanics, game_categories, game_typesattribute 
        if attributes:
            for attr_name in ['game_mechanics', 'game_categories', 'game_types']:
                selected = attributes.get(attr_name, [])
                if selected:
                    # Keep only games that have at least one element
                    mask = self.games_df[attr_name].apply(lambda game_attrs: any(a in game_attrs for a in selected))
                    final_scores[~mask] = 0

            # weight filter
            if 'game_weight' in attributes:
                w_min, w_max = attributes['game_weight']
                mask = (self.games_df['game_weight'] >= w_min) & (self.games_df['game_weight'] <= w_max)
                final_scores[~mask] = 0
                
            # number of players filter
            if 'players' in attributes:
                p_min, p_max = attributes['players']
                mask = (self.games_df['players_max'] >= p_min) & (self.games_df['players_min'] <= p_max)
                final_scores[~mask] = 0
                
            # time filter
            if 'play_time' in attributes:
                t_min, t_max = attributes['play_time']
                # keep games where range [time_min, time_max] overlaps with [t_min, t_max]
                mask = (self.games_df['time_max'] >= t_min) & (self.games_df['time_min'] <= t_max)
                final_scores[~mask] = 0

            # published year filter
            if 'year_published' in attributes:
                y_min, y_max = attributes['year_published']
                # keep games where range [time_min, time_max] overlaps with [t_min, t_max]
                mask = (self.games_df['year_published'] >= y_min) & (self.games_df['year_published'] <= y_max)
                final_scores[~mask] = 0
                
            # published year filter
            if 'min_rating' in attributes:
                min_rating = attributes['min_rating']
                if isinstance(min_rating, (list, tuple)):
                    min_rating = min_rating[0]  # take the first element
                mask = (self.games_df['avg_rating'] >= min_rating)
                final_scores[~mask] = 0
                
        # return top N-games above score 0.3
        # top_n_idx = np.argsort(final_scores)[::-1][:n_recommendations]
        valid_idx = np.where(final_scores >= 0.3)[0]

        # sort descending and take top N
        top_n_idx = valid_idx[np.argsort(final_scores[valid_idx])[::-1][:n_recommendations]]

        recommendations = self.games_df.iloc[top_n_idx][
            ['name', 'avg_rating','game_categories', 'game_mechanics', 'game_weight', 'game_types', 'year_published', 'players_min','players_max']].copy()
        
        recommendations['score'] = final_scores[top_n_idx].round(4)

        return recommendations

In [4]:
###
### function to display recommendations
###

def display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes):
    
    recommendations = recommender.get_game_recommendations(liked_games, disliked_games, exclude_games, attributes=attributes, n_recommendations=5)

    print("=" * 60)
    print(f"Based on liking: {', '.join(liked_games) if liked_games else 'None'}")
    print(f"      disliking: {', '.join(disliked_games) if disliked_games else 'None'}")
    print(f"        exclude: {', '.join(exclude_games) if exclude_games else 'None'}")
    for key, values in attributes.items():
        if values:
            print(f"{key}: {', '.join(str(v) for v in values)}")
    print("=" * 60)

    if recommendations.empty:
        print("\nNo recommendations found.")
    else:
        print("\nRecommendations:")
        print("\n")
        for _, row in recommendations.iterrows():
            print(f"*** {row['name']:<25} Score: {row['score']:.4f}")
            print(f"    Rating: {row['avg_rating']:.2f}")
            print(f"    Categories: {', '.join(row['game_categories'])}")
            print(f"    Game Types: {', '.join(row['game_types'])}")
            print(f"    Mechanics:  {', '.join(row['game_mechanics'])}")
            print(f"    Year: {int(row['year_published'])} | Players: {int(row['players_min'])}–{int(row['players_max'])}")
            print("\n")
    print("\n")


In [5]:
###
### run recommender examples
###

# initialize recommender model
recommender = BoardGameRecommender(games_df, alpha=0.2, beta=0.3)

# Example 1
liked_games = ['Patchwork', 'Azul']
disliked_games = []
exclude_games = []
attributes = {'game_types': ['Abstract Game', 'Family Game'],
              'game_categories': ['Puzzle'],
              'game_weight': [1.5, 2.8],
              'players': [2,5],
              'play_time': [30,90],
              'min_rating':[7.5],
              'year_published':[1999,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes)

# Example 2
liked_games = ['Terraforming Mars','Eclipse']
disliked_games = []
exclude_games = ['Forbidden Stars', 'Dinosaur Island']
attributes = {'game_types': ['Strategy Game'],
              'game_categories': ['Science Fiction', 'Space Exploration'],
              'game_mechanics':['Dice Rolling', 'Hand Management', 'Hexagon Grid'],
              'game_weight': [2.0, 3.9],
              'players': [2,5],
              'play_time': [60,180],
              'min_rating':[7.5],
              'year_published':[1999,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes)


# Example 3
liked_games = ['Agricola', 'Caverna']
disliked_games = []
exclude_games = ['Egizia']
attributes = {'game_types': ['Strategy Game'],
              'game_categories': ['Animals', 'Farming', 'Economic'],
              'game_mechanics':['Worker Placement'],
              'game_weight': [2.0, 3.5],
              'players': [2,5],
              'play_time': [],
              'min_rating':[6.0],
              'year_published':[1999,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes)



# Example 4
liked_games = []
disliked_games = []
exclude_games = []
attributes = {'game_types': ['Strategy Game'],
              'game_categories': ['American West','Exploration'],
              'game_mechanics':[],
              'game_weight': [2.0, 4.0],
              'players': [2,5],
              'play_time': [],
              'min_rating':[6.0],
              'year_published':[2016,2025]}

display_recommendations(recommender, liked_games, disliked_games, exclude_games, attributes)


Based on liking: Patchwork, Azul
      disliking: None
        exclude: None
game_types: Abstract Game, Family Game
game_categories: Puzzle
game_weight: 1.5, 2.8
players: 2, 5
play_time: 30, 90
min_rating: 7.5
year_published: 1999, 2025

Recommendations:


*** Azul: Summer Pavilion     Score: 1.0000
    Rating: 7.78
    Categories: Abstract Strategy, Puzzle
    Game Types: Abstract Game, Family Game
    Mechanics:  End Game Bonuses, Open Drafting, Pattern Building, Set Collection, Tile Placement, Turn Order: Claim Action
    Year: 2019 | Players: 2–4


*** Exit: The Game – Dead Man on the Orient Express Score: 0.9730
    Rating: 7.67
    Categories: Deduction, Murder / Mystery, Puzzle, Real-time
    Game Types: Thematic, Family Game
    Mechanics:  Cooperative Game, Deduction
    Year: 2017 | Players: 1–4


*** Calico                    Score: 0.8463
    Rating: 7.79
    Categories: Animals, Puzzle
    Game Types: Abstract Game, Family Game
    Mechanics:  Enclosure, End Game Bonuses, 