Skip to content

Commit

Permalink
Merge branch '56-popularity-recommender' into 'master'
Browse files Browse the repository at this point in the history
Resolve "Popularity recommender"

Closes #56

See merge request recommend.games/board-game-recommender!27
  • Loading branch information
MarkusShepherd committed May 2, 2023
2 parents d675319 + 8dcca3c commit 65a8365
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 52 deletions.
193 changes: 172 additions & 21 deletions board_game_recommender/baseline.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,39 @@
"""Baseline recommender models."""

import logging
from typing import FrozenSet, Iterable
import os
from typing import Any, FrozenSet, Iterable, List, Optional, Union

import numpy as np
import pandas as pd

from board_game_recommender.base import BaseGamesRecommender

LOGGER = logging.getLogger(__name__)
PATH = Union[str, os.PathLike]


def dataframe_from_scores(
columns: List[Any],
index: Iterable[Any],
scores: np.ndarray,
) -> pd.DataFrame:
"""Creates a Pandas DataFrame out of raw recommendation scores."""

result = pd.DataFrame(
index=list(index),
columns=pd.MultiIndex.from_product([columns, ["score"]]),
data=scores.T,
)
result[pd.MultiIndex.from_product([columns, ["rank"]])] = result.rank(
method="min",
ascending=False,
).astype(int)

if len(columns) == 1:
result.sort_values((columns[0], "rank"), inplace=True)

return result[pd.MultiIndex.from_product([columns, ["score", "rank"]])]


class RandomGamesRecommender(BaseGamesRecommender):
Expand All @@ -25,18 +50,10 @@ def known_games(self) -> FrozenSet[int]:
def rated_games(self) -> FrozenSet[int]:
return frozenset()

@property
def num_games(self) -> int:
return 0

@property
def known_users(self) -> FrozenSet[str]:
return frozenset()

@property
def num_users(self) -> int:
return 0

def _recommendation_scores(self, users: int, games: int) -> np.ndarray:
"""Random scores."""
return self.rng.random((users, games))
Expand All @@ -53,20 +70,107 @@ def recommend(
games = list(games)
scores = self._recommendation_scores(users=len(users), games=len(games))

result = pd.DataFrame(
index=games,
columns=pd.MultiIndex.from_product([users, ["score"]]),
data=scores.T,
return dataframe_from_scores(users, games, scores)

def recommend_as_numpy(
self,
users: Iterable[str],
games: Iterable[int],
) -> np.ndarray:
"""Random recommendations for certain users and games as a numpy array."""
users = list(users)
games = list(games)
return self._recommendation_scores(users=len(users), games=len(games))

def recommend_similar(self, games: Iterable[int], **kwargs) -> pd.DataFrame:
raise NotImplementedError

def similar_games(self, games: Iterable[int], **kwargs) -> pd.DataFrame:
raise NotImplementedError


class PopularGamesRecommender(BaseGamesRecommender):
"""Popular games recommender."""

id_field: str = "bgg_id"
user_id_field: str = "bgg_user_name"
rating_id_field: str = "bgg_user_rating"

_known_games: Optional[FrozenSet[int]] = None

def __init__(self, data: pd.Series) -> None:
self.data = data

@classmethod
def train(cls, ratings: pd.DataFrame) -> "PopularGamesRecommender":
"""Train the recommender from ratings data."""
raise NotImplementedError

@classmethod
def train_from_csv(cls, ratings_file: PATH) -> "PopularGamesRecommender":
"""Train the recommender from a ratings file in CSV format."""
ratings = pd.read_csv(ratings_file)
return cls.train(
ratings[
[
cls.id_field,
cls.user_id_field,
cls.rating_id_field,
]
]
)
result[pd.MultiIndex.from_product([users, ["rank"]])] = result.rank(
method="min",
ascending=False,
).astype(int)

if len(users) == 1:
result.sort_values((users[0], "rank"), inplace=True)
@classmethod
def train_from_json_lines(cls, ratings_file: PATH) -> "PopularGamesRecommender":
"""Train the recommender from a ratings file in JSON lines format."""
ratings = pd.read_json(ratings_file, orient="records", lines=True)
return cls.train(
ratings[
[
cls.id_field,
cls.user_id_field,
cls.rating_id_field,
]
]
)

return result[pd.MultiIndex.from_product([users, ["score", "rank"]])]
@property
def known_games(self) -> FrozenSet[int]:
if self._known_games is not None:
return self._known_games
self._known_games = frozenset(self.data.index)
return self._known_games

@property
def rated_games(self) -> FrozenSet[int]:
return self.known_games

@property
def num_games(self) -> int:
return len(self.data)

@property
def known_users(self) -> FrozenSet[str]:
return frozenset()

def _recommendation_scores(
self,
users: int,
games: Optional[List[int]] = None,
) -> np.ndarray:
"""Popularity scores."""
scores = self.data.loc[games] if games else self.data
return np.tile(scores.to_numpy(), [users, 1])

def recommend(
self,
users: Iterable[str],
**kwargs,
) -> pd.DataFrame:
"""Popular recommendations for certain users."""
users = list(users)
scores = self._recommendation_scores(users=len(users))
return dataframe_from_scores(users, self.data.index, scores)

def recommend_as_numpy(
self,
Expand All @@ -76,10 +180,57 @@ def recommend_as_numpy(
"""Random recommendations for certain users and games as a numpy array."""
users = list(users)
games = list(games)
return self._recommendation_scores(users=len(users), games=len(games))
return self._recommendation_scores(users=len(users), games=games)

def recommend_similar(self, games: Iterable[int], **kwargs) -> pd.DataFrame:
raise NotImplementedError

def similar_games(self, games: Iterable[int], **kwargs) -> pd.DataFrame:
raise NotImplementedError


class PopularMeanGamesRecommender(PopularGamesRecommender):
"""Recommend games by their mean rating score."""

@classmethod
def train(cls, ratings: pd.DataFrame) -> "PopularMeanGamesRecommender":
data = ratings.groupby(cls.id_field, sort=False)[cls.rating_id_field].mean()
return cls(data=data)


class PopularBayesianGamesRecommender(PopularGamesRecommender):
"""Recommend games by their Bayesian average rating score."""

ratings_per_dummy: float = 10_000
dummy_rating: Optional[float] = 5.5

@classmethod
def train(cls, ratings: pd.DataFrame) -> "PopularBayesianGamesRecommender":
num_dummies = len(ratings) / cls.ratings_per_dummy
dummy_rating = (
ratings[cls.rating_id_field].mean()
if cls.dummy_rating is None
else cls.dummy_rating
)

stats = ratings.groupby(
cls.id_field,
sort=False,
)[
cls.rating_id_field
].agg(["size", "mean"])

data = (stats["mean"] * stats["size"] + dummy_rating * num_dummies) / (
stats["size"] + num_dummies
)

return cls(data=data)


class PopularNumRatingsGamesRecommender(PopularGamesRecommender):
"""Recommend games by their number of ratings."""

@classmethod
def train(cls, ratings: pd.DataFrame) -> "PopularNumRatingsGamesRecommender":
data = ratings.groupby(cls.id_field, sort=False).size()
return cls(data=data)
33 changes: 3 additions & 30 deletions board_game_recommender/light.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pandas as pd

from board_game_recommender.base import BaseGamesRecommender
from board_game_recommender.baseline import dataframe_from_scores

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -182,21 +183,7 @@ def recommend(

users = list(users)
scores = self._recommendation_scores(users=users)

result = pd.DataFrame(
index=self.items_labels,
columns=pd.MultiIndex.from_product([users, ["score"]]),
data=scores.T,
)
result[pd.MultiIndex.from_product([users, ["rank"]])] = result.rank(
method="min",
ascending=False,
).astype(int)

if len(users) == 1:
result.sort_values((users[0], "rank"), inplace=True)

return result[pd.MultiIndex.from_product([users, ["score", "rank"]])]
return dataframe_from_scores(users, self.items_labels, scores)

def recommend_as_numpy(
self: "LightGamesRecommender",
Expand Down Expand Up @@ -242,21 +229,7 @@ def similar_games(
game_factors = self.items_factors[:, game_ids]

scores = cosine_similarity(game_factors, self.items_factors)

result = pd.DataFrame(
index=self.items_labels,
columns=pd.MultiIndex.from_product([games, ["score"]]),
data=scores.T,
)
result[pd.MultiIndex.from_product([games, ["rank"]])] = result.rank(
method="min",
ascending=False,
).astype(int)

if len(games) == 1:
result.sort_values((games[0], "rank"), inplace=True)

return result[pd.MultiIndex.from_product([games, ["score", "rank"]])]
return dataframe_from_scores(games, self.items_labels, scores)


def cosine_similarity(matrix_1: np.ndarray, matrix_2: np.ndarray) -> np.ndarray:
Expand Down
1 change: 0 additions & 1 deletion board_game_recommender/recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,6 @@ def _post_process_games(

return games.sort(sort_by, ascending=ascending)[columns]

# pylint: disable=no-self-use
def process_user_id(self: "GamesRecommender", user_id):
"""process user ID"""
return user_id or None
Expand Down

0 comments on commit 65a8365

Please sign in to comment.