Skip to content

Commit

Permalink
Merge branch '32-ndcg' into 'master'
Browse files Browse the repository at this point in the history
Resolve "Proper performance metrics"

Closes #32

See merge request recommend.games/board-game-recommender!25
  • Loading branch information
MarkusShepherd committed Apr 30, 2023
2 parents c712f61 + 399c36a commit 0aa1de1
Show file tree
Hide file tree
Showing 12 changed files with 1,960 additions and 1,184 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,6 @@ recommender/
.tc*/
.bga*
.bgg*
*.csv
*.ipynb
*.npz
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: ^notebooks/
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
hooks:
- id: pyupgrade
exclude: ^notebooks/
- repo: https://github.com/psf/black
rev: '23.3.0'
hooks:
- id: black
exclude: ^notebooks/
- repo: https://github.com/pycqa/isort
rev: '5.12.0'
hooks:
- id: isort
exclude: ^notebooks/
args: ["--profile", "black", "--filter-files"]
5 changes: 5 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ verify_ssl = true
python_version = "3.8"

[packages]
matplotlib = "*"
numpy = "*"
pandas = "*"
polars = "*"
pytility = "*"
scikit-learn = "*"
scipy = "*"
seaborn = "*"
turicreate = "*"

[dev-packages]
Expand All @@ -21,6 +25,7 @@ jupyterlab = "*"
jupytext = "*"
mypy = "*"
nb-black = "*"
pandas-stubs = "*"
pre-commit = "*"
pylint = "*"
twine = "*"
2,843 changes: 1,677 additions & 1,166 deletions Pipfile.lock

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions board_game_recommender/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, AbstractSet, Generic, Iterable, TypeVar, Union

import numpy as np

GameKeyType = TypeVar("GameKeyType")
UserKeyType = TypeVar("UserKeyType")

Expand Down Expand Up @@ -49,6 +51,14 @@ def recommend(
) -> DataFrame:
"""Recommend games for given users."""

@abstractmethod
def recommend_as_numpy(
self: "BaseGamesRecommender",
users: Iterable[UserKeyType],
games: Iterable[GameKeyType],
) -> np.ndarray:
"""Recommend games for given users and games as a numpy array."""

@abstractmethod
def recommend_similar(
self: "BaseGamesRecommender",
Expand Down
111 changes: 111 additions & 0 deletions board_game_recommender/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Evaluate recommender models."""

import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, Tuple, Union

import numpy as np
import polars as pl
from sklearn.metrics import ndcg_score

from board_game_recommender.base import BaseGamesRecommender

LOGGER = logging.getLogger(__name__)


@dataclass(frozen=True)
class RecommenderTestData:
"""Test data for recommender model evaluation."""

user_ids: Tuple[str, ...]
game_ids: np.ndarray
ratings: np.ndarray


def load_test_data(
path: Union[str, Path],
ratings_per_user: int,
user_id_key: str = "bgg_user_name",
game_id_key: str = "bgg_id",
ratings_key: str = "bgg_user_rating",
) -> RecommenderTestData:
"""Load RecommenderTestData from CSV."""

path = Path(path).resolve()
LOGGER.info("Loading test data from <%s>…", path)

data = pl.read_csv(path)
LOGGER.info("Read %d rows", len(data))

if len(data) % ratings_per_user != 0:
raise ValueError(
f"The number of rows ({len(data)}) is not divisible by "
+ f"the number of ratings per user ({ratings_per_user})"
)

user_ids = tuple(data[user_id_key][::ratings_per_user])
game_ids = data[game_id_key].view().reshape((-1, ratings_per_user))
ratings = data[ratings_key].view().reshape((-1, ratings_per_user))

return RecommenderTestData(user_ids=user_ids, game_ids=game_ids, ratings=ratings)


@dataclass(frozen=True)
class RecommenderMetrics:
"""Recommender model evaluation metrics."""

ndcg: Dict[int, float]
ndcg_exp: Dict[int, float]


def calculate_metrics(
recommender: BaseGamesRecommender,
test_data: RecommenderTestData,
*,
k_values: Union[None, int, Iterable[int]],
) -> RecommenderMetrics:
"""Calculate RecommenderMetrics for given recommender model and RecommenderTestData."""

y_true = test_data.ratings
y_pred = np.array(
[
recommender.recommend_as_numpy(users=(user,), games=games)[0, :]
for user, games in zip(test_data.user_ids, test_data.game_ids)
]
)

if y_true.shape != y_pred.shape:
raise ValueError(
f"Shape of ratings ({y_true.shape}) does not match "
+ f"shape of predictions ({y_pred.shape})"
)

if k_values is None:
k_values = frozenset()
elif isinstance(k_values, int):
k_values = frozenset({k_values})
else:
k_values = frozenset(k_values)

k_values = sorted(k_values | {y_true.shape[-1]})
ndcg = {}

for k in k_values:
ndcg[k] = ndcg_score(
y_true=y_true,
y_score=y_pred,
k=k,
)

y_true = np.exp2(y_true) - 1
ndcg_exp = {}

for k in k_values:
ndcg_exp[k] = ndcg_score(
y_true=y_true,
y_score=y_pred,
k=k,
)

return RecommenderMetrics(ndcg=ndcg, ndcg_exp=ndcg_exp)
64 changes: 50 additions & 14 deletions board_game_recommender/light.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ class CollaborativeFilteringData:
"""Labels, vectors and matrices for linear collaborative filtering models."""

intercept: float
users_labels: np.ndarray
users_linear_terms: np.ndarray
users_factors: np.ndarray
items_labels: np.ndarray
items_linear_terms: np.ndarray
items_factors: np.ndarray
users_labels: np.ndarray # (num_users,)
users_linear_terms: np.ndarray # (num_users,)
users_factors: np.ndarray # (num_users, num_factors)
items_labels: np.ndarray # (num_items,)
items_linear_terms: np.ndarray # (num_items,)
items_factors: np.ndarray # (num_factors, num_items)

def to_npz(self: "CollaborativeFilteringData", file_path: Union[Path, str]) -> None:
"""Save data into an .npz file."""
Expand Down Expand Up @@ -141,6 +141,38 @@ def known_users(self: "LightGamesRecommender") -> FrozenSet[str]:
def num_users(self: "LightGamesRecommender") -> int:
return len(self.users_labels)

def _recommendation_scores(
self: "LightGamesRecommender",
users: Optional[List[str]] = None,
games: Optional[List[int]] = None,
) -> np.ndarray:
"""Calculate recommendations scores for certain users and games."""

if users:
user_ids = np.array([self.users_indexes[user] for user in users])
user_factors = self.users_factors[user_ids]
users_linear_terms = self.users_linear_terms[user_ids].reshape(-1, 1)
else:
user_factors = self.users_factors
users_linear_terms = self.users_linear_terms.reshape(-1, 1)

if games:
# TODO Unknown games will cause a key error. Instead, use the user's
# average predicted rating (user + global bias) for unknown games. (#57)
game_ids = np.array([self.items_indexes[game] for game in games])
items_factors = self.items_factors[:, game_ids]
items_linear_terms = self.items_linear_terms[game_ids].reshape(1, -1)
else:
items_factors = self.items_factors
items_linear_terms = self.items_linear_terms.reshape(1, -1)

return (
user_factors @ items_factors # (num_users, num_items)
+ users_linear_terms # (num_users, 1)
+ items_linear_terms # (1, num_items)
+ self.intercept # (1,)
)

def recommend(
self: "LightGamesRecommender",
users: Iterable[str],
Expand All @@ -149,14 +181,7 @@ def recommend(
"""Calculate recommendations for certain users."""

users = list(users)
user_ids = np.array([self.users_indexes[user] for user in users])

scores = (
self.users_factors[user_ids] @ self.items_factors
+ self.users_linear_terms[user_ids].reshape(len(user_ids), 1)
+ self.items_linear_terms
+ self.intercept
)
scores = self._recommendation_scores(users=users)

result = pd.DataFrame(
index=self.items_labels,
Expand All @@ -173,6 +198,17 @@ def recommend(

return result[pd.MultiIndex.from_product([users, ["score", "rank"]])]

def recommend_as_numpy(
self: "LightGamesRecommender",
users: Iterable[str],
games: Iterable[int],
) -> np.ndarray:
"""Calculate recommendations for certain users and games as a numpy array."""

users = list(users)
games = list(games)
return self._recommendation_scores(users=users, games=games)

def recommend_similar(
self: "LightGamesRecommender",
games: Iterable[int],
Expand Down
40 changes: 40 additions & 0 deletions board_game_recommender/recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# from datetime import date
from typing import Any, Dict, FrozenSet, Iterable, Optional, Tuple, Type

import numpy as np
import turicreate as tc
from pytility import arg_to_iter, clear_list

Expand Down Expand Up @@ -428,6 +429,45 @@ def recommend(
ascending=ascending,
)

def recommend_as_numpy(
self: "GamesRecommender",
users: Iterable[str],
games: Iterable[int],
) -> np.ndarray:
"""Calculate recommendations for certain users and games as a numpy array."""

users = list(users)
users_sf = tc.SFrame(
{
self.user_id_field: users,
"sort_users": range(len(users)),
}
)

games = list(games)
games_sf = tc.SFrame(
{
self.id_field: games,
"sort_games": range(len(games)),
}
)

recommendations = self.model.recommend(
users=users,
items=games,
exclude_known=False,
k=len(games),
)

assert len(recommendations) == len(users) * len(games)

result = (
recommendations.join(users_sf)
.join(games_sf)
.sort(["sort_users", "sort_games"])
)
return result["score"].to_numpy().reshape(len(users), len(games))

def recommend_similar(
self: "GamesRecommender",
games: Iterable[GameKeyType],
Expand Down
57 changes: 57 additions & 0 deletions notebooks/ndcg_train_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.14.5
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---

# %%
import polars as pl

# %load_ext nb_black
# %load_ext lab_black

# %%
THRESHOLD_POWER_USERS = 200
NUM_LABELS = 100

# %%
ratings = (
pl.scan_ndjson("../../board-game-data/scraped/bgg_RatingItem.jl")
.filter(pl.col("bgg_user_rating").is_not_null())
.select(
"bgg_id",
"bgg_user_name",
"bgg_user_rating",
(
(pl.col("bgg_id").count().over("bgg_user_name") >= THRESHOLD_POWER_USERS)
& (pl.arange(0, pl.count()).shuffle().over("bgg_user_name") < NUM_LABELS)
).alias("is_test_row"),
)
.collect()
)

# %%
train_test = ratings.partition_by(
"is_test_row",
as_dict=True,
)
data_train = train_test[False]
data_train.drop_in_place("is_test_row")
data_train = data_train.sort("bgg_user_name", "bgg_id")
data_test = train_test[True]
data_test.drop_in_place("is_test_row")
data_test = data_test.sort("bgg_user_name", "bgg_id")
data_train.shape, data_test.shape

# %%
data_train.write_csv("ratings_train.csv")
data_test.write_csv("ratings_test.csv")
2 changes: 1 addition & 1 deletion notebooks/rankings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.13.2
# jupytext_version: 1.14.5
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
Expand Down
Loading

0 comments on commit 0aa1de1

Please sign in to comment.