In [1]:
import numpy as np
from scipy import sparse as sp
from tqdm import trange

from src.hyperparameters_optimizer import HyperparametersOptimizer
from src.recommender_model import RecommenderModel
from src.utils import train_model, write_submission

# PageRank
This notebook provides an implementation of the PageRank recommender.

In [2]:
class PageRank(RecommenderModel):
	def __init__(self):
		super(PageRank, self).__init__()
		self.num_walks: int = 0
		self.alpha: float = 0
		self.adjacency: sp.csr_matrix | None = None
		self.transition_mat: sp.csr_matrix | None = None

	def fit(self, urm: sp.csr_matrix, icm: sp.csr_matrix, urm_val: sp.csr_matrix, progress_bar: bool = True, num_walks: int = 3, alpha: float = .3, batch_size: int = 4096) -> None:
		"""
		alpha: restart probability
		"""
		self.urm = urm
		self.num_walks = num_walks
		self.alpha = alpha
		num_users, num_items = self.urm.shape
		num_nodes = num_users + num_items

		urm_coo = self.urm.tocoo()
		row = np.concatenate([urm_coo.row, urm_coo.col + num_users])
		col = np.concatenate([urm_coo.col + num_users, urm_coo.row])
		self.adjacency = sp.csr_matrix((np.ones_like(row, dtype=np.float32), (row, col)), shape=(num_nodes, num_nodes))
		deg_inv = sp.diags(self.adjacency.sum(axis=1).A.ravel()).power(-1)
		self.transition_mat = deg_inv @ self.adjacency

		self.urm_pred = np.zeros((num_users, num_items), dtype=np.float32)

		iterator = trange(0, num_users, batch_size, desc="Users") if progress_bar else range(0, num_users, batch_size)
		for start_user_idx in iterator:
			num_users_batch = min(batch_size, num_users - start_user_idx)
			users_idxs = np.arange(num_users_batch) + start_user_idx
			restart_probs = sp.csr_matrix(
				(np.full((num_users_batch,), self.alpha), (users_idxs - start_user_idx, users_idxs)),
				shape=(num_users_batch, num_nodes),
			)
			p = sp.csr_matrix(
				(np.ones((num_users_batch,)), (users_idxs - start_user_idx, users_idxs)),
				shape=(num_users_batch, num_nodes),
			)

			for _ in range(self.num_walks):
				next_p = (1 - alpha) * (p @ self.transition_mat)
				p = next_p + restart_probs

			self.urm_pred[users_idxs, :] = p.toarray()[:, num_users:]

In [3]:
optimizer = HyperparametersOptimizer({
	'alpha': np.arange(.8, 1.21, .05),
}, PageRank)
_, best_parameters = optimizer.optimize()

100%|██████████| 9/9 [20:03<00:00, 133.77s/it, Best MAP@10: 0.0458 with ['alpha: 8.00e-01']]


In [4]:
page_rank_submission, _ = train_model(PageRank(), test_size=0, **best_parameters)
write_submission(page_rank_submission, "page_rank_submission.csv.csv")

Users: 100%|██████████| 9/9 [02:04<00:00, 13.80s/it]


Submission result: `0.04478`