In [None]:
import numpy as np
from scipy import sparse as sp
from tqdm import trange

from src.hyperparameters_optimizer import HyperparametersOptimizer
from src.recommender_model import RecommenderModel
from src.utils import train_model, write_submission, tf_idf

In [None]:
class P3(RecommenderModel):
	def __init__(self):
		super(P3, self).__init__()
		self.alpha: float = 0
		self.beta: float = 0

	def fit(
		self,
		urm: sp.csr_matrix,
		icm: sp.csr_matrix,
		urm_val: sp.csr_matrix,
		progress_bar: bool = True,
		alpha: float = 1.,
		beta: float = 0.,
		use_features_weighting: bool = True,
		use_icm: bool = False,
		icm_multiplier: float = 1.,
		batch_size: int = 256,
		top_k: int = 300,
		**kwargs
	) -> None:
		self.urm = urm
		self.icm = icm
		self.alpha = alpha
		self.beta = beta
		num_users, num_items = self.urm.shape

		if use_icm:
			self.urm = sp.vstack([self.urm, self.icm.T * icm_multiplier]).tocsr()

		if use_features_weighting:
			self.urm = tf_idf(self.urm)

		deg_users = sp.diags(np.power(self.urm.sum(axis=1).A.ravel(), -1))
		deg_items = sp.diags(np.power(self.urm.T.sum(axis=1).A.ravel(), -1))

		p_ui = (deg_users @ self.urm).power(self.alpha)
		p_iu = (deg_items @ self.urm.T).power(self.alpha)

		deg_items_pop_scaling = np.power(deg_items.diagonal(), self.beta)

		rows = np.zeros((top_k * num_items,), dtype=np.float32)
		cols = np.zeros((top_k * num_items,), dtype=np.float32)
		values = np.zeros((top_k * num_items,), dtype=np.float32)
		num_values = 0

		iterator = trange(0, num_users, batch_size, desc="Batch") if progress_bar else range(0, num_users, batch_size)
		for start_user_idx in iterator:
			similarity_block = (p_iu[start_user_idx:start_user_idx + batch_size] @ p_ui).toarray()
			for user_batch_idx in range(batch_size):
				user_row = similarity_block[user_batch_idx] * deg_items_pop_scaling

				top_k_idxs = np.argpartition(-user_row, top_k)[:top_k]
				top_k_values = user_row[top_k_idxs]

				non_zero_mask = top_k_values != 0
				top_k_values = top_k_values[non_zero_mask]
				top_k_idxs = top_k_idxs[non_zero_mask]
				items_to_add = len(top_k_values)

				rows[num_values:num_values + items_to_add] = start_user_idx + user_batch_idx
				cols[num_values:num_values + items_to_add] = top_k_idxs
				values[num_values:num_values + items_to_add] = top_k_values

				num_values += items_to_add

		similarity = sp.csr_matrix((
			values[:num_values],
			(rows[:num_values], cols[:num_values]),
		), shape=(num_items, num_items))

		self.urm_pred = self.urm @ similarity

In [None]:
optimizer = HyperparametersOptimizer({
	'alpha': np.arange(.5, .81, .1),
	'beta': np.arange(.3, .51, .1),
}, P3)
_, best_parameters = optimizer.optimize()

In [None]:
p3_submission, _ = train_model(P3(), test_size=0, **best_parameters)
write_submission(p3_submission, "p3_submission.csv")