In [None]:
import numpy as np
import scipy.sparse as sp
import torch
from torch.optim import Adam, Optimizer
from tqdm import trange
from torch.utils.data import Dataset
from torch import nn

from src.recommender_model import RecommenderModel
from src.utils import train_model, write_submission, plot_losses, evaluate_model

# SLIM - BPR

Machine Learning can be used to learn the users and items embeddings. The simplest method uses Mean Squared Error loss to optimize the users and items embeddings.

In [2]:
class URMDatasetNegativeSampling(Dataset):
	"""URM Dataset with negative sampling"""
	def __init__(self, urm: sp.csr_matrix):
		self.urm: sp.csr_matrix = urm
		self.num_items: int = self.urm.shape[1]
		# self.ratings = None  # assume implicit ratings
		self.user_item_sets = {
			user_id: (
				self.urm.getrow(user_id).indices,
				set(self.urm.getrow(user_id).indices)
			)
			for user_id in range(self.urm.shape[0])
		}  # we use sets to check if an item has been interacted with as it has constant access time

	def __getitem__(self, user_idx: int):
		pos_sample = np.random.choice(self.user_item_sets[user_idx][0])
		neg_sample = np.random.randint(0, self.num_items)
		while neg_sample in self.user_item_sets[user_idx][1]:
			neg_sample = np.random.randint(0, self.num_items)
		return torch.tensor(pos_sample, dtype=torch.long), torch.tensor(neg_sample, dtype=torch.long)

	def __len__(self):
		return self.urm.nnz

In [3]:
def bpr_loss(pos_scores, neg_scores):
	return -torch.mean(torch.log(torch.sigmoid(pos_scores - neg_scores)))

In [4]:
class SLIMBPR(RecommenderModel):
	def __init__(self):
		super(SLIMBPR, self).__init__()
		self.similarity_matrix: torch.Tensor | None = None
		self.optimizer: Optimizer | None = None
		self.loss_fn = None
		self.best_map = 0.0

	def fit(self, urm: sp.csr_matrix, icm: sp.csr_matrix, urm_val: sp.csr_matrix, lr: float = .001, epochs: int = 1, plot_loss: bool = True)-> None:
		self.urm = urm
		num_users, num_items = urm.shape
		self.similarity_matrix = nn.Parameter(torch.zeros(num_items, num_items, dtype=torch.float32))
		self.optimizer = Adam([self.similarity_matrix], lr=lr)
		self.loss_fn = bpr_loss

		validation_enabled = urm_val.nnz > 0

		dataset = URMDatasetNegativeSampling(urm_val)

		loss_history = np.zeros(epochs * num_users)
		map_history = np.zeros(epochs)

		for epoch in (t := trange(epochs)):
			for user_idx in range(num_users):
				pos_sample, neg_sample = dataset[user_idx]

				seen_items = torch.tensor(
					self.urm.indices[self.urm.indptr[user_idx]:self.urm.indptr[user_idx + 1]],
					dtype=torch.long
				)

				pos_score = self.similarity_matrix[pos_sample, seen_items].sum()
				neg_score = self.similarity_matrix[neg_sample, seen_items].sum()

				loss = self.loss_fn(pos_score, neg_score)

				self.optimizer.zero_grad()
				loss.backward()
				self.optimizer.step()

				self.similarity_matrix[pos_sample, pos_sample] = 0
				self.similarity_matrix[neg_sample, neg_sample] = 0

				loss_history[num_users * epoch + user_idx] = loss.item()
				with torch.no_grad():
					t.set_postfix({
						"Batch progression": f"{(user_idx + 1) / num_users * 100:.2f}%",
						"Train loss": f"{loss.item():.5f}",
						"Pos score": f"{pos_score.item():.5f}",
						"Neg score": f"{neg_score.item():.5f}"
					})
			if validation_enabled:
				with torch.no_grad():
					self.urm_pred = self.urm @ self.similarity_matrix
					map_history[epoch] = evaluate_model(self, urm_val)

		if not validation_enabled:
			with torch.no_grad():
				self.urm_pred = self.urm @ self.similarity_matrix
		plot_losses(epochs, loss_history)

In [5]:
slim_bpr_train = train_model(SLIMBPR(), epochs=100)

  0%|          | 0/100 [00:00<?, ?it/s]


ValueError: a must be 1-dimensional or an integer

In [None]:
slim_bpr_submission = train_model(SLIMBPR(), test_size=0, epochs=100)
write_submission(slim_bpr_submission, "slim_bpr_submission.csv")

Submission result: `0.00278`