In [1]:
import scipy.sparse as sp

from src.utils import train_model, write_submission
from libs.Recommenders.Similarity.Compute_Similarity import Compute_Similarity
from src.recommender_model import RecommenderModel

# Collaborative Filtering
This notebook provides an implementation of a Collaborative Filtering model.

## User Based CF
The user-based method computes the similarity among user interactions and recommends items based on similar users.

In [2]:
class UserBasedCF(RecommenderModel):
	def __init__(self, top_k: int = 300, shrink: int = 500):
		super(UserBasedCF, self).__init__()
		self.similarity_matrix: sp.csr_matrix | None = None
		self.top_k: int = top_k
		self.shrink: int = shrink

	def fit(self, urm: sp.csr_matrix, icm: sp.csr_matrix, val_urm: sp.csr_matrix, **kwargs) -> None:
		self.urm = urm

		self.similarity_matrix = Compute_Similarity(self.urm.T, topK=min(self.top_k, self.urm.shape[0]), shrink=self.shrink).compute_similarity()

		self.urm_pred = self.similarity_matrix @ self.urm

In [3]:
ubcf = train_model(UserBasedCF())

Similarity column 35736 (100.0%), 6625.32 column/sec. Elapsed time 5.39 sec
MAP@10 evaluation of the UserBasedCF model: 0.05475


In [4]:
ubcf_submission = train_model(UserBasedCF(), test_size=0)
write_submission(ubcf_submission, "ubcf_submission.csv")

Similarity column 35736 (100.0%), 5589.96 column/sec. Elapsed time 6.39 sec


## Item Based CF
The item-based method computes the similarity among the items and recommends them based on which one the user interacted with.

In [5]:
class ItemBasedCF(RecommenderModel):
	def __init__(self, top_k: int = 300, shrink: int = 500):
		super(ItemBasedCF, self).__init__()
		self.similarity_matrix: sp.csr_matrix | None = None
		self.top_k: int = top_k
		self.shrink: int = shrink

	def fit(self, urm: sp.csr_matrix, icm: sp.csr_matrix, urm_val: sp.csr_matrix, **kwargs) -> None:
		self.urm = urm

		self.similarity_matrix = Compute_Similarity(self.urm, topK=min(self.top_k, self.urm.shape[1]), shrink=self.shrink).compute_similarity()

		self.urm_pred = self.urm @ self.similarity_matrix

In [6]:
ibcf = train_model(ItemBasedCF())

Similarity column 38121 (100.0%), 5911.33 column/sec. Elapsed time 6.45 sec
MAP@10 evaluation of the ItemBasedCF model: 0.04983


In [7]:
ibcf_submission = train_model(ItemBasedCF(), test_size=0)
write_submission(ibcf_submission, "ibcf_submission.csv")

Similarity column 38121 (100.0%), 3523.53 column/sec. Elapsed time 10.82 sec
