In [1]:
import numpy as np
import scipy.sparse as sp

from libs.Recommenders.Similarity.Compute_Similarity import Compute_Similarity
from src.utils import train_model, write_submission
from src.recommender_model import RecommenderModel

# Content-Based Filtering
This notebook provides an implementation of a Content-Based Filtering model.

## TF-IDF to compute feature weights

In [2]:
def tf_idf(mat: sp.csr_matrix) -> sp.csr_matrix:
    """Rescales the matrix values by weighting the features of the matrix (typically the ICM) using TF-IDF

    :param mat: The sparse matrix
    :type mat: sp.csr_matrix
    :return: The matrix rescaled by TF-IDF
    :rtype: sp.csr_matrix
    """
    mat = mat.copy()
    df = np.asarray(mat.sum(axis=0)).ravel()
    idf = np.log(mat.shape[0] / (df + 1))
    mat.data = mat.data * idf[mat.tocoo().col]
    mat.eliminate_zeros()
    return mat

## CBF with Similarity Matrix

In [3]:
class CBF(RecommenderModel):
	def __init__(self, top_k: int = 300, shrink: int = 500):
		super(CBF, self).__init__()
		self.similarity_matrix: sp.csr_matrix | None = None
		self.top_k: int = top_k
		self.shrink: int = shrink

	def fit(self, urm: sp.csr_matrix, icm: sp.csr_matrix) -> None:
		self.urm = urm
		self.icm = tf_idf(icm)

		self.similarity_matrix = Compute_Similarity(self.icm.T, topK=self.top_k, shrink=self.shrink).compute_similarity()

		self.urm_pred = self.urm @ self.similarity_matrix

In [4]:
cbf = train_model(CBF())

Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 38121 (100.0%), 313.73 column/sec. Elapsed time 2.03 min
MAP@10 evaluation of the CBF model: 0.02552


<__main__.CBF at 0x2c5e39ffdd0>

In [4]:
top_pop_submission = train_model(CBF(), test_size=0)
write_submission(top_pop_submission, "cbf_submission.csv")