In [1]:
import numpy as np
import scipy.sparse as sp

from src.algorithms import UserBasedCF, ItemBasedCF, CBF
from src.recommender_model import RecommenderModel
from src.utils import train_model, write_submission

# Hybrid Model: Content-Based Filtering and Collaborative Filtering
This notebook provides a simple hybrid model taking advantage of several filtering methods that uses similarity between users/items/features.

In [2]:
class HybridCBFCF(RecommenderModel):
	def __init__(self, ubcf_coeff: float, ibcf_coeff: float, cbf_coeff: float):
		super(HybridCBFCF, self).__init__()
		self.ubcf = UserBasedCF()
		self.ibcf = ItemBasedCF()
		self.cbf = CBF()

		self.ubcf_coeff = ubcf_coeff
		self.ibcf_coeff = ibcf_coeff
		self.cbf_coeff = cbf_coeff

	def fit(self, urm: sp.csr_matrix, icm: sp.csr_matrix, urm_val: sp.csr_matrix, progress_bar: bool = True, **kwargs) -> None:
		self.urm = urm
		self.icm = icm

		self.ubcf.fit(urm, icm, urm_val, **kwargs)
		self.ibcf.fit(urm, icm, urm_val, **kwargs)
		self.cbf.fit(urm, icm, urm_val, **kwargs)

	def _get_recommendations_predictions(self, user_id: int) -> np.ndarray:
		return (
			self.ubcf_coeff * self.ubcf._get_recommendations_predictions(user_id) +
			self.ibcf_coeff * self.ibcf._get_recommendations_predictions(user_id) +
			self.cbf_coeff * self.cbf._get_recommendations_predictions(user_id)
        )

In [3]:
hybrid_cbf_cb, _ = train_model(HybridCBFCF(1, .5, .2))

Similarity column 35736 (100.0%), 6427.27 column/sec. Elapsed time 5.56 sec
Similarity column 38121 (100.0%), 6050.63 column/sec. Elapsed time 6.30 sec
Similarity column 38121 (100.0%), 1191.52 column/sec. Elapsed time 31.99 sec
MAP@10 evaluation of the HybridCBFCF model: 0.05595


In [4]:
hybrid_cbf_cb_submission, _ = train_model(HybridCBFCF(1, .5, .2), test_size=0)
write_submission(hybrid_cbf_cb_submission, "hybrid_cbf_cb_submission.csv")

Similarity column 35736 (100.0%), 5853.93 column/sec. Elapsed time 6.10 sec
Similarity column 38121 (100.0%), 5343.27 column/sec. Elapsed time 7.13 sec
Similarity column 38121 (100.0%), 1146.35 column/sec. Elapsed time 33.25 sec


Submission result: `0.05768`