In [3]:
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Utils.DataReader import load_urm, load_icm, load_target

In [17]:
URM_all = load_urm()
ICM_type_df = pd.read_csv("Dataset/data_ICM_type.csv")


URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1579 ( 3.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 880 ( 2.1%) Users that have less than 1 test interactions


In [21]:
ICM_type_df

Unnamed: 0,item_id,feature_id,data
0,0,1,1
1,1,3,1
2,2,4,1
3,3,1,1
4,4,3,1
...,...,...,...
23086,27963,1,1
23087,27964,2,1
23088,27965,1,1
23089,27966,1,1


In [19]:
item_id_list = ICM_type_df['item_id'].values
feature_id_list = ICM_type_df['feature_id'].values
data_list = ICM_type_df['data'].values

ICM_type = sps.csr_matrix((data_list, (item_id_list, feature_id_list)))
ICM_type = ICM_type.astype(dtype=np.int32)
ICM_type

<27968x8 sparse matrix of type '<class 'numpy.int32'>'
	with 23091 stored elements in Compressed Sparse Row format>

In [23]:
from Recommenders.BaseRecommender import BaseRecommender

class LightFMCBFRecommender(BaseRecommender):
    """LightFMCBFRecommender"""

    RECOMMENDER_NAME = "LightFMCBFRecommender"

    def __init__(self, URM_train, ICM_train):
        super(LightFMCBFRecommender, self).__init__(URM_train)
        
        self.ICM_train = ICM_train.copy()


    def fit(self, epochs = 300, alpha = 1e-6, n_factors = 10, n_threads = 4):
        
        # Let's fit a WARP model
        self.lightFM_model = LightFM(loss='warp',
                                     item_alpha=alpha,
                                     no_components=n_factors)

        self.lightFM_model = self.lightFM_model.fit(URM_train, 
                                       item_features=self.ICM_train, 
                                       epochs=epochs, 
                                       num_threads=n_threads)


    def _compute_item_score(self, user_id_array, items_to_compute = None):
        
        # Create a single (n_items, ) array with the item score, then copy it for every user
        items_to_compute = np.arange(self.n_items)
        
        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf

        for user_index, user_id in enumerate(user_id_array):
            item_scores[user_index] = self.lightFM_model.predict(int(user_id), 
                                                                 items_to_compute,
                                                                 item_features = self.ICM_train)

        return item_scores

In [33]:
from lightfm import LightFM

class LightFMItemHybridRecommender(LightFMCBFRecommender):
    """LightFMItemHybridRecommender"""

    RECOMMENDER_NAME = "LightFMItemHybridRecommender"
    
    def __init__(self, URM_train, ICM_train):
        super(LightFMItemHybridRecommender, self).__init__(URM_train, ICM_train)

        # Need to hstack item_features to ensure each ItemIDs are present in the model
        eye = sps.eye(self.n_items, self.n_items).tocsr()
        self.ICM_train = sps.hstack((eye, self.ICM_train)).tocsr()

In [34]:
recommender = LightFMItemHybridRecommender(URM_train, ICM_type)
recommender.fit(epochs = 10)

result_df, _ = evaluator_validation.evaluateRecommender(recommender)
result_df

ValueError: blocks[0,:] has incompatible row dimensions. Got blocks[0,1].shape[0] == 27968, expected 24507.