In [1]:
#import libraries

import scipy.sparse as sps
import numpy as np
import os
import pandas as pd 

URM_all= sps.load_npz("./Processed Matrices/URM_simple_coo_2020.npz").tocsr()
ICM_all= sps.load_npz("./Processed Matrices/ICM_simple_coo_2020.npz").tocsr()

In [2]:
%load_ext Cython

In [3]:
from Notebooks_utils.evaluation_function import evaluate_algorithm
from Base.Similarity.Compute_Similarity_Python import Compute_Similarity_Python
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

np.random.seed(41148)
URM_train, URM_valid = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [4]:
from GraphBased.RP3betaRecommender import RP3betaRecommender
RP3brecommender= RP3betaRecommender(URM_train)
RP3brecommender.fit(topK=1500, alpha=0.414615342821205, beta=0.17823717015919388, normalize_similarity=False)

RP3betaRecommender: URM Detected 82 (1.03 %) cold users.
RP3betaRecommender: URM Detected 2372 (9.13 %) cold items.


In [5]:
from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
ItemKNNCBFrecommender=ItemKNNCBFRecommender(URM_train, ICM_all)
ItemKNNCBFrecommender.fit(topK=1500, shrink=6, similarity="dice")

ItemKNNCBFRecommender: URM Detected 82 (1.03 %) cold users.
ItemKNNCBFRecommender: URM Detected 2372 (9.13 %) cold items.
Similarity column 25975 ( 100 % ), 3547.91 column/sec, elapsed time 0.12 min


In [6]:
from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
ItemKNNCFrecommender=ItemKNNCFRecommender(URM_train)
ItemKNNCFrecommender.fit(topK=1500, shrink=52, similarity='tversky', normalize=True, tversky_alpha=0.5985566775975192, tversky_beta=1.6463076214540535)

ItemKNNCFRecommender: URM Detected 82 (1.03 %) cold users.
ItemKNNCFRecommender: URM Detected 2372 (9.13 %) cold items.
Similarity column 25975 ( 100 % ), 6235.81 column/sec, elapsed time 0.07 min


In [7]:
def WeightGenerator(length,total_sum=100):
    if length == 1:
        yield (total_sum,)
    else:
        for value in range(0,total_sum + 1, 10):
            for permutation in WeightGenerator(length - 1, total_sum - value):
                yield (value,) + permutation

In [8]:
resultDF= pd.DataFrame({"topK":[],"weights":[],"MAP":[]})

In [9]:
from KNN.WeightedSimilarityHybridRecommender import WeightedSimilarityHybridRecommender

similarities=[ RP3brecommender.W_sparse, ItemKNNCBFrecommender.W_sparse, ItemKNNCFrecommender.W_sparse]
recommender=WeightedSimilarityHybridRecommender(URM_train,similarities)
evaluator_validation = EvaluatorHoldout(URM_valid, cutoff_list=[10])

for topK in range(700,1501,100):
    for weights in list(WeightGenerator(3)):
        recommender.fit(np.array(weights)/100,topK)
        MAP,_=evaluator_validation.evaluateRecommender(recommender)
        MAP=MAP[10]["MAP"]
        resultDF=resultDF.append({"topK":[topK],"weights":[np.array(weights)/100],"MAP":[MAP]},ignore_index=True)
        print(topK,weights, MAP)

WeightedSimilarityHybridRecommender: URM Detected 82 (1.03 %) cold users.
WeightedSimilarityHybridRecommender: URM Detected 2372 (9.13 %) cold items.
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 3.26 sec. Users per second: 1733
900 (0, 0, 100) 0.051072956092721895
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 3.98 sec. Users per second: 1422
900 (0, 10, 90) 0.06355298889110235
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 3.95 sec. Users per second: 1431
900 (0, 20, 80) 0.06625674172044747
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 4.59 sec. Users per second: 1232
900 (0, 30, 70) 0.06133188619236765
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 4.45 sec. Users per second: 1272
900 (0, 40, 60) 0.055198453924089785
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 4.43 sec. Users per second: 1276
900 (0, 50, 50) 0.04916118740951222
EvaluatorHoldout: Processed 5657 ( 100.00% ) in 4.17 sec. Users per second: 1356
900 (0, 60, 40) 0.0438897675720259
EvaluatorHoldout: Processed 56

In [11]:
resultDF_sorted=resultDF.sort_values(by=['MAP'],ascending=False)
resultDF_sorted.to_csv("Hybrid_Weighted_Similarity_SameTopK_results_sorted_9001500.csv")

TypeError: '<' not supported between instances of 'str' and 'list'

In [15]:
RP3brecommender_full= RP3betaRecommender(URM_all)
RP3brecommender_full.fit(topK=1500, alpha=0.414615342821205, beta=0.17823717015919388, normalize_similarity=False)


ItemKNNCBFrecommender_full=ItemKNNCBFRecommender(URM_all, ICM_all)
ItemKNNCBFrecommender_full.fit(topK=1500, shrink=6, similarity="dice")

ItemKNNCFrecommender_full=ItemKNNCFRecommender(URM_all)
ItemKNNCFrecommender_full.fit(topK=1500, shrink=52, similarity='tversky', normalize=True, tversky_alpha=0.5985566775975192, tversky_beta=1.6463076214540535)

similarities_full=[RP3brecommender_full.W_sparse, ItemKNNCBFrecommender_full.W_sparse, ItemKNNCFrecommender_full.W_sparse]
recommender_full= WeightedSimilarityHybridRecommender(URM_all, similarities_full)
recommender_full.fit(np.array([0.4,0.4,0.2]), topK=800)

RP3betaRecommender: URM Detected 1079 (4.15 %) cold items.
ItemKNNCBFRecommender: URM Detected 1079 (4.15 %) cold items.
Similarity column 25975 ( 100 % ), 3444.49 column/sec, elapsed time 0.13 min
ItemKNNCFRecommender: URM Detected 1079 (4.15 %) cold items.
Similarity column 25975 ( 100 % ), 5914.12 column/sec, elapsed time 0.07 min
WeightedSimilarityHybridRecommender: URM Detected 1079 (4.15 %) cold items.


In [16]:
rec_df=pd.DataFrame({"user_id":[],"item_list":[]})
target_user_list=list(pd.read_csv("./Data/2020/data_target_users_test.csv").user_id)

for uid in target_user_list:
    rec_list=map(str,recommender_full.recommend(uid)[:10])
    rec_df=rec_df.append({"user_id":uid,"item_list":' '.join(rec_list)},ignore_index=True)

    
rec_df=rec_df.astype({"user_id":'int32',"item_list":'str'})
rec_df=rec_df.set_index('user_id')
rec_df.to_csv("Hybrid_Weighted_Similarity_SameTopK_recommendations.csv")