# Collaborative Filtering Script

In [5]:
import scipy.sparse as sps
import numpy as np
import os
import pandas as pd 

URM_all= sps.load_npz("./Processed Matrices/URM_simple_coo_2020.npz").tocsr()
ICM_all= sps.load_npz("./Processed Matrices/ICM_simple_coo_2020.npz").tocsr()

In [6]:
from Notebooks_utils.evaluation_function import evaluate_algorithm
from Base.Similarity.Compute_Similarity_Python import Compute_Similarity_Python
from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from KNN.UserKNNCFRecommender import UserKNNCFRecommender
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_valid = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [9]:
similarity_list=["cosine","adjusted"]
vals=[50,100,200]
feature_weighting=['BM25', 'TF-IDF', 'none']
itemCF=ItemKNNCFRecommender(URM_train)
userCF=UserKNNCFRecommender(URM_train)
rec_list = [userCF,itemCF]
evaluator_validation = EvaluatorHoldout(URM_valid, cutoff_list=[10])
resultDF= pd.DataFrame({"recommender":[],"similarity":[],"shrink":[],"topK":[],"MAP":[],"feature_weighting":[]})

for recommender in rec_list:

    for similarity in similarity_list:

        for shrink in vals:

            for topK in vals:
                
                for fw in feature_weighting:

                    recommender.fit(topK=topK, shrink=shrink, similarity=similarity, feature_weighting=fw)
                    MAP,_=evaluator_validation.evaluateRecommender(recommender)
                    MAP=MAP[10]["MAP"]
                    resultDF=resultDF.append({"recommender":[type(recommender)],"similarity":[similarity],"shrink":[shrink],"topK":[topK],"MAP":[MAP],"feature_weighting":[fw]},ignore_index=True)
                    print(str(type(recommender)),similarity,shrink,topK,MAP,fw)

resultDF.to_csv("CF_results")

ItemKNNCFRecommender: URM Detected 67 (0.84 %) cold users.
ItemKNNCFRecommender: URM Detected 2459 (9.47 %) cold items.
UserKNNCFRecommender: URM Detected 67 (0.84 %) cold users.
UserKNNCFRecommender: URM Detected 2459 (9.47 %) cold items.
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 7947 ( 100 % ), 3600.26 column/sec, elapsed time 0.04 min
EvaluatorHoldout: Processed 5683 ( 100.00% ) in 3.26 sec. Users per second: 1743
<class 'KNN.UserKNNCFRecommender.UserKNNCFRecommender'> cosine 50 50 0.02896905703124959 BM25
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 7947 ( 100 % ), 3877.25 column/sec, elapsed time 0.03 min
EvaluatorHoldout: Processed 5683 ( 100.00% ) in 3.28 sec. Users per second: 1733
<class 'KNN.UserKNNCFRecommender.UserKNNCFRecommender'> cosine 50 50 0.042837783308721004 TF-IDF
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 7947 ( 100 % ), 3647.79 column/sec, elapsed time 0.

In [10]:
resultDF_sorted=resultDF.sort_values(by=['MAP'],ascending=False)
resultDF_sorted.to_csv("CF_results_sorted.csv")

In [28]:
recommender_full= UserKNNCFRecommender(URM_all)
recommender_full.fit(topK=resultDF_sorted.iloc[0]['topK'][0], shrink=resultDF_sorted.iloc[0]['shrink'][0],  similarity=resultDF_sorted.iloc[0]['similarity'][0],feature_weighting=resultDF_sorted.iloc[0]['feature_weighting'][0])

UserKNNCFRecommender: URM Detected 1079 (4.15 %) cold items.
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 7947 ( 100 % ), 3763.28 column/sec, elapsed time 0.04 min


In [12]:
rec_df=pd.DataFrame({"user_id":[],"item_list":[]})
target_user_list=list(pd.read_csv("./Data/2020/data_target_users_test.csv").user_id)

for uid in target_user_list:
    rec_list=map(str,recommender_full.recommend(uid)[:10])
    rec_df=rec_df.append({"user_id":uid,"item_list":' '.join(rec_list)},ignore_index=True)

    
rec_df=rec_df.astype({"user_id":'int32',"item_list":'str'})
rec_df=rec_df.set_index('user_id')
rec_df.to_csv("CF_recomm_100_50_cosine.csv")