# XGBoost Reweighted ItemKNN-RP3b

In [1]:
#import libraries

import scipy.sparse as sps
import numpy as np
import os

URM_all= sps.load_npz("./Processed Matrices/URM_simple_coo_2020.npz").tocsr()
ICM_all= sps.load_npz("./Processed Matrices/ICM_simple_coo_2020.npz").tocsr()

In [2]:
%load_ext Cython

In [3]:
from Notebooks_utils.evaluation_function import evaluate_algorithm
from Base.Similarity.Compute_Similarity_Python import Compute_Similarity_Python
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

np.random.seed(41148)
URM_train, URM_valid = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.70)



In [4]:
from GraphBased.RP3betaRecommender import RP3betaRecommender
RP3brecommender= RP3betaRecommender(URM_train)
RP3brecommender.fit(topK=770, alpha=0.414615342821205, beta=0.17823717015919388, normalize_similarity=False)

RP3betaRecommender: URM Detected 158 (1.99 %) cold users.
RP3betaRecommender: URM Detected 3328 (12.81 %) cold items.


In [5]:
from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
ItemKNNrecommender=ItemKNNCBFRecommender(URM_train, ICM_all)
ItemKNNrecommender.fit(topK=393, shrink=6, similarity="dice")

ItemKNNCBFRecommender: URM Detected 158 (1.99 %) cold users.
ItemKNNCBFRecommender: URM Detected 3328 (12.81 %) cold items.
Similarity column 25975 ( 100 % ), 3655.99 column/sec, elapsed time 0.12 min


In [6]:
from KNN.ItemKNNSimilarityHybridRecommender import ItemKNNSimilarityHybridRecommender
Hybridrecommender= ItemKNNSimilarityHybridRecommender(URM_train, RP3brecommender.W_sparse, ItemKNNrecommender.W_sparse)
Hybridrecommender.fit(topK=1100, alpha=0.5)

ItemKNNSimilarityHybridRecommender: URM Detected 158 (1.99 %) cold users.
ItemKNNSimilarityHybridRecommender: URM Detected 3328 (12.81 %) cold items.


In [7]:
from Base.NonPersonalizedRecommender import TopPop
topPop = TopPop(URM_train)
topPop.fit()

TopPopRecommender: URM Detected 158 (1.99 %) cold users.
TopPopRecommender: URM Detected 3328 (12.81 %) cold items.


In [8]:
import xgboost as xgb
import pandas as pd 

target_user_list=list(pd.read_csv("./Data/2020/data_target_users_test.csv").user_id)
user_list=[]
recommendations=[]
scores=[]
profile_len=[]
popularity=[]
for uid in target_user_list:
    user_list+=([uid]*10)
    user_rec=Hybridrecommender.recommend(uid)[:10]
    recommendations+=user_rec
    scores+=list(Hybridrecommender._compute_item_score(uid)[0,user_rec])
    profile_len+=[URM_train[uid].nnz]*10
    popularity+=list(topPop._compute_item_score([uid])[0,user_rec])

X_train=pd.DataFrame({"user_id":user_list,"item_id":recommendations,"item_score":scores,"profile_len":profile_len,"popularity":popularity})
y_train=[URM_valid[u,i] for u,i in zip(user_list,recommendations)]

In [9]:
URM_num_elements=URM_all.shape[0]*URM_all.shape[1]
model=xgb.XGBClassifier(scale_pos_weight=(len(y_train)-sum(y_train))/len(y_train))
model.fit(X_train.drop(["user_id","item_id"],axis=1),y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=0.9580689828801612,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

In [10]:
RP3brecommender_full= RP3betaRecommender(URM_all)
RP3brecommender_full.fit(topK=770, alpha=0.414615342821205, beta=0.17823717015919388, normalize_similarity=False)

ItemKNNrecommender_full=ItemKNNCBFRecommender(URM_all, ICM_all)
ItemKNNrecommender_full.fit(topK=393, shrink=6, similarity="dice")

Hybridrecommender_full= ItemKNNSimilarityHybridRecommender(URM_all, RP3brecommender.W_sparse, ItemKNNrecommender.W_sparse)
Hybridrecommender_full.fit(topK=1100, alpha=0.5)

topPop_full = TopPop(URM_all)
topPop_full.fit()

user_list=[]
recommendations=[]
scores=[]
profile_len=[]
popularity=[]

for uid in target_user_list:
    user_list+=([uid]*10)
    user_rec=Hybridrecommender_full.recommend(uid)[:10]
    recommendations+=user_rec
    scores+=list(Hybridrecommender._compute_item_score(uid)[0,user_rec])
    profile_len+=[URM_train[uid].nnz]*10
    popularity+=list(topPop_full._compute_item_score([uid])[0,user_rec])

X=pd.DataFrame({"user_id":user_list,"item_id":recommendations,"item_score":scores,"profile_len":profile_len,"popularity":popularity})
y=model.predict_proba(X.drop(["user_id","item_id"],axis=1))

RP3betaRecommender: URM Detected 1079 (4.15 %) cold items.
ItemKNNCBFRecommender: URM Detected 1079 (4.15 %) cold items.
Similarity column 25975 ( 100 % ), 3908.27 column/sec, elapsed time 0.11 min
ItemKNNSimilarityHybridRecommender: URM Detected 1079 (4.15 %) cold items.
TopPopRecommender: URM Detected 1079 (4.15 %) cold items.


In [11]:
df_complete=X
df_complete["prediction"]=y[:,1]

rec_df=pd.DataFrame({"user_id":[],"item_list":[]})

for uid in target_user_list:
    user_final_rec=df_complete[df_complete["user_id"]==uid][["item_id","prediction"]].sort_values(["prediction"],ascending=False)[:10].item_id.values
    rec_list=map(str,user_final_rec)
    rec_df=rec_df.append({"user_id":uid,"item_list":' '.join(rec_list)},ignore_index=True)

    
rec_df=rec_df.astype({"user_id":'int32',"item_list":'str'})
rec_df=rec_df.set_index('user_id')
rec_df.to_csv("XGBoost_ItemCBF_RP3b_recommendations.csv")