# Recommender with splitted user 

In [252]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
sys.path.append('..')

dataset = pd.read_csv('data_train.csv', names=["user", "item", "interaction"], header=0)

# Get for each user only the number of interaction, this will allow to create group of users 
# based on how much interaction they had
grouped_users = dataset.groupby(['user']).count()
splitted_user = list()

for group_id in range(1, 11):
    grouped_users_local = grouped_users.copy()
    grouped_users_local = grouped_users_local[grouped_users_local.item == group_id]
    grouped_users_local = grouped_users_local.reset_index()['user']
    splitted_user.append(grouped_users_local)
    print("**********: ", group_id, " Lenght: ", len(grouped_users_local))

grouped_users_local = grouped_users.copy()
grouped_users_local = grouped_users_local[grouped_users_local.item > 10]
grouped_users_local = grouped_users_local.reset_index()['user']
splitted_user.append(grouped_users_local)
#print("**********: ", 11, "> Lenght: ", len(grouped_users_local))


**********:  1  Lenght:  93
**********:  2  Lenght:  995
**********:  3  Lenght:  1148
**********:  4  Lenght:  933
**********:  5  Lenght:  605
**********:  6  Lenght:  490
**********:  7  Lenght:  413
**********:  8  Lenght:  607
**********:  9  Lenght:  250
**********:  10  Lenght:  179


# Data pre-processing

In [253]:
users = dataset.user
items = dataset.item
data = dataset.interaction
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 
URM_all.shape

(7947, 25975)

In [254]:
ICM_df = pd.read_csv('data_ICM_title_abstract.csv')

items = ICM_df.row
features = ICM_df.col
data = ICM_df.data
ICM_all = sps.coo_matrix((data, (items, features)))
ICM_all = ICM_all.tocsr() #fast row access -> fast access to users 

In [255]:
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

users_not_in_group_three = grouped_users.copy()
users_not_in_group_three = users_not_in_group_three.loc[(users_not_in_group_three.item < 2) | (users_not_in_group_three.item > 3)]
users_not_in_group_three = users_not_in_group_three.reset_index()['user']
evaluator_validation_three = EvaluatorHoldout(URM_validation, cutoff_list=[10], ignore_users = users_not_in_group_three)

users_not_in_group_other = grouped_users.copy()
users_not_in_group_other = users_not_in_group_other[users_not_in_group_other.item < 11]
users_not_in_group_other = users_not_in_group_other.reset_index()['user']
evaluator_validation_other = EvaluatorHoldout(URM_validation, cutoff_list=[10], ignore_users = users_not_in_group_other)

EvaluatorHoldout: Ignoring 5804 Users
EvaluatorHoldout: Ignoring 5713 Users


In [256]:
from GraphBased.P3alphaRecommender import P3alphaRecommender
from KNN.ItemKNNSimilarityHybridRecommender import ItemKNNSimilarityHybridRecommender

recommender_alpha = P3alphaRecommender(URM_train)
recommender_alpha.fit(topK=475, alpha = 0.45, implicit = True)

recommender_alpha_ICM = P3alphaRecommender(ICM_all.T)
recommender_alpha_ICM.fit(topK=175, alpha = 0.45)
recommender_alpha_ICM.URM_train = URM_train

hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, recommender_alpha_ICM.W_sparse, recommender_alpha.W_sparse)
hybridrecommender.fit(topK=600, alpha = 0.45)

P3alphaRecommender: URM Detected 42 (0.53 %) cold users.
P3alphaRecommender: URM Detected 1985 (7.64 %) cold items.
ItemKNNCBFRecommender: URM Detected 42 (0.53 %) cold users.
ItemKNNCBFRecommender: URM Detected 1985 (7.64 %) cold items.
Similarity column 25975 ( 100 % ), 4314.31 column/sec, elapsed time 0.10 min
ItemKNNSimilarityHybridRecommender: URM Detected 42 (0.53 %) cold users.
ItemKNNSimilarityHybridRecommender: URM Detected 1985 (7.64 %) cold items.


In [257]:
evaluator_validation_three.evaluateRecommender(hybridrecommender)

EvaluatorHoldout: Processed 715 ( 100.00% ) in 0.55 sec. Users per second: 1307


({10: {'ROC_AUC': 0.09468725718725718,
   'PRECISION': 0.013566433566433543,
   'PRECISION_RECALL_MIN_DEN': 0.12377622377622377,
   'RECALL': 0.12377622377622377,
   'MAP': 0.06532190032190036,
   'MRR': 0.06987401487401491,
   'NDCG': 0.08036849744670041,
   'F1': 0.024452736672695898,
   'HIT_RATE': 0.13566433566433567,
   'ARHR': 0.0706549006549007,
   'NOVELTY': 0.005167119925985794,
   'AVERAGE_POPULARITY': 0.05651776794633939,
   'DIVERSITY_MEAN_INTER_LIST': 0.9949952008775538,
   'DIVERSITY_HERFINDAHL': 0.9993603599198004,
   'COVERAGE_ITEM': 0.17605389797882579,
   'COVERAGE_ITEM_CORRECT': 0.0034648700673724736,
   'COVERAGE_USER': 0.33364442370508635,
   'COVERAGE_USER_CORRECT': 0.04386374241717219,
   'DIVERSITY_GINI': 0.12047648071054293,
   'SHANNON_ENTROPY': 11.660782357928408}},
 'CUTOFF: 10 - ROC_AUC: 0.0946873, PRECISION: 0.0135664, PRECISION_RECALL_MIN_DEN: 0.1237762, RECALL: 0.1237762, MAP: 0.0653219, MRR: 0.0698740, NDCG: 0.0803685, F1: 0.0244527, HIT_RATE: 0.1356643

In [258]:
evaluator_validation_other.evaluateRecommender(hybridrecommender)

EvaluatorHoldout: Processed 2122 ( 100.00% ) in 1.90 sec. Users per second: 1115


({10: {'ROC_AUC': 0.26072919004233847,
   'PRECISION': 0.06159283694627648,
   'PRECISION_RECALL_MIN_DEN': 0.15350055353589737,
   'RECALL': 0.14587380521132323,
   'MAP': 0.06839922170739338,
   'MRR': 0.19958896219499442,
   'NDCG': 0.11561002001880855,
   'F1': 0.0866142277686139,
   'HIT_RATE': 0.615928369462771,
   'ARHR': 0.23703342159986865,
   'NOVELTY': 0.0047380513610791285,
   'AVERAGE_POPULARITY': 0.11558777543601252,
   'DIVERSITY_MEAN_INTER_LIST': 0.9795154687139644,
   'DIVERSITY_HERFINDAHL': 0.9979053868587332,
   'COVERAGE_ITEM': 0.22641000962463909,
   'COVERAGE_ITEM_CORRECT': 0.032146294513955724,
   'COVERAGE_USER': 0.9498657117278424,
   'COVERAGE_USER_CORRECT': 0.40062667860340195,
   'DIVERSITY_GINI': 0.08119462190430549,
   'SHANNON_ENTROPY': 10.7433465865132}},
 'CUTOFF: 10 - ROC_AUC: 0.2607292, PRECISION: 0.0615928, PRECISION_RECALL_MIN_DEN: 0.1535006, RECALL: 0.1458738, MAP: 0.0683992, MRR: 0.1995890, NDCG: 0.1156100, F1: 0.0866142, HIT_RATE: 0.6159284, ARHR: