# Recommender with splitted user 

In [330]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
sys.path.append('..')

dataset = pd.read_csv('data_train.csv', names=["user", "item", "interaction"], header=0)

# Get for each user only the number of interaction, this will allow to create group of users 
# based on how much interaction they had
grouped_users = dataset.groupby(['user']).count()
splitted_user = list()

for group_id in range(1, 11):
    grouped_users_local = grouped_users.copy()
    grouped_users_local = grouped_users_local[grouped_users_local.item == group_id]
    grouped_users_local = grouped_users_local.reset_index()['user']
    splitted_user.append(grouped_users_local)
    print("**********: ", group_id, " Lenght: ", len(grouped_users_local))

grouped_users_local = grouped_users.copy()
grouped_users_local = grouped_users_local[grouped_users_local.item > 10]
grouped_users_local = grouped_users_local.reset_index()['user']
splitted_user.append(grouped_users_local)
#print("**********: ", 11, "> Lenght: ", len(grouped_users_local))


**********:  1  Lenght:  93
**********:  2  Lenght:  995
**********:  3  Lenght:  1148
**********:  4  Lenght:  933
**********:  5  Lenght:  605
**********:  6  Lenght:  490
**********:  7  Lenght:  413
**********:  8  Lenght:  607
**********:  9  Lenght:  250
**********:  10  Lenght:  179


# Data pre-processing

In [331]:
users = dataset.user
items = dataset.item
data = dataset.interaction
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 
URM_all.shape

(7947, 25975)

In [332]:
ICM_df = pd.read_csv('data_ICM_title_abstract.csv')

items = ICM_df.row
features = ICM_df.col
data = ICM_df.data
ICM_all = sps.coo_matrix((data, (items, features)))
ICM_all = ICM_all.tocsr() #fast row access -> fast access to users 

In [333]:
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

users_not_in_group_three = grouped_users.copy()
users_not_in_group_three = users_not_in_group_three.loc[users_not_in_group_three.item > 10]
users_not_in_group_three = users_not_in_group_three.reset_index()['user']
evaluator_validation_three = EvaluatorHoldout(URM_validation, cutoff_list=[10], ignore_users = users_not_in_group_three)

users_not_in_group_other = grouped_users.copy()
users_not_in_group_other = users_not_in_group_other[users_not_in_group_other.item < 11]
users_not_in_group_other = users_not_in_group_other.reset_index()['user']
evaluator_validation_other = EvaluatorHoldout(URM_validation, cutoff_list=[10], ignore_users = users_not_in_group_other)

EvaluatorHoldout: Ignoring 2234 Users
EvaluatorHoldout: Ignoring 5713 Users


In [334]:
from GraphBased.P3alphaRecommender import P3alphaRecommender
from KNN.ItemKNNSimilarityHybridRecommender import ItemKNNSimilarityHybridRecommender

recommender_alpha = P3alphaRecommender(URM_train)
recommender_alpha.fit(topK=475, alpha = 0.45, implicit = True)

recommender_alpha_ICM = P3alphaRecommender(ICM_all.T)
recommender_alpha_ICM.fit(topK=175, alpha = 0.45)
recommender_alpha_ICM.URM_train = URM_train

hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, recommender_alpha_ICM.W_sparse, recommender_alpha.W_sparse)
hybridrecommender.fit(topK=600, alpha = 0.45)

P3alphaRecommender: URM Detected 73 (0.92 %) cold users.
P3alphaRecommender: URM Detected 2442 (9.40 %) cold items.
P3alphaRecommender: URM Detected 2 (0.01 %) cold users.
P3alphaRecommender: URM Detected 25 (0.10 %) cold items.
ItemKNNSimilarityHybridRecommender: URM Detected 73 (0.92 %) cold users.
ItemKNNSimilarityHybridRecommender: URM Detected 2442 (9.40 %) cold items.


In [335]:
evaluator_validation_three.evaluateRecommender(hybridrecommender)

EvaluatorHoldout: Processed 3426 ( 100.00% ) in 2.05 sec. Users per second: 1669


({10: {'ROC_AUC': 0.137848177800017,
   'PRECISION': 0.023146526561587822,
   'PRECISION_RECALL_MIN_DEN': 0.15522961665693708,
   'RECALL': 0.15522961665693708,
   'MAP': 0.07014374399626877,
   'MRR': 0.09538658623597325,
   'NDCG': 0.0975620915044384,
   'F1': 0.04028595282154012,
   'HIT_RATE': 0.23146526561587857,
   'ARHR': 0.10070504267089206,
   'NOVELTY': 0.004997691439875659,
   'AVERAGE_POPULARITY': 0.08031757883198197,
   'DIVERSITY_MEAN_INTER_LIST': 0.9911868792105027,
   'DIVERSITY_HERFINDAHL': 0.9990897566052531,
   'COVERAGE_ITEM': 0.41070259865255054,
   'COVERAGE_ITEM_CORRECT': 0.02356111645813282,
   'COVERAGE_USER': 0.5996849291090496,
   'COVERAGE_USER_CORRECT': 0.1256782776124628,
   'DIVERSITY_GINI': 0.17861433141122138,
   'SHANNON_ENTROPY': 12.068064204314895}},
 'CUTOFF: 10 - ROC_AUC: 0.1378482, PRECISION: 0.0231465, PRECISION_RECALL_MIN_DEN: 0.1552296, RECALL: 0.1552296, MAP: 0.0701437, MRR: 0.0953866, NDCG: 0.0975621, F1: 0.0402860, HIT_RATE: 0.2314653, ARHR:

In [336]:
evaluator_validation_other.evaluateRecommender(hybridrecommender)

EvaluatorHoldout: Processed 2183 ( 100.00% ) in 1.78 sec. Users per second: 1227


({10: {'ROC_AUC': 0.2960198212740592,
   'PRECISION': 0.07819514429683815,
   'PRECISION_RECALL_MIN_DEN': 0.15674221436933264,
   'RECALL': 0.14449560218315274,
   'MAP': 0.07057334148688053,
   'MRR': 0.2252793956183782,
   'NDCG': 0.12189994760640938,
   'F1': 0.10147574285476986,
   'HIT_RATE': 0.7819514429683921,
   'ARHR': 0.2817756618604081,
   'NOVELTY': 0.004690630131864422,
   'AVERAGE_POPULARITY': 0.12248667029496983,
   'DIVERSITY_MEAN_INTER_LIST': 0.9774318928911979,
   'DIVERSITY_HERFINDAHL': 0.9976984145803295,
   'COVERAGE_ITEM': 0.2148604427333975,
   'COVERAGE_ITEM_CORRECT': 0.03980750721847931,
   'COVERAGE_USER': 0.9771709937332139,
   'COVERAGE_USER_CORRECT': 0.4740376007162041,
   'DIVERSITY_GINI': 0.07312607599586976,
   'SHANNON_ENTROPY': 10.593860531245443}},
 'CUTOFF: 10 - ROC_AUC: 0.2960198, PRECISION: 0.0781951, PRECISION_RECALL_MIN_DEN: 0.1567422, RECALL: 0.1444956, MAP: 0.0705733, MRR: 0.2252794, NDCG: 0.1218999, F1: 0.1014757, HIT_RATE: 0.7819514, ARHR: 0.