# Data loading 
Next cells are used to load the data we need.

In [1]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
sys.path.append('..')


In [2]:
#data which contains users (row), items(col) and implicit interaction (data)
dataset = pd.read_csv('data_train.csv')
dataset

Unnamed: 0,row,col,data
0,0,10080,1.0
1,0,19467,1.0
2,1,2665,1.0
3,1,7494,1.0
4,1,17068,1.0
...,...,...,...
113263,7945,2476,1.0
113264,7945,12319,1.0
113265,7945,21384,1.0
113266,7946,8699,1.0


# Data pre-processing
Pre-processing of data to check for missing users, items ecc...

In [3]:
users = dataset.row
items = dataset.col
data = dataset.data
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 
URM_all.shape

(7947, 25975)

In [4]:
ICM_df = pd.read_csv('data_ICM_title_abstract.csv')
ICM_df

Unnamed: 0,row,col,data
0,0,1185,1.015524
1,0,2507,0.459024
2,0,3534,0.227742
3,0,8766,0.501549
4,0,10862,0.297011
...,...,...,...
490686,25974,12554,0.963016
490687,25974,13003,0.104613
490688,25974,16236,0.118760
490689,25974,18797,0.363301


In [5]:
items = ICM_df.row
features = ICM_df.col
data = ICM_df.data
ICM_all = sps.coo_matrix((data, (items, features)))
ICM_all = ICM_all.tocsr() #fast row access -> fast access to users 
ICM_all.shape

(25975, 20000)

In [6]:
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

#splitting data into train and validation (from the previous training data)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)



In [7]:
URM_all

<7947x25975 sparse matrix of type '<class 'numpy.float64'>'
	with 113268 stored elements in Compressed Sparse Row format>

In [8]:
ICM_all

<25975x20000 sparse matrix of type '<class 'numpy.float64'>'
	with 490691 stored elements in Compressed Sparse Row format>

In [9]:
#evaluator for validation (used for hyperparameter tuning)
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

In [10]:
from GraphBased.RP3betaRecommender import RP3betaRecommender

recommender = RP3betaRecommender(URM_train)

RP3betaRecommender: URM Detected 41 (0.52 %) cold users.
RP3betaRecommender: URM Detected 2002 (7.71 %) cold items.


In [11]:
x_tick = []
collaborative_MAP = []

for topK in [70,90,100,110,120,130,150,200,250,300,350,400]:
    for alpha in [0.2,0.4,0.5,0.55,0.6,0.8,0.9]:
        for beta in [0.2,0.4,0.5,0.55,0.6,0.8,0.9]:
            x_tick.append("topk {}, alpha{}, beta{}".format(topK,alpha,beta))

            recommender = RP3betaRecommender(ICM_all.T)

            recommender.fit(topK=topK, alpha=alpha, beta=beta, implicit=False)
            recommender.URM_train = URM_train

            result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

            collaborative_MAP.append(result_dict[10]["MAP"])

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.56 sec. Users per second: 1401
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.66 sec. Users per second: 1364
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.52 sec. Users per second: 1420
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.83 sec. Users per second: 1305
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.09 sec. Users per second: 1220
RP3betaRecommender: URM Detect

EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.24 sec. Users per second: 1178
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.01 sec. Users per second: 1245
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.19 sec. Users per second: 962
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.29 sec. Users per second: 1164
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.48 sec. Users per second: 1114
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 499

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.36 sec. Users per second: 1147
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.90 sec. Users per second: 1281
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.65 sec. Users per second: 1368
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.80 sec. Users per second: 1315
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.83 sec. Users per second: 1303
RP3betaRecommender: URM Detect

EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.69 sec. Users per second: 1353
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.60 sec. Users per second: 1388
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.71 sec. Users per second: 1346
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.87 sec. Users per second: 1291
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.90 sec. Users per second: 1279
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 49

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.75 sec. Users per second: 1333
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.77 sec. Users per second: 1325
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.88 sec. Users per second: 1288
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.62 sec. Users per second: 1380
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.64 sec. Users per second: 1373
RP3betaRecommender: URM Detect

EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.81 sec. Users per second: 1311
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.77 sec. Users per second: 1324
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.89 sec. Users per second: 1283
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.71 sec. Users per second: 1346
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.70 sec. Users per second: 1350
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 49

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.73 sec. Users per second: 1337
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.82 sec. Users per second: 1309
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.72 sec. Users per second: 1344
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.64 sec. Users per second: 1372
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.78 sec. Users per second: 1321
RP3betaRecommender: URM Detect

EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.01 sec. Users per second: 1244
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.77 sec. Users per second: 1326
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.68 sec. Users per second: 1356
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.68 sec. Users per second: 1358
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.79 sec. Users per second: 1319
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 49

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.78 sec. Users per second: 1322
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.82 sec. Users per second: 1309
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.87 sec. Users per second: 1290
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.86 sec. Users per second: 1294
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.98 sec. Users per second: 1255
RP3betaRecommender: URM Detect

EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.83 sec. Users per second: 1303
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.85 sec. Users per second: 1297
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.11 sec. Users per second: 1215
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.87 sec. Users per second: 1289
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.86 sec. Users per second: 1293
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 49

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.96 sec. Users per second: 1261
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.95 sec. Users per second: 1264
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 3.97 sec. Users per second: 1259
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.14 sec. Users per second: 1205
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.09 sec. Users per second: 1220
RP3betaRecommender: URM Detect

EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.11 sec. Users per second: 977
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.31 sec. Users per second: 941
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.38 sec. Users per second: 927
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.45 sec. Users per second: 916
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.27 sec. Users per second: 947
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 

RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.04 sec. Users per second: 991
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.37 sec. Users per second: 1143
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 4.39 sec. Users per second: 1138
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.50 sec. Users per second: 908
RP3betaRecommender: URM Detected 2 (0.01 %) cold users.
RP3betaRecommender: URM Detected 25 (0.10 %) cold items.
EvaluatorHoldout: Processed 4994 ( 100.00% ) in 5.25 sec. Users per second: 952
RP3betaRecommender: URM Detected 

KeyboardInterrupt: 

In [12]:
import operator
index, value = max(enumerate(collaborative_MAP), key=operator.itemgetter(1))
print(index, value)
parameters = x_tick[index]
print(parameters)

1 0.037734466828826384
topk 70, alpha0.2, beta0.4


In [None]:
import matplotlib.ticker as ticker

fig, ax = plt.subplots(1, 1, figsize=(20, 9))
plt.plot(x_tick, collaborative_MAP, label="Collaborative")
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.01))
plt.ylabel('MAP')
plt.xlabel('TopK')
plt.xticks(rotation='vertical')
plt.show()

In [None]:
test_users = pd.read_csv('data_target_users_test.csv')
test_users

In [None]:
# best model itemCBF topk 600, shrink200, weightTF-IDF

recommender = ItemKNNCBFRecommender(URM_all, ICM_all)
recommender.fit(topK=600, shrink=200, feature_weighting = 'TF-IDF')

user_id = test_users['user_id']
recommendations = recommender.recommend(user_id,cutoff = 10)


In [None]:
for index in range(len(recommendations)):
    #print(element)
    recommendations[index]=np.array(recommendations[index])
    #print(type(element))
print(len(recommendations))

In [None]:
test_users['item_list']= recommendations
#test_users['item_list'] =  test_users['item_list'].apply(lambda x: x.replace('[','').replace(']','')) 
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])

#convert the string columns to int
#test_users['item_list'] = test_users['item_list'].astype(int)
test_users
test_users.to_csv('submission.csv', index=False)
