# Data loading 
Next cells are used to load the data we need.

In [1]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
sys.path.append('..')


In [2]:
#data which contains users (row), items(col) and implicit interaction (data)
dataset = pd.read_csv('data_train.csv')
dataset

Unnamed: 0,row,col,data
0,0,10080,1.0
1,0,19467,1.0
2,1,2665,1.0
3,1,7494,1.0
4,1,17068,1.0
...,...,...,...
113263,7945,2476,1.0
113264,7945,12319,1.0
113265,7945,21384,1.0
113266,7946,8699,1.0


# Data pre-processing
Pre-processing of data to check for missing users, items ecc...

In [3]:
users = dataset.row
items = dataset.col
data = dataset.data
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 
URM_all.shape

(7947, 25975)

In [4]:
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])



In [5]:
from GraphBased.RP3betaRecommender import RP3betaRecommender

recommender = RP3betaRecommender(URM_train)

RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.


In [6]:
x_tick = []
collaborative_MAP = []
max_map = 0
x_tick_max = ""

for topK in [10,40,70,75,100]:
    for alpha in [0.1,0.2,0.4,0.45]:
        for beta in [0.1,0.2,0.4,0.45]:
            x_tick.append("topk {}, alpha{}, beta{}".format(topK,alpha,beta))

            recommender = RP3betaRecommender(URM_train)

            recommender.fit(topK=topK, alpha=alpha, beta=beta, implicit=True)

            result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

            collaborative_MAP.append(result_dict[10]["MAP"])

            if result_dict[10]["MAP"] > max_map:
                max_map = result_dict[10]["MAP"]
                x_tick_max = "topk {}, alpha{}, beta{}".format(topK,alpha,beta)

print("********")
print(max_map)
print(x_tick_max)

RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.21 sec. Users per second: 1574
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.33 sec. Users per second: 1520
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.04 sec. Users per second: 1661
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.37 sec. Users per second: 1502
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.22 sec. Users per second: 1571
RP3betaRecommen

EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.17 sec. Users per second: 1596
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.20 sec. Users per second: 1581
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.20 sec. Users per second: 1579
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.15 sec. Users per second: 1608
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldout: Processed 5056 ( 100.00% ) in 3.35 sec. Users per second: 1509
RP3betaRecommender: URM Detected 36 (0.45 %) cold users.
RP3betaRecommender: URM Detected 1999 (7.70 %) cold items.
EvaluatorHoldou

In [None]:
import operator
index, value = max(enumerate(collaborative_MAP), key=operator.itemgetter(1))
print(index, value)
parameters = x_tick[index]
print(parameters)

In [None]:
import matplotlib.ticker as ticker

fig, ax = plt.subplots(1, 1, figsize=(15, 9))
plt.plot(x_tick, collaborative_MAP, label="Collaborative")
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.001))
plt.ylabel('MAP')
plt.xlabel('TopK')
plt.xticks(rotation='vertical')
plt.show()

In [None]:
test_users = pd.read_csv('data_target_users_test.csv')
test_users

In [None]:
recommender = RP3betaRecommender(URM_all)

recommender.fit(topK=70, alpha=0.4, beta=0.2, implicit=True)

user_id = test_users['user_id']
recommendations = recommender.recommend(user_id,cutoff = 10)


In [None]:
for index in range(len(recommendations)):
    #print(element)
    recommendations[index]=np.array(recommendations[index])
    #print(type(element))
print(len(recommendations))

In [None]:
test_users['item_list']= recommendations
#test_users['item_list'] =  test_users['item_list'].apply(lambda x: x.replace('[','').replace(']','')) 
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])

#convert the string columns to int
#test_users['item_list'] = test_users['item_list'].astype(int)
test_users
test_users.to_csv('submission.csv', index=False)
