In [1]:
!cp -r ../input/recsys-repo/RecSys_Course_AT_PoliMi-master/* ./

# Loading Data

In [2]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [3]:
#load data_train, matrix of users interaction
URM_path = "../input/recommender-system-2021-challenge-polimi/data_train.csv"
URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_path, 
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                header=0)
URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]

In [4]:
URM_all_dataframe.head()

Unnamed: 0,UserID,ItemID,Interaction
0,0,53,1.0
1,0,209,1.0
2,0,223,1.0
3,0,249,1.0
4,0,435,1.0


In [5]:
userID_unique = URM_all_dataframe["UserID"].unique()
itemID_unique = URM_all_dataframe["ItemID"].unique()

n_users = len(userID_unique)
n_items = len(itemID_unique)
n_interactions = len(URM_all_dataframe)

print ("Number of items\t {}, Number of users\t {}".format(n_items, n_users))
print ("Max ID items\t {}, Max Id users\t {}\n".format(max(itemID_unique), max(userID_unique)))
print ("Average interactions per user {:.2f}".format(n_interactions/n_users))
print ("Average interactions per item {:.2f}\n".format(n_interactions/n_items))

print ("Sparsity {:.2f} %".format((1-float(n_interactions)/(n_items*n_users))*100))

Number of items	 18059, Number of users	 13650
Max ID items	 18058, Max Id users	 13649

Average interactions per user 387.23
Average interactions per item 292.69

Sparsity 97.86 %


In [6]:
URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)))
URM_all = URM_all.tocsr() # to obtain fast access to rows (users)
URM_all

<13650x18059 sparse matrix of type '<class 'numpy.float64'>'
	with 5285664 stored elements in Compressed Sparse Row format>

# Data processing and basic tuning setup

In [7]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

# split data into train and validation data 80/20
URM_train_1, URM_valid_1 = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
URM_train_2, URM_valid_2 = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
URM_train_3, URM_valid_3 = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [8]:
from Evaluation.Evaluator import EvaluatorHoldout

#create an evaluator object to evaluate validation set
#we will use it for hyperparameter tuning
evaluator_valid_1 = EvaluatorHoldout(URM_valid_1, cutoff_list=[10])
evaluator_valid_2 = EvaluatorHoldout(URM_valid_2, cutoff_list=[10])
evaluator_valid_3 = EvaluatorHoldout(URM_valid_3, cutoff_list=[10])

EvaluatorHoldout: Ignoring 13644 ( 0.0%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 13643 ( 0.1%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 13643 ( 0.1%) Users that have less than 1 test interactions


In [9]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Model comparison

In [10]:
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender

old_best_1 = IALSRecommender(URM_train_1)
new_best_1 = IALSRecommender(URM_train_1)
second_new_best_1 = IALSRecommender(URM_train_1)
third_new_best_1 = IALSRecommender(URM_train_1)

old_best_1.fit(num_factors = 49, confidence_scaling = "linear", alpha = 0.7374080457174151, epsilon = 2.4848274666036083, reg = 4.473996989962275e-05, epochs = 75.0)
new_best_1.fit(num_factors = 55, confidence_scaling = "linear", alpha = 0.8607224501148175, epsilon = 0.7034017711035647, reg = 7.395659881982846e-06, epochs = 55.0)
second_new_best_1.fit(num_factors = 37, confidence_scaling = "linear", alpha = 0.19057276132770296, epsilon = 1.824075261880227, reg = 4.486470919169912e-06, epochs = 45.0)
third_new_best_1.fit(num_factors = 44, confidence_scaling = "log", alpha = 2.7921346029646616, epsilon = 3.912241129006435, reg = 1.5215133991648166e-05, epochs = 10.0)

print(evaluator_valid_1.evaluateRecommender(old_best_1))
print(evaluator_valid_1.evaluateRecommender(new_best_1))
print(evaluator_valid_1.evaluateRecommender(second_new_best_1))
print(evaluator_valid_1.evaluateRecommender(third_new_best_1))

IALSRecommender: Epoch 1 of 75.0. Elapsed time 11.60 sec
IALSRecommender: Epoch 2 of 75.0. Elapsed time 21.72 sec
IALSRecommender: Epoch 3 of 75.0. Elapsed time 31.84 sec
IALSRecommender: Epoch 4 of 75.0. Elapsed time 43.34 sec
IALSRecommender: Epoch 5 of 75.0. Elapsed time 53.49 sec
IALSRecommender: Epoch 6 of 75.0. Elapsed time 1.06 min
IALSRecommender: Epoch 7 of 75.0. Elapsed time 1.26 min
IALSRecommender: Epoch 8 of 75.0. Elapsed time 1.43 min
IALSRecommender: Epoch 9 of 75.0. Elapsed time 1.61 min
IALSRecommender: Epoch 10 of 75.0. Elapsed time 1.80 min
IALSRecommender: Epoch 11 of 75.0. Elapsed time 1.97 min
IALSRecommender: Epoch 12 of 75.0. Elapsed time 2.14 min
IALSRecommender: Epoch 13 of 75.0. Elapsed time 2.33 min
IALSRecommender: Epoch 14 of 75.0. Elapsed time 2.51 min
IALSRecommender: Epoch 15 of 75.0. Elapsed time 2.68 min
IALSRecommender: Epoch 16 of 75.0. Elapsed time 2.87 min
IALSRecommender: Epoch 17 of 75.0. Elapsed time 3.04 min
IALSRecommender: Epoch 18 of 75.0. 

In [11]:
old_best_2 = IALSRecommender(URM_train_2)
new_best_2 = IALSRecommender(URM_train_2)
second_new_best_2 = IALSRecommender(URM_train_2)
third_new_best_2 = IALSRecommender(URM_train_2)

old_best_2.fit(num_factors = 49, confidence_scaling = "linear", alpha = 0.7374080457174151, epsilon = 2.4848274666036083, reg = 4.473996989962275e-05, epochs = 75.0)
new_best_2.fit(num_factors = 55, confidence_scaling = "linear", alpha = 0.8607224501148175, epsilon = 0.7034017711035647, reg = 7.395659881982846e-06, epochs = 55.0)
second_new_best_2.fit(num_factors = 37, confidence_scaling = "linear", alpha = 0.19057276132770296, epsilon = 1.824075261880227, reg = 4.486470919169912e-06, epochs = 45.0)
third_new_best_2.fit(num_factors = 44, confidence_scaling = "log", alpha = 2.7921346029646616, epsilon = 3.912241129006435, reg = 1.5215133991648166e-05, epochs = 10.0)

print(evaluator_valid_2.evaluateRecommender(old_best_2))
print(evaluator_valid_2.evaluateRecommender(new_best_2))
print(evaluator_valid_2.evaluateRecommender(second_new_best_2))
print(evaluator_valid_2.evaluateRecommender(third_new_best_2))

IALSRecommender: Epoch 1 of 75.0. Elapsed time 10.50 sec
IALSRecommender: Epoch 2 of 75.0. Elapsed time 22.35 sec
IALSRecommender: Epoch 3 of 75.0. Elapsed time 32.90 sec
IALSRecommender: Epoch 4 of 75.0. Elapsed time 43.47 sec
IALSRecommender: Epoch 5 of 75.0. Elapsed time 55.21 sec
IALSRecommender: Epoch 6 of 75.0. Elapsed time 1.09 min
IALSRecommender: Epoch 7 of 75.0. Elapsed time 1.27 min
IALSRecommender: Epoch 8 of 75.0. Elapsed time 1.47 min
IALSRecommender: Epoch 9 of 75.0. Elapsed time 1.64 min
IALSRecommender: Epoch 10 of 75.0. Elapsed time 1.82 min
IALSRecommender: Epoch 11 of 75.0. Elapsed time 2.01 min
IALSRecommender: Epoch 12 of 75.0. Elapsed time 2.18 min
IALSRecommender: Epoch 13 of 75.0. Elapsed time 2.36 min
IALSRecommender: Epoch 14 of 75.0. Elapsed time 2.56 min
IALSRecommender: Epoch 15 of 75.0. Elapsed time 2.73 min
IALSRecommender: Epoch 16 of 75.0. Elapsed time 2.92 min
IALSRecommender: Epoch 17 of 75.0. Elapsed time 3.11 min
IALSRecommender: Epoch 18 of 75.0. 

In [12]:
old_best_3 = IALSRecommender(URM_train_3)
new_best_3 = IALSRecommender(URM_train_3)
second_new_best_3 = IALSRecommender(URM_train_3)
third_new_best_3 = IALSRecommender(URM_train_3)

old_best_3.fit(num_factors = 49, confidence_scaling = "linear", alpha = 0.7374080457174151, epsilon = 2.4848274666036083, reg = 4.473996989962275e-05, epochs = 75.0)
new_best_3.fit(num_factors = 55, confidence_scaling = "linear", alpha = 0.8607224501148175, epsilon = 0.7034017711035647, reg = 7.395659881982846e-06, epochs = 55.0)
second_new_best_3.fit(num_factors = 37, confidence_scaling = "linear", alpha = 0.19057276132770296, epsilon = 1.824075261880227, reg = 4.486470919169912e-06, epochs = 45.0)
third_new_best_3.fit(num_factors = 44, confidence_scaling = "log", alpha = 2.7921346029646616, epsilon = 3.912241129006435, reg = 1.5215133991648166e-05, epochs = 10.0)

print(evaluator_valid_3.evaluateRecommender(old_best_3))
print(evaluator_valid_3.evaluateRecommender(new_best_3))
print(evaluator_valid_3.evaluateRecommender(second_new_best_3))
print(evaluator_valid_3.evaluateRecommender(third_new_best_3))

IALSRecommender: Epoch 1 of 75.0. Elapsed time 10.64 sec
IALSRecommender: Epoch 2 of 75.0. Elapsed time 22.70 sec
IALSRecommender: Epoch 3 of 75.0. Elapsed time 33.26 sec
IALSRecommender: Epoch 4 of 75.0. Elapsed time 44.23 sec
IALSRecommender: Epoch 5 of 75.0. Elapsed time 56.35 sec
IALSRecommender: Epoch 6 of 75.0. Elapsed time 1.12 min
IALSRecommender: Epoch 7 of 75.0. Elapsed time 1.31 min
IALSRecommender: Epoch 8 of 75.0. Elapsed time 1.51 min
IALSRecommender: Epoch 9 of 75.0. Elapsed time 1.69 min
IALSRecommender: Epoch 10 of 75.0. Elapsed time 1.88 min
IALSRecommender: Epoch 11 of 75.0. Elapsed time 2.08 min
IALSRecommender: Epoch 12 of 75.0. Elapsed time 2.25 min
IALSRecommender: Epoch 13 of 75.0. Elapsed time 2.45 min
IALSRecommender: Epoch 14 of 75.0. Elapsed time 2.63 min
IALSRecommender: Epoch 15 of 75.0. Elapsed time 2.80 min
IALSRecommender: Epoch 16 of 75.0. Elapsed time 3.01 min
IALSRecommender: Epoch 17 of 75.0. Elapsed time 3.19 min
IALSRecommender: Epoch 18 of 75.0. 

In [13]:
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender

recommender = IALSRecommender(URM_all)
recommender.fit(num_factors = 49, confidence_scaling = "linear", alpha = 0.7374080457174151, epsilon = 2.4848274666036083, reg = 4.473996989962275e-05, epochs = 75.0)
#print(evaluator_valid_1.evaluateRecommender(recommender))

IALSRecommender: Epoch 1 of 75.0. Elapsed time 11.56 sec
IALSRecommender: Epoch 2 of 75.0. Elapsed time 24.67 sec
IALSRecommender: Epoch 3 of 75.0. Elapsed time 36.56 sec
IALSRecommender: Epoch 4 of 75.0. Elapsed time 48.59 sec
IALSRecommender: Epoch 5 of 75.0. Elapsed time 1.03 min
IALSRecommender: Epoch 6 of 75.0. Elapsed time 1.22 min
IALSRecommender: Epoch 7 of 75.0. Elapsed time 1.42 min
IALSRecommender: Epoch 8 of 75.0. Elapsed time 1.64 min
IALSRecommender: Epoch 9 of 75.0. Elapsed time 1.83 min
IALSRecommender: Epoch 10 of 75.0. Elapsed time 2.06 min
IALSRecommender: Epoch 11 of 75.0. Elapsed time 2.25 min
IALSRecommender: Epoch 12 of 75.0. Elapsed time 2.45 min
IALSRecommender: Epoch 13 of 75.0. Elapsed time 2.67 min
IALSRecommender: Epoch 14 of 75.0. Elapsed time 2.86 min
IALSRecommender: Epoch 15 of 75.0. Elapsed time 3.05 min
IALSRecommender: Epoch 16 of 75.0. Elapsed time 3.27 min
IALSRecommender: Epoch 17 of 75.0. Elapsed time 3.47 min
IALSRecommender: Epoch 18 of 75.0. E

In [14]:
recommender.save_model(output_folder_path, file_name = recommender.RECOMMENDER_NAME + "_my_own_save.zip" )

IALSRecommender: Saving model in file 'result_experiments/IALSRecommender_my_own_save.zip'
IALSRecommender: Saving complete


# Create final recommendations

In [15]:
test_users = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_target_users_test.csv')
test_users

Unnamed: 0,user_id
0,0
1,1
2,2
3,3
4,4
...,...
13645,13645
13646,13646
13647,13647
13648,13648


In [16]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user,cutoff = 10))

In [17]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])
    
test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('submission.csv', index=False)