In [11]:
import pandas as pd
import DataHandler
from ModelController import ModelController
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.KNN.UserKNNCFRecommender import UserKNNCFRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from xgboost import XGBRanker
from tqdm import tqdm
import scipy.sparse as sps
import numpy as np



In [12]:
URM_all_dataframe = pd.read_csv(filepath_or_buffer="Data/data_train.csv",
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                engine='python')
users = pd.read_csv(filepath_or_buffer="Data/data_target_users_test.csv")

ICM = pd.read_csv(filepath_or_buffer="Data/data_ICM_metadata.csv",
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

In [13]:
URM_all, ICM_all = DataHandler.create_urm_icm(URM_all_dataframe, ICM)

controller = ModelController()

EvaluatorHoldout: Ignoring 433 ( 1.2%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 161 ( 0.5%) Users that have less than 1 test interactions


In [16]:
from Recommenders.ScoresHybridRecommender import ScoresHybridRecommender
from Recommenders.HybridOptunable2 import HybridOptunable2


bestrp3 = RP3betaRecommender(controller.URM_boost)
bestrp3.fit(topK= 18, beta= 0.2449115248846201, alpha= 0.34381573319072084)
stacked = sps.vstack([0.8392863849420211 * controller.URM_boost, (1 - 0.8392863849420211) * controller.ICM_all.T]).tocsr()
SLIMstacked = SLIMElasticNetRecommender(stacked)
SLIMstacked.load_model(folder_path="_saved_models", file_name="slimbooststacked3")
ease = EASE_R_Recommender(controller.URM_boost)
ease.load_model(folder_path="_saved_models", file_name="easeboost3")
hyb = ScoresHybridRecommender(controller.URM_boost, ease, SLIMstacked, bestrp3,ease,ease)
hyb.fit(alpha= 0.25944298974386737, beta= 85.44916996475027, gamma= 19.13509374518065,delta=0,epsilon=0)

RP3betaRecommender: Similarity column 38121 (100.0%), 2370.90 column/sec. Elapsed time 16.08 sec
SLIMElasticNetRecommender: Loading model from file '_saved_modelsslimbooststacked3'
SLIMElasticNetRecommender: Loading complete
EASE_R_Recommender: Loading model from file '_saved_modelseaseboost3'
EASE_R_Recommender: Loading complete


In [17]:
n_users, n_items = controller.URM_boost.shape

training_dataframe = pd.DataFrame(index=range(0,n_users), columns = ["ItemID"])
training_dataframe.index.name='UserID'

training_dataframe

Unnamed: 0_level_0,ItemID
UserID,Unnamed: 1_level_1
0,
1,
2,
3,
4,
...,...
35731,
35732,
35733,
35734,


In [18]:
cutoff = 30

for user_id in tqdm(range(n_users)):
    recommendations = hyb.recommend(user_id, cutoff = cutoff)
    training_dataframe.loc[user_id, "ItemID"] = recommendations

training_dataframe = training_dataframe.explode("ItemID")
training_dataframe

100%|██████████| 35736/35736 [02:04<00:00, 286.15it/s]


Unnamed: 0_level_0,ItemID
UserID,Unnamed: 1_level_1
0,572
0,3074
0,14888
0,28958
0,14931
...,...
35735,35049
35735,37445
35735,36321
35735,37020


In [19]:
URM_validation_coo = sps.coo_matrix(controller.URM_validation)

correct_recommendations = pd.DataFrame({"UserID": URM_validation_coo.row,
                                        "ItemID": URM_validation_coo.col})
correct_recommendations

Unnamed: 0,UserID,ItemID
0,0,0
1,0,453
2,0,884
3,0,1015
4,0,2698
...,...,...
282332,35735,36392
282333,35735,36921
282334,35735,36965
282335,35735,37300


In [20]:
training_dataframe = pd.merge(training_dataframe, correct_recommendations, on=['UserID','ItemID'], how='left', indicator='Exist')
training_dataframe

Unnamed: 0,UserID,ItemID,Exist
0,0,572,left_only
1,0,3074,left_only
2,0,14888,left_only
3,0,28958,left_only
4,0,14931,left_only
...,...,...,...
1072075,35735,35049,left_only
1072076,35735,37445,left_only
1072077,35735,36321,left_only
1072078,35735,37020,left_only


In [21]:
training_dataframe["Label"] = training_dataframe["Exist"] == "both"
training_dataframe.drop(columns = ['Exist'], inplace=True)
training_dataframe

Unnamed: 0,UserID,ItemID,Label
0,0,572,False
1,0,3074,False
2,0,14888,False
3,0,28958,False
4,0,14931,False
...,...,...,...
1072075,35735,35049,False
1072076,35735,37445,False
1072077,35735,36321,False
1072078,35735,37020,False


In [22]:
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from Recommenders.NonPersonalizedRecommender import TopPop

top_pop = TopPop(controller.URM_boost)
top_pop.fit()


item_cf = ItemKNNCFRecommender(controller.URM_boost)
item_cf.fit(similarity= 'tversky', topK= 5, shrink= 15, tversky_alpha= 0.0291003114865242, tversky_beta= 1.0501107741561788)


item_cbf = ItemKNNCBFRecommender(controller.URM_boost, controller.ICM_all)
item_cbf.fit()

user = UserKNNCFRecommender(controller.URM_boost)
user.fit(topK= 1000, shrink= 16, similarity ='cosine', normalize= True, feature_weighting= 'BM25')

rp3beta = RP3betaRecommender(controller.URM_boost)
rp3beta.fit(topK= 18, beta= 0.2449115248846201, alpha= 0.34381573319072084)

slimbpr = SLIM_BPR_Cython(controller.URM_boost)
slimbpr.fit( topK= 20, learning_rate= 0.07235759859199255, lambda_i= 0.0026131161353345695, lambda_j= 0.0025413226707704894, symmetric= True, sgd_mode= 'adagrad'
)


other_algorithms = {
    "top_pop": top_pop,
    "item_cf": item_cf,
    "item_cbf": item_cbf,
    "rp3beta": rp3beta,
    "SLIM_BPR": slimbpr,
    "user" : user
}

Cython module imported successfully.
Similarity column 38121 (100.0%), 2614.07 column/sec. Elapsed time 14.58 sec
Cython module imported successfully.
Similarity column 38121 (100.0%), 597.30 column/sec. Elapsed time 1.06 min
Cython module imported successfully.
Similarity column 35736 (100.0%), 3432.82 column/sec. Elapsed time 10.41 sec
RP3betaRecommender: Similarity column 38121 (100.0%), 2607.16 column/sec. Elapsed time 14.62 sec
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 35736 ( 100.00% ) in 0.51 seconds. BPR loss is 8.48E-02. Sample per second: 70631
SLIM_BPR_Recommender: Epoch 1 of 320. Elapsed time 0.24 sec
Processed 35736 ( 100.00% ) in 0.69 seconds. BPR loss is 2.17E-01. Sample per second: 51544
SLIM_BPR_Recommender: Epoch 2 of 320. Elapsed time 0.42 sec
Processed 35736 ( 100.00% ) in 0.87 sec

In [23]:
training_dataframe = training_dataframe.set_index('UserID')

for user_id in tqdm(range(n_users)):
    for rec_label, rec_instance in other_algorithms.items():

        item_list = training_dataframe.loc[user_id, "ItemID"].values.tolist()

        all_item_scores = rec_instance._compute_item_score([user_id], items_to_compute = item_list)

        training_dataframe.loc[user_id, rec_label] = all_item_scores[0, item_list]

training_dataframe = training_dataframe.reset_index()
training_dataframe = training_dataframe.rename(columns = {"index": "UserID"})
training_dataframe

100%|██████████| 35736/35736 [23:43<00:00, 25.11it/s]


Unnamed: 0,UserID,ItemID,Label,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,user
0,0,572,False,58.0,0.308208,0.000000,0.211296,0.985309,720.766602
1,0,3074,False,292.0,0.190834,0.000000,0.280530,1.179580,68.930260
2,0,14888,False,69.0,0.297586,0.000000,0.152133,0.703747,740.963135
3,0,28958,False,21.0,0.148741,0.118683,0.265741,0.380170,342.519348
4,0,14931,False,69.0,0.267057,0.000000,0.128055,0.820292,935.203430
...,...,...,...,...,...,...,...,...,...
1072075,35735,35049,False,16.0,0.221901,0.484419,0.108183,0.000000,4.841352
1072076,35735,37445,False,16.0,0.272615,0.434766,0.101811,0.662073,6.890255
1072077,35735,36321,False,111.0,0.117644,0.334306,0.135678,0.286976,4.417545
1072078,35735,37020,False,14.0,0.134264,0.494332,0.159507,0.165915,7.377809


In [24]:
item_popularity = np.ediff1d(sps.csc_matrix(controller.URM_boost).indptr)

training_dataframe['item_popularity'] = item_popularity[training_dataframe["ItemID"].values.astype(int)]

user_popularity = np.ediff1d(sps.csr_matrix(controller.URM_boost).indptr)
training_dataframe['user_profile_len'] = user_popularity[training_dataframe["UserID"].values.astype(int)]


training_dataframe

Unnamed: 0,UserID,ItemID,Label,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,user,item_popularity,user_profile_len
0,0,572,False,58.0,0.308208,0.000000,0.211296,0.985309,720.766602,58,70
1,0,3074,False,292.0,0.190834,0.000000,0.280530,1.179580,68.930260,292,70
2,0,14888,False,69.0,0.297586,0.000000,0.152133,0.703747,740.963135,69,70
3,0,28958,False,21.0,0.148741,0.118683,0.265741,0.380170,342.519348,21,70
4,0,14931,False,69.0,0.267057,0.000000,0.128055,0.820292,935.203430,69,70
...,...,...,...,...,...,...,...,...,...,...,...
1072075,35735,35049,False,16.0,0.221901,0.484419,0.108183,0.000000,4.841352,16,25
1072076,35735,37445,False,16.0,0.272615,0.434766,0.101811,0.662073,6.890255,16,25
1072077,35735,36321,False,111.0,0.117644,0.334306,0.135678,0.286976,4.417545,111,25
1072078,35735,37020,False,14.0,0.134264,0.494332,0.159507,0.165915,7.377809,14,25


In [25]:
type(training_dataframe["ItemID"])

pandas.core.series.Series

In [26]:
training_dataframe = training_dataframe.sort_values("UserID").reset_index()
training_dataframe.drop(columns = ['index'], inplace=True)
training_dataframe

Unnamed: 0,UserID,ItemID,Label,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,user,item_popularity,user_profile_len
0,0,572,False,58.0,0.308208,0.000000,0.211296,0.985309,720.766602,58,70
1,0,10086,False,13.0,0.129815,0.000000,0.087108,0.607527,79.648468,13,70
2,0,884,True,175.0,0.000000,0.000000,0.000000,0.000000,153.016129,175,70
3,0,454,False,68.0,0.198152,0.114815,0.096091,0.451133,406.088379,68,70
4,0,2539,False,57.0,0.156180,0.000000,0.079028,0.435585,243.294922,57,70
...,...,...,...,...,...,...,...,...,...,...,...
1072075,35735,37657,False,61.0,0.892885,0.263317,0.318636,2.107907,10.274439,61,25
1072076,35735,37801,False,29.0,0.782749,0.484419,0.306424,1.525791,9.901058,29,25
1072077,35735,37020,False,14.0,0.134264,0.494332,0.159507,0.165915,7.377809,14,25
1072078,35735,36034,False,42.0,0.355329,0.340052,0.135866,0.422474,5.790395,42,25


In [27]:
groups = training_dataframe.groupby("UserID").size().values
groups

array([30, 30, 30, ..., 30, 30, 30], dtype=int64)

In [28]:
print(training_dataframe.dtypes)

# Convert unsupported data types to supported ones
for column in training_dataframe.columns:
    if training_dataframe[column].dtype not in [int, float, bool, 'category']:
        training_dataframe[column] = training_dataframe[column].astype(float)

# Verify the data types after conversion
print(training_dataframe.dtypes)
y_train = training_dataframe["Label"]
X_train = training_dataframe.drop(columns=["Label"])

UserID                int64
ItemID               object
Label                  bool
top_pop             float64
item_cf             float64
item_cbf            float64
rp3beta             float64
SLIM_BPR            float64
user                float64
item_popularity       int32
user_profile_len      int32
dtype: object
UserID              float64
ItemID              float64
Label                  bool
top_pop             float64
item_cf             float64
item_cbf            float64
rp3beta             float64
SLIM_BPR            float64
user                float64
item_popularity       int32
user_profile_len      int32
dtype: object


In [29]:
X_train["UserID"] = X_train["UserID"].astype("category")
X_train["ItemID"] = X_train["ItemID"].astype("category")

In [33]:
bestrp3 = RP3betaRecommender(controller.URM_train)
bestrp3.fit(topK= 18, beta= 0.2449115248846201, alpha= 0.34381573319072084)
stacked = sps.vstack([0.8392863849420211 * controller.URM_train, (1 - 0.8392863849420211) * controller.ICM_all.T]).tocsr()
SLIMstacked = SLIMElasticNetRecommender(stacked)
SLIMstacked.load_model(folder_path="_saved_models", file_name="SLIMstackedTrain3")
ease = EASE_R_Recommender(controller.URM_train)
ease.load_model(folder_path="_saved_models", file_name="easetrain3")
hyb = ScoresHybridRecommender(controller.URM_train, ease, SLIMstacked, bestrp3,ease,ease)
hyb.fit(alpha= 0.25944298974386737, beta= 85.44916996475027, gamma= 19.13509374518065,delta=0,epsilon=0)

RP3betaRecommender: Similarity column 38121 (100.0%), 2122.06 column/sec. Elapsed time 17.96 sec
SLIMElasticNetRecommender: Loading model from file '_saved_modelsSLIMstackedTrain3'
SLIMElasticNetRecommender: Loading complete
EASE_R_Recommender: Loading model from file '_saved_modelseasetrain3'
EASE_R_Recommender: Loading complete


In [34]:
user_recommendations_items = []
user_recommendations_user_id = []

for user_id in tqdm(range(n_users)):
    recommendations = hyb.recommend(user_id, cutoff = cutoff)
    
    user_recommendations_items.extend(recommendations)
    user_recommendations_user_id.extend([user_id]*len(recommendations))

100%|██████████| 35736/35736 [02:17<00:00, 260.47it/s]


In [35]:
test_dataframe = pd.DataFrame({"UserID":user_recommendations_user_id, "ItemID":user_recommendations_items})
test_dataframe

Unnamed: 0,UserID,ItemID
0,0,14888
1,0,7703
2,0,14931
3,0,6348
4,0,6380
...,...,...
1072075,35735,37440
1072076,35735,35753
1072077,35735,37304
1072078,35735,37192


In [36]:
top_pop = TopPop(controller.URM_train)
top_pop.fit()


item_cf = ItemKNNCFRecommender(controller.URM_train)
item_cf.fit(similarity= 'tversky', topK= 5, shrink= 15, tversky_alpha= 0.0291003114865242, tversky_beta= 1.0501107741561788)


item_cbf = ItemKNNCBFRecommender(controller.URM_train, controller.ICM_all)
item_cbf.fit()

user = UserKNNCFRecommender(controller.URM_train)
user.fit(topK= 1000, shrink= 16, similarity ='cosine', normalize= True, feature_weighting= 'BM25')

rp3beta = RP3betaRecommender(controller.URM_train)
rp3beta.fit(topK= 18, beta= 0.2449115248846201, alpha= 0.34381573319072084)

slimbpr = SLIM_BPR_Cython(controller.URM_train)
slimbpr.fit( topK= 20, learning_rate= 0.07235759859199255, lambda_i= 0.0026131161353345695, lambda_j= 0.0025413226707704894, symmetric= True, sgd_mode= 'adagrad'
)


other_algorithms_tr = {
    "top_pop": top_pop,
    "item_cf": item_cf,
    "item_cbf": item_cbf,
    "rp3beta": rp3beta,
    "SLIM_BPR": slimbpr,
    "user" : user
}

Cython module imported successfully.
Similarity column 38121 (100.0%), 2286.41 column/sec. Elapsed time 16.67 sec
Cython module imported successfully.
Similarity column 38121 (100.0%), 581.98 column/sec. Elapsed time 1.09 min
Cython module imported successfully.
Similarity column 35736 (100.0%), 3205.79 column/sec. Elapsed time 11.15 sec
RP3betaRecommender: Similarity column 38121 (100.0%), 2361.82 column/sec. Elapsed time 16.14 sec
Unable to read memory status: list index out of range
SLIM_BPR_Recommender: Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. Using dense matrix.
Processed 35736 ( 100.00% ) in 1.15 seconds. BPR loss is 1.07E-01. Sample per second: 30983
SLIM_BPR_Recommender: Epoch 1 of 320. Elapsed time 0.23 sec
Processed 35736 ( 100.00% ) in 0.38 seconds. BPR loss is 3.35E-01. Sample per second: 94700
SLIM_BPR_Recommender: Epoch 2 of 320. Elapsed time 0.46 sec
Processed 35736 ( 100.00% ) in 0.60 sec

In [37]:

test_dataframe = test_dataframe.set_index('UserID')

for user_id in tqdm(range(n_users)):
    for rec_label, rec_instance in other_algorithms_tr.items():

        item_list = test_dataframe.loc[user_id, "ItemID"].values.tolist()

        all_item_scores = rec_instance._compute_item_score([user_id], items_to_compute = item_list)

        test_dataframe.loc[user_id, rec_label] = all_item_scores[0, item_list]

test_dataframe = test_dataframe.reset_index()
test_dataframe = test_dataframe.rename(columns = {"index": "UserID"})




100%|██████████| 35736/35736 [32:41<00:00, 18.22it/s]


In [38]:
item_popularity = np.ediff1d(sps.csc_matrix(controller.URM_train).indptr)
test_dataframe['item_popularity'] = item_popularity[test_dataframe["ItemID"].values.astype(int)]

user_popularity = np.ediff1d(sps.csr_matrix(controller.URM_train).indptr)
test_dataframe['user_profile_len'] = user_popularity[test_dataframe["UserID"].values.astype(int)]

test_dataframe

Unnamed: 0,UserID,ItemID,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,user,item_popularity,user_profile_len
0,0,14888,84.0,0.596443,0.000000,0.329630,1.182553,13.838886,84,90
1,0,7703,69.0,0.414451,0.208383,0.315615,1.309058,0.935975,69,90
2,0,14931,84.0,0.483289,0.117672,0.217824,1.597207,13.037214,84,90
3,0,6348,211.0,0.215074,0.000000,0.193364,0.584552,4.205878,211,90
4,0,6380,227.0,0.000000,0.000000,0.242495,1.114138,4.679424,227,90
...,...,...,...,...,...,...,...,...,...,...
1072075,35735,37440,18.0,0.290487,0.120606,0.081652,0.421366,6.071812,18,30
1072076,35735,35753,22.0,0.337339,0.156480,0.154681,0.623384,10.783901,22,30
1072077,35735,37304,56.0,0.000000,0.000000,0.143927,0.336837,13.189621,56,30
1072078,35735,37192,27.0,0.234921,0.000000,0.093568,0.776635,10.984703,27,30


In [39]:
test_dataframe = test_dataframe.sort_values("UserID").reset_index()
test_dataframe.drop(columns = ['index'], inplace=True)


test_dataframe

Unnamed: 0,UserID,ItemID,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,user,item_popularity,user_profile_len
0,0,14888,84.0,0.596443,0.000000,0.329630,1.182553,13.838886,84,90
1,0,3146,55.0,0.180506,0.000000,0.065613,0.768291,6.455511,55,90
2,0,9316,29.0,0.000000,0.000000,0.195242,0.103736,2.362833,29,90
3,0,9812,124.0,0.000000,0.114570,0.169619,0.761546,3.541891,124,90
4,0,22940,55.0,0.161781,0.000000,0.157445,0.328499,5.450683,55,90
...,...,...,...,...,...,...,...,...,...,...
1072075,35735,37801,37.0,0.885405,0.484419,0.320712,1.409585,12.056814,37,30
1072076,35735,37657,76.0,1.283847,0.263317,0.451972,2.104181,15.339855,76,30
1072077,35735,37192,27.0,0.234921,0.000000,0.093568,0.776635,10.984703,27,30
1072078,35735,36322,19.0,0.489228,0.111659,0.204982,1.948786,11.273000,19,30


In [55]:
X = test_dataframe #devo predictare su questo




In [56]:
def map_at_k(URM_test, predictions, groups, k=10):
    """
    Calcola la MAP@k per ogni utente e poi la media di MAP su tutti gli utenti.

    Args:
        URM_test (scipy.sparse matrix): Matrce di valutazione di test (utente x item)
        predictions (numpy.ndarray): Matrice delle predizioni (utente x item)
        groups (array-like): Array che contiene il numero di item interagiti per ogni utente
        k (int): Numero di top raccomandazioni per la MAP@k (default è 10)
    
    Returns:
        float: MAP@k calcolata per tutti gli utenti
    """

    # Lista per memorizzare la precisione di ogni utente
    map_scores = []
    
    # Itera su ogni utente
    start_idx = 0
    for group_size in groups:
        # Estrai le interazioni dell'utente da URM_test
        end_idx = start_idx + group_size
        user_test = URM_test[start_idx:end_idx].toarray().flatten()

        # Predizioni per l'utente (score per ogni item)
        user_predictions = predictions[start_idx:end_idx].flatten()
        
        # Trova gli indici dei top k item raccomandati
        top_k_items = user_predictions.argsort()[-k:][::-1]
        
        # Calcola la precisione per i top k items
        relevant_items = user_test[top_k_items]  # 1 se l'utente ha interagito, 0 altrimenti
        precision_at_k = np.sum(relevant_items) / k
        
        map_scores.append(precision_at_k)
        
        # Spostati al prossimo utente
        start_idx = end_idx
    
    # Restituisci la MAP media
    return np.mean(map_scores)

In [59]:
def objective(trial):
    # Definizione dei parametri da ottimizzare
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 1.0),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'max_leaves': trial.suggest_int('max_leaves', 0, 100),
        'grow_policy': trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide']),
        'objective': "rank:pairwise",
        'booster': trial.suggest_categorical('booster', ['gbtree', 'dart']),
        'random_state': 42,
        "enable_categorical": True
        
    }

    # Inizializza il modello XGBoost Ranker
    model = XGBRanker(**params, tree_method='hist', verbosity=0)

    # Fitta il modello sul training set (con gruppi)
    model.fit(X_train, y_train, group=groups, verbose=False)

    # Predici sul validation set
    y_pred = model.predict(X)

    # Calcola la MAP@10 complessiva
    map10 = map_at_k(controller.URM_test, y_pred, groups, k=10)
    print(f"Trial {trial.number}: MAP@10 = {map10:.4f}")
    return map10

In [None]:
import optuna
# Creazione dello studio di Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)  # Numero di iterazioni per Optuna

# Ottieni i migliori parametri trovati da Optuna
best_params = study.best_params
print("Migliori parametri trovati:", best_params)

[I 2025-01-05 16:15:04,372] A new study created in memory with name: no-name-a8ecb8d1-3ad0-4656-ae2a-2e345e19a81a


In [40]:
reranked_dataframe = test_dataframe.copy()
reranked_dataframe['rating_xgb'] = pd.Series(predictions, index=reranked_dataframe.index)
reranked_dataframe

Unnamed: 0,UserID,ItemID,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,item_popularity,user_profile_len,rating_xgb
0,0,2548,149.0,0.082101,0.000000,0.168342,0.298365,149,114,-0.155686
1,0,8505,166.0,0.000000,0.000000,0.097763,0.346993,166,114,-0.253184
2,0,2637,67.0,0.107413,0.083077,0.200953,0.417259,67,114,-0.049796
3,0,2743,39.0,0.220747,0.056510,0.239450,0.322208,39,114,0.012898
4,0,357,25.0,0.093566,0.084774,0.395788,0.202158,25,114,-0.010681
...,...,...,...,...,...,...,...,...,...,...
1072075,35735,37657,93.0,1.337090,0.094451,0.557155,2.465509,93,37,1.639965
1072076,35735,36772,39.0,0.872134,0.165306,0.610348,1.555248,39,37,1.639965
1072077,35735,36779,32.0,0.171969,0.081673,0.082172,0.192374,32,37,0.984405
1072078,35735,36920,147.0,0.391505,0.000000,0.276931,1.616406,147,37,1.529824


In [41]:
reranked_dataframe = reranked_dataframe.sort_values(['UserID','rating_xgb'], ascending=[True, False])
reranked_dataframe

Unnamed: 0,UserID,ItemID,top_pop,item_cf,item_cbf,rp3beta,SLIM_BPR,item_popularity,user_profile_len,rating_xgb
22,0,7703,79.0,0.377827,0.261284,0.410197,0.877082,79,114,0.186323
19,0,7547,43.0,0.375689,0.000000,0.511732,0.330391,43,114,0.140569
28,0,399,228.0,0.283455,0.000000,0.258461,1.002502,228,114,0.088349
21,0,14888,110.0,0.256521,0.092722,0.296955,0.843104,110,114,0.081294
29,0,1425,39.0,0.170580,0.067446,0.321098,0.777575,39,114,0.057473
...,...,...,...,...,...,...,...,...,...,...
1072056,35735,34998,57.0,0.391233,0.113891,0.079775,0.000000,57,37,1.292605
1072079,35735,36968,27.0,0.175031,0.000000,0.165511,0.000000,27,37,1.280923
1072058,35735,35753,28.0,0.418558,0.000000,0.081554,0.650875,28,37,1.277164
1072059,35735,36780,67.0,0.190273,0.081673,0.082067,0.249823,67,37,1.179648


In [52]:
def write_reranked_recommendations(file_name):
    # Apertura del file in modalità scrittura
    f = open(file_name + ".csv", "w")
    
    # Scrittura dell'intestazione del file
    f.write("user_id,item_list\n")
    
    # Iterazione su tutti gli utenti
    for user_id in users["user_id"]:
        # Selezione delle prime 10 raccomandazioni per ogni utente
        recommendations_per_user = reranked_dataframe.loc[reranked_dataframe['UserID'] == user_id].ItemID.values[:10]
        
        # Creazione della riga formattata
        recommendation_string = f"{user_id}," + " ".join(map(str, recommendations_per_user))
        
        # Scrittura della riga nel file
        f.write(recommendation_string + "\n")
    
    # Chiusura del file
    f.close()


In [54]:
write_reranked_recommendations(file_name = "recomm")


In [55]:

submission = pd.read_csv("recomm.csv")
submission

Unnamed: 0,user_id,item_list
0,0,7703 7547 399 14888 1425 572 11966 2697 9911 2...
1,1,11146 7010 6348 4060 13766 13733 2820 2644 119...
2,2,22714 29963 21367 29964 22558 29640 16255 2262...
3,3,11753 6827 25140 25643 14103 3207 23023 25079 ...
4,4,4309 8505 18647 4572 15902 15731 3141 5819 352...
...,...,...
34226,35729,36527 35119 36844 36802 37525 37216 35548 3794...
34227,35730,38027 36770 36142 37873 37317 35443 37146 3757...
34228,35731,37170 36810 37873 36856 37109 36525 35394 3595...
34229,35734,37550 35093 37803 36561 35918 35914 34994 3534...
