In [None]:
import pandas as pd
import DataHandler
from ModelController import ModelController
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from xgboost import XGBRanker
from tqdm import tqdm
import scipy.sparse as sps
import numpy as np

In [None]:
# import machine learning libraries
import xgboost as xgb
from sklearn.metrics import accuracy_score


# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

In [None]:
URM_all_dataframe = pd.read_csv(filepath_or_buffer="Data/data_train.csv",
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                engine='python')
users = pd.read_csv(filepath_or_buffer="Data/data_target_users_test.csv")

ICM = pd.read_csv(filepath_or_buffer="Data/data_ICM_metadata.csv",
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

In [None]:
URM_all, ICM_all = DataHandler.create_urm_icm(URM_all_dataframe, ICM)

controller = ModelController()

In [None]:
from Recommenders.KNN.UserKNNCFRecommender import UserKNNCFRecommender
from Recommenders.ScoresHybridRecommender import ScoresHybridRecommender
from EASE_R_Recommender import EASE_R_Recommender
from Recommenders.HybridOptunable2 import HybridOptunable2

stacked = sps.vstack([0.6814451172353111 * URM_all, (1 - 0.6814451172353111) * controller.ICM_all.T]).tocsr()
slim = SLIMElasticNetRecommender(stacked)
slim.load_model(folder_path="_saved_models", file_name="SLIMstackedAll1")

rp3 = RP3betaRecommender(controller.URM_all)
rp3.fit(topK= 18, beta= 0.2449115248846201, alpha= 0.34381573319072084)

easeR = EASE_R_Recommender(controller.URM_all)
easeR.fit(topK= 32, l2_norm= 20.402285200199643, normalize_matrix= False)

user = UserKNNCFRecommender(controller.URM_all)
user.fit(topK= 1000, shrink= 16, similarity ='cosine', normalize= True, feature_weighting= 'BM25')

hybrid_all = ScoresHybridRecommender(controller.URM_all, slim, rp3, easeR, user, slim)
#TODO: put parameters
hybrid_all.fit(0, 0, 0, 0, 0)

In [None]:
def write_recommendations(recommender, file_name):
    # Apertura del file in modalità scrittura
    f = open(file_name + ".csv", "w")

    # Scrittura dell'intestazione del file
    f.write("user_id,item_list\n")

    # Iterazione su tutti gli utenti
    for user_id in users["user_id"]:
        # Selezione delle prime 10 raccomandazioni per ogni utente
        recommendations_per_user = recommender.recommend(user_id_array=id, remove_seen_flag=True, cutoff=10)

        # Creazione della riga formattata
        recommendation_string = f"{user_id}," + " ".join(map(str, recommendations_per_user))

        # Scrittura della riga nel file
        f.write(recommendation_string + "\n")

    # Chiusura del file
    f.close()


In [None]:
write_recommendations(recommender = hybrid_all, file_name = "submission_norerank")


In [None]:
hybrid_submission = pd.read_csv("submission_norerank.csv")
hybrid_submission

In [None]:
from Recommenders.KNN.UserKNNCFRecommender import UserKNNCFRecommender
from Recommenders.ScoresHybridRecommender import ScoresHybridRecommender
from EASE_R_Recommender import EASE_R_Recommender
from Recommenders.HybridOptunable2 import HybridOptunable2

slim1 = SLIMElasticNetRecommender(controller.URM_train)
slim1.load_model(folder_path="_saved_models", file_name="SLIMtrain")

rp3 = RP3betaRecommender(controller.URM_train)
rp3.fit(topK= 18, beta= 0.2449115248846201, alpha= 0.34381573319072084)

easeR = EASE_R_Recommender(controller.URM_train)
easeR.fit(topK= 32, l2_norm= 20.402285200199643, normalize_matrix= False)

user = UserKNNCFRecommender(controller.URM_train)
user.fit(topK= 1000, shrink= 16, similarity ='cosine', normalize= True, feature_weighting= 'BM25')

hybrid_train = ScoresHybridRecommender(controller.URM_train, slim, rp3, easeR, user, slim)
#TODO: put parameters
hybrid_train.fit(0, 0, 0, 0, 0)

In [None]:
n_users, n_items = controller.URM_train.shape

training_dataframe = pd.DataFrame(index=range(0,n_users), columns = ["ItemID"])
training_dataframe.index.name='UserID'

training_dataframe

In [None]:
cutoff = 30

for user_id in tqdm(range(n_users)):
    recommendations = hybrid_train.recommend(user_id, cutoff = cutoff)
    training_dataframe.loc[user_id, "ItemID"] = recommendations

training_dataframe = training_dataframe.explode("ItemID")
training_dataframe

In [None]:
URM_validation_coo = sps.coo_matrix(controller.URM_test)

correct_recommendations = pd.DataFrame({"UserID": URM_validation_coo.row,
                                        "ItemID": URM_validation_coo.col})
correct_recommendations

In [None]:
training_dataframe = pd.merge(training_dataframe, correct_recommendations, on=['UserID','ItemID'], how='left', indicator='Exist')
training_dataframe

In [None]:
training_dataframe["Label"] = training_dataframe["Exist"] == "both"
training_dataframe.drop(columns = ['Exist'], inplace=True)
training_dataframe

In [None]:
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from Recommenders.NonPersonalizedRecommender import TopPop

top_pop = TopPop(controller.URM_train)
top_pop.fit()


item_cf = ItemKNNCFRecommender(controller.URM_train)
item_cf.load_model(folder_path="_saved_models", file_name="itemtrain")


item_cbf = ItemKNNCBFRecommender(controller.URM_train, controller.ICM_all)
item_cbf.fit()

rp3beta = RP3betaRecommender(controller.URM_train)
rp3beta.load_model(folder_path= "_saved_models",file_name="rp3train")

slimbpr = SLIM_BPR_Cython(controller.URM_train)
slimbpr.load_model(folder_path= "_saved_models",file_name="bprtrain")

#TODO: add other promising algorithms

other_algorithms = {
    "top_pop": top_pop,
    "item_cf": item_cf,
    "item_cbf": item_cbf,
    "rp3beta": rp3beta,
    "SLIM_BPR": slimbpr
}

In [None]:
training_dataframe = training_dataframe.set_index('UserID')

for user_id in tqdm(range(n_users)):
    for rec_label, rec_instance in other_algorithms.items():

        item_list = training_dataframe.loc[user_id, "ItemID"].values.tolist()

        all_item_scores = rec_instance._compute_item_score([user_id], items_to_compute = item_list)

        training_dataframe.loc[user_id, rec_label] = all_item_scores[0, item_list]

training_dataframe = training_dataframe.reset_index()
training_dataframe = training_dataframe.rename(columns = {"index": "UserID"})
training_dataframe

In [None]:
item_popularity = np.ediff1d(sps.csc_matrix(controller.URM_train).indptr)

training_dataframe['item_popularity'] = item_popularity[training_dataframe["ItemID"].values.astype(int)]

user_popularity = np.ediff1d(sps.csr_matrix(controller.URM_train).indptr)
training_dataframe['user_profile_len'] = user_popularity[training_dataframe["UserID"].values.astype(int)]


training_dataframe

In [None]:
type(training_dataframe["ItemID"])

In [None]:
training_dataframe = training_dataframe.sort_values("UserID").reset_index()
training_dataframe.drop(columns = ['index'], inplace=True)
training_dataframe

In [None]:
groups = training_dataframe.groupby("UserID").size().values
groups

In [None]:
n_estimators = 50
learning_rate = 1e-1
reg_alpha = 1e-1
reg_lambda = 1e-1
max_depth = 5
max_leaves = 0
grow_policy = "depthwise"
objective = "pairwise"
booster = "gbtree"
use_user_profile = False
random_seed = None

XGB_model = XGBRanker(objective='rank:{}'.format(objective),
                      n_estimators = int(n_estimators),
                      random_state = random_seed,
                      learning_rate = learning_rate,
                      reg_alpha = reg_alpha,
                      reg_lambda = reg_lambda,
                      max_depth = int(max_depth),
                      max_leaves = int(max_leaves),
                      grow_policy = grow_policy,
                      verbosity = 0, # 2 if self.verbose else 0,
                      booster = booster,
                      )

In [None]:
'''candidate_recommender = SLIMElasticNetRecommender(controller.URM_train)
candidate_recommender.fit(alpha= 0.00022742003969239836, topK= 709, l1_ratio= 0.1488442906776265)
candidate_recommender.save_model(folder_path="_saved_models", file_name = "ModelName.SLIM_ElasticNetTrain")'''

In [None]:
# Check the data types of the columns in training_dataframe
print(training_dataframe.dtypes)

# Convert unsupported data types to supported ones
for column in training_dataframe.columns:
    if training_dataframe[column].dtype not in [int, float, bool, 'category']:
        training_dataframe[column] = training_dataframe[column].astype(float)

# Verify the data types after conversion
print(training_dataframe.dtypes)
y_train = training_dataframe["Label"]
X_train = training_dataframe.drop(columns=["Label"])

XGB_model.fit(X_train,
          y_train,
          group=groups,
          verbose=True)

In [None]:
# Let's say I want to compute the prediction for a group of user-item pairs, for simplicity I will use a slice of the data used
# for training because it already contains all the features
X_to_predict = X_train[X_train["UserID"] == 10]

XGB_model.predict(X_to_predict)

In [None]:
from xgboost import plot_importance

plot_importance(XGB_model, importance_type='weight', title='Weight (Frequence)')

In [None]:
X_train["UserID"] = X_train["UserID"].astype("category")
X_train["ItemID"] = X_train["ItemID"].astype("category")

In [None]:
XGB_model = XGBRanker(objective='rank:{}'.format(objective),
                      n_estimators = int(n_estimators),
                      random_state = random_seed,
                      learning_rate = learning_rate,
                      reg_alpha = reg_alpha,
                      reg_lambda = reg_lambda,
                      max_depth = int(max_depth),
                      max_leaves = int(max_leaves),
                      grow_policy = grow_policy,
                      verbosity = 0, # 2 if self.verbose else 0,
                      booster = booster,
                      enable_categorical = True,
                      tree_method = "hist",  # Supported tree methods are `gpu_hist`, `approx`, and `hist`.
                      )

XGB_model.fit(X_train,
          y_train,
          group=groups,
          verbose=True)

In [None]:
X_to_predict = X_train[X_train["UserID"] == 10]

XGB_model.predict(X_to_predict)

In [None]:
plot_importance(XGB_model, importance_type='weight', title='Weight (Frequence)')

In [None]:
slim1 = SLIMElasticNetRecommender(URM_all)
slim1.load_model(folder_path="_saved_models", file_name="SLIM_ElasticNetAll")
stacked = sps.vstack([0.6814451172353111 * URM_all, (1 - 0.6814451172353111) * controller.ICM_all.T]).tocsr()
slim2 = SLIMElasticNetRecommender(stacked)
slim2.load_model(folder_path="_saved_models", file_name="SLIMstackedAll1")
bestrp3 = RP3betaRecommender(URM_all)
bestrp3.fit(topK= 12 , alpha = 0.25843, beta= 0.357834)
hyb1 = HybridOptunable2(URM_all)
hyb1.fit(0.27959722573911727,slim1,slim2)
ease1 = EASE_R_Recommender(URM_all)
ease1.load_model(folder_path="_saved_models", file_name="easeall")
hyb2 = HybridOptunable2(URM_all)
hyb2.fit(0.18923840370620948,hyb1,bestrp3)
hyb3 = ScoresHybridRecommender(controller.URM_train, ease1, hyb2, bestrp3, slim1, slim1)
alpha=0.689217356
hyb3.fit(alpha,1-alpha,0,0,0)

In [None]:
user_recommendations_items = []
user_recommendations_user_id = []

for user_id in tqdm(range(n_users)):
    recommendations = hyb3.recommend(user_id, cutoff = cutoff)
    
    user_recommendations_items.extend(recommendations)
    user_recommendations_user_id.extend([user_id]*len(recommendations))

In [None]:
test_dataframe = pd.DataFrame({"UserID":user_recommendations_user_id, "ItemID":user_recommendations_items})
test_dataframe

In [None]:
top_pop = TopPop(URM_all)
top_pop.fit()
item_cf = ItemKNNCFRecommender(URM_all)
item_cf.fit(similarity= 'tversky', topK= 5, shrink= 19, tversky_alpha= 0.20343700501082568, tversky_beta= 1.8980319969315242)
item_cbf = ItemKNNCBFRecommender(URM_all, ICM_all)
item_cbf.load_model(folder_path="_saved_models", file_name="ItemKNNCBFRecommender_all")
SLIM_BPR = SLIM_BPR_Cython(URM_all)
rp3beta=bestrp3

other_algorithms_all = {
    "top_pop": top_pop,
    "item_cf": item_cf,
    "item_cbf": item_cbf,
    "rp3beta": rp3beta,
    "SLIM_BPR": slimbpr
}

In [None]:

test_dataframe = test_dataframe.set_index('UserID')

for user_id in tqdm(range(n_users)):
    for rec_label, rec_instance in other_algorithms_all.items():

        item_list = test_dataframe.loc[user_id, "ItemID"].values.tolist()

        all_item_scores = rec_instance._compute_item_score([user_id], items_to_compute = item_list)

        test_dataframe.loc[user_id, rec_label] = all_item_scores[0, item_list]

test_dataframe = test_dataframe.reset_index()
test_dataframe = test_dataframe.rename(columns = {"index": "UserID"})




In [None]:
item_popularity = np.ediff1d(sps.csc_matrix(URM_all).indptr)
test_dataframe['item_popularity'] = item_popularity[test_dataframe["ItemID"].values.astype(int)]

user_popularity = np.ediff1d(sps.csr_matrix(URM_all).indptr)
test_dataframe['user_profile_len'] = user_popularity[test_dataframe["UserID"].values.astype(int)]

test_dataframe

In [None]:
test_dataframe = test_dataframe.sort_values("UserID").reset_index()
test_dataframe.drop(columns = ['index'], inplace=True)


test_dataframe

In [None]:
X = test_dataframe


In [None]:
predictions = XGB_model.predict(X)
predictions

In [None]:
reranked_dataframe = test_dataframe.copy()
reranked_dataframe['rating_xgb'] = pd.Series(predictions, index=reranked_dataframe.index)
reranked_dataframe

In [None]:
reranked_dataframe = reranked_dataframe.sort_values(['UserID','rating_xgb'], ascending=[True, False])
reranked_dataframe

In [None]:
write_reranked_recommendations(file_name = "recomm")


In [None]:

submission = pd.read_csv("recomm.csv")
submission