In [1]:
import scipy.sparse as sps
import numpy as np
import os
import pandas as pd 
import itertools
from lightfm import LightFM

np.random.seed(41148)

URM_all= sps.load_npz("./Processed Matrices/URM_simple_coo_2020.npz").tocsr()
ICM_all= sps.load_npz("./Processed Matrices/ICM_nonweighted_simple_coo_2020.npz").tocsr()

In [2]:
from Notebooks_utils.evaluation_function import evaluate_algorithm
from Base.Similarity.Compute_Similarity_Python import Compute_Similarity_Python
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_valid = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [None]:
    
def sample_hyperparameters():
    """
    Yield possible hyperparameter choices.
    """

    while True:
        yield {
            "no_components": np.random.randint(16, 256),
            "learning_schedule": np.random.choice(["adagrad", "adadelta"]),
            "learning_rate": np.random.exponential(0.1),
            "loss":"warp-kos",
            "item_alpha": np.random.exponential(1e-5),
            "user_alpha": np.random.exponential(1e-5),
            "num_epochs": np.random.randint(10, 200),
            "k":10
        }

num_samples=300
num_threads=4

resultDF= pd.DataFrame({"hparams":[],"epochs":[],"auc":[],"precision":[]})
from lightfm.evaluation import auc_score, recall_at_k,precision_at_k

i=0
for hparams in itertools.islice(sample_hyperparameters(), num_samples):
    
    i+=1
    num_epochs = hparams.pop("num_epochs")
    model = LightFM(**hparams)
    model.fit(URM_train, item_features=ICM_all, epochs=num_epochs, num_threads=num_threads)
    auc=auc_score(model, URM_valid, item_features=ICM_all, num_threads=num_threads).mean()
    precision=precision_at_k(model, URM_valid, item_features=ICM_all, num_threads=num_threads,k=10).mean()
    resultDF=resultDF.append({"hparams":hparams,"epochs":num_epochs,"auc":auc,"precision":precision},ignore_index=True)
    print(i,hparams,num_epochs,auc,precision)
    

In [None]:
resultDF_sorted=resultDF.sort_values(by=['precision'],ascending=False)
resultDF_sorted.to_csv("LightFM_results_sorted.csv")

In [None]:
best_model= LightFM(**resultDF_sorted.iloc[0]["hparams"])
model.fit(URM_all, item_features=ICM_all, epochs=int(resultDF_sorted.iloc[0]["epochs"]), num_threads=num_threads)

In [None]:
def remove_seen(user_id, scores, URM):

        seen = URM.indices[URM.indptr[user_id]:URM.indptr[user_id + 1]]

        scores[seen] = -np.inf
        return scores

In [None]:
rec_df=pd.DataFrame({"user_id":[],"item_list":[]})
target_user_list=list(pd.read_csv("./Data/2020/data_target_users_test.csv").user_id)

for uid in target_user_list:
    scores=model.predict(uid,list(range(URM_all.shape[1])),item_features=ICM_all,num_threads=num_threads)
    scores=remove_seen(uid,scores,URM_all)
    rec_list=scores.argsort()[-10:][::-1]
    rec_list=map(str,list(rec_list))
    rec_df=rec_df.append({"user_id":uid,"item_list":' '.join(rec_list)},ignore_index=True)

    
rec_df=rec_df.astype({"user_id":'int32',"item_list":'str'})
rec_df=rec_df.set_index('user_id')
rec_df.to_csv("LightFM_recommendations.csv")