In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [2]:
URM_all = sps.load_npz("Data/URM/combined/URM_combined_all.npz")
URM_train = sps.load_npz("Data/URM/combined/URM_combined_train.npz")
URM_val = sps.load_npz("Data/URM/combined/URM_combined_val.npz")

In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [4]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

n_cases=10

runHyperparameterSearch_Collaborative(
    ItemKNNCFRecommender,
    URM_train=URM_train,
    n_cases=n_cases,
    n_random_starts = int(n_cases*0.3),
    evaluator_validation=evaluator,
    cutoff_to_optimize=10,
    output_folder_path="result_experiments/item_knn_cf_combined/",
    metric_to_optimize="MAP",
    similarity_type_list=["cosine", "jaccard"]
)


In [5]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = "result_experiments/item_knn_cf_combined/")
search_metadata = data_loader.load_data(ItemKNNCFRecommender.RECOMMENDER_NAME + "_cosine_metadata.zip")

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.031682,0.055706,0.05327,0.014238,0.024998,0.112671,0.054039,0.039733,0.24812,0.126199,...,1.0,0.24812,1.0,0.001807,5.214907,0.951836,0.003554,0.386458,8.572473,0.205851
1,10,0.01578,0.021807,0.019489,0.007529,0.010489,0.054274,0.023812,0.017439,0.11307,0.063621,...,1.0,0.11307,1.0,0.471907,13.717524,1.000187,0.928238,1.016556,0.144867,0.347711
2,10,0.031927,0.055674,0.053194,0.013851,0.02394,0.108099,0.055373,0.039904,0.248937,0.12188,...,1.0,0.248937,1.0,0.003337,5.641507,0.958606,0.006563,0.418072,7.91681,0.210573
3,10,0.045692,0.076167,0.072233,0.021932,0.036401,0.15622,0.077267,0.055975,0.322011,0.184149,...,1.0,0.322011,1.0,0.021195,8.167357,0.98858,0.041691,0.605253,4.854751,0.238923
4,10,0.018473,0.025415,0.022772,0.008796,0.012198,0.061556,0.027296,0.020398,0.128852,0.073212,...,1.0,0.128852,1.0,0.484546,13.731661,1.000178,0.953099,1.017603,0.175741,0.346377
5,10,0.0356,0.061275,0.058407,0.016612,0.028336,0.126427,0.063332,0.044237,0.269019,0.14421,...,1.0,0.269019,1.0,0.003069,6.031882,0.96785,0.006038,0.447001,7.303316,0.214145
6,10,0.017927,0.024569,0.021966,0.008465,0.011705,0.059385,0.026303,0.019742,0.125489,0.070555,...,1.0,0.125489,1.0,0.450744,13.694273,1.000183,0.886611,1.014833,0.169436,0.347926
7,10,0.046002,0.076138,0.072114,0.022119,0.036446,0.156222,0.077118,0.056171,0.321795,0.184945,...,1.0,0.321795,1.0,0.049749,8.783017,0.990265,0.097856,0.650877,4.469812,0.248097
8,10,0.046239,0.076368,0.072312,0.022401,0.036869,0.157831,0.07802,0.056409,0.321939,0.187068,...,1.0,0.321939,1.0,0.049999,8.901718,0.991319,0.098348,0.659674,4.255811,0.249368
9,10,0.046244,0.076342,0.07228,0.022421,0.036876,0.157876,0.078028,0.056402,0.321891,0.187166,...,1.0,0.321891,1.0,0.051102,8.922928,0.991384,0.100517,0.661246,4.24041,0.249701


In [6]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize,feature_weighting
0,1769,568,cosine,False,TF-IDF
1,607,555,cosine,False,BM25
2,705,235,cosine,False,none
3,1818,567,cosine,True,TF-IDF
4,500,996,cosine,True,BM25
5,1999,984,cosine,True,none
6,1997,960,cosine,True,BM25
7,1926,11,cosine,True,TF-IDF
8,510,9,cosine,True,TF-IDF
9,500,0,cosine,True,TF-IDF


In [7]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 500,
 'shrink': 0,
 'similarity': 'cosine',
 'normalize': True,
 'feature_weighting': 'TF-IDF'}

In [12]:
exception_list = search_metadata["exception_list"]
exception_list

[None, None, None, None, None, None, None, None, None, None]

In [10]:
recommender = ItemKNNCFRecommender(URM_all)
recommender.fit(topK=500, shrink=0, similarity='cosine', normalize=True, feature_weighting='TF-IDF')
recommender.save_model(folder_path = "result_experiments/item_knn_cf_details/", file_name="best.zip")

Similarity column 24507 (100.0%), 3046.06 column/sec. Elapsed time 8.05 sec


In [11]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})

In [12]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})["user_id"].values

print("Creating recomendations...")
recomendations = recommender.recommend(
    target_users,
    cutoff = 10
)

Creating recomendations...


In [13]:
from datetime import datetime
out = os.path.join('Output', datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv')
print("Writing to file...")
with open(out, 'w') as f:
    f.write('user_id,item_list\n')

    for i in range(len(target_users)):
        f.write(str(target_users[i]) + "," + " ".join(str(x) for x in recomendations[i]) + "\n")

Writing to file...
