In [6]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [7]:
URM_all = sps.load_npz("Data/URM/details/URM_details_all.npz")
URM_train = sps.load_npz("Data/URM/details/URM_details_train.npz")
URM_val = sps.load_npz("Data/URM/details/URM_details_val.npz")

In [8]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

EvaluatorHoldout: Ignoring 8269 (19.9%) Users that have less than 1 test interactions


In [13]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

n_cases=5

runHyperparameterSearch_Collaborative(
    ItemKNNCFRecommender,
    URM_train=URM_train,
    n_cases=n_cases,
    n_random_starts = int(n_cases*0.3),
    evaluator_validation=evaluator,
    cutoff_to_optimize=10,
    output_folder_path="result_experiments/item_knn_cf_details/",
    metric_to_optimize="MAP",
    similarity_type_list=["cosine", "jaccard"]
)


In [14]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = "result_experiments/item_knn_cf_details/")
search_metadata = data_loader.load_data(ItemKNNCFRecommender.RECOMMENDER_NAME + "_cosine_metadata.zip")

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.015746,0.043893,0.042916,0.006913,0.019332,0.057753,0.034229,0.023039,0.130695,0.063039,...,0.801364,0.104735,0.801364,0.041209,8.267591,0.982335,0.085953,0.615141,7.149294,0.564222
1,10,0.011933,0.022126,0.020943,0.004955,0.009147,0.036641,0.018131,0.015204,0.088399,0.042491,...,0.801364,0.07084,0.801364,0.44511,13.742575,1.000216,0.928397,1.0225,0.407344,0.836349
2,10,0.007029,0.0132,0.012362,0.002861,0.005393,0.022295,0.011005,0.008962,0.054826,0.025177,...,0.801364,0.043936,0.801364,0.529549,14.014199,1.000266,1.104518,1.04271,0.131996,0.845653
3,10,0.015668,0.043716,0.042745,0.006887,0.019268,0.057566,0.034084,0.022931,0.130036,0.062819,...,0.801364,0.104206,0.801364,0.035868,8.061977,0.981167,0.074813,0.599842,7.376342,0.557511
4,10,0.021942,0.056632,0.055121,0.009717,0.025452,0.075079,0.044455,0.03139,0.167866,0.08515,...,0.801364,0.134522,0.801364,0.225697,11.397999,0.997361,0.470752,0.848055,3.344251,0.67603


In [10]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize,feature_weighting
0,831,657,cosine,True,TF-IDF
1,235,858,cosine,False,TF-IDF
2,564,834,cosine,False,BM25
3,996,525,cosine,True,none
4,509,618,cosine,True,none
5,44,990,cosine,True,BM25
6,5,1000,cosine,False,none
7,821,654,cosine,True,none
8,974,1,cosine,False,TF-IDF
9,893,2,cosine,False,none


In [11]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 831,
 'shrink': 657,
 'similarity': 'cosine',
 'normalize': True,
 'feature_weighting': 'TF-IDF'}

In [12]:
exception_list = search_metadata["exception_list"]
exception_list

[None, None, None, None, None, None, None, None, None, None]

In [10]:
recommender = ItemKNNCFRecommender(URM_all)
recommender.fit(topK=991, shrink=9, similarity='cosine', normalize=True, feature_weighting='TF-IDF')
recommender.save_model(folder_path = "result_experiments/item_knn_cf_details/", file_name="best.zip")

Similarity column 24507 (100.0%), 3046.06 column/sec. Elapsed time 8.05 sec


In [11]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})

In [12]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})["user_id"].values

print("Creating recomendations...")
recomendations = recommender.recommend(
    target_users,
    cutoff = 10
)

Creating recomendations...


In [13]:
from datetime import datetime
out = os.path.join('Output', datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv')
print("Writing to file...")
with open(out, 'w') as f:
    f.write('user_id,item_list\n')

    for i in range(len(target_users)):
        f.write(str(target_users[i]) + "," + " ".join(str(x) for x in recomendations[i]) + "\n")

Writing to file...
