In [6]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [7]:
URM_all = sps.load_npz("Data/URM/views/URM_views_all.npz")
URM_train = sps.load_npz("Data/URM/views/URM_views_train.npz")
URM_val = sps.load_npz("Data/URM/views/URM_views_val.npz")

In [8]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1236 ( 3.0%) Users that have less than 1 test interactions


In [9]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

n_cases=10

runHyperparameterSearch_Collaborative(
    ItemKNNCFRecommender,
    URM_train=URM_train,
    n_cases=n_cases,
    n_random_starts = int(n_cases*0.3),
    evaluator_validation=evaluator,
    output_folder_path="result_experiments/item_knn_cf_views/",
    cutoff_to_optimize=10,
    metric_to_optimize="MAP",
    similarity_type_list=["cosine", "jaccard"]
)


In [10]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = "result_experiments/item_knn_cf_views/")
search_metadata = data_loader.load_data(ItemKNNCFRecommender.RECOMMENDER_NAME + "_cosine_metadata.zip")

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.016622,0.029448,0.028274,0.007882,0.014027,0.056796,0.026809,0.020936,0.118956,0.066606,...,0.970309,0.115424,0.970309,0.493546,13.790514,1.000326,1.118067,1.045854,0.129504,0.531673
1,10,0.040613,0.091007,0.089385,0.019509,0.043428,0.142389,0.077124,0.05585,0.293269,0.166015,...,0.970309,0.284561,0.970309,0.037128,8.63137,0.990464,0.084109,0.654591,4.000495,0.361738
2,10,0.029327,0.069494,0.068441,0.013246,0.031363,0.105362,0.057413,0.04106,0.231104,0.117713,...,0.970309,0.224243,0.970309,0.007577,6.089358,0.963238,0.017164,0.461809,6.95881,0.31273
3,10,0.028087,0.066586,0.065564,0.012146,0.028347,0.09629,0.055117,0.039326,0.222217,0.107764,...,0.970309,0.215619,0.970309,0.004066,5.743907,0.959468,0.009212,0.43561,7.134568,0.308527
4,10,0.030268,0.070643,0.069529,0.013435,0.031081,0.105113,0.059918,0.042175,0.235783,0.118402,...,0.970309,0.228783,0.970309,0.012489,6.76524,0.972854,0.028293,0.513067,6.071311,0.324796
5,10,0.034825,0.079935,0.078578,0.016543,0.0377,0.125492,0.069404,0.048261,0.261951,0.143512,...,0.970309,0.254174,0.970309,0.007004,6.944766,0.979778,0.015866,0.526682,5.529142,0.327773
6,10,0.04151,0.092376,0.090712,0.020064,0.04442,0.144881,0.079292,0.056956,0.296338,0.169752,...,0.970309,0.28754,0.970309,0.038425,9.160979,0.994474,0.087046,0.694756,3.231382,0.372653
7,10,0.013854,0.024694,0.023678,0.006525,0.011707,0.048522,0.022825,0.01748,0.102592,0.056048,...,0.970309,0.099546,0.970309,0.485577,13.779204,1.000334,1.100013,1.044996,0.105419,0.530655
8,10,0.040816,0.089466,0.08776,0.019896,0.043285,0.142716,0.077019,0.055719,0.288887,0.16773,...,0.970309,0.280309,0.970309,0.061105,9.927527,0.996515,0.138426,0.75289,2.664687,0.393833
9,10,0.034246,0.076364,0.07502,0.0163,0.03627,0.122095,0.067719,0.047025,0.254252,0.140511,...,0.970309,0.246703,0.970309,0.040763,9.012252,0.993637,0.092344,0.683477,3.448872,0.368581


In [11]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize,feature_weighting
0,146,894,cosine,True,BM25
1,919,502,cosine,True,TF-IDF
2,393,427,cosine,False,TF-IDF
3,1000,929,cosine,False,none
4,134,540,cosine,False,none
5,980,381,cosine,True,none
6,77,594,cosine,True,TF-IDF
7,971,6,cosine,False,BM25
8,23,4,cosine,True,TF-IDF
9,9,992,cosine,True,none


In [12]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 77,
 'shrink': 594,
 'similarity': 'cosine',
 'normalize': True,
 'feature_weighting': 'TF-IDF'}

In [13]:
exception_list = search_metadata["exception_list"]
exception_list

[None, None, None, None, None, None, None, None, None, None]

In [10]:
recommender = ItemKNNCFRecommender(URM_all)
recommender.fit(topK=991, shrink=9, similarity='cosine', normalize=True, feature_weighting='TF-IDF')
recommender.save_model(folder_path = "result_experiments/item_knn_cf_views/", file_name="best.zip")

Similarity column 24507 (100.0%), 3046.06 column/sec. Elapsed time 8.05 sec


In [11]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})

In [12]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})["user_id"].values

print("Creating recomendations...")
recomendations = recommender.recommend(
    target_users,
    cutoff = 10
)

Creating recomendations...


In [13]:
from datetime import datetime
out = os.path.join('Output', datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv')
print("Writing to file...")
with open(out, 'w') as f:
    f.write('user_id,item_list\n')

    for i in range(len(target_users)):
        f.write(str(target_users[i]) + "," + " ".join(str(x) for x in recomendations[i]) + "\n")

Writing to file...
