In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [2]:
URM_views_all = sps.load_npz("Data/URM/views/URM_views_all.npz")
URM_views_all.eliminate_zeros()
URM_views_all.data[:] = 1
URM_views_train = sps.load_npz("Data/URM/views/URM_views_train.npz")
URM_views_train.eliminate_zeros()
URM_views_train.data[:] = 1
URM_views_val = sps.load_npz("Data/URM/views/URM_views_val.npz")
URM_views_val.eliminate_zeros()
URM_views_val.data[:] = 1

URM_details_all = sps.load_npz("Data/URM/details/URM_details_all.npz")
URM_details_all.eliminate_zeros()
URM_details_all.data[:] = 1
URM_details_train = sps.load_npz("Data/URM/details/URM_details_train.npz")
URM_details_train.eliminate_zeros()
URM_details_train.data[:] = 1
URM_details_val = sps.load_npz("Data/URM/details/URM_details_val.npz")
URM_details_val.eliminate_zeros()
URM_details_val.data[:] = 1

URM_all = sps.vstack([URM_views_all, URM_details_all])
URM_train = sps.vstack([URM_views_train, URM_details_train])
URM_val = sps.load_npz("Data/URM/combined/URM_combined_val.npz")
URM_val.data[:] = 1

In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

In [4]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

n_cases=10

runHyperparameterSearch_Collaborative(
    ItemKNNCFRecommender,
    URM_train=URM_train,
    n_cases=n_cases,
    n_random_starts = int(n_cases*0.3),
    evaluator_validation=evaluator,
    output_folder_path="result_experiments/item_knn_cf_stacked/",
    cutoff_to_optimize=10,
    metric_to_optimize="MAP",
    similarity_type_list=["cosine"]
)


In [5]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = "result_experiments/item_knn_cf_stacked/")
search_metadata = data_loader.load_data(ItemKNNCFRecommender.RECOMMENDER_NAME + "_cosine_metadata.zip")

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.044109,0.073535,0.069891,0.020936,0.034857,0.150717,0.07162,0.054084,0.313628,0.176819,...,1.0,0.313628,1.0,0.035218,8.683858,0.990818,0.074633,0.651502,4.759579,0.444941
1,10,0.044089,0.073437,0.069789,0.020943,0.034828,0.150688,0.071565,0.054039,0.313267,0.176833,...,1.0,0.313267,1.0,0.037771,8.729685,0.990907,0.080042,0.65494,4.733932,0.446325
2,10,0.032117,0.046502,0.043266,0.014785,0.021435,0.101141,0.046347,0.036867,0.218141,0.121754,...,1.0,0.218141,1.0,0.319495,12.828163,0.99991,0.677059,0.962426,0.888451,0.591574
3,10,0.03155,0.055465,0.053088,0.013653,0.024112,0.107885,0.051507,0.039579,0.247832,0.120967,...,1.0,0.247832,1.0,0.003063,5.51514,0.955774,0.00649,0.41377,9.105614,0.373254
4,10,0.043193,0.071742,0.068187,0.020421,0.03391,0.14728,0.069871,0.052886,0.30755,0.172635,...,1.0,0.30755,1.0,0.03544,8.79048,0.991025,0.075103,0.659501,4.652413,0.450531
5,10,0.031473,0.055097,0.052686,0.013654,0.023928,0.107555,0.051219,0.039406,0.246559,0.120767,...,1.0,0.246559,1.0,0.003106,5.522972,0.95624,0.006582,0.414358,9.06557,0.373802
6,10,0.019088,0.027879,0.025669,0.008848,0.013066,0.065252,0.028456,0.021895,0.139902,0.075755,...,1.0,0.139902,1.0,0.387218,13.317242,1.000178,0.820574,0.999119,0.205763,0.620859
7,10,0.044342,0.07395,0.070252,0.021133,0.03519,0.152064,0.072163,0.054368,0.314468,0.178433,...,1.0,0.314468,1.0,0.028625,8.656472,0.991403,0.060661,0.649447,4.649325,0.443527
8,10,0.031264,0.045209,0.042029,0.014368,0.020818,0.09852,0.045079,0.035856,0.213289,0.118454,...,1.0,0.213289,1.0,0.324705,12.885175,0.99996,0.688101,0.966703,0.783527,0.594348
9,10,0.038829,0.066006,0.062843,0.018286,0.031057,0.136666,0.064174,0.048,0.287612,0.157411,...,1.0,0.287612,1.0,0.008629,7.071434,0.9806,0.018286,0.530531,6.655547,0.403824


In [6]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize,feature_weighting
0,1440,371,cosine,True,TF-IDF
1,1513,259,cosine,True,TF-IDF
2,1672,36,cosine,True,BM25
3,1989,12,cosine,False,TF-IDF
4,1942,5,cosine,True,none
5,1917,10,cosine,False,none
6,536,998,cosine,False,BM25
7,520,901,cosine,True,TF-IDF
8,1614,79,cosine,True,BM25
9,500,932,cosine,True,none


In [7]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 520,
 'shrink': 901,
 'similarity': 'cosine',
 'normalize': True,
 'feature_weighting': 'TF-IDF'}

In [8]:
exception_list = search_metadata["exception_list"]
exception_list

[None, None, None, None, None, None, None, None, None, None]

In [8]:
recommender = ItemKNNCFRecommender(URM_all)
recommender.fit(topK=525, shrink=997, similarity='cosine', normalize=True, feature_weighting='TF-IDF')
recommender.save_model(folder_path = "result_experiments/item_knn_cf_stacked/", file_name="best.zip")

ItemKNNCFRecommender: URM Detected 22 ( 0.0%) users with no interactions.
Similarity column 24507 (100.0%), 4819.13 column/sec. Elapsed time 5.09 sec
ItemKNNCFRecommender: Saving model in file 'result_experiments/item_knn_cf_stacked/best.zip'
ItemKNNCFRecommender: Saving complete


In [9]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})

In [10]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})["user_id"].values

print("Creating recomendations...")
recomendations = recommender.recommend(
    target_users,
    cutoff = 10
)

Creating recomendations...


In [14]:
from datetime import datetime
out = os.path.join('submissions', datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv')
print("Writing to file...")
with open(out, 'w') as f:
    f.write('user_id,item_list\n')

    for i in range(len(target_users)):
        f.write(str(target_users[i]) + "," + " ".join(str(x) for x in recomendations[i]) + "\n")

Writing to file...
