In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
SEED = 42

random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)


## Load dataset

In [2]:
URM_all = sps.load_npz("Data/URM/combined/URM_combined_all.npz")
URM_train = sps.load_npz("Data/URM/combined/URM_combined_train.npz")
URM_val = sps.load_npz("Data/URM/combined/URM_combined_val.npz")

In [3]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

## Model

In [4]:
from Recommenders.KNN.ItemKNNSimilarityHybridRecommender import (
    ItemKNNSimilarityHybridRecommender,
)
from Recommenders.KNN.ItemKNNCFRecommender import (
    ItemKNNCFRecommender,
)
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

recommender_class = ItemKNNSimilarityHybridRecommender
n_cases = 10

hyperparameters_range_dictionary = {
    "topK": Integer(100, 2000),
    "alpha": Real(0.0, 1.0),
}

hyperparameterSearch = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator,
)

cosine_recommender = ItemKNNCFRecommender(URM_train)
cosine_recommender.load_model(
    folder_path="result_experiments/item_knn_cf_combined/",
    file_name=ItemKNNCFRecommender.RECOMMENDER_NAME + "_cosine_best_model.zip",
)
jaccard_recommender = ItemKNNCFRecommender(URM_train)
jaccard_recommender.load_model(
    folder_path="result_experiments/item_knn_cf_combined/",
    file_name=ItemKNNCFRecommender.RECOMMENDER_NAME + "_jaccard_best_model.zip",
)

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[
        URM_train,
        cosine_recommender.W_sparse,
        jaccard_recommender.W_sparse
    ],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={},
    EARLYSTOPPING_KEYWORD_ARGS={},
)

output_folder_path = "result_experiments/item_knn_cf_hybrid/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_random_starts = int(n_cases * 0.3)
metric_to_optimize = "MAP"
cutoff_to_optimize = 10


ItemKNNCFRecommender: Loading model from file 'result_experiments/item_knn_cf_combined/ItemKNNCFRecommender_cosine_best_model.zip'
ItemKNNCFRecommender: Loading complete
ItemKNNCFRecommender: Loading model from file 'result_experiments/item_knn_cf_combined/ItemKNNCFRecommender_jaccard_best_model.zip'
ItemKNNCFRecommender: Loading complete


In [5]:
hyperparameterSearch.search(
    recommender_input_args,
    hyperparameter_search_space=hyperparameters_range_dictionary,
    n_cases=n_cases,
    n_random_starts=n_random_starts,
    save_model="best",
    output_folder_path=output_folder_path,  # Where to save the results
    output_file_name_root=recommender_class.RECOMMENDER_NAME,  # How to call the files
    metric_to_optimize=metric_to_optimize,
    cutoff_to_optimize=cutoff_to_optimize,
)


Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 1091, 'alpha': 0.30143986805874196}
EvaluatorHoldout: Processed 41629 (100.0%) in 58.06 sec. Users per second: 717
SearchBayesianSkopt: New best config found. Config 0: {'topK': 1091, 'alpha': 0.30143986805874196} - results: PRECISION: 0.0452281, PRECISION_RECALL_MIN_DEN: 0.0742436, RECALL: 0.0701628, MAP: 0.0218043, MAP_MIN_DEN: 0.0357120, MRR: 0.1532911, NDCG: 0.0769643, F1: 0.0550014, HIT_RATE: 0.3141800, ARHR_ALL_HITS: 0.1818115, NOVELTY: 0.0044163, AVERAGE_POPULARITY: 0.1918478, DIVERSITY_MEAN_INTER_LIST: 0.9283803, DIVERSITY_HERFINDAHL: 0.9928358, COVERAGE_ITEM: 0.6069694, COVERAGE_ITEM_HIT: 0.0523524, ITEMS_IN_GT: 0.9926960, COVERAGE_USER: 1.0000000, COVERAGE_USER_HIT: 0.3141800, USERS_IN_GT: 1.0000000, DIVERSITY_GINI: 0.0473711, SHANNON_ENTROPY: 9.1180944, RATIO_DIVERSITY_HERFINDAHL: 0.9931628, RATIO_DIVERSITY_GINI: 0.0931786, RATIO_SHANNON_ENTROPY: 0.6757088, RATIO_AVERA

In [6]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path=output_folder_path)
search_metadata = data_loader.load_data(
    ItemKNNSimilarityHybridRecommender.RECOMMENDER_NAME + "_metadata.zip"
)

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df


Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.045228,0.074244,0.070163,0.021804,0.035712,0.153291,0.076964,0.055001,0.31418,0.181811,...,1.0,0.31418,1.0,0.047371,9.118094,0.993163,0.093179,0.675709,3.783318,0.252668
1,10,0.045675,0.075006,0.070899,0.022195,0.03636,0.15622,0.077721,0.055558,0.317183,0.18521,...,1.0,0.317183,1.0,0.058669,9.14033,0.992387,0.115402,0.677357,4.014065,0.252562
2,10,0.044409,0.072999,0.069008,0.0212,0.03473,0.149408,0.075451,0.054041,0.3094,0.177021,...,1.0,0.3094,1.0,0.044125,9.167995,0.993712,0.086793,0.679407,3.581667,0.253881
3,10,0.045658,0.074953,0.07085,0.022191,0.036346,0.156205,0.077754,0.055531,0.317087,0.18518,...,1.0,0.317087,1.0,0.057897,9.123626,0.992308,0.113884,0.676119,4.031997,0.252325
4,10,0.04555,0.074781,0.070683,0.022072,0.036145,0.155179,0.077505,0.055399,0.316462,0.184055,...,1.0,0.316462,1.0,0.052353,9.110408,0.992702,0.102977,0.675139,3.92579,0.2523
5,10,0.045709,0.075083,0.070995,0.022202,0.036376,0.156361,0.077726,0.055612,0.317519,0.185353,...,1.0,0.317519,1.0,0.061757,9.143213,0.992146,0.121475,0.67757,4.07299,0.252546
6,10,0.045725,0.075126,0.07104,0.022197,0.03638,0.156283,0.077771,0.055639,0.317687,0.185271,...,1.0,0.317687,1.0,0.060358,9.135256,0.992203,0.118723,0.676981,4.05875,0.252454
7,10,0.045632,0.074937,0.070851,0.022194,0.036366,0.156193,0.077802,0.055511,0.316702,0.185187,...,1.0,0.316702,1.0,0.056485,9.117124,0.992386,0.111106,0.675637,4.010736,0.252268
8,10,0.044291,0.072799,0.068819,0.021162,0.034664,0.149294,0.075343,0.053896,0.308895,0.176782,...,1.0,0.308895,1.0,0.044039,9.175885,0.993769,0.086624,0.679991,3.558449,0.254057
9,10,0.045533,0.074709,0.07061,0.022008,0.03605,0.154784,0.077445,0.055364,0.316198,0.18356,...,1.0,0.316198,1.0,0.051354,9.117169,0.992794,0.101013,0.67564,3.900772,0.252401


In [7]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,alpha
0,1091,0.30144
1,454,0.842404
2,1476,0.08499
3,527,0.807634
4,2000,0.539302
5,1421,1.0
6,2000,0.927498
7,550,0.736269
8,1538,0.064789
9,434,0.499703


In [8]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 1421, 'alpha': 1.0}

In [9]:
exception_list = search_metadata["exception_list"]
exception_list

[None, None, None, None, None, None, None, None, None, None]

In [11]:
recommender = ItemKNNSimilarityHybridRecommender(URM_all, cosine_recommender.W_sparse, jaccard_recommender.W_sparse)
recommender.load_model(
    folder_path="result_experiments/",
    file_name=ItemKNNSimilarityHybridRecommender.RECOMMENDER_NAME + "_best_model.zip",
)


ItemKNNSimilarityHybridRecommender: Loading model from file 'result_experiments/ItemKNNSimilarityHybridRecommender_best_model.zip'
ItemKNNSimilarityHybridRecommender: Loading complete


In [12]:
target_users = pd.read_csv("Data/data_target_users_test.csv", dtype={0: int})


In [13]:
target_users = pd.read_csv("Data/data_target_users_test.csv", dtype={0: int})[
    "user_id"
].values

print("Creating recomendations...")
recomendations = recommender.recommend(target_users, cutoff=10)


Creating recomendations...


In [14]:
from datetime import datetime

out = os.path.join("Output", datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + ".csv")
print("Writing to file...")
with open(out, "w") as f:
    f.write("user_id,item_list\n")

    for i in range(len(target_users)):
        f.write(
            str(target_users[i])
            + ","
            + " ".join(str(x) for x in recomendations[i])
            + "\n"
        )


Writing to file...
