In [6]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [7]:
URM_views_all = sps.load_npz("Data/URM/views/URM_views_all.npz")
URM_views_train = sps.load_npz("Data/URM/views/URM_views_train.npz")
URM_views_val = sps.load_npz("Data/URM/views/URM_views_val.npz")
URM_details_all = sps.load_npz("Data/URM/views/URM_details_all.npz")
URM_details_train = sps.load_npz("Data/URM/views/URM_details_train.npz")
URM_details_val = sps.load_npz("Data/URM/views/URM_details_val.npz")

URM_all = sps.vstack([URM_views_all, URM_details_all])
URM_train = sps.vstack([URM_views_train, URM_details_train])
URM_val = sps.load_npz("Data/URM/combined/URM_combined_val.npz")

In [8]:
from evaluation.evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(URM_val, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1236 ( 3.0%) Users that have less than 1 test interactions


In [9]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

n_cases=10

runHyperparameterSearch_Collaborative(
    ItemKNNCFRecommender,
    URM_train=URM_train,
    n_cases=n_cases,
    n_random_starts = int(n_cases*0.3),
    evaluator_validation=evaluator,
    output_folder_path="result_experiments/item_knn_cf_views/",
    cutoff_to_optimize=10,
    metric_to_optimize="MAP",
    similarity_type_list=["cosine", "jaccard"]
)


In [None]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = "result_experiments/item_knn_cf_views/")
search_metadata = data_loader.load_data(ItemKNNCFRecommender.RECOMMENDER_NAME + "_cosine_metadata.zip")

result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

FileNotFoundError: [Errno 2] No such file or directory: 'result_experiments/item_knn_cf_views/ItemKNNCFRecommender_cosine_metadata.zip'

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

Unnamed: 0,topK,shrink,similarity,normalize,feature_weighting
0,760,784,cosine,True,TF-IDF
1,510,45,cosine,True,none
2,313,643,cosine,True,TF-IDF
3,100,0,cosine,True,BM25
4,29,0,cosine,False,TF-IDF
5,151,685,cosine,True,none
6,979,0,cosine,False,none
7,938,994,cosine,False,BM25
8,991,9,cosine,True,TF-IDF
9,149,1000,cosine,False,TF-IDF


In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

{'topK': 991,
 'shrink': 9,
 'similarity': 'cosine',
 'normalize': True,
 'feature_weighting': 'TF-IDF'}

In [None]:
exception_list = search_metadata["exception_list"]
exception_list

[None, None, None, None, None, None, None, None, None, None]

In [10]:
recommender = ItemKNNCFRecommender(URM_all)
recommender.fit(topK=991, shrink=9, similarity='cosine', normalize=True, feature_weighting='TF-IDF')
recommender.save_model(folder_path = "result_experiments/item_knn_cf_views/", file_name="best.zip")

Similarity column 24507 (100.0%), 3046.06 column/sec. Elapsed time 8.05 sec


In [11]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})

In [12]:
target_users = pd.read_csv('Data/data_target_users_test.csv', dtype={0:int})["user_id"].values

print("Creating recomendations...")
recomendations = recommender.recommend(
    target_users,
    cutoff = 10
)

Creating recomendations...


In [13]:
from datetime import datetime
out = os.path.join('Output', datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv')
print("Writing to file...")
with open(out, 'w') as f:
    f.write('user_id,item_list\n')

    for i in range(len(target_users)):
        f.write(str(target_users[i]) + "," + " ".join(str(x) for x in recomendations[i]) + "\n")

Writing to file...
