In [2]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
import optuna

sys.path.append('../../')

from SLIM_ElasticNet.SLIMElasticNetRecommender import MultiThreadSLIM_ElasticNet
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [3]:
dataset = pd.read_csv('../data_train.csv')
dataset

Unnamed: 0,row,col,data
0,0,10080,1.0
1,0,19467,1.0
2,1,2665,1.0
3,1,7494,1.0
4,1,17068,1.0
...,...,...,...
113263,7945,2476,1.0
113264,7945,12319,1.0
113265,7945,21384,1.0
113266,7946,8699,1.0


In [4]:
users = dataset.row
items = dataset.col
data = dataset.data
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 
URM_all.shape

test_users = pd.read_csv('../data_target_users_test.csv')
test_users

Unnamed: 0,user_id
0,0
1,1
2,2
3,3
4,4
...,...
7939,7942
7940,7943
7941,7944
7942,7945


In [5]:
ICM_df = pd.read_csv('../data_ICM_title_abstract.csv')
ICM_df

Unnamed: 0,row,col,data
0,0,1185,1.015524
1,0,2507,0.459024
2,0,3534,0.227742
3,0,8766,0.501549
4,0,10862,0.297011
...,...,...,...
490686,25974,12554,0.963016
490687,25974,13003,0.104613
490688,25974,16236,0.118760
490689,25974,18797,0.363301


In [6]:
items = ICM_df.row
features = ICM_df.col
data = ICM_df.data
ICM_all = sps.coo_matrix((data, (items, features)))
ICM_all = ICM_all.tocsr() #fast row access -> fast access to users 
ICM_all.shape

(25975, 20000)

In [7]:
URM_train_1, URM_validation_1 = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
evaluator_validation_1 = EvaluatorHoldout(URM_validation_1, cutoff_list=[10])

URM_train_2, URM_validation_2 = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
evaluator_validation_2 = EvaluatorHoldout(URM_validation_2, cutoff_list=[10])

URM_train_3, URM_validation_3 = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
evaluator_validation_3 = EvaluatorHoldout(URM_validation_3, cutoff_list=[10])

URM_train_vector = [URM_train_1, URM_train_2, URM_train_3]
Evaluator_vector = [evaluator_validation_1, evaluator_validation_2, evaluator_validation_3]



In [None]:


class Objective(object):
    def __init__(self, URM_train_vector, ICM_all, Evaluator_vector):
        # Hold this implementation specific arguments as the fields of the class.
        self.URM_train_vector = URM_train_vector
        self.ICM_all = ICM_all
        self.Evaluator_vector = Evaluator_vector
        self.MAP_vector = np.zeros(3)

        

    def __call__(self, trial):
        # Calculate an objective value by using the extra arguments.
        topK = trial.suggest_int('topK', 50,1000)
        weight = trial.suggest_uniform('weight', 0.1, 0.9)
        l1_ratio = trial.suggest_loguniform('l1_ratio', 1e-6, 1e-2)
        alpha= trial.suggest_uniform('alpha', 1e-5,1e-2)
        self.MAP_vector = np.zeros(3)
        i = 0
        for URM_train, evaluator_validation in zip(self.URM_train_vector,self.Evaluator_vector):
            URM_train_ICM_all = sps.vstack([URM_train*(1-weight), ICM_all.T*weight])
            recommender = MultiThreadSLIM_ElasticNet(URM_train_ICM_all)
            recommender.fit(topK=topK, l1_ratio= l1_ratio, alpha=alpha)
            result_dict, _ = evaluator_validation.evaluateRecommender(recommender)
            self.MAP_vector[i]=result_dict[10]["MAP"]
            i = i+1
        
        print('printing self map vector: ', self.MAP_vector)
        MAP = np.sum(self.MAP_vector)/3
        print('printing MAP: ', MAP)



        
        return MAP
    
# Execute an optimization by using an `Objective` instance.
study = optuna.create_study(direction='maximize')
study.optimize(Objective(URM_train_vector, ICM_all, Evaluator_vector), n_trials=50)

print(study.best_params)

[32m[I 2021-01-02 12:10:07,537][0m A new study created in memory with name: no-name-7b7e5c31-16f7-4382-9a55-0c413fdb6463[0m


SLIMElasticNetRecommender: URM Detected 75 (0.27 %) cold users.
SLIMElasticNetRecommender: URM Detected 1 (0.00 %) cold items.
