In [1]:
import SLIM_model
import evaluate
import pandas as pd
import numpy as np
import pickle
import time

from importlib import reload
import optuna

In [2]:
# データロード
user_item_train_df = pd.read_csv('./data/user_item_train.csv')
user_item_test_df = pd.read_csv('./data/user_item_test.csv')
user_list = []
item_list = []
with open('./data/user_list.txt', 'r') as f:
    for l in f:
        user_list.append(l.replace('\n', ''))
        
with open('./data/item_list.txt', 'r') as f:
    for l in f:
        item_list.append(l.replace('\n', ''))

In [3]:
# ハイパラ
# alpha, lin_model

def time_since(runtime):
    mi = int(runtime / 60)
    sec = int(runtime - mi * 60)
    return (mi, sec)

def objective(trial):
    start = time.time()
    # define model and fit
    alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-3)
    lin_model = trial.suggest_categorical('lin_model', ['lasso', 'elastic'])
    
    model = SLIM_model.SLIM(alpha, len(user_list), len(item_list), lin_model=lin_model)
    #model.fit(user_item_train_df)
    model.fit_multi(user_item_train_df)
    #model.load_sim_mat('./sim_mat.txt', user_item_train_df)

    # evaluate
    eval_model = evaluate.Evaluater(user_item_test_df, len(user_list))
    score_sum = 0
    not_count = 0
    for i in range(len(user_list)):
        rec_item_idx = model.pred_ranking(i)
        score = eval_model.topn_precision(rec_item_idx, i)
        if score > 1:
            not_count += 1
            continue
        score_sum += score

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))
        #if i > 20:
        #    break

    return -1 * (score_sum / (len(user_list) - not_count))

In [None]:
study = optuna.create_study()
study.optimize(objective, n_trials=10)

26m2sec


[I 2020-07-04 17:27:34,565] Finished trial#0 with value: -0.13995096454985909 with parameters: {'alpha': 1.6572712853883517e-05, 'lin_model': 'elastic'}. Best is trial#0 with value: -0.13995096454985909.


25m26sec


[I 2020-07-04 17:53:00,786] Finished trial#1 with value: -0.14083971494882203 with parameters: {'alpha': 0.0005329155549139126, 'lin_model': 'lasso'}. Best is trial#1 with value: -0.14083971494882203.


In [None]:
# パラメータを保存しておく
df = study.trials_dataframe() # pandasのDataFrame形式
df.to_csv('hyparams_result.csv')

In [None]:
# save best params 
with open('best_param.pickle', 'wb') as f:
    pickle.dump(study.best_params, f)