# Robust Scaling 적용한 lightfm model 학습

In [1]:
import os
import pandas as pd
import numpy as np
from time import time
from tqdm.notebook import tqdm
import pickle

from scipy.sparse import coo_matrix, csr_matrix

from lightfm.cross_validation import random_train_test_split
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k, auc_score, recall_at_k

from hyperopt import fmin, hp, tpe, Trials



min_max_item_features.csv
min_max_user_features.csv
rating.csv
robust_item_features.csv
robust_user_features.csv
whisky.csv

In [2]:
item_features = pd.read_csv("dataset/robust_item_features.csv", index_col=0, encoding="UTF-8")
user_features = pd.read_csv("dataset/robust_user_features.csv", index_col=0, encoding="UTF-8")
rating = pd.read_csv("dataset/rating.csv", index_col=0, encoding="UTF-8")
whisky = pd.read_csv("dataset/whisky.csv", index_col=0, encoding="UTF-8")

In [3]:
item_features = csr_matrix(item_features)
user_features = csr_matrix(user_features)

In [4]:
print(user_features)

  (0, 0)	4.0
  (0, 2)	30.0
  (0, 3)	-0.1666666666666666
  (0, 5)	-0.6
  (0, 6)	0.1666666666666666
  (0, 7)	-0.1666666666666666
  (0, 8)	0.3333333333333333
  (0, 9)	1.3333333333333333
  (0, 10)	0.8571428571428571
  (0, 11)	1.3333333333333333
  (0, 12)	0.8571428571428571
  (0, 13)	1.3461538461538465
  (1, 0)	5.0
  (1, 1)	0.4
  (1, 3)	-0.1666666666666666
  (1, 4)	-0.3333333333333333
  (1, 5)	0.2
  (1, 6)	0.5
  (1, 7)	0.8333333333333334
  (1, 8)	0.6666666666666666
  (1, 9)	0.6666666666666666
  (1, 10)	0.5714285714285714
  (1, 12)	1.4285714285714286
  (1, 13)	-0.1923076923076923
  (2, 0)	4.0
  :	:
  (119512, 13)	-0.1923076923076923
  (119513, 0)	1.0
  (119513, 1)	0.44
  (119513, 3)	-0.1666666666666666
  (119513, 4)	0.3
  (119513, 5)	-1.0
  (119513, 6)	-0.8333333333333334
  (119513, 7)	-0.5
  (119513, 8)	1.3333333333333333
  (119513, 10)	0.4285714285714285
  (119513, 11)	-0.6666666666666666
  (119513, 12)	-0.2857142857142857
  (119513, 13)	-0.1923076923076923
  (119514, 0)	1.0
  (119514, 1)	

In [5]:
unique_user = rating["user_id"].unique()

In [6]:
unique_user

array([     0,      1,      2, ..., 119512, 119513, 119514], dtype=int64)

### make Interactions 

In [7]:
rating.shape

(908176, 3)

In [8]:
def create_user_item_interactions(rating, n_users, n_items):
    interactions = coo_matrix(
        (
            rating['rating'].values,
            (rating['user_id'].values, rating["whisky_id"].values)
        ),
        shape=(n_users, n_items)
    )
    return interactions

In [9]:
interactions = create_user_item_interactions(rating, rating["user_id"].nunique(), whisky["whisky_id"].nunique())

In [10]:
print(interactions)
interactions

  (0, 0)	9.0
  (1, 0)	7.0
  (2, 0)	9.0
  (3, 0)	10.0
  (4, 0)	9.0
  (5, 0)	7.0
  (6, 0)	10.0
  (7, 0)	7.0
  (8, 0)	10.0
  (9, 0)	10.0
  (10, 0)	9.0
  (11, 0)	9.0
  (12, 0)	8.0
  (13, 0)	8.0
  (14, 0)	10.0
  (15, 0)	10.0
  (16, 0)	9.0
  (17, 0)	9.0
  (18, 0)	8.0
  (19, 0)	9.0
  (20, 0)	9.0
  (21, 0)	10.0
  (22, 0)	9.0
  (23, 0)	9.0
  (24, 0)	9.0
  :	:
  (7636, 3531)	4.0
  (56292, 3531)	2.0
  (2670, 3531)	2.0
  (786, 3531)	2.0
  (5942, 3531)	6.0
  (15561, 3531)	2.0
  (25623, 3532)	6.0
  (2885, 3532)	6.0
  (7368, 3533)	2.0
  (69554, 3534)	10.0
  (4484, 3534)	2.0
  (85727, 3534)	5.0
  (43900, 3534)	4.0
  (16070, 3534)	8.0
  (29998, 3534)	8.0
  (7895, 3534)	4.0
  (104052, 3534)	5.0
  (31152, 3534)	2.0
  (119512, 3534)	6.0
  (2509, 3534)	4.0
  (4828, 3534)	4.0
  (119513, 3534)	2.0
  (119514, 3534)	8.0
  (3123, 3534)	4.0
  (95648, 3534)	2.0


<119515x3535 sparse matrix of type '<class 'numpy.float64'>'
	with 908176 stored elements in COOrdinate format>

## Train_Test data split

In [11]:
train_interactions, test_interactions = random_train_test_split(interactions, test_percentage=0.2, random_state=42)

In [12]:
train_interactions, valid_interactions = random_train_test_split(train_interactions, test_percentage=0.2, random_state=42)

## Optuna 사용한 HyperParameter 최적화
---
- learning_rate, alpha 등의 하이퍼 파라미터 값을 작게 설정했을 때보다 높게 설정했을 때 AUC Score가 높게 나오는 경향이 보였습니다.
- HyperOPT는 베이지안 최적화 접근 기반인데 이 부분에 제대로 알지 못하기 때문에 전체를 돌려보는 Optuna 방식으로 변경하겠습니다.
- Optuna의 경우 시각화도 가능하고, GridSearchCV보다 빠르다는 장점이 있습니다.

In [13]:
import optuna

In [14]:
def objective(trial):
    
    
    # 조정할 하이퍼 파라미터
    params = {
        "learning_schedule": 'adagrad',
        "loss": "warp",
        "random_state": 0,
        "no_components": trial.suggest_categorical("no_components", [10, 20, 30, 40, 50]),
        'learning_rate': trial.suggest_categorical("learning_rate", [0.0005, 0.001, 0.005,  0.01,  0.05]),
        'item_alpha': trial.suggest_categorical("item_alpha", [5e-04, 1e-03, 5e-03, 1e-02]),
        'user_alpha': trial.suggest_categorical("user_alpha", [5e-04, 1e-03, 5e-03, 1e-02]),
    }

    model = LightFM(**params)

    model.fit(interactions=train_interactions,
              user_features=user_features,
              item_features=item_features,
              epochs=3,
              verbose=True)

    test_precision = precision_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_recall = recall_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_auc = auc_score(model, valid_interactions, item_features=item_features, user_features=user_features).mean()
    
    print("no_comp: {}, lrn_rate: {:.5f}, item_alpha: {:.5f}, user_alpha: {:.5f}, precision: {:.5f}, recall: {:.5f}, auc_score: {:.5f}".format(
      params["no_components"], params["learning_rate"], params["item_alpha"], params["user_alpha"], test_precision, test_recall, test_auc))
    return test_auc

In [15]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print(study.best_trial.params)

[32m[I 2023-03-13 20:39:12,895][0m A new study created in memory with name: no-name-a06ad83c-6795-4de4-b42e-5be385d06427[0m
Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:06<00:00, 22.09s/it]
[32m[I 2023-03-13 20:42:51,232][0m Trial 0 finished with value: 0.7774629592895508 and parameters: {'no_components': 20, 'learning_rate': 0.005, 'item_alpha': 0.01, 'user_alpha': 0.0005}. Best is trial 0 with value: 0.7774629592895508.[0m


no_comp: 20, lrn_rate: 0.00500, item_alpha: 0.01000, user_alpha: 0.00050, precision: 0.00288, recall: 0.01245, auc_score: 0.77746


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:44<00:00, 54.69s/it]
[32m[I 2023-03-13 20:50:32,939][0m Trial 1 finished with value: 0.7648791670799255 and parameters: {'no_components': 50, 'learning_rate': 0.0005, 'item_alpha': 0.0005, 'user_alpha': 0.01}. Best is trial 0 with value: 0.7774629592895508.[0m


no_comp: 50, lrn_rate: 0.00050, item_alpha: 0.00050, user_alpha: 0.01000, precision: 0.00165, recall: 0.00594, auc_score: 0.76488


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:37<00:00, 32.40s/it]
[32m[I 2023-03-13 20:55:28,678][0m Trial 2 finished with value: 0.48272180557250977 and parameters: {'no_components': 30, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.001}. Best is trial 0 with value: 0.7774629592895508.[0m


no_comp: 30, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00100, precision: 0.00086, recall: 0.00252, auc_score: 0.48272


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:39<00:00, 53.05s/it]
[32m[I 2023-03-13 21:03:02,373][0m Trial 3 finished with value: 0.7801773548126221 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.005, 'user_alpha': 0.01}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00500, user_alpha: 0.01000, precision: 0.00343, recall: 0.01723, auc_score: 0.78018


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:37<00:00, 12.60s/it]
[32m[I 2023-03-13 21:05:25,883][0m Trial 4 finished with value: 0.7466212511062622 and parameters: {'no_components': 10, 'learning_rate': 0.0005, 'item_alpha': 0.0005, 'user_alpha': 0.001}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 10, lrn_rate: 0.00050, item_alpha: 0.00050, user_alpha: 0.00100, precision: 0.00143, recall: 0.00474, auc_score: 0.74662


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.76s/it]
[32m[I 2023-03-13 21:07:53,084][0m Trial 5 finished with value: 0.7465819120407104 and parameters: {'no_components': 10, 'learning_rate': 0.0005, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 10, lrn_rate: 0.00050, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00144, recall: 0.00479, auc_score: 0.74658


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:32<00:00, 30.70s/it]
[32m[I 2023-03-13 21:12:46,705][0m Trial 6 finished with value: 0.6654597520828247 and parameters: {'no_components': 30, 'learning_rate': 0.01, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 30, lrn_rate: 0.01000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00173, recall: 0.00590, auc_score: 0.66546


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:12<00:00, 44.24s/it]
[32m[I 2023-03-13 21:19:05,851][0m Trial 7 finished with value: 0.7219672799110413 and parameters: {'no_components': 40, 'learning_rate': 0.0005, 'item_alpha': 0.0005, 'user_alpha': 0.01}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 40, lrn_rate: 0.00050, item_alpha: 0.00050, user_alpha: 0.01000, precision: 0.00142, recall: 0.00465, auc_score: 0.72197


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:05<00:00, 21.92s/it]
[32m[I 2023-03-13 21:22:39,768][0m Trial 8 finished with value: 0.5793220400810242 and parameters: {'no_components': 20, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 20, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00088, recall: 0.00273, auc_score: 0.57932


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:13<00:00, 44.45s/it]
[32m[I 2023-03-13 21:28:57,715][0m Trial 9 finished with value: 0.7226675748825073 and parameters: {'no_components': 40, 'learning_rate': 0.0005, 'item_alpha': 0.01, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 40, lrn_rate: 0.00050, item_alpha: 0.01000, user_alpha: 0.00050, precision: 0.00140, recall: 0.00461, auc_score: 0.72267


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:46<00:00, 55.52s/it]
[32m[I 2023-03-13 21:37:00,462][0m Trial 10 finished with value: 0.7669777274131775 and parameters: {'no_components': 50, 'learning_rate': 0.001, 'item_alpha': 0.005, 'user_alpha': 0.005}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 50, lrn_rate: 0.00100, item_alpha: 0.00500, user_alpha: 0.00500, precision: 0.00212, recall: 0.00945, auc_score: 0.76698


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:17<00:00, 25.74s/it]
[32m[I 2023-03-13 21:41:12,606][0m Trial 11 finished with value: 0.7778975367546082 and parameters: {'no_components': 20, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 3 with value: 0.7801773548126221.[0m


no_comp: 20, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00292, recall: 0.01246, auc_score: 0.77790


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:07<00:00, 62.45s/it]
[32m[I 2023-03-13 21:49:35,298][0m Trial 12 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:53<00:00, 57.92s/it]
[32m[I 2023-03-13 21:57:41,170][0m Trial 13 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:53<00:00, 57.87s/it]
[32m[I 2023-03-13 22:05:39,867][0m Trial 14 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:47<00:00, 55.80s/it]
[32m[I 2023-03-13 22:13:31,797][0m Trial 15 finished with value: 0.7808710336685181 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.005}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00500, precision: 0.00314, recall: 0.01490, auc_score: 0.78087


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:27<00:00, 49.08s/it]
[32m[I 2023-03-13 22:21:13,035][0m Trial 16 finished with value: 0.7572591304779053 and parameters: {'no_components': 50, 'learning_rate': 0.01, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.01000, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00248, recall: 0.01107, auc_score: 0.75726


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:55<00:00, 58.57s/it]
[32m[I 2023-03-13 22:29:29,344][0m Trial 17 finished with value: 0.7668690085411072 and parameters: {'no_components': 50, 'learning_rate': 0.001, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00100, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00206, recall: 0.00923, auc_score: 0.76687


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:47<00:00, 55.96s/it]
[32m[I 2023-03-13 22:37:25,625][0m Trial 18 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.85s/it]
[32m[I 2023-03-13 22:40:00,105][0m Trial 19 finished with value: 0.7774520516395569 and parameters: {'no_components': 10, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.005}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 10, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00500, precision: 0.00283, recall: 0.01218, auc_score: 0.77745


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:08<00:00, 42.86s/it]
[32m[I 2023-03-13 22:46:21,427][0m Trial 20 finished with value: 0.6962584257125854 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00175, recall: 0.00582, auc_score: 0.69626


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:47<00:00, 55.93s/it]
[32m[I 2023-03-13 22:54:15,757][0m Trial 21 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:07<00:00, 62.44s/it]
[32m[I 2023-03-13 23:02:31,171][0m Trial 22 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:45<00:00, 55.18s/it]
[32m[I 2023-03-13 23:10:18,650][0m Trial 23 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:45<00:00, 55.04s/it]
[32m[I 2023-03-13 23:18:28,902][0m Trial 24 finished with value: 0.7801773548126221 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.005, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00500, user_alpha: 0.01000, precision: 0.00343, recall: 0.01723, auc_score: 0.78018


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:58<00:00, 39.38s/it]
[32m[I 2023-03-13 23:24:16,809][0m Trial 25 finished with value: 0.7148984670639038 and parameters: {'no_components': 30, 'learning_rate': 0.001, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 30, lrn_rate: 0.00100, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00128, recall: 0.00385, auc_score: 0.71490


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:50<00:00, 56.87s/it]
[32m[I 2023-03-13 23:32:44,698][0m Trial 26 finished with value: 0.6568671464920044 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00159, recall: 0.00576, auc_score: 0.65687


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:38<00:00, 52.74s/it]
[32m[I 2023-03-13 23:40:56,905][0m Trial 27 finished with value: 0.7560548782348633 and parameters: {'no_components': 50, 'learning_rate': 0.01, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.01000, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00272, recall: 0.01234, auc_score: 0.75605


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:40<00:00, 53.51s/it]
[32m[I 2023-03-13 23:48:47,881][0m Trial 28 finished with value: 0.7808710336685181 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.005}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00500, precision: 0.00314, recall: 0.01490, auc_score: 0.78087


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:07<00:00, 22.65s/it]
[32m[I 2023-03-13 23:52:34,489][0m Trial 29 finished with value: 0.7774629592895508 and parameters: {'no_components': 20, 'learning_rate': 0.005, 'item_alpha': 0.01, 'user_alpha': 0.0005}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 20, lrn_rate: 0.00500, item_alpha: 0.01000, user_alpha: 0.00050, precision: 0.00288, recall: 0.01245, auc_score: 0.77746


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:39<00:00, 13.28s/it]
[32m[I 2023-03-13 23:55:09,460][0m Trial 30 finished with value: 0.7779573798179626 and parameters: {'no_components': 10, 'learning_rate': 0.005, 'item_alpha': 0.005, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 10, lrn_rate: 0.00500, item_alpha: 0.00500, user_alpha: 0.01000, precision: 0.00284, recall: 0.01231, auc_score: 0.77796


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:45<00:00, 55.29s/it]
[32m[I 2023-03-14 00:03:07,706][0m Trial 31 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:52<00:00, 57.66s/it]
[32m[I 2023-03-14 00:11:32,166][0m Trial 32 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:01<00:00, 60.35s/it]
[32m[I 2023-03-14 00:20:05,956][0m Trial 33 finished with value: 0.781226634979248 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00334, recall: 0.01624, auc_score: 0.78123


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:51<00:00, 37.03s/it]
[32m[I 2023-03-14 00:25:49,912][0m Trial 34 finished with value: 0.4826804995536804 and parameters: {'no_components': 30, 'learning_rate': 0.05, 'item_alpha': 0.001, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 30, lrn_rate: 0.05000, item_alpha: 0.00100, user_alpha: 0.01000, precision: 0.00086, recall: 0.00251, auc_score: 0.48268


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:02<00:00, 60.90s/it]
[32m[I 2023-03-14 00:34:36,999][0m Trial 35 finished with value: 0.7788313627243042 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.01, 'user_alpha': 0.01}. Best is trial 12 with value: 0.781226634979248.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.01000, user_alpha: 0.01000, precision: 0.00321, recall: 0.01588, auc_score: 0.77883


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:03<00:00, 61.07s/it]
[32m[I 2023-03-14 00:43:20,877][0m Trial 36 finished with value: 0.7817395925521851 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00419, recall: 0.02188, auc_score: 0.78174


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:37<00:00, 52.48s/it]
[32m[I 2023-03-14 00:50:52,744][0m Trial 37 finished with value: 0.7560548782348633 and parameters: {'no_components': 50, 'learning_rate': 0.01, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.01000, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00272, recall: 0.01234, auc_score: 0.75605


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:59<00:00, 39.89s/it]
[32m[I 2023-03-14 00:56:57,826][0m Trial 38 finished with value: 0.6956368684768677 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.005, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.00500, user_alpha: 0.00100, precision: 0.00169, recall: 0.00551, auc_score: 0.69564


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:12<00:00, 24.27s/it]
[32m[I 2023-03-14 01:00:50,577][0m Trial 39 finished with value: 0.739649772644043 and parameters: {'no_components': 20, 'learning_rate': 0.0005, 'item_alpha': 0.0005, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 20, lrn_rate: 0.00050, item_alpha: 0.00050, user_alpha: 0.00100, precision: 0.00112, recall: 0.00344, auc_score: 0.73965


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:44<00:00, 34.74s/it]
[32m[I 2023-03-14 01:06:06,912][0m Trial 40 finished with value: 0.7149181365966797 and parameters: {'no_components': 30, 'learning_rate': 0.001, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 30, lrn_rate: 0.00100, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00128, recall: 0.00387, auc_score: 0.71492


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:43<00:00, 54.52s/it]
[32m[I 2023-03-14 01:14:07,135][0m Trial 41 finished with value: 0.7817395925521851 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00419, recall: 0.02188, auc_score: 0.78174


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:48<00:00, 56.13s/it]
[32m[I 2023-03-14 01:22:09,766][0m Trial 42 finished with value: 0.7817395925521851 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00419, recall: 0.02188, auc_score: 0.78174


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:43<00:00, 54.65s/it]
[32m[I 2023-03-14 01:30:02,430][0m Trial 43 finished with value: 0.7817395925521851 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00419, recall: 0.02188, auc_score: 0.78174


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:35<00:00, 11.82s/it]
[32m[I 2023-03-14 01:32:27,149][0m Trial 44 finished with value: 0.5605233907699585 and parameters: {'no_components': 10, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 10, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00100, precision: 0.00061, recall: 0.00168, auc_score: 0.56052


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:51<00:00, 57.28s/it]
[32m[I 2023-03-14 01:40:27,792][0m Trial 45 finished with value: 0.7817395925521851 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00419, recall: 0.02188, auc_score: 0.78174


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:51<00:00, 57.31s/it]
[32m[I 2023-03-14 01:48:29,542][0m Trial 46 finished with value: 0.7647778391838074 and parameters: {'no_components': 50, 'learning_rate': 0.0005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00050, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00203, recall: 0.00879, auc_score: 0.76478


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:03<00:00, 41.05s/it]
[32m[I 2023-03-14 01:54:46,892][0m Trial 47 finished with value: 0.6878995299339294 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00165, recall: 0.00539, auc_score: 0.68790


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:43<00:00, 54.52s/it]
[32m[I 2023-03-14 02:01:50,632][0m Trial 48 finished with value: 0.7817395925521851 and parameters: {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 50, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00419, recall: 0.02188, auc_score: 0.78174


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:50<00:00, 16.68s/it]
[32m[I 2023-03-14 02:04:32,212][0m Trial 49 finished with value: 0.7769889235496521 and parameters: {'no_components': 20, 'learning_rate': 0.005, 'item_alpha': 0.0005, 'user_alpha': 0.001}. Best is trial 36 with value: 0.7817395925521851.[0m


no_comp: 20, lrn_rate: 0.00500, item_alpha: 0.00050, user_alpha: 0.00100, precision: 0.00291, recall: 0.01235, auc_score: 0.77699
{'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}


In [16]:
print("Best Params : {}".format(study.best_params))
print()
print("Best Trials : {}".format(study.best_trials))
print()
print("Best Values : {}".format(study.best_value))

Best Params : {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}

Best Trials : [FrozenTrial(number=36, state=TrialState.COMPLETE, values=[0.7817395925521851], datetime_start=datetime.datetime(2023, 3, 14, 0, 34, 37, 839), datetime_complete=datetime.datetime(2023, 3, 14, 0, 43, 20, 877070), params={'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'no_components': CategoricalDistribution(choices=(10, 20, 30, 40, 50)), 'learning_rate': CategoricalDistribution(choices=(0.0005, 0.001, 0.005, 0.01, 0.05)), 'item_alpha': CategoricalDistribution(choices=(0.0005, 0.001, 0.005, 0.01)), 'user_alpha': CategoricalDistribution(choices=(0.0005, 0.001, 0.005, 0.01))}, trial_id=36, value=None), FrozenTrial(number=41, state=TrialState.COMPLETE, values=[0.7817395925521851], datetime_start=datetime.datetime(2023, 3, 14, 1, 6, 6, 913256), datetime_complete=date

In [14]:
m1 = LightFM(loss='warp', learning_schedule="adagrad", random_state=0, no_components=60, learning_rate=0.01, item_alpha=0.005, user_alpha=0.01)
m1.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=5, verbose=True)

Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:13<00:00, 50.73s/it]


<lightfm.lightfm.LightFM at 0x191cf2e1730>

In [15]:
test_precision = precision_at_k(m1, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(m1, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(m1, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall) 
print("Test AUC Score : %.5f" % test_auc)

Train precision: 0.00624
Test precision: 0.03233
Test AUC Score : 0.78300


In [17]:
def objective(trial):
    # 조정할 하이퍼 파라미터
    params = {
        "learning_schedule": 'adagrad',
        "loss": "warp",
        "random_state": 0,
        "no_components": trial.suggest_int("no_components", 40, 100, 10),
        'learning_rate': trial.suggest_float("learning_rate", 0.005,  0.01),
        'item_alpha': trial.suggest_float("item_alpha", 1e-03, 1e-02),
        'user_alpha': trial.suggest_float("user_alpha", 1e-03, 1e-02),
    }

    model = LightFM(**params)

    model.fit(interactions=train_interactions,
              user_features=user_features,
              item_features=item_features,
              epochs=5,
              verbose=True)

    test_precision = precision_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_recall = recall_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_auc = auc_score(model, valid_interactions, item_features=item_features, user_features=user_features).mean()
    
    print("no_comp: {}, lrn_rate: {:.5f}, item_alpha: {:.5f}, user_alpha: {:.5f}, precision: {:.5f}, recall: {:.5f}, auc_score: {:.5f}".format(
      params["no_components"], params["learning_rate"], params["item_alpha"], params["user_alpha"], test_precision, test_recall, test_auc))
    return test_auc

In [18]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print(study.best_trial.params)

[32m[I 2023-03-14 09:43:24,995][0m A new study created in memory with name: no-name-33e1116d-f16f-4c9f-aa0b-10e582584106[0m
Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:21<00:00, 40.29s/it]
[32m[I 2023-03-14 09:50:42,438][0m Trial 0 finished with value: 0.7664042711257935 and parameters: {'no_components': 50, 'learning_rate': 0.008515490987966674, 'item_alpha': 0.009989801052029564, 'user_alpha': 0.006525796566942141}. Best is trial 0 with value: 0.7664042711257935.[0m


no_comp: 50, lrn_rate: 0.00852, item_alpha: 0.00999, user_alpha: 0.00653, precision: 0.00224, recall: 0.00890, auc_score: 0.76640


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [02:44<00:00, 32.81s/it]
[32m[I 2023-03-14 09:56:34,061][0m Trial 1 finished with value: 0.6648830771446228 and parameters: {'no_components': 40, 'learning_rate': 0.00928604382460443, 'item_alpha': 0.001431535015211974, 'user_alpha': 0.008787731876146091}. Best is trial 0 with value: 0.7664042711257935.[0m


no_comp: 40, lrn_rate: 0.00929, item_alpha: 0.00143, user_alpha: 0.00879, precision: 0.00141, recall: 0.00443, auc_score: 0.66488


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:20<00:00, 40.04s/it]
[32m[I 2023-03-14 10:03:37,357][0m Trial 2 finished with value: 0.7677977681159973 and parameters: {'no_components': 50, 'learning_rate': 0.008668578547255225, 'item_alpha': 0.009478209543015877, 'user_alpha': 0.001899806983471075}. Best is trial 2 with value: 0.7677977681159973.[0m


no_comp: 50, lrn_rate: 0.00867, item_alpha: 0.00948, user_alpha: 0.00190, precision: 0.00229, recall: 0.00899, auc_score: 0.76780


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:20<00:00, 52.08s/it]
[32m[I 2023-03-14 10:12:53,118][0m Trial 3 finished with value: 0.7026572227478027 and parameters: {'no_components': 70, 'learning_rate': 0.006647381814146183, 'item_alpha': 0.009769828032584211, 'user_alpha': 0.0037891000784671235}. Best is trial 2 with value: 0.7677977681159973.[0m


no_comp: 70, lrn_rate: 0.00665, item_alpha: 0.00977, user_alpha: 0.00379, precision: 0.00196, recall: 0.00705, auc_score: 0.70266


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:07<00:00, 37.52s/it]
[32m[I 2023-03-14 10:19:51,666][0m Trial 4 finished with value: 0.7480788230895996 and parameters: {'no_components': 50, 'learning_rate': 0.00771827318819096, 'item_alpha': 0.008491357979891222, 'user_alpha': 0.008510330922786236}. Best is trial 2 with value: 0.7677977681159973.[0m


no_comp: 50, lrn_rate: 0.00772, item_alpha: 0.00849, user_alpha: 0.00851, precision: 0.00381, recall: 0.01915, auc_score: 0.74808


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:32<00:00, 54.52s/it]
[32m[I 2023-03-14 10:29:29,079][0m Trial 5 finished with value: 0.6983861327171326 and parameters: {'no_components': 70, 'learning_rate': 0.007289593775809891, 'item_alpha': 0.005452906812705326, 'user_alpha': 0.0020003748946945377}. Best is trial 2 with value: 0.7677977681159973.[0m


no_comp: 70, lrn_rate: 0.00729, item_alpha: 0.00545, user_alpha: 0.00200, precision: 0.00211, recall: 0.00888, auc_score: 0.69839


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:30<00:00, 78.10s/it]
[32m[I 2023-03-14 10:44:05,338][0m Trial 6 finished with value: 0.7225891947746277 and parameters: {'no_components': 100, 'learning_rate': 0.006509272585593201, 'item_alpha': 0.004363854667169842, 'user_alpha': 0.006610497214754425}. Best is trial 2 with value: 0.7677977681159973.[0m


no_comp: 100, lrn_rate: 0.00651, item_alpha: 0.00436, user_alpha: 0.00661, precision: 0.00212, recall: 0.00905, auc_score: 0.72259


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [08:52<00:00, 106.55s/it]
[32m[I 2023-03-14 11:02:43,157][0m Trial 7 finished with value: 0.7229148149490356 and parameters: {'no_components': 100, 'learning_rate': 0.005968969400855364, 'item_alpha': 0.001407105031569854, 'user_alpha': 0.0025640866776845024}. Best is trial 2 with value: 0.7677977681159973.[0m


no_comp: 100, lrn_rate: 0.00597, item_alpha: 0.00141, user_alpha: 0.00256, precision: 0.00234, recall: 0.00985, auc_score: 0.72291


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:29<00:00, 65.82s/it]
[32m[I 2023-03-14 11:14:02,667][0m Trial 8 finished with value: 0.7782987952232361 and parameters: {'no_components': 60, 'learning_rate': 0.009860561731923383, 'item_alpha': 0.001215062984330102, 'user_alpha': 0.006420194552258707}. Best is trial 8 with value: 0.7782987952232361.[0m


no_comp: 60, lrn_rate: 0.00986, item_alpha: 0.00122, user_alpha: 0.00642, precision: 0.00570, recall: 0.03001, auc_score: 0.77830


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:35<00:00, 55.14s/it]
[32m[I 2023-03-14 11:24:09,285][0m Trial 9 finished with value: 0.7701193690299988 and parameters: {'no_components': 50, 'learning_rate': 0.00685990535879959, 'item_alpha': 0.009449351927747075, 'user_alpha': 0.001221378294114082}. Best is trial 8 with value: 0.7782987952232361.[0m


no_comp: 50, lrn_rate: 0.00686, item_alpha: 0.00945, user_alpha: 0.00122, precision: 0.00386, recall: 0.01883, auc_score: 0.77012


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:51<00:00, 82.39s/it]
[32m[I 2023-03-14 11:38:40,224][0m Trial 10 finished with value: 0.6994630098342896 and parameters: {'no_components': 80, 'learning_rate': 0.00501409814008439, 'item_alpha': 0.002920156300040253, 'user_alpha': 0.004748053984368421}. Best is trial 8 with value: 0.7782987952232361.[0m


no_comp: 80, lrn_rate: 0.00501, item_alpha: 0.00292, user_alpha: 0.00475, precision: 0.00162, recall: 0.00535, auc_score: 0.69946


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:27<00:00, 65.46s/it]
[32m[I 2023-03-14 11:50:02,965][0m Trial 11 finished with value: 0.7800536155700684 and parameters: {'no_components': 60, 'learning_rate': 0.009744727638426742, 'item_alpha': 0.00751559424526017, 'user_alpha': 0.003984200056854025}. Best is trial 11 with value: 0.7800536155700684.[0m


no_comp: 60, lrn_rate: 0.00974, item_alpha: 0.00752, user_alpha: 0.00398, precision: 0.00554, recall: 0.02955, auc_score: 0.78005


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [07:01<00:00, 84.39s/it]
[32m[I 2023-03-14 12:05:01,202][0m Trial 12 finished with value: 0.7408261299133301 and parameters: {'no_components': 80, 'learning_rate': 0.00967184567070999, 'item_alpha': 0.007380262175885372, 'user_alpha': 0.005038829964680467}. Best is trial 11 with value: 0.7800536155700684.[0m


no_comp: 80, lrn_rate: 0.00967, item_alpha: 0.00738, user_alpha: 0.00504, precision: 0.00296, recall: 0.01298, auc_score: 0.74083


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:16<00:00, 63.24s/it]
[32m[I 2023-03-14 12:16:01,437][0m Trial 13 finished with value: 0.7732396721839905 and parameters: {'no_components': 60, 'learning_rate': 0.009982814398297573, 'item_alpha': 0.0069789193146403295, 'user_alpha': 0.003721968673736693}. Best is trial 11 with value: 0.7800536155700684.[0m


no_comp: 60, lrn_rate: 0.00998, item_alpha: 0.00698, user_alpha: 0.00372, precision: 0.00486, recall: 0.02473, auc_score: 0.77324


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:21<00:00, 64.36s/it]
[32m[I 2023-03-14 12:27:08,450][0m Trial 14 finished with value: 0.7813881635665894 and parameters: {'no_components': 60, 'learning_rate': 0.009070638302229142, 'item_alpha': 0.003972469608076546, 'user_alpha': 0.006730144043094697}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 60, lrn_rate: 0.00907, item_alpha: 0.00397, user_alpha: 0.00673, precision: 0.00594, recall: 0.03198, auc_score: 0.78139


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:46<00:00, 81.23s/it]
[32m[I 2023-03-14 12:42:10,368][0m Trial 15 finished with value: 0.6304170489311218 and parameters: {'no_components': 80, 'learning_rate': 0.008981343747941142, 'item_alpha': 0.00547736037713057, 'user_alpha': 0.007878919314096448}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 80, lrn_rate: 0.00898, item_alpha: 0.00548, user_alpha: 0.00788, precision: 0.00288, recall: 0.01399, auc_score: 0.63042


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:45<00:00, 69.11s/it]
[32m[I 2023-03-14 12:54:07,097][0m Trial 16 finished with value: 0.780698835849762 and parameters: {'no_components': 60, 'learning_rate': 0.00925270999020915, 'item_alpha': 0.006826547878249492, 'user_alpha': 0.00586551042984355}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 60, lrn_rate: 0.00925, item_alpha: 0.00683, user_alpha: 0.00587, precision: 0.00511, recall: 0.02713, auc_score: 0.78070


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:40<00:00, 44.18s/it]
[32m[I 2023-03-14 13:02:21,392][0m Trial 17 finished with value: 0.6713290810585022 and parameters: {'no_components': 40, 'learning_rate': 0.00807574965623575, 'item_alpha': 0.004266967516888211, 'user_alpha': 0.009675852277958422}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 40, lrn_rate: 0.00808, item_alpha: 0.00427, user_alpha: 0.00968, precision: 0.00161, recall: 0.00495, auc_score: 0.67133


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:19<00:00, 75.84s/it]
[32m[I 2023-03-14 13:16:00,213][0m Trial 18 finished with value: 0.6683340668678284 and parameters: {'no_components': 70, 'learning_rate': 0.009205142747528889, 'item_alpha': 0.006245880186147626, 'user_alpha': 0.005736349294648854}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 70, lrn_rate: 0.00921, item_alpha: 0.00625, user_alpha: 0.00574, precision: 0.00224, recall: 0.01094, auc_score: 0.66833


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [08:22<00:00, 100.41s/it]
[32m[I 2023-03-14 13:33:47,948][0m Trial 19 finished with value: 0.7324991226196289 and parameters: {'no_components': 90, 'learning_rate': 0.008553595955646857, 'item_alpha': 0.004437161385747066, 'user_alpha': 0.0073957117681295315}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 90, lrn_rate: 0.00855, item_alpha: 0.00444, user_alpha: 0.00740, precision: 0.00334, recall: 0.01684, auc_score: 0.73250


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:31<00:00, 66.31s/it]
[32m[I 2023-03-14 13:45:19,108][0m Trial 20 finished with value: 0.7801561951637268 and parameters: {'no_components': 60, 'learning_rate': 0.008129292221597667, 'item_alpha': 0.00625218179250305, 'user_alpha': 0.005703923433998452}. Best is trial 14 with value: 0.7813881635665894.[0m


no_comp: 60, lrn_rate: 0.00813, item_alpha: 0.00625, user_alpha: 0.00570, precision: 0.00537, recall: 0.02858, auc_score: 0.78016


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:40<00:00, 68.13s/it]
[32m[I 2023-03-14 13:57:23,590][0m Trial 21 finished with value: 0.7828404903411865 and parameters: {'no_components': 60, 'learning_rate': 0.008146396234592687, 'item_alpha': 0.00628651909946878, 'user_alpha': 0.005732287458834077}. Best is trial 21 with value: 0.7828404903411865.[0m


no_comp: 60, lrn_rate: 0.00815, item_alpha: 0.00629, user_alpha: 0.00573, precision: 0.00543, recall: 0.02894, auc_score: 0.78284


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:42<00:00, 68.50s/it]
[32m[I 2023-03-14 14:09:25,834][0m Trial 22 finished with value: 0.7810840606689453 and parameters: {'no_components': 60, 'learning_rate': 0.008985278929795653, 'item_alpha': 0.008218529609928056, 'user_alpha': 0.0072183001843209565}. Best is trial 21 with value: 0.7828404903411865.[0m


no_comp: 60, lrn_rate: 0.00899, item_alpha: 0.00822, user_alpha: 0.00722, precision: 0.00577, recall: 0.03145, auc_score: 0.78108


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:46<00:00, 69.30s/it]
[32m[I 2023-03-14 14:20:10,438][0m Trial 23 finished with value: 0.6730281710624695 and parameters: {'no_components': 70, 'learning_rate': 0.008889179552784395, 'item_alpha': 0.008340193928602932, 'user_alpha': 0.0072747823590638945}. Best is trial 21 with value: 0.7828404903411865.[0m


no_comp: 70, lrn_rate: 0.00889, item_alpha: 0.00834, user_alpha: 0.00727, precision: 0.00232, recall: 0.01134, auc_score: 0.67303


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:21<00:00, 40.37s/it]
[33m[W 2023-03-14 14:24:44,346][0m Trial 24 failed with parameters: {'no_components': 50, 'learning_rate': 0.008177141786385261, 'item_alpha': 0.008041511437748481, 'user_alpha': 0.007254423593822881} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "C:\Users\SSAFY\anaconda3\envs\mini-project\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\SSAFY\AppData\Local\Temp\ipykernel_26048\3567443769.py", line 21, in objective
    test_precision = precision_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
  File "C:\Users\SSAFY\anaconda3\envs\mini-project\lib\site-packages\lightfm\evaluation.py", line 71, in precision_at_k
    ranks = model.predict_rank(
  File "C:\Users\SSAFY\anaconda3\envs\mini-projec

KeyboardInterrupt: 

In [19]:
print("Best Params : {}".format(study.best_params))
print()
print("Best Trials : {}".format(study.best_trials))
print()
print("Best Values : {}".format(study.best_value))

Best Params : {'no_components': 60, 'learning_rate': 0.008146396234592687, 'item_alpha': 0.00628651909946878, 'user_alpha': 0.005732287458834077}

Best Trials : [FrozenTrial(number=21, state=TrialState.COMPLETE, values=[0.7828404903411865], datetime_start=datetime.datetime(2023, 3, 14, 13, 45, 19, 109162), datetime_complete=datetime.datetime(2023, 3, 14, 13, 57, 23, 590782), params={'no_components': 60, 'learning_rate': 0.008146396234592687, 'item_alpha': 0.00628651909946878, 'user_alpha': 0.005732287458834077}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'no_components': IntDistribution(high=100, log=False, low=40, step=10), 'learning_rate': FloatDistribution(high=0.01, log=False, low=0.005, step=None), 'item_alpha': FloatDistribution(high=0.01, log=False, low=0.001, step=None), 'user_alpha': FloatDistribution(high=0.01, log=False, low=0.001, step=None)}, trial_id=21, value=None)]

Best Values : 0.7828404903411865


no_comp: 60, lrn_rate: 0.00815, item_alpha: 0.00629, user_alpha: 0.00573, precision: 0.00543, recall: 0.02894, auc_score: 0.78284

Best Params : {'no_components': 50, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}
Best Values : 0.7817395925521851

### Evaluation

In [20]:
m1 = LightFM(
    no_components=60,
    learning_schedule='adagrad',
    loss='warp',
    learning_rate=0.01,
    item_alpha=0.00629,
    user_alpha=0.00573,
    random_state=0
)

In [21]:
%time m1.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=10, verbose=True)

Epoch: 100%|███████████████████████████████████████████████████████████████████████████| 10/10 [08:16<00:00, 49.70s/it]

CPU times: total: 8min 16s
Wall time: 8min 16s





<lightfm.lightfm.LightFM at 0x191d1a0c640>

In [22]:
test_precision = precision_at_k(m1, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(m1, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(m1, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall) 
print("Test AUC Score : %.5f" % test_auc)

Train precision: 0.00652
Test precision: 0.03355
Test AUC Score : 0.79624


### Result
---
Min-Max Normalization이 조금 더 좋은 성능을 보이기 때문에 Min-Max Normalization을 적용한 model을 저장합니다.

### save

In [None]:
# # save the model to a file
# with open('robust_lightfm.pkl', 'wb') as f:
#     pickle.dump(model, f)