# Min-Max Normalization 적용한 lightfm model 학습

In [1]:
import os
import pandas as pd
import numpy as np
from time import time
from tqdm.notebook import tqdm
import pickle

from scipy.sparse import coo_matrix, csr_matrix

from lightfm.cross_validation import random_train_test_split
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k, auc_score, recall_at_k

from hyperopt import fmin, hp, tpe, Trials



min_max_item_features.csv
min_max_user_features.csv
rating.csv
robust_item_features.csv
robust_user_features.csv
whisky.csv

In [2]:
item_features = pd.read_csv("dataset/min_max_item_features.csv", index_col=0, encoding="UTF-8")
user_features = pd.read_csv("dataset/min_max_user_features.csv", index_col=0, encoding="UTF-8")
rating = pd.read_csv("dataset/rating.csv", index_col=0, encoding="UTF-8")
whisky = pd.read_csv("dataset/whisky.csv", index_col=0, encoding="UTF-8")

In [3]:
item_features = csr_matrix(item_features)
user_features = csr_matrix(user_features)

In [4]:
print(user_features)

  (0, 0)	5.0
  (0, 1)	0.3
  (0, 2)	0.85
  (0, 3)	0.5
  (0, 4)	0.3
  (0, 5)	0.2
  (0, 6)	0.8
  (0, 7)	0.8
  (0, 8)	0.85
  (0, 9)	0.1666666666666666
  (0, 10)	0.2
  (0, 11)	0.25
  (0, 12)	0.85
  (0, 13)	0.5263157894736842
  (1, 0)	5.0
  (1, 1)	0.3
  (1, 2)	0.3
  (1, 3)	0.6
  (1, 4)	0.4
  (1, 5)	0.7000000000000001
  (1, 6)	0.8
  (1, 7)	0.7000000000000001
  (1, 8)	0.7000000000000001
  (1, 9)	0.4444444444444445
  (1, 10)	0.6
  :	:
  (119512, 7)	0.4
  (119512, 8)	1.0
  (119512, 10)	0.55
  (119512, 12)	0.3
  (119512, 13)	0.1052631578947368
  (119513, 0)	1.0
  (119513, 1)	0.31
  (119513, 3)	0.4
  (119513, 4)	0.29
  (119513, 6)	0.3
  (119513, 7)	0.4
  (119513, 8)	1.0
  (119513, 10)	0.55
  (119513, 12)	0.3
  (119513, 13)	0.1052631578947368
  (119514, 0)	1.0
  (119514, 1)	0.31
  (119514, 3)	0.4
  (119514, 4)	0.29
  (119514, 6)	0.3
  (119514, 7)	0.4
  (119514, 8)	1.0
  (119514, 10)	0.55
  (119514, 12)	0.3
  (119514, 13)	0.1052631578947368


In [5]:
unique_user = rating["user_id"].unique()

In [6]:
unique_user

array([     0,      1,      2, ..., 119512, 119513, 119514], dtype=int64)

### make Interactions 

In [7]:
rating.shape

(908176, 3)

In [8]:
def create_user_item_interactions(rating, n_users, n_items):
    interactions = coo_matrix(
        (
            rating['rating'].values,
            (rating['user_id'].values, rating["whisky_id"].values)
        ),
        shape=(n_users, n_items)
    )
    return interactions

In [9]:
interactions = create_user_item_interactions(rating, rating["user_id"].nunique(), whisky["whisky_id"].nunique())

In [10]:
print(interactions)
interactions

  (0, 0)	9.0
  (1, 0)	7.0
  (2, 0)	9.0
  (3, 0)	10.0
  (4, 0)	9.0
  (5, 0)	7.0
  (6, 0)	10.0
  (7, 0)	7.0
  (8, 0)	10.0
  (9, 0)	10.0
  (10, 0)	9.0
  (11, 0)	9.0
  (12, 0)	8.0
  (13, 0)	8.0
  (14, 0)	10.0
  (15, 0)	10.0
  (16, 0)	9.0
  (17, 0)	9.0
  (18, 0)	8.0
  (19, 0)	9.0
  (20, 0)	9.0
  (21, 0)	10.0
  (22, 0)	9.0
  (23, 0)	9.0
  (24, 0)	9.0
  :	:
  (7636, 3531)	4.0
  (56292, 3531)	2.0
  (2670, 3531)	2.0
  (786, 3531)	2.0
  (5942, 3531)	6.0
  (15561, 3531)	2.0
  (25623, 3532)	6.0
  (2885, 3532)	6.0
  (7368, 3533)	2.0
  (69554, 3534)	10.0
  (4484, 3534)	2.0
  (85727, 3534)	5.0
  (43900, 3534)	4.0
  (16070, 3534)	8.0
  (29998, 3534)	8.0
  (7895, 3534)	4.0
  (104052, 3534)	5.0
  (31152, 3534)	2.0
  (119512, 3534)	6.0
  (2509, 3534)	4.0
  (4828, 3534)	4.0
  (119513, 3534)	2.0
  (119514, 3534)	8.0
  (3123, 3534)	4.0
  (95648, 3534)	2.0


<119515x3535 sparse matrix of type '<class 'numpy.float64'>'
	with 908176 stored elements in COOrdinate format>

## Train_Test data split

In [11]:
train_interactions, test_interactions = random_train_test_split(interactions, test_percentage=0.2, random_state=42)

In [12]:
train_interactions, valid_interactions = random_train_test_split(train_interactions, test_percentage=0.2, random_state=42)

### Hyper Parameter Optimization by using HyperOPT

In [None]:
trials = Trials()
space = [
    hp.choice('no_components', range(10,51,10)),
    hp.uniform('learning_rate', 0.001, 0.005,  0.01),
    hp.uniform('item_alpha', 1e-05, 5e-05),
    hp.uniform('user_alpha', 1e-05, 5e-05),
]

In [None]:
def objective(params):
    no_components, learning_rate, item_alpha, user_alpha = params

    model = LightFM(no_components=no_components,
                    learning_schedule='adagrad',
                    loss='warp',
                    learning_rate=learning_rate,
                    item_alpha=item_alpha,
                    user_alpha=user_alpha,
                    random_state=0)

    model.fit(interactions=train_interactions,
              user_features=user_features,
              item_features=item_features,
              epochs=3,
              verbose=True)

    test_precision = precision_at_k(model, valid_interactions, k=10, item_features=item_features, user_features=user_features).mean()
    test_recall = recall_at_k(model, valid_interactions, k=10, item_features=item_features, user_features=user_features).mean()
    test_auc = auc_score(model, valid_interactions, item_features=item_features, user_features=user_features).mean()
    
    print("no_comp: {}, lrn_rate: {:.5f}, item_alpha: {:.5f}, precision: {:.5f}, recall: {:.5f}, auc_score: {:.5f}".format(
      no_components, learning_rate, item_alpha, test_precision, test_recall, test_auc))
    output = -test_auc


    return output

In [None]:
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=5, trials=trials)

best_params

In [None]:
midel = LightFM(no_components=20,
                learning_rate=0.05,
                loss="warp",
                item_alpha=0.01,
                user_alpha=0.01,
                random_state=42
               )

In [None]:
%time model.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=10, verbose=True)

In [None]:
model = LightFM(no_components=40,
                learning_schedule='adagrad',
                loss='warp',
                learning_rate=0.011370592645615374,
                item_alpha=1.975734039413079e-05,
                random_state=42)

In [None]:
%time midel.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=10, verbose=True)

In [None]:
print("Train precision: %.5f" % precision_at_k(midel, test_interactions, user_features=user_features, item_features=item_features, k=9).mean())
print("Test precision: %.5f" % precision_at_k(midel, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean())

In [None]:
test_auc = auc_score(midel, test_interactions, user_features=user_features, item_features=item_features).mean()
print(test_auc)

## Optuna 사용한 HyperParameter 최적화
---
- learning_rate, alpha 등의 하이퍼 파라미터 값을 작게 설정했을 때보다 높게 설정했을 때 AUC Score가 높게 나오는 경향이 보였습니다.
- HyperOPT는 베이지안 최적화 접근 기반인데 이 부분에 제대로 알지 못하기 때문에 전체를 돌려보는 Optuna 방식으로 변경하겠습니다.
- Optuna의 경우 시각화도 가능하고, GridSearchCV보다 빠르다는 장점이 있습니다.

In [13]:
import optuna

In [14]:
def objective(trial):
    
    
    # 조정할 하이퍼 파라미터
    params = {
        "learning_schedule": 'adagrad',
        "loss": "warp",
        "random_state": 0,
        "no_components": trial.suggest_categorical("no_components", [10, 20, 30, 40, 50]),
        'learning_rate': trial.suggest_categorical("learning_rate", [0.0005, 0.001, 0.005,  0.01,  0.05]),
        'item_alpha': trial.suggest_categorical("item_alpha", [5e-04, 1e-03, 5e-03, 1e-02]),
        'user_alpha': trial.suggest_categorical("user_alpha", [5e-04, 1e-03, 5e-03, 1e-02]),
    }

    model = LightFM(**params)

    model.fit(interactions=train_interactions,
              user_features=user_features,
              item_features=item_features,
              epochs=3,
              verbose=True)

    test_precision = precision_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_recall = recall_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_auc = auc_score(model, valid_interactions, item_features=item_features, user_features=user_features).mean()
    
    print("no_comp: {}, lrn_rate: {:.5f}, item_alpha: {:.5f}, user_alpha: {:.5f}, precision: {:.5f}, recall: {:.5f}, auc_score: {:.5f}".format(
      params["no_components"], params["learning_rate"], params["item_alpha"], params["user_alpha"], test_precision, test_recall, test_auc))
    return test_auc

In [15]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print(study.best_trial.params)

[32m[I 2023-03-13 20:39:04,459][0m A new study created in memory with name: no-name-512a595e-85c2-49aa-a0ff-f866e2b5004e[0m
Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:09<00:00, 23.16s/it]
[32m[I 2023-03-13 20:42:48,357][0m Trial 0 finished with value: 0.7446987628936768 and parameters: {'no_components': 20, 'learning_rate': 0.0005, 'item_alpha': 0.001, 'user_alpha': 0.005}. Best is trial 0 with value: 0.7446987628936768.[0m


no_comp: 20, lrn_rate: 0.00050, item_alpha: 0.00100, user_alpha: 0.00500, precision: 0.00077, recall: 0.00268, auc_score: 0.74470


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:14<00:00, 44.95s/it]
[32m[I 2023-03-13 20:49:13,855][0m Trial 1 finished with value: 0.7767640948295593 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.01, 'user_alpha': 0.01}. Best is trial 1 with value: 0.7767640948295593.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.01000, user_alpha: 0.01000, precision: 0.00282, recall: 0.01199, auc_score: 0.77676


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:47<00:00, 35.84s/it]
[32m[I 2023-03-13 20:54:24,165][0m Trial 2 finished with value: 0.745319664478302 and parameters: {'no_components': 30, 'learning_rate': 0.0005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 1 with value: 0.7767640948295593.[0m


no_comp: 30, lrn_rate: 0.00050, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00089, recall: 0.00334, auc_score: 0.74532


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:11<00:00, 43.97s/it]
[32m[I 2023-03-13 21:00:42,679][0m Trial 3 finished with value: 0.8078765869140625 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00685, recall: 0.04067, auc_score: 0.80788


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:37<00:00, 12.43s/it]
[32m[I 2023-03-13 21:03:11,497][0m Trial 4 finished with value: 0.7968514561653137 and parameters: {'no_components': 10, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 10, lrn_rate: 0.05000, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00416, recall: 0.02009, auc_score: 0.79685


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:17<00:00, 45.69s/it]
[32m[I 2023-03-13 21:09:28,019][0m Trial 5 finished with value: 0.7761074304580688 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00100, precision: 0.00274, recall: 0.01178, auc_score: 0.77611


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:11<00:00, 23.97s/it]
[32m[I 2023-03-13 21:13:14,145][0m Trial 6 finished with value: 0.7806143164634705 and parameters: {'no_components': 20, 'learning_rate': 0.01, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 20, lrn_rate: 0.01000, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00309, recall: 0.01317, auc_score: 0.78061


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:46<00:00, 35.61s/it]
[32m[I 2023-03-13 21:18:30,842][0m Trial 7 finished with value: 0.7479438185691833 and parameters: {'no_components': 30, 'learning_rate': 0.001, 'item_alpha': 0.005, 'user_alpha': 0.005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 30, lrn_rate: 0.00100, item_alpha: 0.00500, user_alpha: 0.00500, precision: 0.00104, recall: 0.00389, auc_score: 0.74794


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.80s/it]
[32m[I 2023-03-13 21:21:00,612][0m Trial 8 finished with value: 0.7674564719200134 and parameters: {'no_components': 10, 'learning_rate': 0.005, 'item_alpha': 0.001, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 10, lrn_rate: 0.00500, item_alpha: 0.00100, user_alpha: 0.00050, precision: 0.00170, recall: 0.00626, auc_score: 0.76746


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:20<00:00, 46.99s/it]
[32m[I 2023-03-13 21:27:28,837][0m Trial 9 finished with value: 0.7458027601242065 and parameters: {'no_components': 40, 'learning_rate': 0.0005, 'item_alpha': 0.01, 'user_alpha': 0.005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00050, item_alpha: 0.01000, user_alpha: 0.00500, precision: 0.00108, recall: 0.00400, auc_score: 0.74580


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:46<00:00, 55.53s/it]
[32m[I 2023-03-13 21:35:25,297][0m Trial 10 finished with value: 0.8056703209877014 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00509, recall: 0.02822, auc_score: 0.80567


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:10<00:00, 63.51s/it]
[32m[I 2023-03-13 21:44:18,841][0m Trial 11 finished with value: 0.8056703209877014 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00509, recall: 0.02822, auc_score: 0.80567


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:01<00:00, 60.63s/it]
[32m[I 2023-03-13 21:52:41,977][0m Trial 12 finished with value: 0.8056703209877014 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00509, recall: 0.02822, auc_score: 0.80567


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:57<00:00, 59.21s/it]
[32m[I 2023-03-13 22:00:59,518][0m Trial 13 finished with value: 0.783507764339447 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.01}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.01000, precision: 0.00490, recall: 0.02849, auc_score: 0.78351


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:54<00:00, 58.02s/it]
[32m[I 2023-03-13 22:09:04,364][0m Trial 14 finished with value: 0.8068074584007263 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00538, recall: 0.03013, auc_score: 0.80681


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:22<00:00, 47.60s/it]
[32m[I 2023-03-13 22:15:41,680][0m Trial 15 finished with value: 0.7870996594429016 and parameters: {'no_components': 40, 'learning_rate': 0.01, 'item_alpha': 0.005, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.01000, item_alpha: 0.00500, user_alpha: 0.00100, precision: 0.00348, recall: 0.01541, auc_score: 0.78710


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:25<00:00, 48.61s/it]
[32m[I 2023-03-13 22:22:36,941][0m Trial 16 finished with value: 0.7483178973197937 and parameters: {'no_components': 40, 'learning_rate': 0.001, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00100, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00106, recall: 0.00391, auc_score: 0.74832


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:55<00:00, 58.66s/it]
[32m[I 2023-03-13 22:30:54,831][0m Trial 17 finished with value: 0.7907894849777222 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.01}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.01000, precision: 0.00489, recall: 0.02802, auc_score: 0.79079


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:45<00:00, 35.11s/it]
[32m[I 2023-03-13 22:36:14,767][0m Trial 18 finished with value: 0.8056063055992126 and parameters: {'no_components': 30, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 30, lrn_rate: 0.05000, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00563, recall: 0.03205, auc_score: 0.80561


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:39<00:00, 13.28s/it]
[32m[I 2023-03-13 22:38:50,634][0m Trial 19 finished with value: 0.7959623336791992 and parameters: {'no_components': 10, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 10, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00384, recall: 0.01756, auc_score: 0.79596


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:17<00:00, 25.96s/it]
[32m[I 2023-03-13 22:42:49,419][0m Trial 20 finished with value: 0.7806143164634705 and parameters: {'no_components': 20, 'learning_rate': 0.01, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 20, lrn_rate: 0.01000, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00309, recall: 0.01317, auc_score: 0.78061


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:57<00:00, 59.03s/it]
[32m[I 2023-03-13 22:50:58,418][0m Trial 21 finished with value: 0.8056703209877014 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00509, recall: 0.02822, auc_score: 0.80567


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:54<00:00, 58.29s/it]
[32m[I 2023-03-13 22:59:27,736][0m Trial 22 finished with value: 0.8056703209877014 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00509, recall: 0.02822, auc_score: 0.80567


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:53<00:00, 57.80s/it]
[32m[I 2023-03-13 23:07:26,119][0m Trial 23 finished with value: 0.8056703209877014 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.0005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00050, user_alpha: 0.00050, precision: 0.00509, recall: 0.02822, auc_score: 0.80567


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:53<00:00, 57.84s/it]
[32m[I 2023-03-13 23:15:34,882][0m Trial 24 finished with value: 0.8068074584007263 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00538, recall: 0.03013, auc_score: 0.80681


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:40<00:00, 53.37s/it]
[32m[I 2023-03-13 23:22:57,725][0m Trial 25 finished with value: 0.7483178973197937 and parameters: {'no_components': 40, 'learning_rate': 0.001, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00100, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00106, recall: 0.00391, auc_score: 0.74832


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:10<00:00, 63.33s/it]
[32m[I 2023-03-13 23:31:53,559][0m Trial 26 finished with value: 0.8066747188568115 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00500, precision: 0.00570, recall: 0.03259, auc_score: 0.80667


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [03:04<00:00, 61.55s/it]
[32m[I 2023-03-13 23:40:38,621][0m Trial 27 finished with value: 0.7907894849777222 and parameters: {'no_components': 50, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.01}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 50, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.01000, precision: 0.00489, recall: 0.02802, auc_score: 0.79079


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:14<00:00, 44.78s/it]
[32m[I 2023-03-13 23:47:16,385][0m Trial 28 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:23<00:00, 47.83s/it]
[32m[I 2023-03-13 23:53:57,932][0m Trial 29 finished with value: 0.7458696365356445 and parameters: {'no_components': 40, 'learning_rate': 0.0005, 'item_alpha': 0.005, 'user_alpha': 0.005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00050, item_alpha: 0.00500, user_alpha: 0.00500, precision: 0.00108, recall: 0.00405, auc_score: 0.74587


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:27<00:00, 49.27s/it]
[32m[I 2023-03-14 00:00:41,769][0m Trial 30 finished with value: 0.7757359743118286 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00283, recall: 0.01200, auc_score: 0.77574


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:16<00:00, 25.61s/it]
[32m[I 2023-03-14 00:04:43,778][0m Trial 31 finished with value: 0.802362859249115 and parameters: {'no_components': 20, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 20, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00415, recall: 0.02107, auc_score: 0.80236


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:24<00:00, 48.31s/it]
[32m[I 2023-03-14 00:11:51,843][0m Trial 32 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:31<00:00, 50.53s/it]
[32m[I 2023-03-14 00:19:06,166][0m Trial 33 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:39<00:00, 53.18s/it]
[32m[I 2023-03-14 00:26:29,188][0m Trial 34 finished with value: 0.7458818554878235 and parameters: {'no_components': 40, 'learning_rate': 0.0005, 'item_alpha': 0.001, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00050, item_alpha: 0.00100, user_alpha: 0.00050, precision: 0.00108, recall: 0.00405, auc_score: 0.74588


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:32<00:00, 50.80s/it]
[32m[I 2023-03-14 00:34:00,711][0m Trial 35 finished with value: 0.7982140183448792 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.01}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.01000, precision: 0.00593, recall: 0.03341, auc_score: 0.79821


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:37<00:00, 52.34s/it]
[32m[I 2023-03-14 00:41:25,061][0m Trial 36 finished with value: 0.7872189283370972 and parameters: {'no_components': 40, 'learning_rate': 0.01, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.01000, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00365, recall: 0.01610, auc_score: 0.78722


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:42<00:00, 54.26s/it]
[32m[I 2023-03-14 00:48:24,478][0m Trial 37 finished with value: 0.7483000159263611 and parameters: {'no_components': 40, 'learning_rate': 0.001, 'item_alpha': 0.001, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00100, item_alpha: 0.00100, user_alpha: 0.00050, precision: 0.00108, recall: 0.00397, auc_score: 0.74830


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:19<00:00, 46.54s/it]
[32m[I 2023-03-14 00:54:47,406][0m Trial 38 finished with value: 0.7757359743118286 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.01, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.01000, user_alpha: 0.00100, precision: 0.00283, recall: 0.01200, auc_score: 0.77574


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:21<00:00, 47.11s/it]
[32m[I 2023-03-14 01:01:25,634][0m Trial 39 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:49<00:00, 36.40s/it]
[32m[I 2023-03-14 01:06:47,753][0m Trial 40 finished with value: 0.7453698515892029 and parameters: {'no_components': 30, 'learning_rate': 0.0005, 'item_alpha': 0.005, 'user_alpha': 0.005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 30, lrn_rate: 0.00050, item_alpha: 0.00500, user_alpha: 0.00500, precision: 0.00094, recall: 0.00355, auc_score: 0.74537


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:17<00:00, 45.68s/it]
[32m[I 2023-03-14 01:13:32,404][0m Trial 41 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:20<00:00, 46.90s/it]
[32m[I 2023-03-14 01:20:20,374][0m Trial 42 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:19<00:00, 46.65s/it]
[32m[I 2023-03-14 01:27:03,164][0m Trial 43 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:39<00:00, 13.17s/it]
[32m[I 2023-03-14 01:29:40,938][0m Trial 44 finished with value: 0.797116756439209 and parameters: {'no_components': 10, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 10, lrn_rate: 0.05000, item_alpha: 0.01000, user_alpha: 0.00050, precision: 0.00416, recall: 0.02018, auc_score: 0.79712


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:15<00:00, 45.08s/it]
[32m[I 2023-03-14 01:36:13,602][0m Trial 45 finished with value: 0.806874692440033 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.001, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00100, user_alpha: 0.00050, precision: 0.00524, recall: 0.02948, auc_score: 0.80687


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:24<00:00, 48.24s/it]
[32m[I 2023-03-14 01:42:52,214][0m Trial 46 finished with value: 0.7759144902229309 and parameters: {'no_components': 40, 'learning_rate': 0.005, 'item_alpha': 0.005, 'user_alpha': 0.001}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.00500, item_alpha: 0.00500, user_alpha: 0.00100, precision: 0.00274, recall: 0.01170, auc_score: 0.77591


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:20<00:00, 46.68s/it]
[32m[I 2023-03-14 01:49:33,387][0m Trial 47 finished with value: 0.8074862360954285 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [01:12<00:00, 24.33s/it]
[32m[I 2023-03-14 01:53:30,949][0m Trial 48 finished with value: 0.7803113460540771 and parameters: {'no_components': 20, 'learning_rate': 0.01, 'item_alpha': 0.005, 'user_alpha': 0.0005}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 20, lrn_rate: 0.01000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00307, recall: 0.01312, auc_score: 0.78031


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [02:17<00:00, 45.75s/it]
[32m[I 2023-03-14 02:00:07,623][0m Trial 49 finished with value: 0.8045592308044434 and parameters: {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.01}. Best is trial 3 with value: 0.8078765869140625.[0m


no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.01000, user_alpha: 0.01000, precision: 0.00567, recall: 0.03168, auc_score: 0.80456
{'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}


best : no_comp: 40, lrn_rate: 0.05000, item_alpha: 0.00500, user_alpha: 0.00050, precision: 0.00577, recall: 0.03327, auc_score: 0.80749

In [20]:
print("Best Params : {}".format(study.best_params))
print()
print("Best Trials : {}".format(study.best_trials))
print()
print("Best Values : {}".format(study.best_value))

Best Params : {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}

Best Trials : [FrozenTrial(number=3, state=TrialState.COMPLETE, values=[0.8078765869140625], datetime_start=datetime.datetime(2023, 3, 13, 20, 54, 24, 166409), datetime_complete=datetime.datetime(2023, 3, 13, 21, 0, 42, 679591), params={'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'no_components': CategoricalDistribution(choices=(10, 20, 30, 40, 50)), 'learning_rate': CategoricalDistribution(choices=(0.0005, 0.001, 0.005, 0.01, 0.05)), 'item_alpha': CategoricalDistribution(choices=(0.0005, 0.001, 0.005, 0.01)), 'user_alpha': CategoricalDistribution(choices=(0.0005, 0.001, 0.005, 0.01))}, trial_id=3, value=None)]

Best Values : 0.8078765869140625


### uniform distribution 기반으로 최적 파라미터를 찾습니다.
---
- 위 과정은 GridSearch 방식으로 직접 설정한 파라미터 값에서 최적 값을 찾았습니다.
- 그 결과를 바탕으로 적절한 범위를 설정해 좀 더 세세한 optimal hyper parameter value를 찾습니다.

In [22]:
def objective(trial):    
    # 조정할 하이퍼 파라미터
    params = {
        "learning_schedule": 'adagrad',
        "loss": "warp",
        "random_state": 0,
        "no_components": trial.suggest_int("no_components", 40, 100, 10),
        'learning_rate': trial.suggest_float("learning_rate", 0.005,  0.01),
        'item_alpha': trial.suggest_float("item_alpha", 1e-03, 1e-02),
        'user_alpha': trial.suggest_float("user_alpha", 1e-03, 1e-02),
    }

    model = LightFM(**params)

    model.fit(interactions=train_interactions,
              user_features=user_features,
              item_features=item_features,
              epochs=5,
              verbose=True)

    test_precision = precision_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_recall = recall_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_auc = auc_score(model, valid_interactions, item_features=item_features, user_features=user_features).mean()
    
    print("no_comp: {}, lrn_rate: {:.5f}, item_alpha: {:.5f}, user_alpha: {:.5f}, precision: {:.5f}, recall: {:.5f}, auc_score: {:.5f}".format(
      params["no_components"], params["learning_rate"], params["item_alpha"], params["user_alpha"], test_precision, test_recall, test_auc))
    return test_auc

In [23]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print(study.best_trial.params)

[32m[I 2023-03-14 09:42:18,351][0m A new study created in memory with name: no-name-63b82246-2429-4b54-8b31-523a81431f02[0m
Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:24<00:00, 52.86s/it]
[32m[I 2023-03-14 09:51:18,814][0m Trial 0 finished with value: 0.7900952696800232 and parameters: {'no_components': 60, 'learning_rate': 0.008744355581243406, 'item_alpha': 0.0054353405577961655, 'user_alpha': 0.007493531192629571}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 60, lrn_rate: 0.00874, item_alpha: 0.00544, user_alpha: 0.00749, precision: 0.00391, recall: 0.01776, auc_score: 0.79010


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:33<00:00, 54.76s/it]
[32m[I 2023-03-14 10:00:15,041][0m Trial 1 finished with value: 0.7869731187820435 and parameters: {'no_components': 60, 'learning_rate': 0.00783405821538858, 'item_alpha': 0.005256311308722085, 'user_alpha': 0.0035617167199210715}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 60, lrn_rate: 0.00783, item_alpha: 0.00526, user_alpha: 0.00356, precision: 0.00380, recall: 0.01700, auc_score: 0.78697


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [02:59<00:00, 35.95s/it]
[32m[I 2023-03-14 10:06:21,205][0m Trial 2 finished with value: 0.789766788482666 and parameters: {'no_components': 40, 'learning_rate': 0.009113351699509237, 'item_alpha': 0.0029736887742244276, 'user_alpha': 0.004450398783566782}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 40, lrn_rate: 0.00911, item_alpha: 0.00297, user_alpha: 0.00445, precision: 0.00384, recall: 0.01736, auc_score: 0.78977


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [02:56<00:00, 35.24s/it]
[32m[I 2023-03-14 10:12:29,709][0m Trial 3 finished with value: 0.7885702252388 and parameters: {'no_components': 40, 'learning_rate': 0.008726169290588953, 'item_alpha': 0.0026658191771247253, 'user_alpha': 0.0018158137664292004}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 40, lrn_rate: 0.00873, item_alpha: 0.00267, user_alpha: 0.00182, precision: 0.00376, recall: 0.01685, auc_score: 0.78857


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:21<00:00, 52.34s/it]
[32m[I 2023-03-14 10:21:24,681][0m Trial 4 finished with value: 0.7837953567504883 and parameters: {'no_components': 60, 'learning_rate': 0.0062955575620366406, 'item_alpha': 0.0032670452426154357, 'user_alpha': 0.002272292566376715}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 60, lrn_rate: 0.00630, item_alpha: 0.00327, user_alpha: 0.00227, precision: 0.00321, recall: 0.01345, auc_score: 0.78380


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:33<00:00, 54.69s/it]
[32m[I 2023-03-14 10:30:30,210][0m Trial 5 finished with value: 0.7890052199363708 and parameters: {'no_components': 60, 'learning_rate': 0.00937088152907794, 'item_alpha': 0.003431696086366004, 'user_alpha': 0.0016378296530448384}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 60, lrn_rate: 0.00937, item_alpha: 0.00343, user_alpha: 0.00164, precision: 0.00381, recall: 0.01721, auc_score: 0.78901


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:45<00:00, 45.01s/it]
[32m[I 2023-03-14 10:38:06,417][0m Trial 6 finished with value: 0.78534996509552 and parameters: {'no_components': 50, 'learning_rate': 0.006041072086701743, 'item_alpha': 0.002016770126366943, 'user_alpha': 0.0075948785596470295}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 50, lrn_rate: 0.00604, item_alpha: 0.00202, user_alpha: 0.00759, precision: 0.00320, recall: 0.01342, auc_score: 0.78535


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:17<00:00, 63.58s/it]
[32m[I 2023-03-14 10:49:25,111][0m Trial 7 finished with value: 0.7883428931236267 and parameters: {'no_components': 60, 'learning_rate': 0.00897190202372921, 'item_alpha': 0.0043688217266823, 'user_alpha': 0.001041311656141226}. Best is trial 0 with value: 0.7900952696800232.[0m


no_comp: 60, lrn_rate: 0.00897, item_alpha: 0.00437, user_alpha: 0.00104, precision: 0.00377, recall: 0.01699, auc_score: 0.78834


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [09:46<00:00, 117.20s/it]
[32m[I 2023-03-14 11:09:11,616][0m Trial 8 finished with value: 0.7941944599151611 and parameters: {'no_components': 100, 'learning_rate': 0.009893046603840893, 'item_alpha': 0.0030773642617245816, 'user_alpha': 0.005040129725222948}. Best is trial 8 with value: 0.7941944599151611.[0m


no_comp: 100, lrn_rate: 0.00989, item_alpha: 0.00308, user_alpha: 0.00504, precision: 0.00382, recall: 0.01728, auc_score: 0.79419


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [08:56<00:00, 107.23s/it]
[32m[I 2023-03-14 11:27:10,355][0m Trial 9 finished with value: 0.7905226945877075 and parameters: {'no_components': 90, 'learning_rate': 0.006973741933003714, 'item_alpha': 0.003934295451587753, 'user_alpha': 0.006095542302565062}. Best is trial 8 with value: 0.7941944599151611.[0m


no_comp: 90, lrn_rate: 0.00697, item_alpha: 0.00393, user_alpha: 0.00610, precision: 0.00386, recall: 0.01744, auc_score: 0.79052


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [09:56<00:00, 119.29s/it]
[32m[I 2023-03-14 11:47:01,520][0m Trial 10 finished with value: 0.7954165935516357 and parameters: {'no_components': 100, 'learning_rate': 0.009932208333231074, 'item_alpha': 0.00709020991706499, 'user_alpha': 0.009417827767001969}. Best is trial 10 with value: 0.7954165935516357.[0m


no_comp: 100, lrn_rate: 0.00993, item_alpha: 0.00709, user_alpha: 0.00942, precision: 0.00417, recall: 0.01967, auc_score: 0.79542


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [09:36<00:00, 115.30s/it]
[32m[I 2023-03-14 12:06:32,043][0m Trial 11 finished with value: 0.7956605553627014 and parameters: {'no_components': 100, 'learning_rate': 0.009940909725802523, 'item_alpha': 0.007755873955143551, 'user_alpha': 0.009853008299281606}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 100, lrn_rate: 0.00994, item_alpha: 0.00776, user_alpha: 0.00985, precision: 0.00417, recall: 0.01984, auc_score: 0.79566


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [08:39<00:00, 103.92s/it]
[32m[I 2023-03-14 12:23:42,416][0m Trial 12 finished with value: 0.7944998741149902 and parameters: {'no_components': 90, 'learning_rate': 0.00984209390354189, 'item_alpha': 0.007849618827532778, 'user_alpha': 0.009818703410986054}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 90, lrn_rate: 0.00984, item_alpha: 0.00785, user_alpha: 0.00982, precision: 0.00391, recall: 0.01777, auc_score: 0.79450


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [09:41<00:00, 116.33s/it]
[32m[I 2023-03-14 12:43:44,944][0m Trial 13 finished with value: 0.7873353362083435 and parameters: {'no_components': 100, 'learning_rate': 0.005031117534812491, 'item_alpha': 0.007710480775616058, 'user_alpha': 0.00993501348729219}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 100, lrn_rate: 0.00503, item_alpha: 0.00771, user_alpha: 0.00994, precision: 0.00350, recall: 0.01520, auc_score: 0.78734


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [08:15<00:00, 99.04s/it]
[32m[I 2023-03-14 13:00:12,980][0m Trial 14 finished with value: 0.7935495376586914 and parameters: {'no_components': 80, 'learning_rate': 0.009941055289895005, 'item_alpha': 0.009467355445034954, 'user_alpha': 0.008711364041285079}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 80, lrn_rate: 0.00994, item_alpha: 0.00947, user_alpha: 0.00871, precision: 0.00392, recall: 0.01775, auc_score: 0.79355


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [08:16<00:00, 99.36s/it]
[32m[I 2023-03-14 13:16:46,057][0m Trial 15 finished with value: 0.7921397686004639 and parameters: {'no_components': 80, 'learning_rate': 0.008195048870903845, 'item_alpha': 0.006907477937234164, 'user_alpha': 0.008768325480768176}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 80, lrn_rate: 0.00820, item_alpha: 0.00691, user_alpha: 0.00877, precision: 0.00387, recall: 0.01760, auc_score: 0.79214


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [09:18<00:00, 111.80s/it]
[32m[I 2023-03-14 13:35:39,877][0m Trial 16 finished with value: 0.7924544215202332 and parameters: {'no_components': 90, 'learning_rate': 0.008288038064501343, 'item_alpha': 0.006627829134077943, 'user_alpha': 0.0065749376267359855}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 90, lrn_rate: 0.00829, item_alpha: 0.00663, user_alpha: 0.00657, precision: 0.00381, recall: 0.01721, auc_score: 0.79245


Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [10:04<00:00, 120.85s/it]
[32m[I 2023-03-14 13:56:03,573][0m Trial 17 finished with value: 0.7942331433296204 and parameters: {'no_components': 100, 'learning_rate': 0.009393266167179158, 'item_alpha': 0.009569967863489013, 'user_alpha': 0.00888210231907306}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 100, lrn_rate: 0.00939, item_alpha: 0.00957, user_alpha: 0.00888, precision: 0.00386, recall: 0.01744, auc_score: 0.79423


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [08:17<00:00, 99.57s/it]
[32m[I 2023-03-14 14:12:29,273][0m Trial 18 finished with value: 0.792930006980896 and parameters: {'no_components': 80, 'learning_rate': 0.00945874250102717, 'item_alpha': 0.008250309819740616, 'user_alpha': 0.007791842444526473}. Best is trial 11 with value: 0.7956605553627014.[0m


no_comp: 80, lrn_rate: 0.00946, item_alpha: 0.00825, user_alpha: 0.00779, precision: 0.00385, recall: 0.01746, auc_score: 0.79293


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [07:04<00:00, 84.94s/it]
[33m[W 2023-03-14 14:23:54,622][0m Trial 19 failed with parameters: {'no_components': 90, 'learning_rate': 0.008641169469838991, 'item_alpha': 0.0063916245049787405, 'user_alpha': 0.009686035732046409} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "C:\Users\SSAFY\anaconda3\envs\mini-project\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\SSAFY\AppData\Local\Temp\ipykernel_27684\134447992.py", line 22, in objective
    test_recall = recall_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
  File "C:\Users\SSAFY\anaconda3\envs\mini-project\lib\site-packages\lightfm\evaluation.py", line 148, in recall_at_k
    ranks = model.predict_rank(
  File "C:\Users\SSAFY\anaconda3\envs\mini-project\lib\si

KeyboardInterrupt: 

In [24]:
print("Best Params : {}".format(study.best_params))
print()
print("Best Trials : {}".format(study.best_trials))
print()
print("Best Values : {}".format(study.best_value))

Best Params : {'no_components': 100, 'learning_rate': 0.009940909725802523, 'item_alpha': 0.007755873955143551, 'user_alpha': 0.009853008299281606}

Best Trials : [FrozenTrial(number=11, state=TrialState.COMPLETE, values=[0.7956605553627014], datetime_start=datetime.datetime(2023, 3, 14, 11, 47, 1, 521114), datetime_complete=datetime.datetime(2023, 3, 14, 12, 6, 32, 43161), params={'no_components': 100, 'learning_rate': 0.009940909725802523, 'item_alpha': 0.007755873955143551, 'user_alpha': 0.009853008299281606}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'no_components': IntDistribution(high=100, log=False, low=40, step=10), 'learning_rate': FloatDistribution(high=0.01, log=False, low=0.005, step=None), 'item_alpha': FloatDistribution(high=0.01, log=False, low=0.001, step=None), 'user_alpha': FloatDistribution(high=0.01, log=False, low=0.001, step=None)}, trial_id=11, value=None)]

Best Values : 0.7956605553627014


no_comp: 100, lrn_rate: 0.00994, item_alpha: 0.00776, user_alpha: 0.00985, precision: 0.00417, recall: 0.01984, auc_score: 0.79566

Best Params : {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}
Best Values : 0.8078765869140625

### Evaluation
---
- 1차 최적화 hyper parameter 결과
- Best Params : {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}
- Best Values : 0.8078765869140625

In [36]:
min_max_lightfm = LightFM(
    no_components=40, learning_rate=0.05000, item_alpha=0.00500, user_alpha=0.00050,learning_schedule='adagrad',loss="warp", random_state=42
)
%time min_max_lightfm.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=10, verbose=True)

Epoch: 100%|███████████████████████████████████████████████████████████████████████████| 10/10 [05:30<00:00, 33.00s/it]

CPU times: total: 5min 25s
Wall time: 5min 30s





<lightfm.lightfm.LightFM at 0x1fd697cf070>

In [37]:
test_precision = precision_at_k(min_max_lightfm, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(min_max_lightfm, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(min_max_lightfm, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall)
print("Test AUC Score : %.5f" % test_auc)

Train precision: 0.00731
Test precision: 0.04211
Test AUC Score : 0.81388


### save

In [38]:
# save the model to a file
with open('min_max_lightfm.pkl', 'wb') as f:
    pickle.dump(min_max_lightfm, f)