# 기존 코드 이용
---
- Min-Max Normalization
- user_features : price_tier, flavor(13 columns)
- item_features : Categorical [ cagtegory, price_tier, abv, flavor(13 colmuns) ]
- interactions : 기본 값 [ rating.csv ] + new_user

---
## 재학습 최적화
---
- model_fit_partial이 있지만 시간은 fit과 별 차이가 없음을 확인했습니다.
- dataset.fit_partial 또한, 기존 라이브러리로는 item_features, user_features 재학습에 어려움이 있어 매번 불러와야합니다.
- user_features의 경우 재학습시 학습된 취향 정보가 들어가고 이 후 예측에서는 그 당시 입력받은 취향 입맛을 사용합니다.


In [10]:
import os
import pandas as pd
import numpy as np
from time import time
from tqdm.notebook import tqdm
import pickle

from scipy.sparse import coo_matrix, csr_matrix

from lightfm.cross_validation import random_train_test_split
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, auc_score, recall_at_k

from hyperopt import fmin, hp, tpe, Trials

In [2]:
item_features = pd.read_csv("dataset/item_features.csv", index_col=0, encoding="UTF-8")
user_features = pd.read_csv("dataset/user_features.csv", index_col=0, encoding="UTF-8")
rating = pd.read_csv("../dataset/rating.csv", index_col=0, encoding="UTF-8")
whisky = pd.read_csv("../dataset/whisky.csv", index_col=0, encoding="UTF-8")

In [3]:
item_features

Unnamed: 0,category,price_tier,abv,smoky,peaty,spicy,herbal,oily,body,rich,sweet,salty,vanilla,tart,fruity,floral
0,1,4,0.260,0.30,0.85,0.50,0.30,0.20,0.80,0.80,0.85,0.166667,0.20,0.25,0.85,0.526316
1,12,3,0.260,0.40,0.30,0.40,0.20,0.40,0.70,0.80,0.70,0.444444,0.50,0.50,0.70,0.210526
2,5,4,0.542,0.15,0.00,0.20,0.00,0.15,0.80,0.90,0.85,0.055556,0.30,0.25,0.35,0.000000
3,5,3,0.569,0.40,0.00,0.65,0.50,0.20,0.60,0.60,0.45,0.000000,0.60,0.60,0.45,0.000000
4,12,4,0.478,0.30,0.20,0.40,0.30,0.10,0.75,0.75,0.60,0.222222,0.30,0.20,0.50,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3530,8,0,0.260,0.35,0.00,0.20,0.00,0.00,0.40,0.55,0.85,0.000000,0.25,0.10,0.10,0.000000
3531,2,0,0.200,0.00,0.00,0.40,0.20,0.00,0.30,0.00,1.00,0.000000,1.00,0.00,0.60,0.105263
3532,0,2,0.240,0.90,0.10,0.10,0.10,0.40,0.10,0.10,0.90,0.000000,0.50,0.20,0.30,0.105263
3533,9,1,0.290,0.00,0.00,0.10,1.00,0.80,0.00,0.00,0.40,0.000000,0.20,0.00,0.00,0.000000


In [4]:
user_features["price_tier"] = user_features["price_tier"].astype("int")

In [5]:
user_features

Unnamed: 0,price_tier,smoky,peaty,spicy,herbal,oily,body,rich,sweet,salty,vanilla,tart,fruity,floral
0,5,0.30,0.85,0.5,0.30,0.20,0.8,0.80,0.85,0.166667,0.20,0.25,0.85,0.526316
1,5,0.30,0.00,0.4,0.10,0.30,0.7,0.80,0.80,0.111111,0.60,0.20,0.90,0.105263
2,4,0.30,0.20,0.6,0.20,0.40,0.6,0.60,0.70,0.111111,0.60,0.50,0.70,0.421053
3,5,0.20,0.00,0.6,0.40,0.30,0.7,0.70,0.60,0.111111,0.40,0.50,0.80,0.315789
4,4,0.15,0.00,0.2,0.00,0.15,0.8,0.85,0.80,0.055556,0.30,0.25,0.40,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119510,1,0.00,0.00,0.4,0.20,0.00,0.3,0.00,1.00,0.000000,1.00,0.00,0.60,0.105263
119511,1,0.00,0.00,0.4,0.20,0.00,0.3,0.00,1.00,0.000000,1.00,0.00,0.60,0.105263
119512,1,0.31,0.00,0.4,0.29,0.00,0.3,0.40,1.00,0.000000,0.55,0.00,0.30,0.105263
119513,1,0.31,0.00,0.4,0.29,0.00,0.3,0.40,1.00,0.000000,0.55,0.00,0.30,0.105263


In [6]:
item_features = csr_matrix(item_features)
user_features = csr_matrix(user_features)

In [7]:
print(user_features)

  (0, 0)	5.0
  (0, 1)	0.3
  (0, 2)	0.85
  (0, 3)	0.5
  (0, 4)	0.3
  (0, 5)	0.2
  (0, 6)	0.8
  (0, 7)	0.8
  (0, 8)	0.85
  (0, 9)	0.1666666666666666
  (0, 10)	0.2
  (0, 11)	0.25
  (0, 12)	0.85
  (0, 13)	0.5263157894736842
  (1, 0)	5.0
  (1, 1)	0.3
  (1, 3)	0.4
  (1, 4)	0.1
  (1, 5)	0.3
  (1, 6)	0.7000000000000001
  (1, 7)	0.8
  (1, 8)	0.8
  (1, 9)	0.1111111111111111
  (1, 10)	0.6
  (1, 11)	0.2
  :	:
  (119512, 7)	0.4
  (119512, 8)	1.0
  (119512, 10)	0.55
  (119512, 12)	0.3
  (119512, 13)	0.1052631578947368
  (119513, 0)	1.0
  (119513, 1)	0.31
  (119513, 3)	0.4
  (119513, 4)	0.29
  (119513, 6)	0.3
  (119513, 7)	0.4
  (119513, 8)	1.0
  (119513, 10)	0.55
  (119513, 12)	0.3
  (119513, 13)	0.1052631578947368
  (119514, 0)	1.0
  (119514, 1)	0.31
  (119514, 3)	0.4
  (119514, 4)	0.29
  (119514, 6)	0.3
  (119514, 7)	0.4
  (119514, 8)	1.0
  (119514, 10)	0.55
  (119514, 12)	0.3
  (119514, 13)	0.1052631578947368


In [8]:
unique_user = rating["user_id"].unique()

In [9]:
unique_user

array([     0,      1,      2, ..., 119512, 119513, 119514], dtype=int64)

### make Interactions 

In [12]:
dataset = Dataset()
dataset.fit(users=np.arange(rating.user_id.nunique()), items=np.arange(whisky.whisky_id.nunique()))

In [13]:
rating

Unnamed: 0,user_id,whisky_id,rating
0,0,0,9.0
1,1,0,7.0
2,2,0,9.0
3,3,0,10.0
4,4,0,9.0
...,...,...,...
908171,4828,3534,4.0
908172,119513,3534,2.0
908173,119514,3534,8.0
908174,3123,3534,4.0


In [19]:
%time interactions, weights = dataset.build_interactions([tuple(x) for x in rating.itertuples(index=False)])

CPU times: total: 2 s
Wall time: 2 s


In [20]:
interactions

<119515x3535 sparse matrix of type '<class 'numpy.int32'>'
	with 908176 stored elements in COOrdinate format>

In [21]:
weights

<119515x3535 sparse matrix of type '<class 'numpy.float32'>'
	with 908176 stored elements in COOrdinate format>

In [None]:
rating.shape

In [None]:
def create_user_item_interactions(rating, n_users, n_items):
    interactions = coo_matrix(
        (
            rating['rating'].values,
            (rating['user_id'].values, rating["whisky_id"].values)
        ),
        shape=(n_users, n_items)
    )
    return interactions

In [None]:
interactions = create_user_item_interactions(rating, rating["user_id"].nunique(), whisky["whisky_id"].nunique())

In [None]:
rating

In [None]:
print(interactions)
interactions

## Train_Test data split

In [22]:
train_interactions, test_interactions = random_train_test_split(interactions, test_percentage=0.2, random_state=42)
train_weights, test_weights = random_train_test_split(weights, test_percentage=0.2, random_state=42)

In [23]:
train_interactions, valid_interactions = random_train_test_split(train_interactions, test_percentage=0.2, random_state=42)
train_weights, valid_weights = random_train_test_split(train_weights, test_percentage=0.2, random_state=42)

In [24]:
train_interactions.toarray()

array([[1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [26]:
train_weights.toarray()

array([[9., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [9., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

## Optuna 사용한 HyperParameter 최적화
---
- learning_rate, alpha 등의 하이퍼 파라미터 값을 작게 설정했을 때보다 높게 설정했을 때 AUC Score가 높게 나오는 경향이 보였습니다.
- HyperOPT는 베이지안 최적화 접근 기반인데 이 부분에 제대로 알지 못하기 때문에 전체를 돌려보는 Optuna 방식으로 변경하겠습니다.
- Optuna의 경우 시각화도 가능하고, GridSearchCV보다 빠르다는 장점이 있습니다.

In [27]:
import optuna

In [28]:
def objective(trial):
    
    
    # 조정할 하이퍼 파라미터
    params = {
        "learning_schedule": 'adagrad',
        "loss": "warp",
        "random_state": 42,
        "no_components": trial.suggest_int("no_components", 30, 100, 10),
        'learning_rate': trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        'item_alpha': trial.suggest_float("item_alpha", 1e-5, 1e-2, log=True),
        'user_alpha': trial.suggest_float("user_alpha", 1e-5, 1e-2, log=True),
    }

    model = LightFM(**params)

    model.fit(interactions=train_interactions,
              sample_weight=train_weights,
              user_features=user_features,
              item_features=item_features,
              epochs=5,
              verbose=True)

    test_precision = precision_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_recall = recall_at_k(model, valid_interactions, k=9, item_features=item_features, user_features=user_features).mean()
    test_auc = auc_score(model, valid_interactions, item_features=item_features, user_features=user_features).mean()
    
    print("no_comp: {}, lrn_rate: {:.5f}, item_alpha: {:.5f}, user_alpha: {:.5f}, precision: {:.5f}, recall: {:.5f}, auc_score: {:.5f}".format(
      params["no_components"], params["learning_rate"], params["item_alpha"], params["user_alpha"], test_precision, test_recall, test_auc))
    return test_auc

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)
print(study.best_trial.params)

[32m[I 2023-03-21 14:05:45,587][0m A new study created in memory with name: no-name-be166fbd-feae-4a9d-9a2b-4b14a52b4fc1[0m
Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [02:27<00:00, 29.42s/it]
[32m[I 2023-03-21 14:10:57,089][0m Trial 0 finished with value: 0.759311318397522 and parameters: {'no_components': 30, 'learning_rate': 0.0006972396655845984, 'item_alpha': 0.00010728365472524597, 'user_alpha': 0.0003188305219635814}. Best is trial 0 with value: 0.759311318397522.[0m


no_comp: 30, lrn_rate: 0.00070, item_alpha: 0.00011, user_alpha: 0.00032, precision: 0.00116, recall: 0.00437, auc_score: 0.75931


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:14<00:00, 74.92s/it]
[32m[I 2023-03-21 14:23:08,522][0m Trial 1 finished with value: 0.7697624564170837 and parameters: {'no_components': 80, 'learning_rate': 0.00146406013549102, 'item_alpha': 0.00039896483923536693, 'user_alpha': 0.0030338727355386274}. Best is trial 1 with value: 0.7697624564170837.[0m


no_comp: 80, lrn_rate: 0.00146, item_alpha: 0.00040, user_alpha: 0.00303, precision: 0.00169, recall: 0.00618, auc_score: 0.76976


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:36<00:00, 55.29s/it]
[32m[I 2023-03-21 14:32:16,171][0m Trial 2 finished with value: 0.7741205096244812 and parameters: {'no_components': 60, 'learning_rate': 0.0016618813933141515, 'item_alpha': 0.0011220278761806654, 'user_alpha': 0.0016713346866201637}. Best is trial 2 with value: 0.7741205096244812.[0m


no_comp: 60, lrn_rate: 0.00166, item_alpha: 0.00112, user_alpha: 0.00167, precision: 0.00177, recall: 0.00665, auc_score: 0.77412


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:47<00:00, 45.44s/it]
[32m[I 2023-03-21 14:39:56,647][0m Trial 3 finished with value: 0.7865231037139893 and parameters: {'no_components': 50, 'learning_rate': 0.0053538268183575535, 'item_alpha': 8.932421953699476e-05, 'user_alpha': 0.0009079445935793858}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 50, lrn_rate: 0.00535, item_alpha: 0.00009, user_alpha: 0.00091, precision: 0.00344, recall: 0.01483, auc_score: 0.78652


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:50<00:00, 58.04s/it]
[32m[I 2023-03-21 14:49:15,625][0m Trial 4 finished with value: 0.7343332767486572 and parameters: {'no_components': 60, 'learning_rate': 5.797788988452296e-05, 'item_alpha': 0.007606547761742977, 'user_alpha': 0.0015725843996544373}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 60, lrn_rate: 0.00006, item_alpha: 0.00761, user_alpha: 0.00157, precision: 0.00086, recall: 0.00285, auc_score: 0.73433


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [02:21<00:00, 28.28s/it]
[32m[I 2023-03-21 14:54:21,084][0m Trial 5 finished with value: 0.7485226988792419 and parameters: {'no_components': 30, 'learning_rate': 0.00029072270358485426, 'item_alpha': 4.876984999541098e-05, 'user_alpha': 0.00015521629643330234}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 30, lrn_rate: 0.00029, item_alpha: 0.00005, user_alpha: 0.00016, precision: 0.00088, recall: 0.00308, auc_score: 0.74852


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:13<00:00, 62.69s/it]
[32m[I 2023-03-21 15:04:48,052][0m Trial 6 finished with value: 0.7728850245475769 and parameters: {'no_components': 70, 'learning_rate': 0.0027242679187314445, 'item_alpha': 0.0021959441928035006, 'user_alpha': 1.2854323150659998e-05}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 70, lrn_rate: 0.00272, item_alpha: 0.00220, user_alpha: 0.00001, precision: 0.00293, recall: 0.01224, auc_score: 0.77289


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [07:32<00:00, 90.54s/it]
[32m[I 2023-03-21 15:20:07,527][0m Trial 7 finished with value: 0.7600218653678894 and parameters: {'no_components': 100, 'learning_rate': 0.0004657641910683507, 'item_alpha': 0.007291009388985145, 'user_alpha': 0.0008736708080538514}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 100, lrn_rate: 0.00047, item_alpha: 0.00729, user_alpha: 0.00087, precision: 0.00113, recall: 0.00426, auc_score: 0.76002


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:19<00:00, 75.85s/it]
[32m[I 2023-03-21 15:32:16,506][0m Trial 8 finished with value: 0.7738412618637085 and parameters: {'no_components': 80, 'learning_rate': 0.0024628458273809025, 'item_alpha': 0.005467660243788125, 'user_alpha': 2.1327234586351705e-05}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 80, lrn_rate: 0.00246, item_alpha: 0.00547, user_alpha: 0.00002, precision: 0.00301, recall: 0.01234, auc_score: 0.77384


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:48<00:00, 45.72s/it]
[32m[I 2023-03-21 15:40:52,234][0m Trial 9 finished with value: 0.6588549017906189 and parameters: {'no_components': 40, 'learning_rate': 1.76191361320692e-05, 'item_alpha': 0.00047704022401547937, 'user_alpha': 3.509363847923359e-05}. Best is trial 3 with value: 0.7865231037139893.[0m


no_comp: 40, lrn_rate: 0.00002, item_alpha: 0.00048, user_alpha: 0.00004, precision: 0.00141, recall: 0.00483, auc_score: 0.65885


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:42<00:00, 68.57s/it]
[32m[I 2023-03-21 15:52:33,913][0m Trial 10 finished with value: 0.7884501814842224 and parameters: {'no_components': 50, 'learning_rate': 0.005993320814989723, 'item_alpha': 1.0234093457241084e-05, 'user_alpha': 0.005937913061896029}. Best is trial 10 with value: 0.7884501814842224.[0m


no_comp: 50, lrn_rate: 0.00599, item_alpha: 0.00001, user_alpha: 0.00594, precision: 0.00347, recall: 0.01499, auc_score: 0.78845


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:26<00:00, 53.29s/it]
[32m[I 2023-03-21 16:00:59,063][0m Trial 11 finished with value: 0.7943575382232666 and parameters: {'no_components': 50, 'learning_rate': 0.00945138252983317, 'item_alpha': 1.0660030975758987e-05, 'user_alpha': 0.00888784858349413}. Best is trial 11 with value: 0.7943575382232666.[0m


no_comp: 50, lrn_rate: 0.00945, item_alpha: 0.00001, user_alpha: 0.00889, precision: 0.00385, recall: 0.01661, auc_score: 0.79436


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:47<00:00, 45.42s/it]
[32m[I 2023-03-21 16:09:22,382][0m Trial 12 finished with value: 0.7927873134613037 and parameters: {'no_components': 50, 'learning_rate': 0.00855235358955254, 'item_alpha': 1.299822804153153e-05, 'user_alpha': 0.009024183830404006}. Best is trial 11 with value: 0.7943575382232666.[0m


no_comp: 50, lrn_rate: 0.00855, item_alpha: 0.00001, user_alpha: 0.00902, precision: 0.00381, recall: 0.01659, auc_score: 0.79279


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:16<00:00, 63.34s/it]
[32m[I 2023-03-21 16:21:10,056][0m Trial 13 finished with value: 0.794884443283081 and parameters: {'no_components': 50, 'learning_rate': 0.009993784587732014, 'item_alpha': 1.2160848854017678e-05, 'user_alpha': 0.007303443968922652}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 50, lrn_rate: 0.00999, item_alpha: 0.00001, user_alpha: 0.00730, precision: 0.00382, recall: 0.01656, auc_score: 0.79488


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:08<00:00, 37.77s/it]
[32m[I 2023-03-21 16:29:01,119][0m Trial 14 finished with value: 0.7697272300720215 and parameters: {'no_components': 40, 'learning_rate': 0.009773328357015625, 'item_alpha': 2.3332928529345463e-05, 'user_alpha': 0.009225100323641895}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 40, lrn_rate: 0.00977, item_alpha: 0.00002, user_alpha: 0.00923, precision: 0.00297, recall: 0.01260, auc_score: 0.76973


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [07:47<00:00, 93.43s/it]
[32m[I 2023-03-21 16:42:29,754][0m Trial 15 finished with value: 0.7740240693092346 and parameters: {'no_components': 70, 'learning_rate': 0.003937847633821442, 'item_alpha': 3.0057891137041387e-05, 'user_alpha': 0.004441416717994168}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 70, lrn_rate: 0.00394, item_alpha: 0.00003, user_alpha: 0.00444, precision: 0.00300, recall: 0.01259, auc_score: 0.77402


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:06<00:00, 37.36s/it]
[32m[I 2023-03-21 16:48:54,007][0m Trial 16 finished with value: 0.7704907655715942 and parameters: {'no_components': 40, 'learning_rate': 0.009977698107957515, 'item_alpha': 1.103249363316108e-05, 'user_alpha': 0.0036746409889844836}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 40, lrn_rate: 0.00998, item_alpha: 0.00001, user_alpha: 0.00367, precision: 0.00303, recall: 0.01284, auc_score: 0.77049


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [07:32<00:00, 90.51s/it]
[32m[I 2023-03-21 17:04:07,612][0m Trial 17 finished with value: 0.7717212438583374 and parameters: {'no_components': 100, 'learning_rate': 0.0011710305966751346, 'item_alpha': 0.00013387133953531442, 'user_alpha': 0.007969190708256189}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 100, lrn_rate: 0.00117, item_alpha: 0.00013, user_alpha: 0.00797, precision: 0.00170, recall: 0.00627, auc_score: 0.77172


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:29<00:00, 53.87s/it]
[32m[I 2023-03-21 17:13:11,894][0m Trial 18 finished with value: 0.7868652939796448 and parameters: {'no_components': 60, 'learning_rate': 0.004040000412358335, 'item_alpha': 2.645397920531286e-05, 'user_alpha': 0.003079602550526222}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 60, lrn_rate: 0.00404, item_alpha: 0.00003, user_alpha: 0.00308, precision: 0.00324, recall: 0.01341, auc_score: 0.78687


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [06:17<00:00, 75.58s/it]
[32m[I 2023-03-21 17:25:18,106][0m Trial 19 finished with value: 0.7755270600318909 and parameters: {'no_components': 80, 'learning_rate': 0.003565846855879218, 'item_alpha': 4.599374642666679e-05, 'user_alpha': 0.009472259040417265}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 80, lrn_rate: 0.00357, item_alpha: 0.00005, user_alpha: 0.00947, precision: 0.00318, recall: 0.01320, auc_score: 0.77553


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:53<00:00, 46.77s/it]
[32m[I 2023-03-21 17:34:12,287][0m Trial 20 finished with value: 0.7447217106819153 and parameters: {'no_components': 50, 'learning_rate': 0.00016690036041717662, 'item_alpha': 1.635265844388037e-05, 'user_alpha': 0.0020431929584352484}. Best is trial 13 with value: 0.794884443283081.[0m


no_comp: 50, lrn_rate: 0.00017, item_alpha: 0.00002, user_alpha: 0.00204, precision: 0.00086, recall: 0.00302, auc_score: 0.74472


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [05:22<00:00, 64.41s/it]
[32m[I 2023-03-21 17:45:41,319][0m Trial 21 finished with value: 0.7952060699462891 and parameters: {'no_components': 50, 'learning_rate': 0.00985747807667858, 'item_alpha': 1.1730784307821818e-05, 'user_alpha': 0.009567141149640376}. Best is trial 21 with value: 0.7952060699462891.[0m


no_comp: 50, lrn_rate: 0.00986, item_alpha: 0.00001, user_alpha: 0.00957, precision: 0.00389, recall: 0.01672, auc_score: 0.79521


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [04:03<00:00, 48.77s/it]
[32m[I 2023-03-21 17:53:07,195][0m Trial 22 finished with value: 0.7683109641075134 and parameters: {'no_components': 40, 'learning_rate': 0.005449795162936488, 'item_alpha': 1.0199038855024987e-05, 'user_alpha': 0.005120007490177888}. Best is trial 21 with value: 0.7952060699462891.[0m


no_comp: 40, lrn_rate: 0.00545, item_alpha: 0.00001, user_alpha: 0.00512, precision: 0.00281, recall: 0.01182, auc_score: 0.76831


Epoch: 100%|█████████████████████████████████████████████████████████████████████████████| 5/5 [03:39<00:00, 44.00s/it]


BEST no_comp: 120, lrn_rate: 0.01000, item_alpha: 0.05000, user_alpha: 0.01000, precision: 0.00468, recall: 0.02312, auc_score: 0.80381

In [None]:
print("Best Params : {}".format(study.best_params))
print()
print("Best Trials : {}".format(study.best_trials))
print()
print("Best Values : {}".format(study.best_value))

## Best Parameter fitting

In [None]:
model = LightFM(
    no_components=120, learning_rate=0.01000, item_alpha=0.0500, user_alpha=0.01000,learning_schedule='adagrad',loss="warp", random_state=42
)
%time model.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=10, verbose=True)

In [None]:
test_precision = precision_at_k(model, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(model, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(model, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall)
print("Test AUC Score : %.5f" % test_auc)

In [None]:
model2 = LightFM(
    no_components=120, learning_rate=0.01000, item_alpha=0.0500, user_alpha=0.01000,learning_schedule='adagrad',loss="warp", random_state=42
)
%time model2.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=5, verbose=True)

In [None]:
test_precision = precision_at_k(model2, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(model2, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(model2, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall)
print("Test AUC Score : %.5f" % test_auc)

# 시간 고려한 최적 모델
---
- 시간까지 고려했을 때, 아래 hyper parameter가 좀 더 합리적이다

BEST no_comp: 60, lrn_rate: 0.01000, item_alpha: 0.05000, user_alpha: 0.01000, precision: 0.00430, recall: 0.02030, auc_score: 0.80244

In [None]:
model3 = LightFM(
    no_components=60, learning_rate=0.01000, item_alpha=0.0500, user_alpha=0.01000,learning_schedule='adagrad',loss="warp", random_state=42
)
%time model3.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=5, verbose=True)

In [None]:
test_precision = precision_at_k(model3, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(model3, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(model3, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall)
print("Test AUC Score : %.5f" % test_auc)

In [None]:
# save the model to a file
with open('rec_model.pkl', 'wb') as f:
    pickle.dump(model3, f)

In [None]:
with open('model/model_v4.pkl', 'rb') as f:
    model_temp = pickle.load(f)


In [None]:
test_precision = precision_at_k(model_temp, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(model_temp, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(model_temp, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall)
print("Test AUC Score : %.5f" % test_auc)