# LightFM (user item trancation 2)
* user: 各ユーザーの最新購入の直近user_records個のデータのみ学習に使う
* item: 学習期間の最後item_days日に含まれるアイテムのみ学習＆予測に使う

In [1]:
import datetime

import faiss
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import psutil
from lightfm import LightFM
from scipy.sparse import lil_matrix
from utils import train_valid_split

import schema
from metric import mapk

In [2]:
transactions = pd.read_pickle('input/transformed/transactions_train.pkl')[['t_dat', 'user', 'item']]
users = pd.read_pickle('input/transformed/users.pkl')['user']
items = pd.read_pickle('input/transformed/items.pkl')['item']
TOPK = 12

valid_start_date = datetime.date(2020, 9, 16)
transactions_train, transactions_valid = train_valid_split(transactions, valid_start_date, 70)

[I 220309 11:51:59 utils:29] train: [2020-07-08, 2020-09-16)
[I 220309 11:51:59 utils:31] # of records: 2809233
[I 220309 11:51:59 utils:16] valid: [2020-09-16, 2020-09-23)
[I 220309 11:52:00 utils:18] # of records: 240311


In [18]:
def objective(trial: optuna.Trial) -> float:
    user_records = trial.suggest_int('user_records', 1, 100)
    item_days = trial.suggest_int('item_days', 7, 70, 7)
    no_components = trial.suggest_int('no_components', 128, 1024, 128)
    epochs = 100

    lightfm_params = {
        'no_components': no_components,
        'learning_schedule': 'adadelta',
        'loss': 'bpr',
        'learning_rate': 0.005,
        'item_alpha': 1e-8,
        'user_alpha': 1e-8,
    }

    start_date = valid_start_date - datetime.timedelta(days=item_days)
    transactions_item = sorted(transactions_train.query("t_dat >= @start_date")['item'].unique())

    print('item days', item_days)
    print(len(transactions_item))
    mp_item = dict(zip(transactions_item, range(len(transactions_item))))
    tr_train = transactions_train.copy()

    tr_train = tr_train.query("item in @transactions_item").reset_index(drop=True)
    tr_train = tr_train.drop_duplicates(ignore_index=True)

    tr_train['rnk'] = tr_train.groupby('user')['t_dat'].rank(method='first')
    print('user records', user_records)
    print('before', len(tr_train))
    tr_train = tr_train.query("rnk <= @user_records").reset_index(drop=True)
    print('after', len(tr_train))

    tr_valid = transactions_valid.copy()
    tr_train['item'] = tr_train['item'].apply(lambda x: mp_item[x])
    tr_valid['item'] = tr_valid['item'].apply(lambda x: mp_item.get(x, 123456789))
    val = tr_valid.groupby('user')['item'].apply(list).reset_index()
    n_item = len(transactions_item)
    n_user = len(users)

    train = lil_matrix((n_user, n_item))
    train[tr_train.user, tr_train.item] = 1

    model = LightFM(**lightfm_params)
    model.fit(train, epochs=epochs, num_threads=psutil.cpu_count(logical=False), verbose=True)

    index = faiss.index_factory(no_components, "Flat", faiss.METRIC_INNER_PRODUCT)
    index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, index)
    index.add(model.item_embeddings)
    _, idxs = index.search(model.user_embeddings, TOPK)

    return mapk(val.item, idxs[val.user])

In [19]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=3600*8)

[32m[I 2022-03-09 01:01:25,758][0m A new study created in memory with name: no-name-db7597cb-dc63-423d-9f6f-36b3089adc0a[0m


item days 28
29009
user days 7
before 2775154
after 1688381


Epoch: 100%|██████████| 100/100 [04:51<00:00,  2.92s/it]
[32m[I 2022-03-09 01:06:37,291][0m Trial 0 finished with value: 0.02255802923842525 and parameters: {'user_days': 7, 'item_days': 28, 'no_components': 640}. Best is trial 0 with value: 0.02255802923842525.[0m


item days 42
32792
user days 56
before 2797986
after 2718059


Epoch: 100%|██████████| 100/100 [04:35<00:00,  2.76s/it]
[32m[I 2022-03-09 01:11:28,280][0m Trial 1 finished with value: 0.01789910280402429 and parameters: {'user_days': 56, 'item_days': 42, 'no_components': 384}. Best is trial 0 with value: 0.02255802923842525.[0m


item days 42
32792
user days 49
before 2797986
after 2627046


Epoch: 100%|██████████| 100/100 [07:55<00:00,  4.75s/it]
[32m[I 2022-03-09 01:19:47,934][0m Trial 2 finished with value: 0.018444032109301883 and parameters: {'user_days': 49, 'item_days': 42, 'no_components': 768}. Best is trial 0 with value: 0.02255802923842525.[0m


item days 42
32792
user days 21
before 2797986
after 2057586


Epoch: 100%|██████████| 100/100 [01:52<00:00,  1.13s/it]
[32m[I 2022-03-09 01:21:49,218][0m Trial 3 finished with value: 0.01934459260179415 and parameters: {'user_days': 21, 'item_days': 42, 'no_components': 128}. Best is trial 0 with value: 0.02255802923842525.[0m


item days 21
26503
user days 14
before 2748250
after 1864414


Epoch: 100%|██████████| 100/100 [02:33<00:00,  1.54s/it]
[32m[I 2022-03-09 01:24:33,492][0m Trial 4 finished with value: 0.02165345079455688 and parameters: {'user_days': 14, 'item_days': 21, 'no_components': 256}. Best is trial 0 with value: 0.02255802923842525.[0m


item days 21
26503
user days 14
before 2748250
after 1864414


Epoch: 100%|██████████| 100/100 [06:53<00:00,  4.13s/it]
[32m[I 2022-03-09 01:31:52,338][0m Trial 5 finished with value: 0.02248486482205698 and parameters: {'user_days': 14, 'item_days': 21, 'no_components': 896}. Best is trial 0 with value: 0.02255802923842525.[0m


item days 35
30979
user days 7
before 2789764
after 1694346


Epoch: 100%|██████████| 100/100 [06:26<00:00,  3.86s/it]
[32m[I 2022-03-09 01:38:45,010][0m Trial 6 finished with value: 0.022864496703678995 and parameters: {'user_days': 7, 'item_days': 35, 'no_components': 896}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 7
18611
user days 70
before 2586623
after 2586623


Epoch: 100%|██████████| 100/100 [10:50<00:00,  6.50s/it]
[32m[I 2022-03-09 01:50:00,021][0m Trial 7 finished with value: 0.0177226038351356 and parameters: {'user_days': 70, 'item_days': 7, 'no_components': 1024}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 28
29009
user days 21
before 2775154
after 2045298


Epoch: 100%|██████████| 100/100 [01:51<00:00,  1.11s/it]
[32m[I 2022-03-09 01:51:59,562][0m Trial 8 finished with value: 0.019221943560105936 and parameters: {'user_days': 21, 'item_days': 28, 'no_components': 128}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 35
30979
user days 14
before 2789764
after 1884570


Epoch: 100%|██████████| 100/100 [01:46<00:00,  1.07s/it]
[32m[I 2022-03-09 01:53:54,852][0m Trial 9 finished with value: 0.01993475073845579 and parameters: {'user_days': 14, 'item_days': 35, 'no_components': 128}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 63
37318
user days 35
before 2807797
after 2377682


Epoch: 100%|██████████| 100/100 [05:17<00:00,  3.17s/it]
[32m[I 2022-03-09 01:59:30,666][0m Trial 10 finished with value: 0.019415494827727905 and parameters: {'user_days': 35, 'item_days': 63, 'no_components': 512}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 56
36054
user days 7
before 2805786
after 1700661


Epoch: 100%|██████████| 100/100 [04:54<00:00,  2.95s/it]
[32m[I 2022-03-09 02:04:47,245][0m Trial 11 finished with value: 0.02260522504628402 and parameters: {'user_days': 7, 'item_days': 56, 'no_components': 640}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 63
37318
user days 35
before 2807797
after 2377682


Epoch: 100%|██████████| 100/100 [07:27<00:00,  4.48s/it]
[32m[I 2022-03-09 02:12:40,472][0m Trial 12 finished with value: 0.01975904194619919 and parameters: {'user_days': 35, 'item_days': 63, 'no_components': 768}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 56
36054
user days 7
before 2805786
after 1700661


Epoch: 100%|██████████| 100/100 [07:46<00:00,  4.67s/it]
[32m[I 2022-03-09 02:20:57,765][0m Trial 13 finished with value: 0.022825693222618623 and parameters: {'user_days': 7, 'item_days': 56, 'no_components': 1024}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 56
36054
user days 28
before 2805786
after 2220618


Epoch: 100%|██████████| 100/100 [09:21<00:00,  5.61s/it]
[32m[I 2022-03-09 02:30:49,480][0m Trial 14 finished with value: 0.020637883857391317 and parameters: {'user_days': 28, 'item_days': 56, 'no_components': 1024}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 70
38540
user days 49
before 2809233
after 2636204


Epoch: 100%|██████████| 100/100 [09:09<00:00,  5.49s/it]
[32m[I 2022-03-09 02:40:29,368][0m Trial 15 finished with value: 0.01859216877332609 and parameters: {'user_days': 49, 'item_days': 70, 'no_components': 896}. Best is trial 6 with value: 0.022864496703678995.[0m


item days 49
34469
user days 7
before 2802618
after 1699471


Epoch: 100%|██████████| 100/100 [06:31<00:00,  3.91s/it]
[32m[I 2022-03-09 02:47:28,455][0m Trial 16 finished with value: 0.022875402307824616 and parameters: {'user_days': 7, 'item_days': 49, 'no_components': 896}. Best is trial 16 with value: 0.022875402307824616.[0m


item days 49
34469
user days 28
before 2802618
after 2218853


Epoch: 100%|██████████| 100/100 [06:55<00:00,  4.15s/it]
[32m[I 2022-03-09 02:54:48,067][0m Trial 17 finished with value: 0.020591604858482587 and parameters: {'user_days': 28, 'item_days': 49, 'no_components': 768}. Best is trial 16 with value: 0.022875402307824616.[0m


item days 7
18611
user days 21
before 2586623
after 1933763


Epoch: 100%|██████████| 100/100 [06:58<00:00,  4.19s/it]
[32m[I 2022-03-09 03:02:08,954][0m Trial 18 finished with value: 0.02150159960456763 and parameters: {'user_days': 21, 'item_days': 7, 'no_components': 896}. Best is trial 16 with value: 0.022875402307824616.[0m


item days 35
30979
user days 70
before 2789764
after 2789764


Epoch: 100%|██████████| 100/100 [06:23<00:00,  3.83s/it]
[32m[I 2022-03-09 03:08:49,762][0m Trial 19 finished with value: 0.017466323666751226 and parameters: {'user_days': 70, 'item_days': 35, 'no_components': 512}. Best is trial 16 with value: 0.022875402307824616.[0m


item days 49
34469
user days 42
before 2802618
after 2513885


Epoch: 100%|██████████| 100/100 [08:33<00:00,  5.13s/it]
[32m[I 2022-03-09 03:17:52,041][0m Trial 20 finished with value: 0.019214757777074428 and parameters: {'user_days': 42, 'item_days': 49, 'no_components': 896}. Best is trial 16 with value: 0.022875402307824616.[0m


item days 49
34469
user days 7
before 2802618
after 1699471


Epoch: 100%|██████████| 100/100 [07:54<00:00,  4.75s/it]
[32m[I 2022-03-09 03:26:16,200][0m Trial 21 finished with value: 0.02289279846580044 and parameters: {'user_days': 7, 'item_days': 49, 'no_components': 1024}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 49
34469
user days 7
before 2802618
after 1699471


Epoch: 100%|██████████| 100/100 [07:51<00:00,  4.71s/it]
[32m[I 2022-03-09 03:34:36,824][0m Trial 22 finished with value: 0.02267433619259932 and parameters: {'user_days': 7, 'item_days': 49, 'no_components': 1024}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 28
29009
user days 14
before 2775154
after 1877640


Epoch: 100%|██████████| 100/100 [06:07<00:00,  3.68s/it]
[32m[I 2022-03-09 03:41:07,440][0m Trial 23 finished with value: 0.02233947230782944 and parameters: {'user_days': 14, 'item_days': 28, 'no_components': 768}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 49
34469
user days 7
before 2802618
after 1699471


Epoch: 100%|██████████| 100/100 [06:28<00:00,  3.89s/it]
[32m[I 2022-03-09 03:48:03,924][0m Trial 24 finished with value: 0.022582355609732232 and parameters: {'user_days': 7, 'item_days': 49, 'no_components': 896}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 35
30979
user days 21
before 2789764
after 2053176


Epoch: 100%|██████████| 100/100 [09:01<00:00,  5.41s/it]
[32m[I 2022-03-09 03:57:34,171][0m Trial 25 finished with value: 0.021447339896791 and parameters: {'user_days': 21, 'item_days': 35, 'no_components': 1024}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 63
37318
user days 28
before 2807797
after 2221623


Epoch: 100%|██████████| 100/100 [06:04<00:00,  3.65s/it]
[32m[I 2022-03-09 04:04:01,268][0m Trial 26 finished with value: 0.020576898209759106 and parameters: {'user_days': 28, 'item_days': 63, 'no_components': 640}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 42
32792
user days 14
before 2797986
after 1888434


Epoch: 100%|██████████| 100/100 [07:03<00:00,  4.24s/it]
[32m[I 2022-03-09 04:11:32,710][0m Trial 27 finished with value: 0.022379059866066937 and parameters: {'user_days': 14, 'item_days': 42, 'no_components': 896}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 21
26503
user days 7
before 2748250
after 1677143


Epoch: 100%|██████████| 100/100 [05:47<00:00,  3.47s/it]
[32m[I 2022-03-09 04:17:41,973][0m Trial 28 finished with value: 0.022748247549167534 and parameters: {'user_days': 7, 'item_days': 21, 'no_components': 768}. Best is trial 21 with value: 0.02289279846580044.[0m


item days 14
23518
user days 7
before 2702726
after 1657177


Epoch: 100%|██████████| 100/100 [07:15<00:00,  4.36s/it]
[32m[I 2022-03-09 04:25:23,328][0m Trial 29 finished with value: 0.022936058784333434 and parameters: {'user_days': 7, 'item_days': 14, 'no_components': 1024}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 14
23518
user days 14
before 2702726
after 1841150


Epoch: 100%|██████████| 100/100 [07:52<00:00,  4.73s/it]
[32m[I 2022-03-09 04:33:41,702][0m Trial 30 finished with value: 0.022362563633314413 and parameters: {'user_days': 14, 'item_days': 14, 'no_components': 1024}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 14
23518
user days 7
before 2702726
after 1657177


Epoch: 100%|██████████| 100/100 [06:24<00:00,  3.85s/it]
[32m[I 2022-03-09 04:40:30,520][0m Trial 31 finished with value: 0.022914055489553616 and parameters: {'user_days': 7, 'item_days': 14, 'no_components': 896}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 14
23518
user days 7
before 2702726
after 1657177


Epoch: 100%|██████████| 100/100 [07:15<00:00,  4.36s/it]
[32m[I 2022-03-09 04:48:11,926][0m Trial 32 finished with value: 0.02276540023312167 and parameters: {'user_days': 7, 'item_days': 14, 'no_components': 1024}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 14
23518
user days 14
before 2702726
after 1841150


Epoch: 100%|██████████| 100/100 [06:50<00:00,  4.10s/it]
[32m[I 2022-03-09 04:55:26,683][0m Trial 33 finished with value: 0.022483956250403732 and parameters: {'user_days': 14, 'item_days': 14, 'no_components': 896}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 7
18611
user days 63
before 2586623
after 2569547


Epoch: 100%|██████████| 100/100 [07:40<00:00,  4.60s/it]
[32m[I 2022-03-09 05:03:27,243][0m Trial 34 finished with value: 0.017793778469181846 and parameters: {'user_days': 63, 'item_days': 7, 'no_components': 768}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 42
32792
user days 21
before 2797986
after 2057586


Epoch: 100%|██████████| 100/100 [03:51<00:00,  2.32s/it]
[32m[I 2022-03-09 05:07:33,779][0m Trial 35 finished with value: 0.021281408962743907 and parameters: {'user_days': 21, 'item_days': 42, 'no_components': 384}. Best is trial 29 with value: 0.022936058784333434.[0m


item days 21
26503
user days 7
before 2748250
after 1677143


Epoch: 100%|██████████| 100/100 [07:49<00:00,  4.69s/it]
[32m[I 2022-03-09 05:15:49,789][0m Trial 36 finished with value: 0.023028028405801832 and parameters: {'user_days': 7, 'item_days': 21, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 21
26503
user days 14
before 2748250
after 1864414


Epoch: 100%|██████████| 100/100 [08:24<00:00,  5.05s/it]
[32m[I 2022-03-09 05:24:41,392][0m Trial 37 finished with value: 0.022324209253473195 and parameters: {'user_days': 14, 'item_days': 21, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 28
29009
user days 21
before 2775154
after 2045298


Epoch: 100%|██████████| 100/100 [08:59<00:00,  5.39s/it]
[32m[I 2022-03-09 05:34:08,798][0m Trial 38 finished with value: 0.0214950354982134 and parameters: {'user_days': 21, 'item_days': 28, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 14
23518
user days 14
before 2702726
after 1841150


Epoch: 100%|██████████| 100/100 [07:50<00:00,  4.70s/it]
[32m[I 2022-03-09 05:42:24,704][0m Trial 39 finished with value: 0.022434222772113096 and parameters: {'user_days': 14, 'item_days': 14, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 21
26503
user days 49
before 2748250
after 2583482


Epoch: 100%|██████████| 100/100 [08:51<00:00,  5.32s/it]
[32m[I 2022-03-09 05:51:42,212][0m Trial 40 finished with value: 0.018582017013461984 and parameters: {'user_days': 49, 'item_days': 21, 'no_components': 896}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 14
23518
user days 7
before 2702726
after 1657177


Epoch: 100%|██████████| 100/100 [06:20<00:00,  3.80s/it]
[32m[I 2022-03-09 05:58:26,010][0m Trial 41 finished with value: 0.022665944737531623 and parameters: {'user_days': 7, 'item_days': 14, 'no_components': 896}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 7
18611
user days 7
before 2586623
after 1604397


Epoch: 100%|██████████| 100/100 [07:15<00:00,  4.35s/it]
[32m[I 2022-03-09 06:06:05,315][0m Trial 42 finished with value: 0.022803645370982138 and parameters: {'user_days': 7, 'item_days': 7, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 21
26503
user days 7
before 2748250
after 1677143


Epoch: 100%|██████████| 100/100 [06:23<00:00,  3.84s/it]
[32m[I 2022-03-09 06:12:54,216][0m Trial 43 finished with value: 0.022734954975583865 and parameters: {'user_days': 7, 'item_days': 21, 'no_components': 896}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 42
32792
user days 14
before 2797986
after 1888434


Epoch: 100%|██████████| 100/100 [06:12<00:00,  3.73s/it]
[32m[I 2022-03-09 06:19:30,755][0m Trial 44 finished with value: 0.022308411873430058 and parameters: {'user_days': 14, 'item_days': 42, 'no_components': 768}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 56
36054
user days 7
before 2805786
after 1700661


Epoch: 100%|██████████| 100/100 [07:50<00:00,  4.70s/it]
[32m[I 2022-03-09 06:27:51,840][0m Trial 45 finished with value: 0.022981271091357034 and parameters: {'user_days': 7, 'item_days': 56, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 56
36054
user days 14
before 2805786
after 1891850


Epoch: 100%|██████████| 100/100 [08:39<00:00,  5.20s/it]
[32m[I 2022-03-09 06:37:01,662][0m Trial 46 finished with value: 0.022366856786461477 and parameters: {'user_days': 14, 'item_days': 56, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 70
38540
user days 21
before 2809233
after 2062972


Epoch: 100%|██████████| 100/100 [09:08<00:00,  5.49s/it]
[32m[I 2022-03-09 06:46:41,546][0m Trial 47 finished with value: 0.02142681526000594 and parameters: {'user_days': 21, 'item_days': 70, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 7
18611
user days 7
before 2586623
after 1604397


Epoch: 100%|██████████| 100/100 [02:17<00:00,  1.38s/it]
[32m[I 2022-03-09 06:49:08,894][0m Trial 48 finished with value: 0.022053582691325426 and parameters: {'user_days': 7, 'item_days': 7, 'no_components': 256}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 28
29009
user days 7
before 2775154
after 1688381


Epoch: 100%|██████████| 100/100 [07:52<00:00,  4.72s/it]
[32m[I 2022-03-09 06:57:28,582][0m Trial 49 finished with value: 0.022995697178301373 and parameters: {'user_days': 7, 'item_days': 28, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 28
29009
user days 28
before 2775154
after 2202057


Epoch: 100%|██████████| 100/100 [09:39<00:00,  5.79s/it]
[32m[I 2022-03-09 07:07:35,241][0m Trial 50 finished with value: 0.020654087796655486 and parameters: {'user_days': 28, 'item_days': 28, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 21
26503
user days 7
before 2748250
after 1677143


Epoch: 100%|██████████| 100/100 [07:40<00:00,  4.60s/it]
[32m[I 2022-03-09 07:15:42,261][0m Trial 51 finished with value: 0.022828288286579554 and parameters: {'user_days': 7, 'item_days': 21, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 14
23518
user days 7
before 2702726
after 1657177


Epoch: 100%|██████████| 100/100 [07:15<00:00,  4.35s/it]
[32m[I 2022-03-09 07:23:23,162][0m Trial 52 finished with value: 0.022999150826475377 and parameters: {'user_days': 7, 'item_days': 14, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 14
23518
user days 14
before 2702726
after 1841150


Epoch: 100%|██████████| 100/100 [06:49<00:00,  4.10s/it]
[32m[I 2022-03-09 07:30:37,235][0m Trial 53 finished with value: 0.022261103408922032 and parameters: {'user_days': 14, 'item_days': 14, 'no_components': 896}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 21
26503
user days 7
before 2748250
after 1677143


Epoch: 100%|██████████| 100/100 [07:49<00:00,  4.70s/it]
[32m[I 2022-03-09 07:38:53,478][0m Trial 54 finished with value: 0.02284875167107638 and parameters: {'user_days': 7, 'item_days': 21, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 14
23518
user days 14
before 2702726
after 1841150


Epoch: 100%|██████████| 100/100 [06:43<00:00,  4.03s/it]
[32m[I 2022-03-09 07:46:00,586][0m Trial 55 finished with value: 0.022212801714272788 and parameters: {'user_days': 14, 'item_days': 14, 'no_components': 896}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 35
30979
user days 7
before 2789764
after 1694346


Epoch: 100%|██████████| 100/100 [07:53<00:00,  4.74s/it]
[32m[I 2022-03-09 07:54:22,355][0m Trial 56 finished with value: 0.022970630196765287 and parameters: {'user_days': 7, 'item_days': 35, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 35
30979
user days 42
before 2789764
after 2503764


Epoch: 100%|██████████| 100/100 [10:30<00:00,  6.30s/it]
[32m[I 2022-03-09 08:05:21,804][0m Trial 57 finished with value: 0.019110647834669196 and parameters: {'user_days': 42, 'item_days': 35, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 28
29009
user days 14
before 2775154
after 1877640


Epoch: 100%|██████████| 100/100 [08:29<00:00,  5.09s/it]
[32m[I 2022-03-09 08:14:19,320][0m Trial 58 finished with value: 0.022274314018463238 and parameters: {'user_days': 14, 'item_days': 28, 'no_components': 1024}. Best is trial 36 with value: 0.023028028405801832.[0m


item days 28
29009
user days 7
before 2775154
after 1688381


Epoch: 100%|██████████| 100/100 [07:48<00:00,  4.68s/it]
[32m[I 2022-03-09 08:22:35,091][0m Trial 59 finished with value: 0.0231487379063096 and parameters: {'user_days': 7, 'item_days': 28, 'no_components': 1024}. Best is trial 59 with value: 0.0231487379063096.[0m


item days 35
30979
user days 7
before 2789764
after 1694346


Epoch: 100%|██████████| 100/100 [03:20<00:00,  2.00s/it]
[32m[I 2022-03-09 08:26:09,377][0m Trial 60 finished with value: 0.02250215712024591 and parameters: {'user_days': 7, 'item_days': 35, 'no_components': 384}. Best is trial 59 with value: 0.0231487379063096.[0m


item days 28
29009
user days 7
before 2775154
after 1688381


Epoch: 100%|██████████| 100/100 [07:54<00:00,  4.75s/it]
[32m[I 2022-03-09 08:34:31,984][0m Trial 61 finished with value: 0.022683874324127464 and parameters: {'user_days': 7, 'item_days': 28, 'no_components': 1024}. Best is trial 59 with value: 0.0231487379063096.[0m


item days 28
29009
user days 7
before 2775154
after 1688381


Epoch: 100%|██████████| 100/100 [07:57<00:00,  4.78s/it]
[32m[I 2022-03-09 08:42:57,992][0m Trial 62 finished with value: 0.022836138394951234 and parameters: {'user_days': 7, 'item_days': 28, 'no_components': 1024}. Best is trial 59 with value: 0.0231487379063096.[0m


item days 21
26503
user days 14
before 2748250
after 1864414


Epoch: 100%|██████████| 100/100 [08:23<00:00,  5.04s/it]
[32m[I 2022-03-09 08:51:49,086][0m Trial 63 finished with value: 0.022403361455568465 and parameters: {'user_days': 14, 'item_days': 21, 'no_components': 1024}. Best is trial 59 with value: 0.0231487379063096.[0m


item days 35
30979
user days 7
before 2789764
after 1694346


Epoch: 100%|██████████| 100/100 [06:36<00:00,  3.97s/it]
[32m[I 2022-03-09 08:58:52,353][0m Trial 64 finished with value: 0.022961003423987963 and parameters: {'user_days': 7, 'item_days': 35, 'no_components': 896}. Best is trial 59 with value: 0.0231487379063096.[0m


item days 35
30979
user days 21
before 2789764
after 2053176


Epoch: 100%|██████████| 100/100 [05:42<00:00,  3.42s/it]
[32m[I 2022-03-09 09:04:55,228][0m Trial 65 finished with value: 0.021316396892195878 and parameters: {'user_days': 21, 'item_days': 35, 'no_components': 640}. Best is trial 59 with value: 0.0231487379063096.[0m


In [20]:
study.trials_dataframe().sort_values(by='value', ascending=False).head(20)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_item_days,params_no_components,params_user_days,state
59,59,0.023149,2022-03-09 08:14:19.321905,2022-03-09 08:22:35.091491,0 days 00:08:15.769586,28,1024,7,COMPLETE
36,36,0.023028,2022-03-09 05:07:33.780202,2022-03-09 05:15:49.789598,0 days 00:08:16.009396,21,1024,7,COMPLETE
52,52,0.022999,2022-03-09 07:15:42.262143,2022-03-09 07:23:23.162689,0 days 00:07:40.900546,14,1024,7,COMPLETE
49,49,0.022996,2022-03-09 06:49:08.895512,2022-03-09 06:57:28.581912,0 days 00:08:19.686400,28,1024,7,COMPLETE
45,45,0.022981,2022-03-09 06:19:30.756071,2022-03-09 06:27:51.839865,0 days 00:08:21.083794,56,1024,7,COMPLETE
56,56,0.022971,2022-03-09 07:46:00.587434,2022-03-09 07:54:22.355102,0 days 00:08:21.767668,35,1024,7,COMPLETE
64,64,0.022961,2022-03-09 08:51:49.087369,2022-03-09 08:58:52.353776,0 days 00:07:03.266407,35,896,7,COMPLETE
29,29,0.022936,2022-03-09 04:17:41.974663,2022-03-09 04:25:23.328200,0 days 00:07:41.353537,14,1024,7,COMPLETE
31,31,0.022914,2022-03-09 04:33:41.703001,2022-03-09 04:40:30.519930,0 days 00:06:48.816929,14,896,7,COMPLETE
21,21,0.022893,2022-03-09 03:17:52.042700,2022-03-09 03:26:16.200555,0 days 00:08:24.157855,49,1024,7,COMPLETE


In [22]:
len(transactions_train.user.unique())

439368

In [25]:
len(transactions_train.query("t_dat > '2020-08-01'").user.unique())

329034

In [27]:
len(users) - len(transactions_train.query("t_dat > '2020-08-01'").user.unique())

1042946

In [29]:
v = set(transactions_valid.user)

In [36]:
t = set(transactions_train.query("t_dat > '2020-08-25'").user)
print(len(v - t))

42230


In [37]:
t = set(transactions_train.user)
print(len(v - t))

23068
