In [1]:
import lightgbm as lgb
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from metric import apk, mapk
from utils import plot_images
import catboost

In [2]:
def objective(trial):
    num_popular_items = 12
    num_repurchase_week = trial.suggest_int('num_repurchase_week', 8, 20)
    num_cooc_week = trial.suggest_int('num_cooc_week', 4, 16)
    num_train_weeks = trial.suggest_int('num_train_weeks', 1, 4)
    volume_week = trial.suggest_int('volume_week', 1, 4)
    cooc_prob = trial.suggest_uniform('cooc_prob', 0.02, 0.05)
    transaction_dynamic_week = trial.suggest_int('transaction_dynamic_week', 1, 8)

    dataset = '100'

    transactions = pd.read_pickle(f"input/{dataset}/transactions_train.pkl")
    users = pd.read_pickle(f"input/{dataset}/users.pkl")
    items = pd.read_pickle(f"input/{dataset}/items.pkl")

    def create_candidates(transactions: pd.DataFrame, target_users: np.ndarray, week: int) -> pd.DataFrame:
        """
        transactions
            original transactions (user, item, week)
        target_users, week
            候補生成対象のユーザー
            weekで指定されている週の段階での情報のみから作られる
        """
        assert len(target_users) == len(set(target_users))

        def create_candidates_repurchase(
                strategy: str,
                transactions: pd.DataFrame,
                target_users: np.ndarray,
                week_start: int,
                week_end: int) -> pd.DataFrame:
            tr = transactions.query("user in @target_users and @week_start <= week <= @week_end")[['user', 'item', 'week']].drop_duplicates(ignore_index=True)

            gr_week = tr.groupby(['user', 'item'])['week'].min().reset_index(name='week')
            gr_volume = tr.groupby(['user', 'item']).size().reset_index(name='volume')

            gr_week['week_rank'] = gr_week.groupby('user')['week'].rank()
            gr_volume['volume_rank'] = gr_volume.groupby('user')['volume'].rank()

            candidates = gr_week.merge(gr_volume, on=['user', 'item'])
            candidates = candidates[['user', 'item', 'week_rank', 'volume_rank']].rename(columns={'week_rank': f'{strategy}_week_rank', 'volume_rank': f'{strategy}_volume_rank'})

            candidates['strategy'] = strategy
            return candidates

        def create_candidates_popular(
                strategy: str,
                transactions: pd.DataFrame,
                target_users: np.ndarray,
                week_start: int,
                week_end: int) -> pd.DataFrame:
            tr = transactions.query("@week_start <= week <= @week_end")[['user', 'item']].drop_duplicates(ignore_index=True)
            popular_items = tr['item'].value_counts().index.values[:num_popular_items]
            popular_items = pd.DataFrame({
                'item': popular_items,
                'rank': range(num_popular_items),
                'crossjoinkey': 1,
            })

            candidates = pd.DataFrame({
                'user': target_users,
                'crossjoinkey': 1,
            })

            candidates = candidates.merge(popular_items, on='crossjoinkey').drop('crossjoinkey', axis=1)
            candidates = candidates.rename(columns={'rank': f'{strategy}_rank'})

            candidates['strategy'] = strategy
            return candidates

        def create_candidates_cooc(
                strategy: str,
                transactions: pd.DataFrame,
                week_start: int,
                week_end: int,
                base_candidates: pd.DataFrame) -> pd.DataFrame:
            hoge = cooc_prob
            tr = transactions.query("@week_start <= week == @week_end")[['user', 'item', 'week']].drop_duplicates(ignore_index=True)
            tr = tr.merge(tr.rename(columns={'item': 'item_with', 'week': 'week_with'}), on='user').query("item != item_with and week <= week_with")[['item', 'item_with']].reset_index(drop=True)
            gr_sz = tr.groupby('item').size().reset_index(name='tot')
            gr_cnt = tr.groupby(['item', 'item_with']).size().reset_index(name='cnt')
            item2item = gr_cnt.merge(gr_sz, on='item')
            item2item['ratio'] = item2item['cnt'] / item2item['tot']
            item2item = item2item.query("ratio >= @hoge").reset_index(drop=True)

            base_candidates_columns = [c for c in base_candidates.columns if '_' in c]
            replace = {c: f"{strategy}_{c}" for c in base_candidates_columns}
            candidates = base_candidates.merge(item2item, on='item').drop(['item', 'cnt'], axis=1).rename(columns={'item_with': 'item'}).rename(columns=replace)
            candidates = candidates.rename(columns={'ratio': f'{strategy}_ratio', 'tot': f'{strategy}_tot'})

            candidates['strategy'] = strategy
            return candidates

        def create_candidates_same_product_code(
                strategy: str,
                items: pd.DataFrame,
                base_candidates: pd.DataFrame) -> pd.DataFrame:
            item2item = items[['item', 'product_code']].merge(items[['item', 'product_code']].rename({'item': 'item_with'}, axis=1), on='product_code')[['item', 'item_with']].query("item != item_with").reset_index(drop=True)

            base_candidates_columns = [c for c in base_candidates.columns if '_' in c]
            replace = {c: f"{strategy}_{c}" for c in base_candidates_columns}
            candidates = base_candidates.merge(item2item, on='item').drop('item', axis=1).rename(columns={'item_with': 'item'}).rename(columns=replace)

            candidates['strategy'] = strategy
            return candidates

        candidates_repurchase = create_candidates_repurchase('repurchase', transactions, target_users, week, week + num_repurchase_week)
        candidates_popular = create_candidates_popular('pop', transactions, target_users, week, week)
        candidates_cooc = create_candidates_cooc('cooc', transactions, week, week + num_cooc_week, candidates_repurchase)
        candidates_same_product_code = create_candidates_same_product_code('same_product_code', items, candidates_repurchase)

        candidates = [
            candidates_repurchase,
            candidates_popular,
            candidates_cooc,
            candidates_same_product_code,
        ]

        candidates = pd.concat(candidates).reset_index(drop=True)

        volumes = candidates.groupby('strategy').size().reset_index(name='volume').sort_values(by='volume', ascending=False).reset_index(drop=True)
        volumes['ratio'] = volumes['volume'] / volumes['volume'].sum()

        return candidates

    # valid: week=1
    # train: week=2..1+num_train_weeks
    candidates = []
    for week in range(1, 2+num_train_weeks):
        target_users = transactions.query("week == @week")['user'].unique()
        candidates.append(create_candidates(transactions, target_users, week+1))
    def merge_labels(candidates: pd.DataFrame, week: int) -> pd.DataFrame:
        """
        candidatesに対してweekで指定される週のトランザクションからラベルを付与する
        """
        labels = transactions[transactions['week'] == week][['user', 'item']].drop_duplicates(ignore_index=True)
        labels['y'] = 1
        original_positives = len(labels)
        labels = candidates.merge(labels, on=['user', 'item'], how='left')
        labels['y'] = labels['y'].fillna(0)

        remaining_positives_total = labels[['user', 'item', 'y']].drop_duplicates(ignore_index=True)['y'].sum()
        recall = remaining_positives_total / original_positives

        volumes = candidates.groupby('strategy').size().reset_index(name='volume')
        remaining_positives = labels.groupby('strategy')['y'].sum().reset_index()
        remaining_positives = remaining_positives.merge(volumes, on='strategy')
        remaining_positives['recall'] = remaining_positives['y'] / original_positives
        remaining_positives['hit_ratio'] = remaining_positives['y'] / remaining_positives['volume']
        remaining_positives = remaining_positives.sort_values(by='y', ascending=False).reset_index(drop=True)

        return labels

    for idx in range(len(candidates)):
        candidates[idx] = merge_labels(candidates[idx], 1+idx)

    def get_query_group(df):
        def run_length_encoding(sequence):
            comp_seq_index, = np.concatenate(([True], sequence[1:] != sequence[:-1], [True])).nonzero()
            return sequence[comp_seq_index[:-1]], np.ediff1d(comp_seq_index)
        users = df['user'].values
        _, group = run_length_encoding(users)
        return list(group)

    def drop_trivial_users(labels):
        """
        LightGBMのxendgcやlambdarankでは正例のみや負例のみのuserは学習に無意味なのと、メトリックの計算がおかしくなるので省く
        """
        bef = len(labels)
        df = labels[labels['user'].isin(labels[['user', 'y']].drop_duplicates().groupby('user').size().reset_index(name='sz').query("sz==2").user)].reset_index(drop=True)
        aft = len(df)
        return df

    for idx in range(len(candidates)):
        candidates[idx]['week'] = 1+idx

    valid_all = candidates[0].copy()

    for idx in range(len(candidates)):
        candidates[idx] = drop_trivial_users(candidates[idx])

    def attach_features(transactions: pd.DataFrame, users: pd.DataFrame, items: pd.DataFrame, candidates: pd.DataFrame, week: int) -> pd.DataFrame:
        """
        user, itemに対して特徴を横付けする
        week: これを含めた以前の情報は使って良い
        """
        n_original = len(candidates)
        df = candidates.copy()

        # user static features
        user_features = ['FN', 'Active', 'age', 'club_member_status_idx', 'fashion_news_frequency_idx']
        df = df.merge(users[['user'] + user_features], on='user')

        # item static features
        item_features = [c for c in items.columns if c.endswith('idx')]
        df = df.merge(items[['item'] + item_features], on='item')

        # user dynamic features (transactions)
        num_weeks = transaction_dynamic_week
        week_end = week + num_weeks
        tmp = transactions.query("@week <= week < @week_end").groupby('user')[['price', 'sales_channel_id']].agg(['mean', 'std'])
        tmp.columns = ['user_' + '_'.join(a) for a in tmp.columns.to_flat_index()]
        df = df.merge(tmp, on='user', how='left')

        # item dynamic features (transactions)
        num_weeks = transaction_dynamic_week
        week_end = week + num_weeks
        tmp = transactions.query("@week <= week < @week_end").groupby('item')[['price', 'sales_channel_id']].agg(['mean', 'std'])
        tmp.columns = ['item_' + '_'.join(a) for a in tmp.columns.to_flat_index()]
        df = df.merge(tmp, on='item', how='left')

        # item dynamic features (user features)
        num_weeks = transaction_dynamic_week
        week_end = week + num_weeks
        tmp = transactions.query("@week <= week < @week_end").merge(users[['user', 'age']], on='user')
        tmp = tmp.groupby('item')['age'].agg(['mean', 'std'])
        tmp.columns = [f'age_{a}' for a in tmp.columns.to_flat_index()]
        df = df.merge(tmp, on='item', how='left')

        # item freshness features
        tmp = transactions.query("@week <= week").groupby('item')['week'].min().reset_index(name='item_week_min')
        tmp['item_week_min'] -= week
        df = df.merge(tmp, on='item', how='left')

        # item volume features
        num_weeks = volume_week
        week_end = week + num_weeks
        tmp = transactions.query("@week <= week < @week_end").groupby('item').size().reset_index(name='item_volume')
        df = df.merge(tmp, on='item', how='left')

        # # user freshness features
        # tmp = transactions.query("@week <= week").groupby('user')['week'].min().reset_index(name='user_week_min')
        # tmp['user_week_min'] -= week
        # df = df.merge(tmp, on='user', how='left')

        # # user volume features
        # num_weeks = 1
        # week_end = week + num_weeks
        # tmp = transactions.query("@week <= week < @week_end").groupby('user').size().reset_index(name='user_volume')
        # df = df.merge(tmp, on='user', how='left')

        # user-item freshness features
        tmp = transactions.query("@week <= week").groupby(['user', 'item'])['week'].min().reset_index(name='user_item_week_min')
        tmp['user_item_week_min'] -= week
        df = df.merge(tmp, on=['item', 'user'], how='left')

        # user-item volume features
        num_weeks = volume_week
        week_end = week + num_weeks
        tmp = transactions.query("@week <= week < @week_end").groupby(['user', 'item']).size().reset_index(name='user_item_volume')
        df = df.merge(tmp, on=['user', 'item'], how='left')

        assert len(df) == n_original
        return df


    valid_all = attach_features(transactions, users, items, valid_all, 2)
    for idx in range(len(candidates)):
        candidates[idx] = attach_features(transactions, users, items, candidates[idx], 2+idx)

    for idx in range(len(candidates)):
        candidates[idx]['group'] = candidates[idx]['week'].astype(str) + '_' + candidates[idx]['user'].astype(str)
        candidates[idx] = candidates[idx].sort_values(by='group').reset_index(drop=True)

    valid = candidates[0]

    def concat_train(datasets, begin, num):
        train = pd.concat([datasets[idx] for idx in range(begin, begin+num)])
        return train

    train = concat_train(candidates, 1, num_train_weeks)
    feature_columns = [c for c in valid.columns if c not in ['y', 'strategy', 'week', 'group']]


    cat_feature_values = [c for c in feature_columns if c.endswith('idx')]
    cat_features = [feature_columns.index(c) for c in cat_feature_values]

    train_pool = catboost.Pool(data=train[feature_columns], label=train['y'], group_id=train['group'], cat_features=cat_features)
    valid_pool = catboost.Pool(data=valid[feature_columns], label=valid['y'], group_id=valid['group'], cat_features=cat_features)

    params = {
        'loss_function': 'YetiRank',
        'use_best_model': True,
        'one_hot_max_size': 300,
        'metric_period': 100,
    }
    model = catboost.CatBoost(params)
    model.fit(train_pool, eval_set=valid_pool)

    valid_all['pred'] = model.predict(valid_all[feature_columns])
    pred = valid_all.groupby(['user', 'item'])['pred'].max().reset_index()

    pred = pred.sort_values(by=['user', 'pred'], ascending=False).reset_index(drop=True).groupby('user')['item'].apply(lambda x: list(x)[:12]).reset_index()
    pred

    gt = transactions.query("week == 1").groupby('user')['item'].apply(list).reset_index().rename(columns={'item': 'gt'})
    merged = gt.merge(pred, on='user', how='left')
    merged['item'] = merged['item'].fillna('').apply(list)
    merged

    return mapk(merged['gt'], merged['item'])


In [3]:
import optuna

study = optuna.create_study(
    direction='maximize',
    study_name='cat',
)
study.optimize(objective, timeout=3600*5)
study.trials_dataframe().sort_values(by='value',ascending=False).reset_index(drop=True).to_csv('cat.csv', index=False)

[32m[I 2022-04-07 14:41:34,703][0m A new study created in memory with name: cat[0m


0:	test: 0.2946554	best: 0.2946554 (0)	total: 533ms	remaining: 8m 52s
100:	test: 0.4426274	best: 0.4426274 (100)	total: 39.8s	remaining: 5m 54s
200:	test: 0.4513364	best: 0.4513364 (200)	total: 1m 18s	remaining: 5m 12s
300:	test: 0.4534541	best: 0.4534541 (300)	total: 1m 57s	remaining: 4m 32s
400:	test: 0.4559337	best: 0.4559337 (400)	total: 2m 35s	remaining: 3m 52s
500:	test: 0.4572042	best: 0.4572042 (500)	total: 3m 14s	remaining: 3m 13s
600:	test: 0.4572855	best: 0.4572855 (600)	total: 3m 52s	remaining: 2m 34s
700:	test: 0.4591008	best: 0.4591008 (700)	total: 4m 30s	remaining: 1m 55s
800:	test: 0.4598348	best: 0.4598348 (800)	total: 5m 8s	remaining: 1m 16s
900:	test: 0.4597135	best: 0.4598348 (800)	total: 5m 46s	remaining: 38.1s
999:	test: 0.4587869	best: 0.4598348 (800)	total: 6m 24s	remaining: 0us

bestTest = 0.4598347763
bestIteration = 800

Shrink model to first 801 iterations.


[32m[I 2022-04-07 14:50:10,246][0m Trial 0 finished with value: 0.028928555638922562 and parameters: {'num_repurchase_week': 12, 'num_cooc_week': 8, 'num_train_weeks': 1, 'volume_week': 3, 'cooc_prob': 0.03345213126172819, 'transaction_dynamic_week': 4}. Best is trial 0 with value: 0.028928555638922562.[0m


0:	test: 0.2933335	best: 0.2933335 (0)	total: 875ms	remaining: 14m 34s
100:	test: 0.4452110	best: 0.4452110 (100)	total: 1m 28s	remaining: 13m 10s
200:	test: 0.4557164	best: 0.4557164 (200)	total: 2m 56s	remaining: 11m 42s
300:	test: 0.4591164	best: 0.4591164 (300)	total: 4m 23s	remaining: 10m 12s
400:	test: 0.4615290	best: 0.4615290 (400)	total: 5m 51s	remaining: 8m 44s
500:	test: 0.4627775	best: 0.4627775 (500)	total: 7m 17s	remaining: 7m 16s
600:	test: 0.4639101	best: 0.4639101 (600)	total: 8m 44s	remaining: 5m 48s
700:	test: 0.4670378	best: 0.4670378 (700)	total: 10m 11s	remaining: 4m 20s
800:	test: 0.4686896	best: 0.4686896 (800)	total: 11m 37s	remaining: 2m 53s
900:	test: 0.4687768	best: 0.4687768 (900)	total: 13m 4s	remaining: 1m 26s
999:	test: 0.4694986	best: 0.4694986 (999)	total: 14m 29s	remaining: 0us

bestTest = 0.4694985823
bestIteration = 999



[32m[I 2022-04-07 15:08:14,598][0m Trial 1 finished with value: 0.02863794204653312 and parameters: {'num_repurchase_week': 8, 'num_cooc_week': 4, 'num_train_weeks': 4, 'volume_week': 3, 'cooc_prob': 0.03545554963499757, 'transaction_dynamic_week': 6}. Best is trial 0 with value: 0.028928555638922562.[0m


0:	test: 0.2805564	best: 0.2805564 (0)	total: 881ms	remaining: 14m 40s
100:	test: 0.4258414	best: 0.4258414 (100)	total: 1m 27s	remaining: 12m 55s
200:	test: 0.4324797	best: 0.4324797 (200)	total: 2m 53s	remaining: 11m 27s
300:	test: 0.4370911	best: 0.4370911 (300)	total: 4m 18s	remaining: 10m
400:	test: 0.4382316	best: 0.4382316 (400)	total: 5m 43s	remaining: 8m 32s
500:	test: 0.4402875	best: 0.4402875 (500)	total: 7m 8s	remaining: 7m 6s
600:	test: 0.4420112	best: 0.4420112 (600)	total: 8m 32s	remaining: 5m 40s
700:	test: 0.4443167	best: 0.4443167 (700)	total: 9m 56s	remaining: 4m 14s
800:	test: 0.4458087	best: 0.4458087 (800)	total: 11m 20s	remaining: 2m 49s
900:	test: 0.4470631	best: 0.4470631 (900)	total: 12m 44s	remaining: 1m 24s
999:	test: 0.4475025	best: 0.4475025 (999)	total: 14m 7s	remaining: 0us

bestTest = 0.4475024578
bestIteration = 999



[32m[I 2022-04-07 15:25:04,536][0m Trial 2 finished with value: 0.028371243194993574 and parameters: {'num_repurchase_week': 10, 'num_cooc_week': 7, 'num_train_weeks': 2, 'volume_week': 3, 'cooc_prob': 0.024074839925327503, 'transaction_dynamic_week': 3}. Best is trial 0 with value: 0.028928555638922562.[0m


0:	test: 0.2797525	best: 0.2797525 (0)	total: 2.49s	remaining: 41m 27s
100:	test: 0.4169000	best: 0.4169000 (100)	total: 4m 6s	remaining: 36m 32s
200:	test: 0.4292320	best: 0.4292320 (200)	total: 8m 11s	remaining: 32m 35s
300:	test: 0.4344493	best: 0.4344493 (300)	total: 12m 15s	remaining: 28m 28s
400:	test: 0.4378017	best: 0.4378017 (400)	total: 16m 18s	remaining: 24m 20s
500:	test: 0.4394798	best: 0.4394798 (500)	total: 20m 20s	remaining: 20m 15s
600:	test: 0.4406641	best: 0.4406641 (600)	total: 24m 21s	remaining: 16m 10s
700:	test: 0.4412038	best: 0.4412038 (700)	total: 28m 23s	remaining: 12m 6s
800:	test: 0.4422863	best: 0.4422863 (800)	total: 32m 25s	remaining: 8m 3s
900:	test: 0.4421683	best: 0.4422863 (800)	total: 36m 27s	remaining: 4m
999:	test: 0.4432381	best: 0.4432381 (999)	total: 40m 25s	remaining: 0us

bestTest = 0.4432381313
bestIteration = 999



[32m[I 2022-04-07 16:09:37,826][0m Trial 3 finished with value: 0.029890679271307657 and parameters: {'num_repurchase_week': 20, 'num_cooc_week': 12, 'num_train_weeks': 3, 'volume_week': 1, 'cooc_prob': 0.03073031606052501, 'transaction_dynamic_week': 7}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2988265	best: 0.2988265 (0)	total: 302ms	remaining: 5m 1s
100:	test: 0.4429848	best: 0.4429848 (100)	total: 30.4s	remaining: 4m 30s
200:	test: 0.4512794	best: 0.4512794 (200)	total: 1m	remaining: 3m 59s
300:	test: 0.4552768	best: 0.4552768 (300)	total: 1m 30s	remaining: 3m 29s
400:	test: 0.4585469	best: 0.4585469 (400)	total: 1m 59s	remaining: 2m 59s
500:	test: 0.4596580	best: 0.4596580 (500)	total: 2m 29s	remaining: 2m 28s
600:	test: 0.4611812	best: 0.4611812 (600)	total: 2m 58s	remaining: 1m 58s
700:	test: 0.4621329	best: 0.4621329 (700)	total: 3m 28s	remaining: 1m 28s
800:	test: 0.4631755	best: 0.4631755 (800)	total: 3m 57s	remaining: 59s
900:	test: 0.4642276	best: 0.4642276 (900)	total: 4m 27s	remaining: 29.3s
999:	test: 0.4651550	best: 0.4651550 (999)	total: 4m 56s	remaining: 0us

bestTest = 0.4651550204
bestIteration = 999



[32m[I 2022-04-07 16:16:36,222][0m Trial 4 finished with value: 0.028857083341249087 and parameters: {'num_repurchase_week': 12, 'num_cooc_week': 9, 'num_train_weeks': 1, 'volume_week': 1, 'cooc_prob': 0.044312139649449425, 'transaction_dynamic_week': 7}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2856632	best: 0.2856632 (0)	total: 3.7s	remaining: 1h 1m 39s
100:	test: 0.4071269	best: 0.4071269 (100)	total: 5m 49s	remaining: 51m 48s
200:	test: 0.4161764	best: 0.4161764 (200)	total: 11m 22s	remaining: 45m 13s
300:	test: 0.4207486	best: 0.4207486 (300)	total: 16m 46s	remaining: 38m 57s
400:	test: 0.4249672	best: 0.4249672 (400)	total: 22m 8s	remaining: 33m 5s
500:	test: 0.4284537	best: 0.4284537 (500)	total: 27m 31s	remaining: 27m 25s
600:	test: 0.4298214	best: 0.4298214 (600)	total: 32m 57s	remaining: 21m 52s
700:	test: 0.4306323	best: 0.4306323 (700)	total: 38m 25s	remaining: 16m 23s
800:	test: 0.4319342	best: 0.4319342 (800)	total: 43m 52s	remaining: 10m 53s
900:	test: 0.4323466	best: 0.4323466 (900)	total: 49m 18s	remaining: 5m 25s
999:	test: 0.4326938	best: 0.4326938 (999)	total: 54m 41s	remaining: 0us

bestTest = 0.4326938306
bestIteration = 999



[32m[I 2022-04-07 17:16:12,982][0m Trial 5 finished with value: 0.029184083144487664 and parameters: {'num_repurchase_week': 16, 'num_cooc_week': 11, 'num_train_weeks': 4, 'volume_week': 3, 'cooc_prob': 0.0216855065054401, 'transaction_dynamic_week': 2}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2399811	best: 0.2399811 (0)	total: 591ms	remaining: 9m 50s
100:	test: 0.4064227	best: 0.4064227 (100)	total: 54.4s	remaining: 8m 3s
200:	test: 0.4132453	best: 0.4132453 (200)	total: 1m 47s	remaining: 7m 8s
300:	test: 0.4179795	best: 0.4179795 (300)	total: 2m 40s	remaining: 6m 12s
400:	test: 0.4204683	best: 0.4204683 (400)	total: 3m 33s	remaining: 5m 18s
500:	test: 0.4233576	best: 0.4233576 (500)	total: 4m 26s	remaining: 4m 25s
600:	test: 0.4231344	best: 0.4233576 (500)	total: 5m 19s	remaining: 3m 32s
700:	test: 0.4238934	best: 0.4238934 (700)	total: 6m 12s	remaining: 2m 38s
800:	test: 0.4249828	best: 0.4249828 (800)	total: 7m 5s	remaining: 1m 45s
900:	test: 0.4248988	best: 0.4249828 (800)	total: 7m 57s	remaining: 52.5s
999:	test: 0.4252649	best: 0.4252649 (999)	total: 8m 50s	remaining: 0us

bestTest = 0.4252648708
bestIteration = 999



[32m[I 2022-04-07 17:27:34,014][0m Trial 6 finished with value: 0.029185713814581266 and parameters: {'num_repurchase_week': 17, 'num_cooc_week': 4, 'num_train_weeks': 1, 'volume_week': 4, 'cooc_prob': 0.04251036406490562, 'transaction_dynamic_week': 1}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2785244	best: 0.2785244 (0)	total: 1.57s	remaining: 26m 10s
100:	test: 0.4188755	best: 0.4188755 (100)	total: 2m 40s	remaining: 23m 47s
200:	test: 0.4313861	best: 0.4313861 (200)	total: 5m 19s	remaining: 21m 8s
300:	test: 0.4366518	best: 0.4366518 (300)	total: 7m 56s	remaining: 18m 27s
400:	test: 0.4390964	best: 0.4390964 (400)	total: 10m 33s	remaining: 15m 46s
500:	test: 0.4392402	best: 0.4392402 (500)	total: 13m 10s	remaining: 13m 7s
600:	test: 0.4399586	best: 0.4399586 (600)	total: 15m 46s	remaining: 10m 28s
700:	test: 0.4408504	best: 0.4408504 (700)	total: 18m 22s	remaining: 7m 50s
800:	test: 0.4416652	best: 0.4416652 (800)	total: 20m 57s	remaining: 5m 12s
900:	test: 0.4421247	best: 0.4421247 (900)	total: 23m 33s	remaining: 2m 35s
999:	test: 0.4421688	best: 0.4421688 (999)	total: 26m 7s	remaining: 0us

bestTest = 0.442168845
bestIteration = 999



[32m[I 2022-04-07 17:57:36,430][0m Trial 7 finished with value: 0.029520500164084803 and parameters: {'num_repurchase_week': 18, 'num_cooc_week': 15, 'num_train_weeks': 3, 'volume_week': 4, 'cooc_prob': 0.03663195602874758, 'transaction_dynamic_week': 8}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2956050	best: 0.2956050 (0)	total: 964ms	remaining: 16m 2s
100:	test: 0.4337482	best: 0.4337482 (100)	total: 1m 39s	remaining: 14m 48s
200:	test: 0.4441468	best: 0.4441468 (200)	total: 3m 20s	remaining: 13m 15s
300:	test: 0.4464593	best: 0.4464593 (300)	total: 4m 58s	remaining: 11m 33s
400:	test: 0.4485591	best: 0.4485591 (400)	total: 6m 36s	remaining: 9m 51s
500:	test: 0.4512419	best: 0.4512419 (500)	total: 8m 13s	remaining: 8m 11s
600:	test: 0.4528325	best: 0.4528325 (600)	total: 9m 50s	remaining: 6m 32s
700:	test: 0.4544058	best: 0.4544058 (700)	total: 11m 27s	remaining: 4m 53s
800:	test: 0.4553221	best: 0.4553221 (800)	total: 13m 4s	remaining: 3m 14s
900:	test: 0.4556571	best: 0.4556571 (900)	total: 14m 40s	remaining: 1m 36s
999:	test: 0.4555970	best: 0.4556571 (900)	total: 16m 15s	remaining: 0us

bestTest = 0.4556570544
bestIteration = 900

Shrink model to first 901 iterations.


[32m[I 2022-04-07 18:17:16,883][0m Trial 8 finished with value: 0.029089912583703314 and parameters: {'num_repurchase_week': 14, 'num_cooc_week': 14, 'num_train_weeks': 3, 'volume_week': 2, 'cooc_prob': 0.04644963652680684, 'transaction_dynamic_week': 3}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.3026687	best: 0.3026687 (0)	total: 401ms	remaining: 6m 40s
100:	test: 0.4352254	best: 0.4352254 (100)	total: 38.7s	remaining: 5m 44s
200:	test: 0.4415849	best: 0.4415849 (200)	total: 1m 16s	remaining: 5m 5s
300:	test: 0.4465124	best: 0.4465124 (300)	total: 1m 54s	remaining: 4m 26s
400:	test: 0.4471722	best: 0.4471722 (400)	total: 2m 32s	remaining: 3m 47s
500:	test: 0.4486216	best: 0.4486216 (500)	total: 3m 9s	remaining: 3m 9s
600:	test: 0.4489374	best: 0.4489374 (600)	total: 3m 47s	remaining: 2m 31s
700:	test: 0.4501420	best: 0.4501420 (700)	total: 4m 24s	remaining: 1m 52s
800:	test: 0.4503562	best: 0.4503562 (800)	total: 5m 2s	remaining: 1m 15s
900:	test: 0.4507979	best: 0.4507979 (900)	total: 5m 39s	remaining: 37.3s
999:	test: 0.4515494	best: 0.4515494 (999)	total: 6m 15s	remaining: 0us

bestTest = 0.4515493563
bestIteration = 999



[32m[I 2022-04-07 18:25:41,781][0m Trial 9 finished with value: 0.028805039894802643 and parameters: {'num_repurchase_week': 13, 'num_cooc_week': 4, 'num_train_weeks': 1, 'volume_week': 4, 'cooc_prob': 0.0473606801521671, 'transaction_dynamic_week': 7}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2801102	best: 0.2801102 (0)	total: 1.79s	remaining: 29m 53s
100:	test: 0.4099522	best: 0.4099522 (100)	total: 3m 8s	remaining: 27m 55s
200:	test: 0.4206458	best: 0.4206458 (200)	total: 6m 15s	remaining: 24m 53s
300:	test: 0.4269996	best: 0.4269996 (300)	total: 9m 23s	remaining: 21m 48s
400:	test: 0.4294038	best: 0.4294038 (400)	total: 12m 29s	remaining: 18m 40s
500:	test: 0.4317400	best: 0.4317400 (500)	total: 15m 36s	remaining: 15m 32s
600:	test: 0.4328962	best: 0.4328962 (600)	total: 18m 43s	remaining: 12m 25s
700:	test: 0.4335771	best: 0.4335771 (700)	total: 21m 49s	remaining: 9m 18s
800:	test: 0.4337214	best: 0.4337214 (800)	total: 24m 57s	remaining: 6m 11s
900:	test: 0.4342221	best: 0.4342221 (900)	total: 28m 4s	remaining: 3m 5s
999:	test: 0.4349515	best: 0.4349515 (999)	total: 31m 8s	remaining: 0us

bestTest = 0.4349514825
bestIteration = 999



[32m[I 2022-04-07 19:00:18,169][0m Trial 10 finished with value: 0.029507342330209012 and parameters: {'num_repurchase_week': 20, 'num_cooc_week': 12, 'num_train_weeks': 2, 'volume_week': 1, 'cooc_prob': 0.026336001453650084, 'transaction_dynamic_week': 5}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2833683	best: 0.2833683 (0)	total: 2.19s	remaining: 36m 29s
100:	test: 0.4190362	best: 0.4190362 (100)	total: 3m 38s	remaining: 32m 24s
200:	test: 0.4297236	best: 0.4297236 (200)	total: 7m 10s	remaining: 28m 29s
300:	test: 0.4338388	best: 0.4338388 (300)	total: 10m 40s	remaining: 24m 46s
400:	test: 0.4362504	best: 0.4362504 (400)	total: 14m 10s	remaining: 21m 10s
500:	test: 0.4377806	best: 0.4377806 (500)	total: 17m 44s	remaining: 17m 40s
600:	test: 0.4394840	best: 0.4394840 (600)	total: 21m 16s	remaining: 14m 7s
700:	test: 0.4400433	best: 0.4400433 (700)	total: 24m 49s	remaining: 10m 35s
800:	test: 0.4407240	best: 0.4407240 (800)	total: 28m 20s	remaining: 7m 2s
900:	test: 0.4418673	best: 0.4418673 (900)	total: 31m 53s	remaining: 3m 30s
999:	test: 0.4422349	best: 0.4422349 (999)	total: 35m 21s	remaining: 0us

bestTest = 0.4422349262
bestIteration = 999



[32m[I 2022-04-07 19:39:53,397][0m Trial 11 finished with value: 0.029777829294497127 and parameters: {'num_repurchase_week': 20, 'num_cooc_week': 16, 'num_train_weeks': 3, 'volume_week': 2, 'cooc_prob': 0.03279672619081458, 'transaction_dynamic_week': 8}. Best is trial 3 with value: 0.029890679271307657.[0m


0:	test: 0.2976316	best: 0.2976316 (0)	total: 2.24s	remaining: 37m 18s
100:	test: 0.4120120	best: 0.4120120 (100)	total: 3m 45s	remaining: 33m 30s
200:	test: 0.4239541	best: 0.4239541 (200)	total: 7m 29s	remaining: 29m 45s
300:	test: 0.4286520	best: 0.4286520 (300)	total: 11m 12s	remaining: 26m
400:	test: 0.4307669	best: 0.4307669 (400)	total: 14m 54s	remaining: 22m 16s
500:	test: 0.4321392	best: 0.4321392 (500)	total: 18m 35s	remaining: 18m 31s
600:	test: 0.4337852	best: 0.4337852 (600)	total: 22m 16s	remaining: 14m 47s
700:	test: 0.4341522	best: 0.4341522 (700)	total: 25m 58s	remaining: 11m 4s
800:	test: 0.4349705	best: 0.4349705 (800)	total: 29m 40s	remaining: 7m 22s
900:	test: 0.4355723	best: 0.4355723 (900)	total: 33m 20s	remaining: 3m 39s
999:	test: 0.4357874	best: 0.4357874 (999)	total: 36m 59s	remaining: 0us

bestTest = 0.4357874442
bestIteration = 999



[32m[I 2022-04-07 20:21:11,845][0m Trial 12 finished with value: 0.02978304017441304 and parameters: {'num_repurchase_week': 20, 'num_cooc_week': 13, 'num_train_weeks': 3, 'volume_week': 2, 'cooc_prob': 0.030284041227316906, 'transaction_dynamic_week': 8}. Best is trial 3 with value: 0.029890679271307657.[0m


In [4]:
import pickle
with open("study.pkl", 'wb') as f:
    pickle.dump(study, f)

In [5]:
optuna.visualization.plot_contour(study)

In [6]:
optuna.visualization.plot_param_importances(study)

In [7]:
optuna.visualization.plot_optimization_history(study)

In [8]:
df_params = study.trials_dataframe().sort_values(by='value',ascending=False).reset_index(drop=True)
df_params.head(10)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_cooc_prob,params_num_cooc_week,params_num_repurchase_week,params_num_train_weeks,params_transaction_dynamic_week,params_volume_week,state
0,3,0.029891,2022-04-07 15:25:04.537707,2022-04-07 16:09:37.826500,0 days 00:44:33.288793,0.03073,12,20,3,7,1,COMPLETE
1,12,0.029783,2022-04-07 19:39:53.398351,2022-04-07 20:21:11.845156,0 days 00:41:18.446805,0.030284,13,20,3,8,2,COMPLETE
2,11,0.029778,2022-04-07 19:00:18.170230,2022-04-07 19:39:53.397055,0 days 00:39:35.226825,0.032797,16,20,3,8,2,COMPLETE
3,7,0.029521,2022-04-07 17:27:34.015229,2022-04-07 17:57:36.429954,0 days 00:30:02.414725,0.036632,15,18,3,8,4,COMPLETE
4,10,0.029507,2022-04-07 18:25:41.782136,2022-04-07 19:00:18.168923,0 days 00:34:36.386787,0.026336,12,20,2,5,1,COMPLETE
5,6,0.029186,2022-04-07 17:16:12.983537,2022-04-07 17:27:34.013984,0 days 00:11:21.030447,0.04251,4,17,1,1,4,COMPLETE
6,5,0.029184,2022-04-07 16:16:36.223654,2022-04-07 17:16:12.982363,0 days 00:59:36.758709,0.021686,11,16,4,2,3,COMPLETE
7,8,0.02909,2022-04-07 17:57:36.431243,2022-04-07 18:17:16.882731,0 days 00:19:40.451488,0.04645,14,14,3,3,2,COMPLETE
8,0,0.028929,2022-04-07 14:41:34.704799,2022-04-07 14:50:10.245815,0 days 00:08:35.541016,0.033452,8,12,1,4,3,COMPLETE
9,4,0.028857,2022-04-07 16:09:37.827727,2022-04-07 16:16:36.222477,0 days 00:06:58.394750,0.044312,9,12,1,7,1,COMPLETE
