In [1]:
VER = 6

import pandas as pd, numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()

import os, sys, pickle, glob, gc
from collections import Counter
import cudf, itertools
print('We will use RAPIDS version',cudf.__version__)

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# from pandarallel import pandarallel

# pandarallel.initialize(nb_workers=4, progress_bar=True, use_memory_fs=False)

import xgboost as xgb
from sklearn.model_selection import GroupKFold
import polars as pl

from pyarrow.parquet import ParquetFile
import pyarrow as pa 

We will use RAPIDS version 22.10.00a+392.g1558403753


# Training

In [2]:
type_labels = {'clicks':0, 'carts':1, 'orders':2}

In [3]:
CANDIDATE_COUNT = 100

In [4]:
train_sessions = np.load("./splitted_raw_data/val_sessions_for_train.npy", allow_pickle=True)

## For each action

In [5]:
from catboost import CatBoostRanker, Pool, MetricVisualizer

for type_str in tqdm(list(type_labels.keys())):
    
#     pf = ParquetFile(f"./candidated_features/local_{type_str}_all_data.pqt") 
    
#     whole_df = pd.read_parquet(f"./candidated_features/local_{type_str}_all_data.pqt").reset_index(drop=True)
   
    whole_df = []
    batches = sorted(glob.glob(f"./candidated_features/local_{type_str}_all_data_{CANDIDATE_COUNT}candidates_p*.pqt"))
    
    for batch in tqdm(batches):
        
        batch = pd.read_parquet(batch)#.drop(labels=["candidate_rank"], axis=1)
        batch = batch[batch.session.isin(train_sessions)].reset_index(drop=True)
        positives = batch.loc[batch['label']==1]
        negatives = batch.loc[batch['label']==0].groupby("session").sample(frac=0.2,
                                                                           random_state=1337)
        whole_df.append(positives)
        whole_df.append(negatives)
        
    whole_df = pd.concat(whole_df, axis=0, ignore_index=True)
    del positives, negatives
    
    print("sampled")

    FEATURES = whole_df.columns[2 : -1]   
    
    whole_df = whole_df.sort_values('session').reset_index(drop=True)
#     whole_df["group_count"] = whole_df.groupby('session', sort=False).cumcount('candidates') + 1
    
    skf = GroupKFold(n_splits=5)
    for fold, (train_idx, valid_idx) in enumerate(skf.split(whole_df,
                                                            whole_df['label'],
                                                            groups=whole_df['session'])):
        X_train = whole_df.loc[train_idx, FEATURES]
        y_train = whole_df.loc[train_idx, 'label']
        X_valid = whole_df.loc[valid_idx, FEATURES]
        y_valid = whole_df.loc[valid_idx, 'label']

#         train_groups = whole_df.loc[train_idx, ['session', 'group_count']].groupby('session')['group_count'].last().to_numpy()
#         val_groups = whole_df.loc[valid_idx, ['session', 'group_count']].groupby('session')['group_count'].last().to_numpy()
        
        train_groups = whole_df.loc[train_idx, "session"].values
        val_groups = whole_df.loc[valid_idx, "session"].values
#         dtrain = xgb.DMatrix(X_train, y_train, group = train_groups) 
#         dvalid = xgb.DMatrix(X_valid, y_valid, group = val_groups) 

#         xgb_parms = {
#             'objective':'rank:map',
#             'eval_metric':'map@20',
#                      'tree_method':'gpu_hist',
#                     "random_state":42, 
#                     "learning_rate":0.05,
# #                     "colsample_bytree":0.9, 
# #                     "gamma": 2,
# #                   'min_child_weight': 0.1,
#                     "max_depth":4, 
# #                     "subsample":0.8
#                     }
#         model = xgb.train(xgb_parms,
#                           dtrain=dtrain,
#                           evals=[(dtrain,'train'),(dvalid,'valid')],
#                           num_boost_round=250,
# #                           early_stopping_rounds=400,
#                           verbose_eval=100)
        train = Pool(
            data=X_train,
            label=y_train,
            group_id=train_groups
        )

        test = Pool(
            data=X_valid,
            label=y_valid,
            group_id=val_groups
        )
        
        loss = "YetiRankPairwise"
        cb_parameters = {
            'iterations': 10000,
            'loss_function': loss,
            'train_dir': loss,
            'task_type': 'GPU',
            'depth': 7,
            'learning_rate': 0.025,
#             'custom_metric': ['RecallAt:top=20', 'PrecisionAt:top=20'],
#             'eval_metric': ['MAP'],
            'early_stopping_rounds': 200,
#             'metric_period':0,
            'verbose': 100,
            'random_seed': 0,
        }
        model = CatBoostRanker(**cb_parameters)
        model.fit(train,
                  eval_set=test,
#                   plot=True
                 )
        
        print(np.mean(model.eval_metrics(test, ['MAP'])['MAP']))
    

#         model.save_model(f'./models/XGB_{CANDIDATE_COUNT}candidates_fold{fold}_{type_str}.xgb')
        model.save_model(f'./models/CB_{CANDIDATE_COUNT}candidates_fold{fold}_{type_str}.cb')
        del model, X_train, y_train, X_valid, y_valid, train, test
        
        for i in range(5):
            gc.collect()
            
    del whole_df
    for i in range(5):
        gc.collect()

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

sampled


Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.4828714	best: 0.4828714 (0)	total: 108ms	remaining: 18m 3s
100:	test: 0.5007433	best: 0.5007485 (98)	total: 9.43s	remaining: 15m 24s
200:	test: 0.5038135	best: 0.5038154 (199)	total: 18.9s	remaining: 15m 22s
300:	test: 0.5048874	best: 0.5048874 (300)	total: 28.3s	remaining: 15m 13s
400:	test: 0.5054465	best: 0.5054465 (400)	total: 37.7s	remaining: 15m 1s
500:	test: 0.5059567	best: 0.5059567 (500)	total: 47s	remaining: 14m 51s
600:	test: 0.5063829	best: 0.5063829 (600)	total: 56.3s	remaining: 14m 40s
700:	test: 0.5067294	best: 0.5067320 (696)	total: 1m 5s	remaining: 14m 30s
800:	test: 0.5071911	best: 0.5071911 (800)	total: 1m 14s	remaining: 14m 19s
900:	test: 0.5074444	best: 0.5074444 (900)	total: 1m 24s	remaining: 14m 10s
1000:	test: 0.5076991	best: 0.5076991 (1000)	total: 1m 33s	remaining: 14m 1s
1100:	test: 0.5079141	best: 0.5079141 (1100)	total: 1m 42s	remaining: 13m 51s
1200:	test: 0.5081271	best: 0.5081271 (1200)	total: 1m 52s	remaining: 13m 42s
1300:	test: 0.5083125	be

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.4827715	best: 0.4827715 (0)	total: 96.4ms	remaining: 16m 3s
100:	test: 0.5012064	best: 0.5012097 (98)	total: 8.56s	remaining: 13m 58s
200:	test: 0.5034465	best: 0.5034465 (200)	total: 17.1s	remaining: 13m 53s
300:	test: 0.5047732	best: 0.5047732 (300)	total: 25.6s	remaining: 13m 44s
400:	test: 0.5053160	best: 0.5053174 (399)	total: 34s	remaining: 13m 34s
500:	test: 0.5057979	best: 0.5057979 (500)	total: 42.4s	remaining: 13m 24s
600:	test: 0.5062664	best: 0.5062664 (600)	total: 50.8s	remaining: 13m 14s
700:	test: 0.5066544	best: 0.5066544 (700)	total: 59.3s	remaining: 13m 6s
800:	test: 0.5070229	best: 0.5070229 (800)	total: 1m 7s	remaining: 12m 57s
900:	test: 0.5073269	best: 0.5073269 (900)	total: 1m 16s	remaining: 12m 48s
1000:	test: 0.5076137	best: 0.5076137 (1000)	total: 1m 24s	remaining: 12m 39s
1100:	test: 0.5078027	best: 0.5078059 (1098)	total: 1m 32s	remaining: 12m 30s
1200:	test: 0.5079879	best: 0.5079882 (1199)	total: 1m 41s	remaining: 12m 21s
1300:	test: 0.5081555	b

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.4834129	best: 0.4834129 (0)	total: 94.9ms	remaining: 15m 49s
100:	test: 0.5013412	best: 0.5013412 (100)	total: 8.45s	remaining: 13m 48s
200:	test: 0.5038638	best: 0.5038638 (200)	total: 16.9s	remaining: 13m 45s
300:	test: 0.5050626	best: 0.5050627 (299)	total: 25.4s	remaining: 13m 37s
400:	test: 0.5055986	best: 0.5055986 (400)	total: 33.8s	remaining: 13m 29s
500:	test: 0.5062757	best: 0.5062757 (500)	total: 42.2s	remaining: 13m 20s
600:	test: 0.5066131	best: 0.5066141 (597)	total: 50.6s	remaining: 13m 12s
700:	test: 0.5069544	best: 0.5069544 (700)	total: 59s	remaining: 13m 3s
800:	test: 0.5073347	best: 0.5073368 (799)	total: 1m 7s	remaining: 12m 54s
900:	test: 0.5076092	best: 0.5076092 (900)	total: 1m 15s	remaining: 12m 45s
1000:	test: 0.5078496	best: 0.5078535 (998)	total: 1m 24s	remaining: 12m 36s
1100:	test: 0.5081193	best: 0.5081193 (1100)	total: 1m 32s	remaining: 12m 28s
1200:	test: 0.5083417	best: 0.5083438 (1198)	total: 1m 40s	remaining: 12m 19s
1300:	test: 0.5085472	

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.4831815	best: 0.4831815 (0)	total: 95.6ms	remaining: 15m 55s
100:	test: 0.5006734	best: 0.5006734 (100)	total: 8.53s	remaining: 13m 55s
200:	test: 0.5036126	best: 0.5036132 (199)	total: 17.1s	remaining: 13m 52s
300:	test: 0.5048122	best: 0.5048122 (300)	total: 25.5s	remaining: 13m 42s
400:	test: 0.5054763	best: 0.5054765 (399)	total: 34s	remaining: 13m 32s
500:	test: 0.5059482	best: 0.5059482 (500)	total: 42.4s	remaining: 13m 23s
600:	test: 0.5063073	best: 0.5063073 (600)	total: 50.8s	remaining: 13m 13s
700:	test: 0.5067271	best: 0.5067271 (700)	total: 59.2s	remaining: 13m 4s
800:	test: 0.5070736	best: 0.5070736 (800)	total: 1m 7s	remaining: 12m 55s
900:	test: 0.5073435	best: 0.5073435 (900)	total: 1m 15s	remaining: 12m 47s
1000:	test: 0.5075958	best: 0.5075958 (1000)	total: 1m 24s	remaining: 12m 38s
1100:	test: 0.5078695	best: 0.5078695 (1100)	total: 1m 32s	remaining: 12m 29s
1200:	test: 0.5080530	best: 0.5080530 (1198)	total: 1m 41s	remaining: 12m 20s
1300:	test: 0.5082359

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.4822883	best: 0.4822883 (0)	total: 103ms	remaining: 17m 11s
100:	test: 0.4999599	best: 0.4999599 (100)	total: 9.37s	remaining: 15m 18s
200:	test: 0.5030956	best: 0.5030956 (200)	total: 18.7s	remaining: 15m 10s
300:	test: 0.5040845	best: 0.5040845 (300)	total: 28s	remaining: 15m
400:	test: 0.5046032	best: 0.5046032 (400)	total: 37.2s	remaining: 14m 51s
500:	test: 0.5051558	best: 0.5051558 (500)	total: 46.5s	remaining: 14m 41s
600:	test: 0.5055776	best: 0.5055780 (599)	total: 55.7s	remaining: 14m 31s
700:	test: 0.5058805	best: 0.5058805 (700)	total: 1m 4s	remaining: 14m 21s
800:	test: 0.5062545	best: 0.5062580 (799)	total: 1m 14s	remaining: 14m 12s
900:	test: 0.5065764	best: 0.5065764 (900)	total: 1m 23s	remaining: 14m 2s
1000:	test: 0.5068771	best: 0.5068771 (1000)	total: 1m 32s	remaining: 13m 53s
1100:	test: 0.5071047	best: 0.5071047 (1100)	total: 1m 41s	remaining: 13m 43s
1200:	test: 0.5073376	best: 0.5073376 (1200)	total: 1m 51s	remaining: 13m 33s
1300:	test: 0.5075106	bes

  0%|          | 0/19 [00:00<?, ?it/s]

sampled


Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.1067783	best: 0.1067783 (0)	total: 97.7ms	remaining: 16m 16s
100:	test: 0.1104280	best: 0.1104280 (100)	total: 8.61s	remaining: 14m 3s
200:	test: 0.1107849	best: 0.1107849 (200)	total: 17s	remaining: 13m 50s
300:	test: 0.1110038	best: 0.1110038 (300)	total: 25.4s	remaining: 13m 40s
400:	test: 0.1111027	best: 0.1111027 (400)	total: 33.8s	remaining: 13m 30s
500:	test: 0.1111975	best: 0.1111975 (500)	total: 42.2s	remaining: 13m 20s
600:	test: 0.1112385	best: 0.1112404 (599)	total: 50.6s	remaining: 13m 12s
700:	test: 0.1113100	best: 0.1113105 (699)	total: 59s	remaining: 13m 3s
800:	test: 0.1113574	best: 0.1113653 (786)	total: 1m 7s	remaining: 12m 54s
900:	test: 0.1114184	best: 0.1114197 (899)	total: 1m 15s	remaining: 12m 45s
1000:	test: 0.1114565	best: 0.1114565 (1000)	total: 1m 24s	remaining: 12m 37s
1100:	test: 0.1114951	best: 0.1114963 (1096)	total: 1m 32s	remaining: 12m 29s
1200:	test: 0.1115283	best: 0.1115330 (1189)	total: 1m 41s	remaining: 12m 21s
1300:	test: 0.1115408	be

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.1067528	best: 0.1067528 (0)	total: 99.9ms	remaining: 16m 39s
100:	test: 0.1103707	best: 0.1103707 (100)	total: 8.59s	remaining: 14m 1s
200:	test: 0.1106699	best: 0.1106699 (200)	total: 17s	remaining: 13m 47s
300:	test: 0.1108228	best: 0.1108228 (300)	total: 25.4s	remaining: 13m 39s
400:	test: 0.1109375	best: 0.1109375 (400)	total: 33.8s	remaining: 13m 29s
500:	test: 0.1110138	best: 0.1110139 (499)	total: 42.2s	remaining: 13m 20s
600:	test: 0.1110717	best: 0.1110744 (594)	total: 50.6s	remaining: 13m 12s
700:	test: 0.1111125	best: 0.1111148 (699)	total: 59s	remaining: 13m 3s
800:	test: 0.1111615	best: 0.1111615 (800)	total: 1m 7s	remaining: 12m 54s
900:	test: 0.1112007	best: 0.1112016 (891)	total: 1m 15s	remaining: 12m 45s
1000:	test: 0.1112356	best: 0.1112418 (987)	total: 1m 24s	remaining: 12m 37s
1100:	test: 0.1112701	best: 0.1112714 (1099)	total: 1m 32s	remaining: 12m 28s
1200:	test: 0.1113032	best: 0.1113059 (1197)	total: 1m 41s	remaining: 12m 20s
1300:	test: 0.1113242	bes

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.1064353	best: 0.1064353 (0)	total: 95.4ms	remaining: 15m 53s
100:	test: 0.1102085	best: 0.1102086 (99)	total: 8.61s	remaining: 14m 4s
200:	test: 0.1105492	best: 0.1105492 (200)	total: 17s	remaining: 13m 49s
300:	test: 0.1106970	best: 0.1106989 (296)	total: 25.4s	remaining: 13m 38s
400:	test: 0.1108185	best: 0.1108185 (400)	total: 33.8s	remaining: 13m 28s
500:	test: 0.1108934	best: 0.1108959 (499)	total: 42.1s	remaining: 13m 18s
600:	test: 0.1109694	best: 0.1109699 (597)	total: 50.5s	remaining: 13m 9s
700:	test: 0.1110324	best: 0.1110324 (700)	total: 58.9s	remaining: 13m
800:	test: 0.1110752	best: 0.1110803 (789)	total: 1m 7s	remaining: 12m 51s
900:	test: 0.1111176	best: 0.1111188 (899)	total: 1m 15s	remaining: 12m 43s
1000:	test: 0.1111581	best: 0.1111581 (999)	total: 1m 23s	remaining: 12m 35s
1100:	test: 0.1111921	best: 0.1111939 (1098)	total: 1m 32s	remaining: 12m 26s
1200:	test: 0.1112187	best: 0.1112193 (1198)	total: 1m 40s	remaining: 12m 18s
1300:	test: 0.1112313	best: 

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.1067175	best: 0.1067175 (0)	total: 96.6ms	remaining: 16m 6s
100:	test: 0.1104297	best: 0.1104297 (100)	total: 8.52s	remaining: 13m 54s
200:	test: 0.1107024	best: 0.1107024 (200)	total: 16.9s	remaining: 13m 43s
300:	test: 0.1108728	best: 0.1108742 (298)	total: 25.3s	remaining: 13m 34s
400:	test: 0.1109823	best: 0.1109853 (397)	total: 33.6s	remaining: 13m 25s
500:	test: 0.1110634	best: 0.1110634 (500)	total: 42s	remaining: 13m 16s
600:	test: 0.1111301	best: 0.1111307 (598)	total: 50.4s	remaining: 13m 8s
700:	test: 0.1111923	best: 0.1111949 (698)	total: 58.8s	remaining: 12m 59s
800:	test: 0.1112514	best: 0.1112514 (800)	total: 1m 7s	remaining: 12m 50s
900:	test: 0.1112932	best: 0.1112940 (898)	total: 1m 15s	remaining: 12m 41s
1000:	test: 0.1113258	best: 0.1113291 (981)	total: 1m 23s	remaining: 12m 33s
1100:	test: 0.1113562	best: 0.1113575 (1099)	total: 1m 32s	remaining: 12m 25s
1200:	test: 0.1113667	best: 0.1113726 (1196)	total: 1m 40s	remaining: 12m 16s
1300:	test: 0.1113990	b

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.1073242	best: 0.1073242 (0)	total: 96.5ms	remaining: 16m 4s
100:	test: 0.1109809	best: 0.1109840 (98)	total: 8.59s	remaining: 14m 2s
200:	test: 0.1113259	best: 0.1113259 (200)	total: 17s	remaining: 13m 47s
300:	test: 0.1114714	best: 0.1114732 (299)	total: 25.4s	remaining: 13m 37s
400:	test: 0.1115873	best: 0.1115921 (399)	total: 33.7s	remaining: 13m 27s
500:	test: 0.1116587	best: 0.1116613 (499)	total: 42.1s	remaining: 13m 18s
600:	test: 0.1117205	best: 0.1117225 (597)	total: 50.5s	remaining: 13m 10s
700:	test: 0.1117938	best: 0.1117938 (700)	total: 58.9s	remaining: 13m 1s
800:	test: 0.1118205	best: 0.1118218 (799)	total: 1m 7s	remaining: 12m 52s
900:	test: 0.1118617	best: 0.1118651 (877)	total: 1m 15s	remaining: 12m 44s
1000:	test: 0.1118854	best: 0.1118875 (994)	total: 1m 24s	remaining: 12m 36s
1100:	test: 0.1119260	best: 0.1119272 (1098)	total: 1m 32s	remaining: 12m 27s
1200:	test: 0.1119401	best: 0.1119423 (1185)	total: 1m 40s	remaining: 12m 19s
1300:	test: 0.1119602	bes

  0%|          | 0/19 [00:00<?, ?it/s]

sampled


Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.0678116	best: 0.0678116 (0)	total: 93.3ms	remaining: 15m 32s
100:	test: 0.0695292	best: 0.0695322 (97)	total: 8.22s	remaining: 13m 25s
200:	test: 0.0696450	best: 0.0696450 (200)	total: 16.4s	remaining: 13m 17s
300:	test: 0.0696877	best: 0.0696891 (299)	total: 24.4s	remaining: 13m 7s
400:	test: 0.0697223	best: 0.0697265 (374)	total: 32.5s	remaining: 12m 58s
500:	test: 0.0697640	best: 0.0697654 (485)	total: 40.6s	remaining: 12m 50s
600:	test: 0.0697988	best: 0.0697988 (600)	total: 48.8s	remaining: 12m 43s
700:	test: 0.0698353	best: 0.0698385 (680)	total: 57s	remaining: 12m 35s
800:	test: 0.0698686	best: 0.0698702 (797)	total: 1m 5s	remaining: 12m 27s
900:	test: 0.0698854	best: 0.0698871 (892)	total: 1m 13s	remaining: 12m 20s
1000:	test: 0.0699033	best: 0.0699045 (993)	total: 1m 21s	remaining: 12m 12s
1100:	test: 0.0699185	best: 0.0699185 (1100)	total: 1m 29s	remaining: 12m 4s
1200:	test: 0.0699395	best: 0.0699395 (1200)	total: 1m 37s	remaining: 11m 56s
1300:	test: 0.0699629	be

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.0675761	best: 0.0675761 (0)	total: 91.7ms	remaining: 15m 16s
100:	test: 0.0693118	best: 0.0693147 (95)	total: 8.22s	remaining: 13m 25s
200:	test: 0.0694262	best: 0.0694262 (200)	total: 16.3s	remaining: 13m 17s
300:	test: 0.0694868	best: 0.0694868 (300)	total: 24.5s	remaining: 13m 8s
400:	test: 0.0695342	best: 0.0695351 (399)	total: 32.6s	remaining: 12m 59s
500:	test: 0.0695990	best: 0.0695990 (500)	total: 40.7s	remaining: 12m 51s
600:	test: 0.0696404	best: 0.0696404 (600)	total: 48.9s	remaining: 12m 44s
700:	test: 0.0696705	best: 0.0696733 (691)	total: 57.1s	remaining: 12m 36s
800:	test: 0.0697014	best: 0.0697016 (794)	total: 1m 5s	remaining: 12m 29s
900:	test: 0.0697208	best: 0.0697210 (891)	total: 1m 13s	remaining: 12m 21s
1000:	test: 0.0697360	best: 0.0697360 (1000)	total: 1m 21s	remaining: 12m 14s
1100:	test: 0.0697529	best: 0.0697535 (1094)	total: 1m 29s	remaining: 12m 6s
1200:	test: 0.0697673	best: 0.0697677 (1199)	total: 1m 38s	remaining: 11m 58s
1300:	test: 0.0697864

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.0676397	best: 0.0676397 (0)	total: 89.9ms	remaining: 14m 59s
100:	test: 0.0693557	best: 0.0693558 (95)	total: 8.21s	remaining: 13m 24s
200:	test: 0.0694338	best: 0.0694340 (195)	total: 16.3s	remaining: 13m 15s
300:	test: 0.0695321	best: 0.0695329 (298)	total: 24.4s	remaining: 13m 6s
400:	test: 0.0695819	best: 0.0695837 (399)	total: 32.5s	remaining: 12m 57s
500:	test: 0.0696361	best: 0.0696363 (499)	total: 40.6s	remaining: 12m 50s
600:	test: 0.0696891	best: 0.0696891 (600)	total: 48.9s	remaining: 12m 44s
700:	test: 0.0697183	best: 0.0697196 (699)	total: 57.1s	remaining: 12m 36s
800:	test: 0.0697512	best: 0.0697535 (798)	total: 1m 5s	remaining: 12m 28s
900:	test: 0.0697690	best: 0.0697715 (883)	total: 1m 13s	remaining: 12m 21s
1000:	test: 0.0697976	best: 0.0697989 (998)	total: 1m 21s	remaining: 12m 13s
1100:	test: 0.0698124	best: 0.0698165 (1088)	total: 1m 29s	remaining: 12m 6s
1200:	test: 0.0698361	best: 0.0698373 (1190)	total: 1m 38s	remaining: 11m 58s
1300:	test: 0.0698422	

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.0681803	best: 0.0681803 (0)	total: 92.3ms	remaining: 15m 23s
100:	test: 0.0697274	best: 0.0697321 (94)	total: 8.21s	remaining: 13m 24s
200:	test: 0.0698715	best: 0.0698720 (199)	total: 16.4s	remaining: 13m 17s
300:	test: 0.0699083	best: 0.0699094 (299)	total: 24.5s	remaining: 13m 7s
400:	test: 0.0699325	best: 0.0699336 (397)	total: 32.6s	remaining: 12m 59s
500:	test: 0.0699810	best: 0.0699812 (499)	total: 40.7s	remaining: 12m 51s
600:	test: 0.0700279	best: 0.0700279 (600)	total: 48.9s	remaining: 12m 44s
700:	test: 0.0700523	best: 0.0700530 (690)	total: 57s	remaining: 12m 36s
800:	test: 0.0700746	best: 0.0700765 (776)	total: 1m 5s	remaining: 12m 29s
900:	test: 0.0700988	best: 0.0701001 (896)	total: 1m 13s	remaining: 12m 21s
1000:	test: 0.0701147	best: 0.0701169 (983)	total: 1m 21s	remaining: 12m 13s
1100:	test: 0.0701323	best: 0.0701335 (1099)	total: 1m 29s	remaining: 12m 5s
1200:	test: 0.0701524	best: 0.0701530 (1190)	total: 1m 37s	remaining: 11m 57s
1300:	test: 0.0701573	be

Default metric period is 5 because PFound is/are not implemented for GPU
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
Metric PFound is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.0678951	best: 0.0678951 (0)	total: 91.9ms	remaining: 15m 19s
100:	test: 0.0695001	best: 0.0695050 (95)	total: 8.18s	remaining: 13m 21s
200:	test: 0.0696378	best: 0.0696378 (200)	total: 16.3s	remaining: 13m 14s
300:	test: 0.0697137	best: 0.0697143 (297)	total: 24.4s	remaining: 13m 5s
400:	test: 0.0697406	best: 0.0697412 (392)	total: 32.5s	remaining: 12m 57s
500:	test: 0.0697831	best: 0.0697855 (492)	total: 40.6s	remaining: 12m 49s
600:	test: 0.0698256	best: 0.0698309 (587)	total: 48.8s	remaining: 12m 43s
700:	test: 0.0698480	best: 0.0698507 (675)	total: 57.1s	remaining: 12m 36s
800:	test: 0.0698743	best: 0.0698756 (792)	total: 1m 5s	remaining: 12m 29s
900:	test: 0.0698982	best: 0.0698994 (887)	total: 1m 13s	remaining: 12m 21s
1000:	test: 0.0699255	best: 0.0699255 (999)	total: 1m 21s	remaining: 12m 13s
1100:	test: 0.0699429	best: 0.0699454 (1095)	total: 1m 29s	remaining: 12m 6s
1200:	test: 0.0699643	best: 0.0699645 (1198)	total: 1m 38s	remaining: 11m 58s
1300:	test: 0.0699845	

In [6]:
feat_importances = pd.Series(model.get_feature_importance(data=train), index=whole_df[FEATURES].columns)

NameError: name 'model' is not defined

In [None]:
feat_importances.nlargest(30).plot(kind='barh')

In [None]:
FEATURES