In [20]:
import os
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import scipy.sparse as sp
import gc
import xgboost as xgb
from tqdm.auto import tqdm

import optuna
import pathlib
import joblib as jl
from datetime import datetime


from sklearn.model_selection import GroupKFold, KFold

In [21]:
import sys
sys.path.append('..')
sys.path.append('/Dataset')

In [22]:
from DressipiChallenge.Recommenders.GraphBased.P3alphaRecommender import P3alphaRecommender
from DressipiChallenge.Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from DressipiChallenge.Recommenders.NonPersonalizedRecommender import TopPop

In [23]:
from DressipiChallenge.Pipeline.data_extraction import get_dataframes
from DressipiChallenge.Pipeline.data_splitting import train_val_split
from DressipiChallenge.Pipeline.matrices_creation import create_URM
from DressipiChallenge.Pipeline.utils import create_mapping, batch_compute_item_score, batch_recommend, get_mapped_sessions_to_recommend, get_items_to_exclude
from DressipiChallenge.Pipeline.xgboost.xgboost_utils import load_xgboost_df, fit_models, flat_list, generate_predictions, rename_columns
from DressipiChallenge.Pipeline.telegram_utils import telegram_bot_sendfile, telegram_bot_sendtext

# TEST CELLS XGBOOST

In [None]:
# generate predictions on validation set. N.B: the df has mapped ids
dataframes_list = generate_predictions(models, val_sessions_arr)

In [None]:
boosted_df = dataframes_list[0]

for i in range(1, len(dataframes_list)):
  boosted_df = pd.merge(boosted_df, dataframes_list[i], how='outer', on=['session_id', 'item_id'])

boosted_df

In [None]:
# add missing purchases, drop duplicates, and create target
val_purchases = train_purchases_df[
    (train_purchases_df.date >= '2021-05-01') & (train_purchases_df.date < '2021-06-01')][['session_id', 'item_id']]

val_purchases['session_id'] = val_purchases['session_id'].map(val_session_mapping)
val_purchases['item_id'] = val_purchases['item_id'].map(item_mapping)
val_purchases['target'] = 1

merged_df = boosted_df.drop_duplicates(keep='last')
merged_df = pd.merge(merged_df, val_purchases, how='outer', on=['session_id', 'item_id'])

merged_df.rename_axis('index', inplace=True)
merged_df.sort_values(by=['session_id', 'index'], inplace=True, na_position='first')
merged_df.reset_index(inplace=True, drop=True)

merged_df['target'] = merged_df['target'].fillna(False, inplace=False).astype('uint8')

merged_df

In [None]:
# Genius version

i=0

filled_df = merged_df.copy()

for col_index, column in enumerate(filled_df.columns):
  if not (column in ['session_id', 'item_id', 'target']):
    print(column)

    selected_df = filled_df[filled_df[column].isna()].copy()

    session_ids = np.unique(selected_df.session_id.to_list())
    items_to_compute = np.unique(selected_df.item_id.to_list())

    # print("len session_ids: " + str(len(session_ids)))
    # print("len items_to_compute: " + str(len(items_to_compute)))

    scores_list = batch_compute_item_score(models[i], session_ids, items_to_compute, 100)

    scores_list = np.array(scores_list)

    '''
    def my_assign(x, value):
      x[col_index] = value
      return x
      
    for session_index, session_id in enumerate(tqdm(session_ids)):
      # print(index)
      current_section = selected_df[selected_df.session_id == session_id]
      current_section = current_section.apply(lambda x: my_assign(x, scores_list[session_index, int(x[1])]), axis = 1, raw=True)
      selected_df.update(current_section, errors='ignore')
    '''

    score_col = []

    '''
    for session_index, session_id in enumerate(tqdm(session_ids)):
        current_items = selected_df[selected_df.session_id == session_id].item_id.to_list()
        score_col.extend(scores_list[session_index, current_items])
    '''

    print('Obtaining item indices...')
    # item_indices = [selected_df[selected_df.session_id == x].item_id.to_list() for x in session_ids]
    item_lengths = selected_df.groupby(['session_id']).size().to_list()
    # print(item_lengths)
    item_indices = selected_df.item_id.to_list()
    print('Obtaining session indices...')
    session_indices = flat_list([[id] * length for id, length in zip(range(len(session_ids)), item_lengths)])
    # session_indices = score_col.extend([id] * length for id, length in zip(range(len(session_ids)), item_lengths))
    # item_indices = flat_list(item_indices)


    print('Indexing...')
    score_col = scores_list[session_indices, item_indices]

    selected_df[column] = score_col

    # print("Finished creating column array.")

    filled_df.fillna(selected_df, inplace=True)

    i+=1
    
filled_df

In [None]:
# DF merge version

score_col = merged_df.columns
# selected_df = pd.DataFrame()

i=0

for column in merged_df.columns:
  if not (column in ['session_id', 'item_id']):
    print(column)
    selected_df = merged_df[merged_df[column].isna()].copy()

    session_ids = np.unique(selected_df.session_id.to_list())
    items_to_compute = np.unique(selected_df.item_id.to_list())
    print("len session_ids: " + str(len(session_ids)))
    print("len items_to_compute: " + str(len(items_to_compute)))

    del selected_df 
    gc.collect()

    scores_list = batch_compute_item_score(models[i], session_ids, items_to_compute, 100)

    print("Finished computing scores.")

    replicated_session_ids = flat_list([([session]*len(items_to_compute)) for session in session_ids])
    replicated_items_to_compute = items_to_compute * len(session_ids)

    print("Finished creating column array.")

    del session_ids, items_to_compute
    gc.collect()

    df = pd.DataFrame({'session_id': replicated_session_ids, 'item_id': replicated_items_to_compute, column: scores_list})
    print("Finished creating dataframe.")

    del scores_list, replicated_session_ids, replicated_items_to_compute
    gc.collect()
    
    merged_df = pd.merge(merged_df, df, how='left', on=['session_id', 'item_id'])
    print("Finished merging.")
    print(merged_df)

    del df
    gc.collect()

    i+=1
    
merged_df

In [None]:
arr = [[1,2,3], [4,5,6]]
arr = np.array(arr)
arr[0,0]

In [None]:
df = pd.DataFrame({'session_id': [4, 5, 5], 'item_id': [0, 2, 2], 'item_score' : [np.NaN, np.NaN, np.NaN]})

session_ids = [4, 5]

arr = [[1,2,3], [4,5,6]]
scores_list = np.array(arr)
print(scores_list)


In [None]:
item_indices = [df[df.session_id == x].item_id.to_list() for x in session_ids]
item_indices

In [None]:
session_indices = [[id] * len(items) for id, items in zip(session_ids, item_indices)]
session_indices

In [None]:
flat_list(item_indices)

In [None]:
for index, session_id in enumerate(session_ids):
      # print(index)
      current_items = df[df.session_id == session_id].item_id.to_list()
      print(scores_list[index, current_items])

In [None]:


def my_assign(x, value):
      x[2] = value
      print(x)
      return x

for index, session_id in enumerate(session_ids):
      # print(index)
      current_section = df[df.session_id == session_id]
      current_section = current_section.apply(lambda x: my_assign(x, scores_list[index, int(x[1])]), axis = 1, raw=True)
      # current_section.apply(lambda x: print(scores_list[index, int(x[1])]), axis=1, raw=True)
      # current_section.apply(lambda x: print(x), axis=1, raw=True)
      print(current_section)


In [None]:
# Memory efficient version
score_col = merged_df.columns
# selected_df = pd.DataFrame()

i=0

for column in merged_df.columns:
  if not (column in ['session_id', 'item_id']):
    print(column)
    selected_df = merged_df[merged_df[column].isna()].copy()

    session_ids = np.unique(selected_df.session_id.to_list())

    scores_list = []

    for id in tqdm(session_ids):
      items_to_compute = selected_df[selected_df.session_id == id].item_id.to_list()
      # print("len items_to_compute: " + str(len(items_to_compute)))

      temp_scores_list = single_compute_item_score(models[i], [id], items_to_compute)

      scores_list.extend(temp_scores_list)

      del items_to_compute, temp_scores_list
      gc.collect()

    print("Finished computing scores.")
    count = 0

    if len(scores_list) != len(selected_df):
      raise Exception('NUMBER OF SCORES DOES NOT MATCH')

    for index in selected_df.index:
      merged_df[column].iloc[index] = scores_list[count]
      count += 1

    del selected_df, scores_list, replicated_session_ids, replicated_items_to_compute
    gc.collect()

    print(merged_df)

    i+=1
    
merged_df

In [None]:
#TODO can the score be 0?
merged_df[merged_df['item_score']==0.0] 

In [None]:
merged_df[merged_df.session_id==0]

# PIPELINE

## Data loading

In [26]:
item_features_df, train_sessions_df, train_purchases_df, test_sessions_df, candidate_items_df = get_dataframes()

CSVs read


In [27]:
train_set_df, val_set_df = train_val_split(train_sessions_df, train_purchases_df,
                                               n_sets=1,
                                               ts_start='2021-05-01', ts_end='2021-06-01',
                                               return_discarded=False)

In [28]:
# create mapping
item_mapping = create_mapping(item_features_df['item_id'], return_inverse_mapping=False)

train_session_mapping = create_mapping(train_set_df['session_id'])

val_session_mapping = create_mapping(val_set_df['session_id'])

val_sessions_arr = get_mapped_sessions_to_recommend(val_set_df, val_session_mapping)

# test_session_mapping = create_mapping(test_sessions_df['session_id'])
# test_session_arr = get_mapped_sessions_to_recommend(test_sessions_df, test_session_mapping)

candidates_val_ids = candidate_items_df['item_id'].values
items_to_ignore = get_items_to_exclude(item_features_df, candidates_val_ids)
mapped_items_to_ignore = [item_mapping[elem] for elem in items_to_ignore]

val_purchases = train_purchases_df[
        (train_purchases_df.date >= '2021-05-01') & (train_purchases_df.date < '2021-06-01')][['session_id', 'item_id']]
val_purchases['session_id'] = val_purchases['session_id'].map(val_session_mapping)  
val_purchases['item_id'] = val_purchases['item_id'].map(item_mapping)

In [29]:
# create URM_train
URM_train = create_URM(train_set_df, train_session_mapping, item_mapping)

In [30]:
# define pre optimized models and best hyperparameters
models = []
models_hyp = []

models.append(P3alphaRecommender(URM_train))
models_hyp.append({'topK': 479, 'alpha': 1.1764856470188576, 'normalize_similarity': True})

models.append(TopPop(URM_train))
models_hyp.append({})  # TODO add if condition if model has no hyperparam

models.append(ItemKNNCFRecommender(URM_train))
models_hyp.append({'shrink': 500, 'similarity': 'asymmetric', 'feature_weighting': 'none', 'topK': 495, 'normalize': True})

P3alphaRecommender: URM Detected 73 ( 0.3%) items with no interactions.
TopPopRecommender: URM Detected 73 ( 0.3%) items with no interactions.
ItemKNNCFRecommender: URM Detected 73 ( 0.3%) items with no interactions.


In [31]:
# fit models on URM_train
fit_models(models, models_hyp, mapped_items_to_ignore)

Similarity column 23691 (100.0%), 4683.50 column/sec. Elapsed time 5.06 sec


## Candidate production

In [32]:
candidates_df, target_df = load_xgboost_df(val_purchases=val_purchases, models=models, cutoff=100)

Recommending...


100%|██████████| 50/50 [00:35<00:00,  1.43it/s]


Done!
Recommending...


100%|██████████| 50/50 [00:36<00:00,  1.37it/s]


Done!
Recommending...


100%|██████████| 50/50 [00:38<00:00,  1.31it/s]


Done!
Filling missing item_scores...
Computing item scores...


100%|██████████| 100/100 [00:11<00:00,  8.81it/s]


Done!
Computing item scores...


100%|██████████| 100/100 [00:01<00:00, 80.89it/s]


Done!
Computing item scores...


100%|██████████| 100/100 [00:12<00:00,  7.82it/s]


Done!


In [None]:
# candidates_df[(candidates_df.item_score_x != 0) & (candidates_df.item_score_y != 0) & (target_df.target == 1)]
candidates_df = rename_columns(candidates_df, models)
candidates_df

In [None]:
candidates_df[candidates_df.session_id == 64]

## K-Fold splitting

In [None]:
def XGB_KFold_split(merged_df, n_splits = 5):

    group_kfold = GroupKFold(n_splits=5)

    target_df = merged_df[['target']].copy()
    merged_df.drop(columns='target', inplace=True)

    for train_index, val_index in group_kfold.split(merged_df, target_df, merged_df.session_id):
        # print("TRAIN:", train_index, "VAL:", val_index)
        X_train, X_val = merged_df.iloc[train_index], merged_df.iloc[val_index]
        y_train, y_val = target_df.iloc[train_index], target_df.iloc[val_index]
        # print(X_train, X_val, y_train, y_val)


In [None]:
no_zeros_candidates_df = candidates_df.copy()
no_zeros_candidates_df[no_zeros_candidates_df.item_score_x == 0].item_score_x = np.NaN
no_zeros_candidates_df[no_zeros_candidates_df.item_score_y == 0].item_score_y = np.NaN

In [12]:
dcandidates = xgb.DMatrix(
    candidates_df.drop(columns=['session_id', 'item_id']),
    label = target_df,
    qid = candidates_df['session_id'],
    nthread = -1,
    missing = np.NaN,
    # group = candidates_df.groupby(['session_id']).size().to_list(),
)

In [13]:
params = {
    "booster": "gbtree",
    "verbosity": 1,
    "validate_parameters": True,

    "eta": 1e-1, # learning rate
    "gamma": 1e-3,
    "max_depth": 5,
    "min_child_weight": 50,
    "max_delta_step": 0,
    "subsample": 1e-3,
    "sampling_method": 'gradient_based',
    "colsample_bytree": 1,
    "colsample_bylevel": 1,
    "colsample_bynode": 1,
    "lambda": 1,
    "alpha": 1e-8, # [0, inf]
    "tree_method": 'gpu_hist',    
    
    "objective": 'rank:map',
    "eval_metric": 'map@100',
    "random_state": 10,
}

In [None]:
xgb_model = xgb.train(
    params,
    dcandidates,
    num_boost_round=10,
    # early_stopping_rounds = 100,
    callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)],
)

In [None]:
xgb_model.get_score()

In [None]:
xgb_model.eval(dcandidates)

In [14]:
num_folds = 3
group_kfold = GroupKFold(n_splits=num_folds).split(candidates_df.drop(columns=['session_id', 'item_id']), target_df, groups= candidates_df.session_id)

# print(group_kfold)

xgb_model = xgb.cv(
    params,
    dcandidates,
    # folds = group_kfold,
    num_boost_round=10,
    nfold = num_folds,
    # metrics = ['ndcg@100' 'map@100'],
    early_stopping_rounds = 2,
    # as_pandas=True,
    callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)],
)

[0]	train-map@100:0.22276+0.03842	test-map@100:0.22294+0.03970
[1]	train-map@100:0.44050+0.00798	test-map@100:0.43962+0.01322
[2]	train-map@100:0.54255+0.01394	test-map@100:0.54191+0.01606
[3]	train-map@100:0.55049+0.00831	test-map@100:0.55030+0.00976
[4]	train-map@100:0.55347+0.00541	test-map@100:0.55323+0.00726
[5]	train-map@100:0.55505+0.00614	test-map@100:0.55468+0.00668
[6]	train-map@100:0.55503+0.00617	test-map@100:0.55475+0.00650
[7]	train-map@100:0.55785+0.00299	test-map@100:0.55749+0.00344
[8]	train-map@100:0.55870+0.00216	test-map@100:0.55832+0.00278
[9]	train-map@100:0.55878+0.00200	test-map@100:0.55851+0.00276


In [19]:
xgb_model

9

In [None]:
xgb_model.predict(dcandidates)

In [None]:
tqdm.pandas()
predictions = (candidates_df.groupby('session_id').progress_apply(lambda x: xgb_model.predict(
    xgb.DMatrix(
    x.drop(columns=['session_id', 'item_id']),
    # label = target_df,
    # qid = candidates_df['session_id'],
    nthread = -1,
    missing = np.NaN,
    # group = candidates_df.groupby(['session_id']).size().to_list(),
)
)))
predictions

In [None]:
scores = []
for a in predictions.values:
    scores.extend(a)
    
# inverted because later I need to sort in ascending order for customer_id
candidates_df['score'] = [-a for a in scores]

candidates_df['session_id'] = candidates_df['session_id'].astype('Int64')

submission_df = candidates_df.sort_values(by=['session_id', 'score'], inplace=False, ascending=True)
submission_df = submission_df.groupby('session_id').head(100)

submission_df

In [None]:
def XGB_hypertune(candidates_df, target_df, hyperparams_dict, n_iter = 200):

    dcandidates = xgb.DMatrix(
        candidates_df.drop(columns=['session_id', 'item_id']),
        label = target_df,
        qid = candidates_df['session_id'],
        nthread = -1,
        missing = np.NaN,
    )

    
    

In [24]:

class Space:

    def __init__(self):
        raise NotImplemented

    def set_trial(self, trial):
        self.trial = trial

    def set_name(self, name):
        self.name = name

    def suggest(self):
        raise NotImplementedError	
		
class Range(Space):

    def __init__(self, low=0, high=1, prior='uniform'):
        self.low = low
        self.high = high
        self.prior = prior

class Categorical(Space):

    def __init__(self, params):
        if type(params) != list:
            self.params = list(params)

        self.params = params

    def suggest(self):
        return self.trial.suggest_categorical(self.name, self.params)

class Integer(Range):

    def __init__(self, low=0, high=1, prior='uniform', step=1):
        super(Integer, self).__init__(low, high, prior)
        self.step = step

    def suggest(self):
        return self.trial.suggest_int(self.name, self.low, self.high, self.step)

class Real(Range):

    def __init__(self, low=0, high=1, prior='uniform', step=None):
        super(Real, self).__init__(low, high, prior)
        self.log = True if self.prior == 'log-uniform' else False
        self.step = None if self.log is True else step

    def suggest(self):
        return self.trial.suggest_float(self.name, self.low, self.high, step=self.step, log=self.log)
    
def suggest(trial, param_dict):

    sampled = {}

    for param, val in param_dict.items():
        
        if isinstance(val, Space):
            val.set_name(param)
            val.set_trial(trial)
            sampled[param] = val.suggest()
        else:
            sampled[param] = val

    return sampled


In [35]:
def XGB_hypertune (
    candidates_df, target_df, xgb_hyp_params_dict = None, num_trials = 500, num_folds = 3, early_stopping_rounds = 100,
    save_folder = "./save", study_name = 'study', resume=False, with_datetime = True,
    telegram_notifications = True,
    ):

        class Hypertuner:

            def __init__(self, dcandidates):
                self.dcandidates = dcandidates
    
            def __call__(self, trial):

                chosen_data = suggest(trial, xgb_hyp_params_dict)

                print('[HYPERTUNE] Chosen parameters: ' + str(chosen_data))

                xgb_model = xgb.cv(
                    chosen_data,
                    dcandidates,
                    # folds = group_kfold,
                    num_boost_round=10,
                    nfold = num_folds,
                    # metrics = ['ndcg@100' 'map@100'],
                    early_stopping_rounds = early_stopping_rounds,
                    as_pandas=True,
                )

                best_score = xgb_model.iloc[-1, -2]

                print('[HYPERTUNE] Best mean validation score: ' + str(best_score))
                print('[HYPERTUNE] Std of best validation score: ' + str(xgb_model.iloc[-1, -1]))

                trial.set_user_attr("best_iteration", xgb_model.index[-1])

                del xgb_model

                return best_score

        class SaveCallback:

            def __init__(self, std_name, param_name):
                self.std_name=std_name
                self.param_name = param_name

            def __call__(self, study: optuna.Study, trial):
                jl.dump(study, self.std_name)
                jl.dump(study.best_trial.params, self.param_name)

        class TelegramCallback:

            def __init__(self, std_name):
                self.best = 0
                self.std_name=std_name

            def __call__(self, study: optuna.Study, trial):
                if study.best_value > self.best:
                    self.best = study.best_value
                    telegram_bot_sendtext("[XGBOOST] " + "HYPERPARAMETERS: " + str(study.best_params) + ' MAP: ' + str(self.best))
                    telegram_bot_sendfile(self.std_name, "study_xgboost.pkl")

        pd.options.mode.chained_assignment = None # Disable SettingWithCopyWarning 

        if xgb_hyp_params_dict is None:
            xgb_hyp_params_dict = {
                "sampling_method": Categorical(['uniform', 'gradient_based']),
                "booster": Categorical(["gbtree"]), # ['dart']
                "max_depth": Integer(1, 8),
                "eta": Real(1e-2, 1, prior='log-uniform'),
                "gamma": Real(1e-9, 1e-1, prior='log-uniform'),
                "min_child_weight": Integer(1, 100),
                "subsample": Real(0.1, 1),
                "colsample_bytree": Real(1e-6, 1, prior='log-uniform'), # [0,1]
                "colsample_bylevel": Real(1e-6, 1, prior='log-uniform'), # [0,1]
                "colsample_bynode": Real(1e-6, 1, prior='log-uniform'), # [0,1]
                "alpha": Real(1e-9, 1, prior='log-uniform'), # [0, inf]
                "lambda": Real(1e-9, 5, prior='log-uniform'),
                # "base_score": 0, # [0, inf]
                # "num_parallel_tree": 1, # [1, inf]
            }

            fixed_params = {
                "verbosity": 1,
                "validate_parameters": True,
                "objective": 'rank:map',
                "eval_metric": 'map@100',
                "tree_method": 'gpu_hist',
                "random_state": 10,
                # "gpu_id": 0,
                # "n_jobs": 4,
            }

            xgb_hyp_params_dict.update(fixed_params)
        
        else:
            fixed_params = {k:v for k,v in xgb_hyp_params_dict.items() if not isinstance(v, (Real, Integer, Categorical, Range, Space))}
        
        dt = ""
        if with_datetime:
            dt = datetime.now().strftime('%d-%m-%y_%H_%M_%S')
            dt = dt + '_'
        
        save_folder = os.path.join(save_folder, 'xgboost')
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)
            study = optuna.create_study(direction='maximize')
            study_path = os.path.join(save_folder, dt + study_name + '.pkl')
            param_path = os.path.join(save_folder, dt + study_name + "_best_parameters" + '.pkl')
        else:
            matches = list(pathlib.Path(os.path.join(save_folder)).glob('*' + study_name + '.pkl'))
            param_path = os.path.join(save_folder, dt + study_name + "_best_parameters" + '.pkl')
            if resume and len(matches) > 0:
                filename = max([str(m) for m in matches], key=os.path.getctime)
                print('[HYPERTUNE] Loading from file: ' + filename)
                with open(filename, 'rb') as f:
                    study = jl.load(f)
                study_path = filename
            else:
                study = optuna.create_study(direction='maximize')
                study_path = os.path.join(save_folder, dt + study_name + '.pkl')
            
        callbacks = []
        callbacks.append(SaveCallback(study_path, param_path))

        if telegram_notifications:
            callbacks.append(TelegramCallback(study_path)) 

        dcandidates = xgb.DMatrix(
            candidates_df.drop(columns=['session_id', 'item_id']),
            label = target_df,
            qid = candidates_df['session_id'],
            nthread = -1,
            missing = np.NaN,
        )

        study.optimize(Hypertuner(dcandidates), n_trials=num_trials, callbacks=callbacks)

        best_iteration = study.best_trial.user_attrs['best_iteration']

        if telegram_notifications:
            telegram_bot_sendtext("[XGBOOST] " + "Best iteration : " + str(best_iteration))
            telegram_bot_sendtext("[XGBOOST] " + "Hypertuning finished.")

        best_params = {**fixed_params, **study.best_params}

        print("[HYPERTUNE] Best params: ", str(best_params))
        print("[HYPERTUNE] Best iteration: ", str(best_iteration))

        return best_params, best_iteration


In [36]:
params, iteration = XGB_hypertune(candidates_df=candidates_df, target_df=target_df)

[32m[I 2022-05-26 17:33:57,594][0m A new study created in memory with name: no-name-e35ac42d-a680-4655-8bc5-c5d722032c76[0m


[HYPERTUNE] Chosen parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 3, 'eta': 0.9257156997305257, 'gamma': 0.02407487027326196, 'min_child_weight': 36, 'subsample': 0.981773586148581, 'colsample_bytree': 0.0011791953844212896, 'colsample_bylevel': 0.00025692164954869745, 'colsample_bynode': 0.0012653925913366532, 'alpha': 1.7962231690265088e-07, 'lambda': 2.4701979133045098e-08, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01473+0.00005	test-map@100:0.01473+0.00012
[1]	train-map@100:0.03118+0.00034	test-map@100:0.03123+0.00163
[2]	train-map@100:0.03729+0.00037	test-map@100:0.03733+0.00158
[3]	train-map@100:0.03733+0.00040	test-map@100:0.03735+0.00157
[4]	train-map@100:0.43687+0.00051	test-map@100:0.43664+0.00415
[5]	train-map@100:0.45196+0.00141	test-map@100:0.45183+0.00347
[6]	train-map@100:0.49638+0.00129	test-map@100:0.49634+0.00444

[32m[I 2022-05-26 17:34:26,692][0m Trial 0 finished with value: 0.5021159896284987 and parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 3, 'eta': 0.9257156997305257, 'gamma': 0.02407487027326196, 'min_child_weight': 36, 'subsample': 0.981773586148581, 'colsample_bytree': 0.0011791953844212896, 'colsample_bylevel': 0.00025692164954869745, 'colsample_bynode': 0.0012653925913366532, 'alpha': 1.7962231690265088e-07, 'lambda': 2.4701979133045098e-08}. Best is trial 0 with value: 0.5021159896284987.[0m


[HYPERTUNE] Best mean validation score: 0.5021159896284987
[HYPERTUNE] Std of best validation score: 0.002942936370632178
[HYPERTUNE] Chosen parameters: {'sampling_method': 'uniform', 'booster': 'gbtree', 'max_depth': 7, 'eta': 0.11508675164077814, 'gamma': 4.340269343477574e-06, 'min_child_weight': 58, 'subsample': 0.38578554784066044, 'colsample_bytree': 6.53908135358264e-05, 'colsample_bylevel': 2.997260831289789e-06, 'colsample_bynode': 0.0013696607963351605, 'alpha': 0.02277670203546916, 'lambda': 0.0015398787077015416, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01518+0.00042	test-map@100:0.01518+0.00057
[1]	train-map@100:0.03100+0.00145	test-map@100:0.03086+0.00265
[2]	train-map@100:0.03641+0.00173	test-map@100:0.03627+0.00291
[3]	train-map@100:0.03626+0.00179	test-map@100:0.03609+0.00292
[4]	train-map@100:0.38053+0.00597	test-map@100:0.37946+0.00426
[5]	train-m

[32m[I 2022-05-26 17:34:57,975][0m Trial 1 finished with value: 0.4007390203132586 and parameters: {'sampling_method': 'uniform', 'booster': 'gbtree', 'max_depth': 7, 'eta': 0.11508675164077814, 'gamma': 4.340269343477574e-06, 'min_child_weight': 58, 'subsample': 0.38578554784066044, 'colsample_bytree': 6.53908135358264e-05, 'colsample_bylevel': 2.997260831289789e-06, 'colsample_bynode': 0.0013696607963351605, 'alpha': 0.02277670203546916, 'lambda': 0.0015398787077015416}. Best is trial 0 with value: 0.5021159896284987.[0m


[HYPERTUNE] Best mean validation score: 0.4007390203132586
[HYPERTUNE] Std of best validation score: 0.004333112525448945
[HYPERTUNE] Chosen parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 3, 'eta': 0.9658354523530484, 'gamma': 1.5152916400990294e-08, 'min_child_weight': 68, 'subsample': 0.5024214920078056, 'colsample_bytree': 0.04087340605067627, 'colsample_bylevel': 0.000852725826709348, 'colsample_bynode': 0.5871113479525519, 'alpha': 2.302589476032794e-08, 'lambda': 7.530582562911367e-08, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01473+0.00005	test-map@100:0.01473+0.00012
[1]	train-map@100:0.03108+0.00022	test-map@100:0.03111+0.00153
[2]	train-map@100:0.03721+0.00031	test-map@100:0.03721+0.00148
[3]	train-map@100:0.03726+0.00034	test-map@100:0.03729+0.00149
[4]	train-map@100:0.43725+0.00037	test-map@100:0.43711+0.00415
[5]	trai

[32m[I 2022-05-26 17:35:25,845][0m Trial 2 finished with value: 0.4993809351396354 and parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 3, 'eta': 0.9658354523530484, 'gamma': 1.5152916400990294e-08, 'min_child_weight': 68, 'subsample': 0.5024214920078056, 'colsample_bytree': 0.04087340605067627, 'colsample_bylevel': 0.000852725826709348, 'colsample_bynode': 0.5871113479525519, 'alpha': 2.302589476032794e-08, 'lambda': 7.530582562911367e-08}. Best is trial 0 with value: 0.5021159896284987.[0m


[HYPERTUNE] Best mean validation score: 0.4993809351396354
[HYPERTUNE] Std of best validation score: 0.007513042243468559
[HYPERTUNE] Chosen parameters: {'sampling_method': 'uniform', 'booster': 'gbtree', 'max_depth': 6, 'eta': 0.05296590248362138, 'gamma': 1.407093318830286e-08, 'min_child_weight': 72, 'subsample': 0.6548813503997307, 'colsample_bytree': 9.619270525408546e-06, 'colsample_bylevel': 0.0002880441562957349, 'colsample_bynode': 4.545685289609965e-06, 'alpha': 8.558451792576883e-05, 'lambda': 3.55797734655265e-07, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01522+0.00040	test-map@100:0.01523+0.00056
[1]	train-map@100:0.03291+0.00083	test-map@100:0.03288+0.00163
[2]	train-map@100:0.03846+0.00081	test-map@100:0.03844+0.00145
[3]	train-map@100:0.03836+0.00086	test-map@100:0.03841+0.00137
[4]	train-map@100:0.36801+0.01322	test-map@100:0.36693+0.01126
[5]	train-

[32m[I 2022-05-26 17:35:55,264][0m Trial 3 finished with value: 0.38618006334024385 and parameters: {'sampling_method': 'uniform', 'booster': 'gbtree', 'max_depth': 6, 'eta': 0.05296590248362138, 'gamma': 1.407093318830286e-08, 'min_child_weight': 72, 'subsample': 0.6548813503997307, 'colsample_bytree': 9.619270525408546e-06, 'colsample_bylevel': 0.0002880441562957349, 'colsample_bynode': 4.545685289609965e-06, 'alpha': 8.558451792576883e-05, 'lambda': 3.55797734655265e-07}. Best is trial 0 with value: 0.5021159896284987.[0m


[HYPERTUNE] Best mean validation score: 0.38618006334024385
[HYPERTUNE] Std of best validation score: 0.007594981714118797
[HYPERTUNE] Chosen parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 1, 'eta': 0.024201098660059886, 'gamma': 0.009720396705141866, 'min_child_weight': 40, 'subsample': 0.8026328586956695, 'colsample_bytree': 6.354077522238227e-06, 'colsample_bylevel': 2.1352158990620478e-06, 'colsample_bynode': 6.518768884141251e-06, 'alpha': 2.71787136584183e-08, 'lambda': 1.0068900174839643e-08, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01266+0.00009	test-map@100:0.01266+0.00012
[1]	train-map@100:0.01359+0.00005	test-map@100:0.01360+0.00016
[2]	train-map@100:0.02006+0.00002	test-map@100:0.02007+0.00009
[3]	train-map@100:0.02016+0.00003	test-map@100:0.02017+0.00011
[4]	train-map@100:0.42411+0.00205	test-map@100:0.42397+0.00161


[32m[I 2022-05-26 17:36:21,202][0m Trial 4 finished with value: 0.4455161081067304 and parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 1, 'eta': 0.024201098660059886, 'gamma': 0.009720396705141866, 'min_child_weight': 40, 'subsample': 0.8026328586956695, 'colsample_bytree': 6.354077522238227e-06, 'colsample_bylevel': 2.1352158990620478e-06, 'colsample_bynode': 6.518768884141251e-06, 'alpha': 2.71787136584183e-08, 'lambda': 1.0068900174839643e-08}. Best is trial 0 with value: 0.5021159896284987.[0m


[HYPERTUNE] Best mean validation score: 0.4455161081067304
[HYPERTUNE] Std of best validation score: 0.00418113271932924
[HYPERTUNE] Chosen parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 6, 'eta': 0.026864696311828657, 'gamma': 9.079395149911102e-06, 'min_child_weight': 32, 'subsample': 0.7365336611708589, 'colsample_bytree': 1.7608072334338495e-06, 'colsample_bylevel': 2.8714923252999378e-05, 'colsample_bynode': 0.7726361170415443, 'alpha': 0.3007448505005821, 'lambda': 6.308814856377941e-09, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01524+0.00042	test-map@100:0.01523+0.00057
[1]	train-map@100:0.03201+0.00057	test-map@100:0.03195+0.00109
[2]	train-map@100:0.03766+0.00061	test-map@100:0.03744+0.00114
[3]	train-map@100:0.03753+0.00053	test-map@100:0.03739+0.00106
[4]	train-map@100:0.35395+0.01011	test-map@100:0.35323+0.00984
[5]	tr

[32m[I 2022-05-26 17:36:52,096][0m Trial 5 finished with value: 0.3661394370990995 and parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 6, 'eta': 0.026864696311828657, 'gamma': 9.079395149911102e-06, 'min_child_weight': 32, 'subsample': 0.7365336611708589, 'colsample_bytree': 1.7608072334338495e-06, 'colsample_bylevel': 2.8714923252999378e-05, 'colsample_bynode': 0.7726361170415443, 'alpha': 0.3007448505005821, 'lambda': 6.308814856377941e-09}. Best is trial 0 with value: 0.5021159896284987.[0m


[HYPERTUNE] Best mean validation score: 0.3661394370990995
[HYPERTUNE] Std of best validation score: 0.005356401772828749
[HYPERTUNE] Chosen parameters: {'sampling_method': 'gradient_based', 'booster': 'gbtree', 'max_depth': 1, 'eta': 0.2357774110388101, 'gamma': 3.526397787365607e-05, 'min_child_weight': 36, 'subsample': 0.4583046236747047, 'colsample_bytree': 0.029109737008087968, 'colsample_bylevel': 0.5883081280626057, 'colsample_bynode': 4.303625275500572e-05, 'alpha': 0.03632396452250383, 'lambda': 2.4123001256113716, 'verbosity': 1, 'validate_parameters': True, 'objective': 'rank:map', 'eval_metric': 'map@100', 'tree_method': 'gpu_hist', 'random_state': 10}
[0]	train-map@100:0.01266+0.00009	test-map@100:0.01266+0.00012


KeyboardInterrupt: 