<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Evaluation" data-toc-modified-id="Evaluation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Evaluation</a></span></li></ul></div>

In [1]:
import os
import configparser
from tqdm import tqdm
from helpers.helper_functions import *
from helpers.helper_classes import *
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import lightgbm as lgb

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', None)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Read config.ini file
config = configparser.ConfigParser()
config.read('src/config.ini')
os.chdir(config['PATH']['ROOT_DIR'])

# # Load data
df = pd.read_parquet(config['PATH']['INT_DIR'] + '/training_set_preprocessed_nodrop.parquet', engine = 'fastparquet')
df_test = pd.read_parquet(config['PATH']['INT_DIR'] + '/test_set_preprocessed_nodrop.parquet', engine = 'fastparquet')
df_mini = df[df['srch_id'] < 10000]


## Submission

In [4]:
## Best run
best_params = {'n_estimators': 878,
               'num_leaves': 80,
               'max_depth': 14,
               'learning_rate': 0.018926700075124463,
               'subsample': 0.6001819915274639,
               'colsample_bytree': 0.7879556726353679,
               'reg_alpha': 0.06065988852935483,
               'reg_lambda': 0.14848222729700747,
               'min_child_samples': 7,
               'min_child_weight': 0.05206418484811052,
               'val_size': 0.37752777983423735}

# all but val_size in lgb_best_params
val_size = best_params.pop('val_size')

X_train_full, X_val_full, y_train_full, y_val_full, _ = train_test_split(df, 'target', test_size=val_size)

_, desire_df_click_full = construct_desire(X_val_full)
_, desire_df_book_full = construct_desire(X_val_full, target = 'booking_bool')

prop_counts = X_val_full['prop_id'].value_counts()
prop_counts.name = 'prop_counts'
srch_dest_counts = X_val_full['srch_destination_id'].value_counts()
srch_dest_counts.name = 'srch_dest_counts'

merge_df_list = [(desire_df_click_full, 'prop_id'), (desire_df_book_full, 'prop_id'), (prop_counts, 'prop_id'), (srch_dest_counts, 'srch_destination_id')]   

X_train_full = merge_and_drop(X_train_full, merge_df_list)
df_test = merge_and_drop(df_test, merge_df_list, drop=False)
X_val_full.drop(['click_bool', 'booking_bool'], axis=1, inplace=True)


# Create dataset
group_train = X_train_full.groupby('srch_id').size().values
X_train_lgb = X_train_full.drop(['srch_id'], axis=1)
# X_val_lgb = X_test.drop(['srch_id'], axis=1)

ranker = lgb.LGBMRanker(**best_params)

# Training the model
ranker.fit(
      X=X_train_lgb,
      y=y_train_full,
      group=group_train,
      eval_set=[(X_train_lgb, y_train_full)],
      eval_group=[group_train],
      eval_at=[5])

# Predicting the scores
# test = X_val
test = df_test
test_input = test.drop(['srch_id'], axis=1)
df_res = test


print("Predicting...")
y_pred = ranker.predict(test_input)
df_res['pred_grades'] = y_pred
print("Done predicting")

df_res = df_res.sort_values(by=['srch_id', 'pred_grades'], ascending=[True, False], inplace=False)

df_res
lgbm_submission_desire = df_res[['srch_id', 'prop_id']]
lgbm_submission_desire.to_csv(config['PATH']['SUBMISSION_DIR'] + '/lgbm_submission_integrated_optuna.csv', index=False)


[1]	training's ndcg@5: 0.317934
[2]	training's ndcg@5: 0.352619
[3]	training's ndcg@5: 0.365162
[4]	training's ndcg@5: 0.372537
[5]	training's ndcg@5: 0.377751
[6]	training's ndcg@5: 0.38071
[7]	training's ndcg@5: 0.383778
[8]	training's ndcg@5: 0.385338
[9]	training's ndcg@5: 0.386907
[10]	training's ndcg@5: 0.388711
[11]	training's ndcg@5: 0.389809
[12]	training's ndcg@5: 0.390465
[13]	training's ndcg@5: 0.391493
[14]	training's ndcg@5: 0.392266
[15]	training's ndcg@5: 0.393235
[16]	training's ndcg@5: 0.393936
[17]	training's ndcg@5: 0.394718
[18]	training's ndcg@5: 0.39519
[19]	training's ndcg@5: 0.395265
[20]	training's ndcg@5: 0.395612
[21]	training's ndcg@5: 0.395771
[22]	training's ndcg@5: 0.396282
[23]	training's ndcg@5: 0.396653
[24]	training's ndcg@5: 0.39682
[25]	training's ndcg@5: 0.396934
[26]	training's ndcg@5: 0.397149
[27]	training's ndcg@5: 0.397268
[28]	training's ndcg@5: 0.397611
[29]	training's ndcg@5: 0.397654
[30]	training's ndcg@5: 0.397939
[31]	training's ndcg@5

: 

: 

## Evaluation

In [5]:
best_params = {'n_estimators': 878,
               'num_leaves': 80,
               'max_depth': 14,
               'learning_rate': 0.018926700075124463,
               'subsample': 0.6001819915274639,
               'colsample_bytree': 0.7879556726353679,
               'reg_alpha': 0.06065988852935483,
               'reg_lambda': 0.14848222729700747,
               'min_child_samples': 7,
               'min_child_weight': 0.05206418484811052,
               'val_size': 0.37752777983423735}
               
lgb_params = best_params.copy()
val_size = lgb_params.pop('val_size')

X_train, X_val, X_test, y_train, y_val, y_test, test_ideal = train_val_test_split(df, 'target', test_size=.15, val_size=val_size, random_state=7)

_, desire_df_click = construct_desire(X_val)
_, desire_df_book = construct_desire(X_val, target = 'booking_bool')
prop_counts = X_val['prop_id'].value_counts()
prop_counts.name = 'prop_counts'
srch_dest_counts = X_val['srch_destination_id'].value_counts()
srch_dest_counts.name = 'srch_dest_counts'

merge_df_list = [(desire_df_click, 'prop_id'), (desire_df_book, 'prop_id'), (prop_counts, 'prop_id'), (srch_dest_counts, 'srch_destination_id')]   

X_train = merge_and_drop(X_train, merge_df_list)
X_test = merge_and_drop(X_test, merge_df_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test.sort_values(by=['srch_id', target_str], ascending=[True, False], inplace=True)


In [7]:
# Testing
# LightGBM ranker
import lightgbm as lgb
# import wandb
# from wandb.lightgbm import wandb_callback, log_summary

# Create dataset
group_train = X_train.groupby('srch_id').size().values
group_val = X_test.groupby('srch_id').size().values

X_train_lgb = X_train.drop(['srch_id'], axis=1)
X_val_lgb = X_test.drop(['srch_id'], axis=1)


ranker = lgb.LGBMRanker(**lgb_params)

# wandb.init(project='DMT-2023', config = best_params, notes='Now with class_weight = balanced', name='possibly-balanced-tiger-2')


# Training the model
ranker.fit(
      X=X_train_lgb,
      y=y_train,
      group=group_train,
      eval_set=[(X_train_lgb, y_train),(X_val_lgb, y_test)],
      eval_group=[group_train, group_val],
      eval_at=[5],
      callbacks=[])

# Predicting the scores
test = X_test.drop(['srch_id'], axis=1).copy()

print("Predicting...")
y_pred = ranker.predict(test)
print("Done predicting")

df_res = X_test.copy()
df_res['pred_grades'] = y_pred
df_res = df_res.sort_values(by=['srch_id', 'pred_grades'], ascending=[True, False], inplace=False)
df_res = df_res.merge(test_ideal, on=['srch_id', 'prop_id'], how='left')

final_ndcg = calc_NDCG(test_ideal, df_res)
# wandb.log({'ndcg_final': final_ndcg})
print(f"result final:{final_ndcg}")
# wandb.finish()


[1]	training's ndcg@5: 0.375468	valid_1's ndcg@5: 0.368295
[2]	training's ndcg@5: 0.365883	valid_1's ndcg@5: 0.354923
[3]	training's ndcg@5: 0.369959	valid_1's ndcg@5: 0.357776
[4]	training's ndcg@5: 0.374963	valid_1's ndcg@5: 0.362363
[5]	training's ndcg@5: 0.379062	valid_1's ndcg@5: 0.365045
[6]	training's ndcg@5: 0.382791	valid_1's ndcg@5: 0.368754
[7]	training's ndcg@5: 0.384259	valid_1's ndcg@5: 0.370474
[8]	training's ndcg@5: 0.386767	valid_1's ndcg@5: 0.372232
[9]	training's ndcg@5: 0.387368	valid_1's ndcg@5: 0.373472
[10]	training's ndcg@5: 0.38852	valid_1's ndcg@5: 0.373592
[11]	training's ndcg@5: 0.389701	valid_1's ndcg@5: 0.37474
[12]	training's ndcg@5: 0.389989	valid_1's ndcg@5: 0.374929
[13]	training's ndcg@5: 0.391199	valid_1's ndcg@5: 0.375815
[14]	training's ndcg@5: 0.391531	valid_1's ndcg@5: 0.37634
[15]	training's ndcg@5: 0.392522	valid_1's ndcg@5: 0.376408
[16]	training's ndcg@5: 0.393047	valid_1's ndcg@5: 0.376821
[17]	training's ndcg@5: 0.393491	valid_1's ndcg@5: 0

## Evaluation


In [None]:
# Predicting the scores
# test = X_val
test = df_test
test_input = test.drop(['srch_id'], axis=1)
df_res = test


print("Predicting...")
y_pred = best_ranker.predict(test_input)
df_res['pred_grades'] = y_pred
print("Done predicting")

df_res = df_res.sort_values(by=['srch_id', 'pred_grades'], ascending=[True, False], inplace=False)

df_res

Predicting...
Done predicting


Unnamed: 0,srch_id,site_id,visitor_location_country_id,visitor_hist_starrating,visitor_hist_adr_usd,prop_country_id,prop_id,prop_starrating,prop_review_score,prop_brand_bool,prop_location_score1,prop_location_score2,prop_log_historical_price,price_usd,promotion_flag,srch_destination_id,srch_length_of_stay,srch_booking_window,srch_adults_count,srch_children_count,srch_room_count,srch_saturday_night_bool,srch_query_affinity_score,orig_destination_distance,comp1_rate,comp1_inv,comp1_rate_percent_diff,comp2_rate,comp2_inv,comp2_rate_percent_diff,comp3_rate,comp3_inv,comp3_rate_percent_diff,comp4_rate,comp4_inv,comp4_rate_percent_diff,comp5_rate,comp5_inv,comp5_rate_percent_diff,comp6_rate,comp6_inv,comp6_rate_percent_diff,comp7_rate,comp7_inv,comp7_rate_percent_diff,comp8_rate,comp8_inv,comp8_rate_percent_diff,month,day,hour,norm_price_usd_srch_id,norm_price_usd_prop_id,norm_price_usd_prop_country_id,norm_price_usd_srch_destination_id,norm_price_usd_srch_length_of_stay,norm_price_usd_srch_booking_window,norm_prop_starrating_srch_id,norm_prop_starrating_prop_id,norm_prop_starrating_prop_country_id,norm_prop_starrating_srch_destination_id,norm_prop_starrating_srch_length_of_stay,norm_prop_starrating_srch_booking_window,norm_prop_review_score_srch_id,norm_prop_review_score_prop_id,norm_prop_review_score_prop_country_id,norm_prop_review_score_srch_destination_id,norm_prop_review_score_srch_length_of_stay,norm_prop_review_score_srch_booking_window,norm_prop_location_score1_srch_id,norm_prop_location_score1_prop_id,norm_prop_location_score1_prop_country_id,norm_prop_location_score1_srch_destination_id,norm_prop_location_score1_srch_length_of_stay,norm_prop_location_score1_srch_booking_window,norm_prop_location_score2_srch_id,norm_prop_location_score2_prop_id,norm_prop_location_score2_prop_country_id,norm_prop_location_score2_srch_destination_id,norm_prop_location_score2_srch_length_of_stay,norm_prop_location_score2_srch_booking_window,rank_price_usd,rank_prop_starrating,rank_prop_review_score,rank_prop_location_score1,rank_prop_location_score2,usd_diff,star_diff,pred_grades
23,1,24,216,-1.0,-1.0,219,99484,3,4.0,1,2.40,0.2182,4.54,69.00,1,19222,1,10,2,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2,2,15,-0.671318,-0.689460,-0.038936,-0.608633,-0.031497,-0.256239,0.435686,-1.0,0.022244,0.423708,-0.06239,-0.152279,-0.018342,-1.0,0.141606,0.056152,0.221727,0.208696,-0.866188,-1.0,-0.146292,-0.225135,-0.134617,-0.266470,1.211751,1.546040,0.596490,1.240322,0.633046,0.570355,20.5,11.5,20.0,22.0,6.0,-1.0,-1.0,1.128720
12,1,24,216,-1.0,-1.0,219,61934,3,4.5,1,2.89,0.2425,4.69,88.88,1,19222,1,10,2,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2,2,15,-0.133106,-0.446593,-0.031811,-0.023315,-0.025488,-0.194912,0.435686,-1.0,0.022244,0.423708,-0.06239,-0.152279,0.513567,-1.0,0.750018,0.638288,0.713598,0.701668,0.761925,-1.0,0.210782,0.703219,0.207293,0.064784,1.528994,1.688049,0.753974,1.568926,0.793864,0.723864,14.0,11.5,9.5,7.0,1.0,-1.0,-1.0,0.893853
9,1,24,216,-1.0,-1.0,219,54937,3,4.0,1,2.08,0.1649,4.75,83.30,1,19222,1,10,2,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2,2,15,-0.284174,-0.125656,-0.033811,-0.187605,-0.027175,-0.212125,0.435686,-1.0,0.022244,0.423708,-0.06239,-0.152279,-0.018342,-1.0,0.141606,0.056152,0.221727,0.208696,-1.929446,-1.0,-0.379483,-0.831407,-0.357905,-0.482800,0.515907,1.670272,0.251061,0.519556,0.280303,0.233647,16.0,11.5,20.0,28.5,11.0,-1.0,-1.0,0.718350
6,1,24,216,-1.0,-1.0,219,34263,3,4.5,1,3.09,0.1300,4.63,79.00,0,19222,1,10,2,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,6.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2,2,15,-0.400588,-0.882482,-0.035352,-0.314208,-0.028475,-0.225390,0.435686,-1.0,0.022244,0.423708,-0.06239,-0.152279,0.513567,-1.0,0.750018,0.638288,0.713598,0.701668,1.426461,-1.0,0.356526,1.082139,0.346848,0.199990,0.060279,1.005503,0.024880,0.047610,0.049333,0.013175,18.0,11.5,9.5,1.0,13.0,-1.0,-1.0,0.596245
4,1,24,216,-1.0,-1.0,219,24194,3,4.5,1,2.94,0.2090,4.72,79.00,0,19222,1,10,2,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2,2,15,-0.400588,-0.492400,-0.035352,-0.314208,-0.028475,-0.225390,0.435686,-1.0,0.022244,0.423708,-0.06239,-0.152279,0.513567,-1.0,0.750018,0.638288,0.713598,0.701668,0.928059,-1.0,0.247218,0.797949,0.242182,0.098586,1.091643,1.057683,0.536866,1.115912,0.572159,0.512237,18.0,11.5,9.5,4.5,7.5,-1.0,-1.0,0.514031
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4959181,332787,24,216,-1.0,-1.0,117,94437,4,0.0,0,2.94,0.0928,4.64,66.07,0,19246,2,7,1,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,43.0,1.0,0.0,43.0,-1.0,0.0,12.0,-1.0,0.0,12.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5,21,11,-0.657494,0.188880,-0.245072,-0.657494,-0.009917,-0.573559,-0.377964,-1.0,0.068374,-0.377964,0.77001,0.817248,-1.387905,-1.0,-1.599454,-1.387905,-3.689045,-3.678032,1.635425,-1.0,-0.181605,1.635425,0.003139,0.113572,-0.296757,2.267787,0.337793,-0.296757,-0.257804,-0.211032,6.5,4.5,6.5,1.0,3.0,-1.0,-1.0,-0.020801
4959177,332787,24,216,-1.0,-1.0,117,29018,4,5.0,1,2.56,0.0538,4.64,70.05,0,19246,2,7,1,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,69.0,-1.0,-1.0,-1.0,0.0,0.0,16.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5,21,11,-0.404765,0.442053,-0.223651,-0.404765,-0.009747,-0.545107,-0.377964,-1.0,0.068374,-0.377964,0.77001,0.817248,0.981689,-1.0,1.123349,0.981689,1.165966,1.186891,0.607002,-1.0,-0.483648,0.607002,-0.244004,-0.140976,-0.631899,1.788854,-0.113008,-0.631899,-0.497846,-0.462914,4.0,4.5,1.0,2.0,5.0,-1.0,-1.0,-0.072454
4959182,332787,24,216,-1.0,-1.0,117,99509,4,4.5,1,2.08,0.0344,4.64,82.06,0,19246,2,7,1,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,16.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5,21,11,0.357866,-0.041746,-0.159009,0.357866,-0.009236,-0.459249,-0.377964,-1.0,0.068374,-0.377964,0.77001,0.817248,0.744729,-1.0,0.851069,0.744729,0.680465,0.700399,-0.692059,-1.0,-0.865176,-0.692059,-0.556186,-0.462510,-0.798610,3.175426,-0.337252,-0.798610,-0.617252,-0.588210,2.0,4.5,2.5,6.0,6.0,-1.0,-1.0,-0.283133
4959178,332787,24,216,-1.0,-1.0,117,32019,4,3.5,0,2.48,0.0551,4.53,66.07,0,19246,2,7,1,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,22.0,1.0,0.0,127.0,-1.0,0.0,27.0,1.0,0.0,22.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5,21,11,-0.657494,2.642323,-0.245072,-0.657494,-0.009917,-0.573559,-0.377964,-1.0,0.068374,-0.377964,0.77001,0.817248,0.270811,-1.0,0.306508,0.270811,-0.290537,-0.272586,0.390491,-1.0,-0.547236,0.390491,-0.296035,-0.194565,-0.620727,0.575903,-0.097981,-0.620727,-0.489845,-0.454518,6.5,4.5,4.0,3.0,4.0,-1.0,-1.0,-0.334332


In [None]:

lgbm_submission = df_res[['srch_id', 'prop_id']]
lgbm_submission.to_csv(config['PATH']['SUBMISSION_DIR'] + '/lgbm_submission_optuna.csv', index=False)

In [None]:
# print(f"RF: {calc_NDCG(test_ideal, pred_ideal_rf)}\n,XGB: {calc_NDCG(test_ideal, pred_xgb_optimized)},\nRandom: {calc_NDCG(test_ideal, pred_random)}")
print(f"XGB: {calc_NDCG(df_ideal, pred_xgb)}, Random: {calc_NDCG(test_ideal, pred_random)}")

XGB: 0.3375755506487008, Random: 0.15050172446700524


## Optuna + XGBRegressor

In [None]:
# Optimize XGB with optuna
import optuna
from functools import partial

def objective(trial, X_train, y_train, X_test, test_ideal):
    y_train_xgb = y_train.astype(int)
    y_train_xgb[y_train == 5] = 2

    params = {
        "objective": "multi:softprob",
        "random_state": 42,
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 1),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-4, 1e-1, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-4, 1e-1, log=True),
    }

    xgb_model = xgb.XGBClassifier(**params)
    xgb_model.fit(X_train, y_train_xgb)

    pred_xgb = constructs_predictions(xgb_model, X_test, ideal_df=test_ideal)
    ndcg = calc_NDCG(test_ideal, pred_xgb)

    return ndcg

print("Training XGB")
# Assuming you have defined X_train, y_train, X_test, and test_ideal before this point.

# Wrap the objective function with the input data
objective_with_data = partial(objective, X_train=X_train, y_train=y_train, X_test=X_test, test_ideal=test_ideal)

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction="maximize")
study.optimize(objective_with_data, n_trials=20)




# Train the final model with the best hyperparameters
y_train_xgb = y_train.astype(int)
y_train_xgb[y_train == 5] = 2

best_params = study.best_params
xgb_model_optimized = xgb.XGBClassifier(objective="multi:softprob", random_state=42, **best_params)
xgb_model_optimized.fit(X_train, y_train_xgb)

# Evaluate the optimized model
pred_xgb_optimized = constructs_predictions(xgb_model_optimized, X_test, ideal_df=test_ideal)
pred_xgb_submission = constructs_predictions(xgb_model_optimized, df_test)
print(f"XGB Optimized: {calc_NDCG(test_ideal, pred_xgb_optimized)}")

# pred_submission.to_csv(config['PATH']['DATA_DIR'] + '/submission_RF.csv', index=False)
pred_xgb_submission.to_csv(config['PATH']['DATA_DIR'] + '/submission_XGB.csv', index=False)

[32m[I 2023-05-09 13:56:31,830][0m A new study created in memory with name: no-name-3a7cbff6-7ed5-4fc1-8c20-9778f5cce14a[0m


Training XGB


[32m[I 2023-05-09 13:58:02,635][0m Trial 0 finished with value: 0.3422170675116014 and parameters: {'n_estimators': 218, 'max_depth': 10, 'learning_rate': 0.002795642578981349, 'subsample': 0.8932459525721343, 'colsample_bytree': 0.6546014752508442, 'gamma': 0.4545479889258107, 'reg_alpha': 0.0006735472057143736, 'reg_lambda': 0.05659086785788689}. Best is trial 0 with value: 0.3422170675116014.[0m
[32m[I 2023-05-09 13:58:34,110][0m Trial 1 finished with value: 0.3296238751532763 and parameters: {'n_estimators': 122, 'max_depth': 5, 'learning_rate': 0.00020205115375924383, 'subsample': 0.6995347755906247, 'colsample_bytree': 0.9885228465832642, 'gamma': 0.19381601429279216, 'reg_alpha': 0.03803815623242628, 'reg_lambda': 0.00015357257740569215}. Best is trial 0 with value: 0.3422170675116014.[0m
[32m[I 2023-05-09 13:59:37,760][0m Trial 2 finished with value: 0.3368665578611753 and parameters: {'n_estimators': 472, 'max_depth': 4, 'learning_rate': 0.004549507912707027, 'subsampl

ValueError: Invalid classes inferred from unique values of `y`.  Expected: [0 1 2], got [0 1 5]