In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import seaborn as sns
import numpy as np
pd.set_option('display.max_columns', 700)

In [2]:
df_train = pd.read_csv('TRAIN (1).csv')
df_train['order_date'] = pd.to_datetime(df_train['order_date'], format = '%m/%d/%y')
df_train['evsd'] = pd.to_datetime(df_train['evsd'], format = '%m/%d/%y')
df_train['tag'] = 'train'

print(df_train.shape)
df_train.head()

(259287, 14)


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag
0,1,JFK2,540,30718,67271,2022-11-02,2022-11-14,AFT Lite,54,54,54,4,9,train
1,1,JFK2,540,30718,125389,2022-11-20,2022-12-01,AFT Lite,60,60,60,5,3,train
2,1,JFK2,648,16750,11165,2022-09-30,2022-10-03,AFT Lite,6,6,6,1,2,train
3,1,JFK2,648,16750,27008,2022-10-14,2022-10-17,AFT Lite,6,6,6,2,2,train
4,1,JFK2,7,8748,33598,2022-11-15,2022-11-16,AFT Lite,8,8,8,1,1,train


In [3]:
# Create a new column that combines the values of the existing columns
df_train['unique_id'] = df_train.apply(lambda row: f"{row['warehouse_id']}_{row['d_id']}_{row['isbn_id']}_{row['order_id']}\
                           _{row['order_date']}_{row['evsd']}_{row['row_group_ind']}_{row['visibility']}", axis=1)

# Drop duplicates based on the new column
df_unique = df_train.drop_duplicates(subset=['unique_id'])

df_unique.shape, df_train.shape

((259287, 15), (259287, 15))

In [4]:
df_train[df_train['d_id'] == 746]

Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id
16876,1,JFK2,746,6524,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,6,1,train,JFK2_746_6524_198145 ...
16877,1,JFK2,746,6524,198145,2022-09-28,2022-10-14,AFT Lite,6,6,6,6,2,train,JFK2_746_6524_198145 ...
50160,1,JFK2,746,31407,198145,2022-09-28,2022-10-10,AFT Lite,6,6,6,0,2,train,JFK2_746_31407_198145 ...
61287,1,JFK2,746,6524,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,6,3,train,JFK2_746_6524_198145 ...
81009,1,JFK2,746,31407,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,1,2,train,JFK2_746_31407_198145 ...
94532,1,JFK2,746,6524,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,4,6,train,JFK2_746_6524_198145 ...
105367,1,JFK2,746,31407,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,0,1,train,JFK2_746_31407_198145 ...
105368,1,JFK2,746,31407,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,6,6,train,JFK2_746_31407_198145 ...
114281,1,JFK2,746,31407,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,1,4,train,JFK2_746_31407_198145 ...
120090,1,JFK2,746,6524,198145,2022-09-28,2022-10-06,AFT Lite,6,6,6,1,8,train,JFK2_746_6524_198145 ...


In [5]:
df_train.nunique()

region_id                  1
warehouse_id               1
d_id                      38
isbn_id                 9059
order_id                1414
order_date                79
evsd                      81
po_source_ind              2
quantity_ordered         205
quantity_submitted       205
quantity_received        231
row_group_ind              8
visibility                25
tag                        1
unique_id             259287
dtype: int64

In [6]:
df_test = pd.read_csv('Test.csv', encoding = 'latin1')
# df_test.columns = ['LOCATION', 'd_id', 'isbn_id', 'order_id_1', 'order_date', 'evsd',
#        'row_group_ind', 'visibility', 'quantity_ordered', 'quantity_submitted',
#        'quantity_received']
df_test['order_date'] = pd.to_datetime(df_test['order_date'], format = '%m/%d/%y')
df_test['evsd'] = pd.to_datetime(df_test['evsd'], format = '%m/%d/%y')
df_test['tag'] = 'test'
print(df_test.shape)
df_test.head()

(98309, 14)


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag
0,1,JFK2,684,674,45573,2022-12-15,2022-12-19,AFT Lite,42,42,42,3,3,test
1,1,JFK2,1,19822,110331,2022-12-27,2022-12-30,AFT Lite,6,6,6,0,3,test
2,1,JFK2,509,19749,119943,2022-12-15,2022-12-17,AFT Lite,12,12,12,3,1,test
3,1,JFK2,584,19747,43091,2022-12-20,2022-12-23,AFT Lite,12,12,12,4,3,test
4,1,JFK2,540,20260,117854,2022-12-26,2023-01-04,AFT Lite,60,60,60,1,8,test


In [7]:
df_test['unique_id'] = df_test.apply(lambda row: f"{row['warehouse_id']}_{row['d_id']}_{row['isbn_id']}_{row['order_id']}\
                           _{row['order_date']}_{row['evsd']}_{row['row_group_ind']}_{row['visibility']}", axis=1)

# Drop duplicates based on the new column
df_test = df_test.drop_duplicates(subset=['unique_id'])

print(df_test.shape)
df_test.head()

(98309, 15)


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id
0,1,JFK2,684,674,45573,2022-12-15,2022-12-19,AFT Lite,42,42,42,3,3,test,JFK2_684_674_45573 _...
1,1,JFK2,1,19822,110331,2022-12-27,2022-12-30,AFT Lite,6,6,6,0,3,test,JFK2_1_19822_110331 ...
2,1,JFK2,509,19749,119943,2022-12-15,2022-12-17,AFT Lite,12,12,12,3,1,test,JFK2_509_19749_119943 ...
3,1,JFK2,584,19747,43091,2022-12-20,2022-12-23,AFT Lite,12,12,12,4,3,test,JFK2_584_19747_43091 ...
4,1,JFK2,540,20260,117854,2022-12-26,2023-01-04,AFT Lite,60,60,60,1,8,test,JFK2_540_20260_117854 ...


In [8]:
df_test.nunique()

region_id                 1
warehouse_id              1
d_id                     32
isbn_id                6539
order_id                579
order_date               31
evsd                     40
po_source_ind             2
quantity_ordered        168
quantity_submitted      183
quantity_received       205
row_group_ind             8
visibility               17
tag                       1
unique_id             98309
dtype: int64

In [9]:
df_train['po_source_ind'].value_counts(dropna = False)

po_source_ind
AFT Lite    259172
AFT            115
Name: count, dtype: int64

In [10]:
df_test['po_source_ind'].value_counts(dropna = False)

po_source_ind
AFT Lite    98260
AFT            49
Name: count, dtype: int64

In [11]:
for col in ['d_id', 'isbn_id', 'order_id']:
  df_train[col] = df_train[col].astype(int).astype(str)

print(df_train.shape)
df_train.head(2)

(259287, 15)


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id
0,1,JFK2,540,30718,67271,2022-11-02,2022-11-14,AFT Lite,54,54,54,4,9,train,JFK2_540_30718_67271 ...
1,1,JFK2,540,30718,125389,2022-11-20,2022-12-01,AFT Lite,60,60,60,5,3,train,JFK2_540_30718_125389 ...


In [12]:
for col in ['d_id', 'isbn_id', 'order_id']:
  df_test[col] = df_test[col].astype(int).astype(str)

print(df_test.shape)
df_test.head(2)

(98309, 15)


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id
0,1,JFK2,684,674,45573,2022-12-15,2022-12-19,AFT Lite,42,42,42,3,3,test,JFK2_684_674_45573 _...
1,1,JFK2,1,19822,110331,2022-12-27,2022-12-30,AFT Lite,6,6,6,0,3,test,JFK2_1_19822_110331 ...


## Split the Training data into Train and Validation, we will keep Test data as OOT data

In [13]:
df_train['order_date'].describe()

count                           259287
mean     2022-10-26 14:10:23.540709632
min                2022-09-05 00:00:00
25%                2022-10-09 00:00:00
50%                2022-10-26 00:00:00
75%                2022-11-14 00:00:00
max                2022-11-30 00:00:00
Name: order_date, dtype: object

In [14]:
df_train['order_date'].value_counts()

order_date
2022-11-23    8154
2022-11-14    7513
2022-09-26    7157
2022-10-24    5811
2022-11-15    5574
              ... 
2022-09-15     161
2022-09-16     154
2022-09-12     152
2022-09-05     125
2022-09-18      91
Name: count, Length: 79, dtype: int64

In [33]:
# Convert order_date to datetime format
df_train['order_date'] = pd.to_datetime(df_train['order_date'])

# Split the data into training and testing sets based on the date
X_train = df_train[df_train['order_date'].dt.month < 11]
X_valid = df_train[df_train['order_date'].dt.month == 11]

print(X_train.shape)
print(X_valid.shape)
df_train.shape

(146495, 15)
(112792, 15)


(259287, 15)

In [34]:
112792/259287, 146495/259287

(0.435008311253553, 0.564991688746447)

In [35]:
X_valid.head()

Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id
0,1,JFK2,540,30718,67271,2022-11-02,2022-11-14,AFT Lite,54,54,54,4,9,train,JFK2_540_30718_67271 ...
1,1,JFK2,540,30718,125389,2022-11-20,2022-12-01,AFT Lite,60,60,60,5,3,train,JFK2_540_30718_125389 ...
4,1,JFK2,7,8748,33598,2022-11-15,2022-11-16,AFT Lite,8,8,8,1,1,train,JFK2_7_8748_33598 _2...
5,1,JFK2,7,8748,142228,2022-11-01,2022-11-02,AFT Lite,16,16,16,3,1,train,JFK2_7_8748_142228 _...
6,1,JFK2,7,8748,27330,2022-11-05,2022-11-06,AFT Lite,0,24,0,6,1,train,JFK2_7_8748_27330 _2...


In [36]:
# How long does order usually take to deliver?
X_train['order_delivery_days'] = (X_train['evsd'] - X_train['order_date']).dt.days
X_valid['order_delivery_days'] = (X_valid['evsd'] - X_valid['order_date']).dt.days


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['order_delivery_days'] = (X_train['evsd'] - X_train['order_date']).dt.days
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_valid['order_delivery_days'] = (X_valid['evsd'] - X_valid['order_date']).dt.days


In [37]:
# Define a funtion to peform one hot encoding for the top 10 most frequent categories of Categorical Features
def one_hot_top10(df,feature,top10_categories):
    for category in top10_categories:
        df[feature+'_'+category]=np.where(df[feature]==category,1,0)

In [38]:
"""
Create a dict for the top 10 most frequent categories of Categorical Features
Call the function to perform one hot encoding on Categorical Features
verify the train and test set after applying one hot encoding for the Categorical 
"""

var_dict_train ={}

for col in ['d_id', 'isbn_id']:
  var_dict_train[col] = [x for x in X_train[col].value_counts().sort_values(ascending=False).head(10).index]


display(var_dict_train)

{'d_id': ['509', '584', '540', '648', '410', '7', '1', '643', '454', '110'],
 'isbn_id': ['4595',
  '26550',
  '27825',
  '14820',
  '19934',
  '29771',
  '7865',
  '31986',
  '6790',
  '8453']}

In [39]:
for key,val in var_dict_train.items():
  one_hot_top10(X_train,key,val)

for key,val in var_dict_train.items():
  one_hot_top10(X_valid,key,val)

display(X_train.head())
display(X_valid.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature+'_'+category]=np.where(df[feature]==category,1,0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature+'_'+category]=np.where(df[feature]==category,1,0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature+'_'+category]=np.where(df[feature]==category,1,0)
A value is trying to b

Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id,order_delivery_days,d_id_509,d_id_584,d_id_540,d_id_648,d_id_410,d_id_7,d_id_1,d_id_643,d_id_454,d_id_110,isbn_id_4595,isbn_id_26550,isbn_id_27825,isbn_id_14820,isbn_id_19934,isbn_id_29771,isbn_id_7865,isbn_id_31986,isbn_id_6790,isbn_id_8453
2,1,JFK2,648,16750,11165,2022-09-30,2022-10-03,AFT Lite,6,6,6,1,2,train,JFK2_648_16750_11165 ...,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,JFK2,648,16750,27008,2022-10-14,2022-10-17,AFT Lite,6,6,6,2,2,train,JFK2_648_16750_27008 ...,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,1,JFK2,584,29062,62664,2022-10-17,2022-10-19,AFT Lite,12,12,12,5,1,train,JFK2_584_29062_62664 ...,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,1,JFK2,393,24754,91483,2022-10-19,2022-10-28,AFT Lite,16,16,16,7,6,train,JFK2_393_24754_91483 ...,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
11,1,JFK2,7,27826,175768,2022-10-11,2022-10-12,AFT Lite,24,24,24,5,1,train,JFK2_7_27826_175768 ...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id,order_delivery_days,d_id_509,d_id_584,d_id_540,d_id_648,d_id_410,d_id_7,d_id_1,d_id_643,d_id_454,d_id_110,isbn_id_4595,isbn_id_26550,isbn_id_27825,isbn_id_14820,isbn_id_19934,isbn_id_29771,isbn_id_7865,isbn_id_31986,isbn_id_6790,isbn_id_8453
0,1,JFK2,540,30718,67271,2022-11-02,2022-11-14,AFT Lite,54,54,54,4,9,train,JFK2_540_30718_67271 ...,12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,JFK2,540,30718,125389,2022-11-20,2022-12-01,AFT Lite,60,60,60,5,3,train,JFK2_540_30718_125389 ...,11,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,JFK2,7,8748,33598,2022-11-15,2022-11-16,AFT Lite,8,8,8,1,1,train,JFK2_7_8748_33598 _2...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,1,JFK2,7,8748,142228,2022-11-01,2022-11-02,AFT Lite,16,16,16,3,1,train,JFK2_7_8748_142228 _...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,1,JFK2,7,8748,27330,2022-11-05,2022-11-06,AFT Lite,0,24,0,6,1,train,JFK2_7_8748_27330 _2...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [40]:
X_train.head(1)

Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id,order_delivery_days,d_id_509,d_id_584,d_id_540,d_id_648,d_id_410,d_id_7,d_id_1,d_id_643,d_id_454,d_id_110,isbn_id_4595,isbn_id_26550,isbn_id_27825,isbn_id_14820,isbn_id_19934,isbn_id_29771,isbn_id_7865,isbn_id_31986,isbn_id_6790,isbn_id_8453
2,1,JFK2,648,16750,11165,2022-09-30,2022-10-03,AFT Lite,6,6,6,1,2,train,JFK2_648_16750_11165 ...,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [41]:
X_train['po_source_ind_AFT_Lite'] = np.where(X_train['po_source_ind'] == 'AFT Lite', 1, 0)
X_valid['po_source_ind_AFT_Lite'] = np.where(X_valid['po_source_ind'] == 'AFT Lite', 1, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['po_source_ind_AFT_Lite'] = np.where(X_train['po_source_ind'] == 'AFT Lite', 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_valid['po_source_ind_AFT_Lite'] = np.where(X_valid['po_source_ind'] == 'AFT Lite', 1, 0)


In [42]:
X_train['row_group_ind'] = X_train['row_group_ind'].astype(str)
var_dict_train_ ={}

for col in ['row_group_ind']:
  var_dict_train_[col] = [x for x in X_train[col].value_counts().sort_values(ascending=False).index]


display(var_dict_train_)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['row_group_ind'] = X_train['row_group_ind'].astype(str)


{'row_group_ind': ['4', '1', '5', '3', '2', '6', '0', '7']}

In [43]:
for key,val in var_dict_train_.items():
  one_hot_top10(X_train,key,val)

for key,val in var_dict_train_.items():
  one_hot_top10(X_valid,key,val)

display(X_train.head())
display(X_valid.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature+'_'+category]=np.where(df[feature]==category,1,0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature+'_'+category]=np.where(df[feature]==category,1,0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature+'_'+category]=np.where(df[feature]==category,1,0)
A value is trying to b

Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id,order_delivery_days,d_id_509,d_id_584,d_id_540,d_id_648,d_id_410,d_id_7,d_id_1,d_id_643,d_id_454,d_id_110,isbn_id_4595,isbn_id_26550,isbn_id_27825,isbn_id_14820,isbn_id_19934,isbn_id_29771,isbn_id_7865,isbn_id_31986,isbn_id_6790,isbn_id_8453,po_source_ind_AFT_Lite,row_group_ind_4,row_group_ind_1,row_group_ind_5,row_group_ind_3,row_group_ind_2,row_group_ind_6,row_group_ind_0,row_group_ind_7
2,1,JFK2,648,16750,11165,2022-09-30,2022-10-03,AFT Lite,6,6,6,1,2,train,JFK2_648_16750_11165 ...,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0
3,1,JFK2,648,16750,27008,2022-10-14,2022-10-17,AFT Lite,6,6,6,2,2,train,JFK2_648_16750_27008 ...,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
8,1,JFK2,584,29062,62664,2022-10-17,2022-10-19,AFT Lite,12,12,12,5,1,train,JFK2_584_29062_62664 ...,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
9,1,JFK2,393,24754,91483,2022-10-19,2022-10-28,AFT Lite,16,16,16,7,6,train,JFK2_393_24754_91483 ...,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1
11,1,JFK2,7,27826,175768,2022-10-11,2022-10-12,AFT Lite,24,24,24,5,1,train,JFK2_7_27826_175768 ...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0


Unnamed: 0,region_id,warehouse_id,d_id,isbn_id,order_id,order_date,evsd,po_source_ind,quantity_ordered,quantity_submitted,quantity_received,row_group_ind,visibility,tag,unique_id,order_delivery_days,d_id_509,d_id_584,d_id_540,d_id_648,d_id_410,d_id_7,d_id_1,d_id_643,d_id_454,d_id_110,isbn_id_4595,isbn_id_26550,isbn_id_27825,isbn_id_14820,isbn_id_19934,isbn_id_29771,isbn_id_7865,isbn_id_31986,isbn_id_6790,isbn_id_8453,po_source_ind_AFT_Lite,row_group_ind_4,row_group_ind_1,row_group_ind_5,row_group_ind_3,row_group_ind_2,row_group_ind_6,row_group_ind_0,row_group_ind_7
0,1,JFK2,540,30718,67271,2022-11-02,2022-11-14,AFT Lite,54,54,54,4,9,train,JFK2_540_30718_67271 ...,12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,1,JFK2,540,30718,125389,2022-11-20,2022-12-01,AFT Lite,60,60,60,5,3,train,JFK2_540_30718_125389 ...,11,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4,1,JFK2,7,8748,33598,2022-11-15,2022-11-16,AFT Lite,8,8,8,1,1,train,JFK2_7_8748_33598 _2...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
5,1,JFK2,7,8748,142228,2022-11-01,2022-11-02,AFT Lite,16,16,16,3,1,train,JFK2_7_8748_142228 _...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
6,1,JFK2,7,8748,27330,2022-11-05,2022-11-06,AFT Lite,0,24,0,6,1,train,JFK2_7_8748_27330 _2...,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [44]:
req_cols = [col for col in X_train.columns if col not in ['region_id', 'warehouse_id', 'd_id', 'isbn_id', 'order_id', 'order_date', 'evsd', 'po_source_ind', 'tag', 'unique_id', 'row_group_ind','quantity_received']]


In [27]:
## Feature ranking with recursive feature elimination and cross-validated selection of the best number of features
# regressor =RandomForestRegressor(random_state=SEED)
# regressor = DecisionTreeRegressor()
from sklearn.feature_selection import RFECV
import xgboost as xgb

X = X_train[req_cols].copy()
y = X_train['quantity_received'].copy()

from sklearn.preprocessing import QuantileTransformer

# Instantiate the transformer
qt = QuantileTransformer()
X[req_cols] = qt.fit_transform(X[req_cols])

regressor = xgb.XGBRegressor(n_estimators = 20)
selector = RFECV(regressor, step = 1, cv=5, n_jobs=-1,verbose=1,  scoring='r2')
selector.fit(X, y)
print('The optimal number of features is {}'.format(selector.n_features_))
features_rfecv = [f for f,s in zip(X, selector.support_) if s]
print('The selected features are:')
print ('{}'.format(features_rfecv)) ## optimal features list

Fitting estimator with 33 features.
Fitting estimator with 32 features.
Fitting estimator with 31 features.
Fitting estimator with 30 features.
Fitting estimator with 29 features.
Fitting estimator with 28 features.
Fitting estimator with 27 features.
Fitting estimator with 26 features.
Fitting estimator with 25 features.
Fitting estimator with 24 features.
Fitting estimator with 23 features.
Fitting estimator with 22 features.
Fitting estimator with 21 features.
Fitting estimator with 20 features.
Fitting estimator with 19 features.
Fitting estimator with 18 features.
Fitting estimator with 17 features.
Fitting estimator with 16 features.
Fitting estimator with 15 features.
Fitting estimator with 14 features.
Fitting estimator with 13 features.
Fitting estimator with 12 features.
Fitting estimator with 11 features.
Fitting estimator with 10 features.
The optimal number of features is 9
The selected features are:
['quantity_ordered', 'quantity_submitted', 'order_delivery_days', 'd_id_5

In [49]:
"""Uncomment for tuning the model"""
import optuna
from sklearn.metrics import mean_absolute_error, r2_score

imp_features = ['quantity_ordered', 'quantity_submitted', 'order_delivery_days', 'd_id_509', 'd_id_584', 'd_id_540', 'd_id_648', 'd_id_410', 'row_group_ind_6']


def run(trial):
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.1)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 3, 6)
    n_estimators = trial.suggest_int("n_estimators", 20, 3000)

    xtrain =X_train[req_cols].copy()
    xvalid = X_valid[req_cols].copy()
    y_train = X_train['quantity_received']
    y_test = X_valid['quantity_received']


    xtrain[req_cols] = qt.fit_transform(xtrain[req_cols])
    xvalid[req_cols] = qt.transform(xvalid[req_cols])

    model = xgb.XGBRegressor(
            random_state=42,
            objective='reg:squarederror',
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            max_depth=max_depth,
            eval_metric='mae'
        )
    model.fit(xtrain, y_train)
    preds_valid = model.predict(xvalid)
    mae_ = mean_absolute_error(y_test, preds_valid)
    r2_sc = r2_score(y_test, preds_valid)
    print("Mean Absolute Error : ", mae_)
    print("R2_score : ", r2_sc)
    r2 = r2_sc.copy()
    n = len(y_test)
    p = xvalid.shape[1]  # number of features in X
    adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    print("Adjusted R-Square: ", adj_r2)
    return adj_r2

study = optuna.create_study(direction="maximize")
study.optimize(run, n_trials=200)

# study.best_params

[32m[I 2023-05-01 12:09:51,939][0m A new study created in memory with name: no-name-c5652706-8b4f-4ac6-9e34-d612cbd2727b[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:10:39,864][0m Trial 0 finished with value: 0.5707641092409012 and parameters: {'learning_rate': 0.0019491028320732767, 'reg_lambda': 63.52429905168716, 'reg_alpha': 4.142073543749265e-05, 'subsample': 0.9662822748710106, 'colsample_bytree': 0.7646596383515162, 'max_depth': 6, 'n_estimators': 507}. Best is trial 0 with value: 0.5707641092409012.[0m


Mean Absolute Error :  7.758115318590932
R2_score :  0.5708896935906724
Adjusted R-Square:  0.5707641092409012


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:13:08,363][0m Trial 1 finished with value: 0.5443755791625011 and parameters: {'learning_rate': 0.0837437247932512, 'reg_lambda': 6.777570677568392e-08, 'reg_alpha': 4.621171629297244e-08, 'subsample': 0.3613369121121358, 'colsample_bytree': 0.9725271006958678, 'max_depth': 6, 'n_estimators': 1985}. Best is trial 0 with value: 0.5707641092409012.[0m


Mean Absolute Error :  4.29073738306652
R2_score :  0.5445088841769761
Adjusted R-Square:  0.5443755791625011


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:13:40,433][0m Trial 2 finished with value: 0.6346233508685379 and parameters: {'learning_rate': 0.002555690433161839, 'reg_lambda': 1.8741548166108914e-07, 'reg_alpha': 1.489672334818149, 'subsample': 0.1116338279906743, 'colsample_bytree': 0.3938607768715191, 'max_depth': 4, 'n_estimators': 1093}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  6.0085433127685
R2_score :  0.634730251502643
Adjusted R-Square:  0.6346233508685379


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:13:43,036][0m Trial 3 finished with value: 0.6209843309906091 and parameters: {'learning_rate': 0.09130889440859602, 'reg_lambda': 0.012825267733223775, 'reg_alpha': 0.01518813390494116, 'subsample': 0.39116902380424146, 'colsample_bytree': 0.24970011807430395, 'max_depth': 3, 'n_estimators': 48}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  6.083342479599036
R2_score :  0.6210952220818957
Adjusted R-Square:  0.6209843309906091


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:14:31,263][0m Trial 4 finished with value: 0.558239449814421 and parameters: {'learning_rate': 0.0894019261581501, 'reg_lambda': 1.576906440486161, 'reg_alpha': 10.649072742750947, 'subsample': 0.9535723467124977, 'colsample_bytree': 0.45844901328169896, 'max_depth': 4, 'n_estimators': 1003}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.375645652997963
R2_score :  0.5583686985856539
Adjusted R-Square:  0.558239449814421


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:14:47,024][0m Trial 5 finished with value: 0.560555657673184 and parameters: {'learning_rate': 0.03557404346016877, 'reg_lambda': 8.071346489804719e-06, 'reg_alpha': 7.284008797572168e-05, 'subsample': 0.5286565298046031, 'colsample_bytree': 0.7582862911570016, 'max_depth': 4, 'n_estimators': 225}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.5027505933612995
R2_score :  0.5606842287763464
Adjusted R-Square:  0.560555657673184


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:15:33,756][0m Trial 6 finished with value: 0.5644029209399478 and parameters: {'learning_rate': 0.03260092582728893, 'reg_lambda': 10.968849752515483, 'reg_alpha': 0.00023552600700057106, 'subsample': 0.7001716065284891, 'colsample_bytree': 0.22795363169759375, 'max_depth': 6, 'n_estimators': 853}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.564851666479473
R2_score :  0.564530366424153
Adjusted R-Square:  0.5644029209399478


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:16:44,267][0m Trial 7 finished with value: 0.526089142762171 and parameters: {'learning_rate': 0.055346260312461405, 'reg_lambda': 1.038800096406048e-05, 'reg_alpha': 8.509631330975528e-07, 'subsample': 0.14843970535603387, 'colsample_bytree': 0.8132096980073522, 'max_depth': 6, 'n_estimators': 1018}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.392962589600759
R2_score :  0.526227797958852
Adjusted R-Square:  0.526089142762171


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:18:00,627][0m Trial 8 finished with value: 0.5583558429925176 and parameters: {'learning_rate': 0.03919865672085111, 'reg_lambda': 0.3814762751068402, 'reg_alpha': 1.1296321618529891e-06, 'subsample': 0.27731396278757253, 'colsample_bytree': 0.11331490369535764, 'max_depth': 5, 'n_estimators': 2308}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.633693958083253
R2_score :  0.5584850577098377
Adjusted R-Square:  0.5583558429925176


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:18:08,908][0m Trial 9 finished with value: 0.5644247013515029 and parameters: {'learning_rate': 0.07354435210252067, 'reg_lambda': 1.727187208635567e-05, 'reg_alpha': 95.01838523471633, 'subsample': 0.11684865287270427, 'colsample_bytree': 0.7833901295331802, 'max_depth': 4, 'n_estimators': 146}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.50974415150707
R2_score :  0.5645521404632707
Adjusted R-Square:  0.5644247013515029


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:19:36,534][0m Trial 10 finished with value: 0.5845150937951429 and parameters: {'learning_rate': 0.0016365770166213347, 'reg_lambda': 1.444407542123464e-08, 'reg_alpha': 0.22041674975003614, 'subsample': 0.11618524288814797, 'colsample_bytree': 0.48053636291516477, 'max_depth': 3, 'n_estimators': 2792}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  5.17958344722302
R2_score :  0.5846366549294955
Adjusted R-Square:  0.5845150937951429


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:20:32,965][0m Trial 11 finished with value: 0.5445769968027135 and parameters: {'learning_rate': 0.09179604276362868, 'reg_lambda': 0.009126247925099334, 'reg_alpha': 0.041475616517623874, 'subsample': 0.4039427451788482, 'colsample_bytree': 0.3358935644689979, 'max_depth': 3, 'n_estimators': 1582}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.48510778728671
R2_score :  0.5447102428871131
Adjusted R-Square:  0.5445769968027135


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:21:15,176][0m Trial 12 finished with value: 0.5155045764718047 and parameters: {'learning_rate': 0.06850159755093654, 'reg_lambda': 0.00846504055935849, 'reg_alpha': 0.021098159618876528, 'subsample': 0.270046793469489, 'colsample_bytree': 0.3343924992092482, 'max_depth': 3, 'n_estimators': 1532}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.568666721825177
R2_score :  0.5156463284642192
Adjusted R-Square:  0.5155045764718047


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:21:55,618][0m Trial 13 finished with value: 0.5563473164740003 and parameters: {'learning_rate': 0.09854650738556756, 'reg_lambda': 0.0005525770250353529, 'reg_alpha': 0.934553104582979, 'subsample': 0.5018086845914779, 'colsample_bytree': 0.5882916720133362, 'max_depth': 5, 'n_estimators': 696}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.35003851679617
R2_score :  0.5564771188390504
Adjusted R-Square:  0.5563473164740003


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:21:59,308][0m Trial 14 finished with value: 0.5258629855145401 and parameters: {'learning_rate': 0.018181434049879795, 'reg_lambda': 1.915425678339708e-07, 'reg_alpha': 0.008156970387703393, 'subsample': 0.23395445681279753, 'colsample_bytree': 0.19439658891202732, 'max_depth': 3, 'n_estimators': 93}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  8.40448504229777
R2_score :  0.5260017068795251
Adjusted R-Square:  0.5258629855145401


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:22:49,471][0m Trial 15 finished with value: 0.554041474323365 and parameters: {'learning_rate': 0.05473135625019774, 'reg_lambda': 0.0006545736883393421, 'reg_alpha': 1.5473054624574163, 'subsample': 0.38830241405426946, 'colsample_bytree': 0.33138796850041524, 'max_depth': 4, 'n_estimators': 1231}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.512888305554905
R2_score :  0.5541719513237227
Adjusted R-Square:  0.554041474323365


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:24:58,301][0m Trial 16 finished with value: 0.5483193225012998 and parameters: {'learning_rate': 0.06335485260790627, 'reg_lambda': 4.5249864453111757e-07, 'reg_alpha': 0.0046689487553548915, 'subsample': 0.6327673764061306, 'colsample_bytree': 0.4297841881117041, 'max_depth': 5, 'n_estimators': 1940}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.348580002671458
R2_score :  0.5484514736690123
Adjusted R-Square:  0.5483193225012998


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:25:15,812][0m Trial 17 finished with value: 0.5623958843812904 and parameters: {'learning_rate': 0.07461270409510551, 'reg_lambda': 1.1024032514548107e-08, 'reg_alpha': 0.25933656212011086, 'subsample': 0.20988495976971905, 'colsample_bytree': 0.5605590599003883, 'max_depth': 3, 'n_estimators': 449}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.536165965903399
R2_score :  0.5625239170772982
Adjusted R-Square:  0.5623958843812904


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:25:49,035][0m Trial 18 finished with value: 0.5729169058738062 and parameters: {'learning_rate': 0.046732127466954224, 'reg_lambda': 7.609025153518057e-05, 'reg_alpha': 0.0009939354776435946, 'subsample': 0.10386346869511585, 'colsample_bytree': 0.10157618130997809, 'max_depth': 4, 'n_estimators': 1266}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.787145763797902
R2_score :  0.5730418603657973
Adjusted R-Square:  0.5729169058738062


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:27:02,054][0m Trial 19 finished with value: 0.5511240140888565 and parameters: {'learning_rate': 0.018820545591674606, 'reg_lambda': 2.6258933428219963e-06, 'reg_alpha': 6.189127767241103, 'subsample': 0.3003598430782928, 'colsample_bytree': 0.2240082676214457, 'max_depth': 3, 'n_estimators': 2511}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.63844117267926
R2_score :  0.5512553446696216
Adjusted R-Square:  0.5511240140888565


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:28:07,318][0m Trial 20 finished with value: 0.5269587391083532 and parameters: {'learning_rate': 0.07896462094669664, 'reg_lambda': 0.031187816625982773, 'reg_alpha': 0.0780174068220974, 'subsample': 0.19491938042594867, 'colsample_bytree': 0.39910923987454716, 'max_depth': 4, 'n_estimators': 1872}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.47347953138535
R2_score :  0.5270971398815482
Adjusted R-Square:  0.5269587391083532


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:29:15,966][0m Trial 21 finished with value: 0.5649262572285246 and parameters: {'learning_rate': 0.008092960625110805, 'reg_lambda': 2.0356252985063045e-08, 'reg_alpha': 0.30027086450378115, 'subsample': 0.11373587694044981, 'colsample_bytree': 0.48098573044123283, 'max_depth': 3, 'n_estimators': 2720}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.660287055836431
R2_score :  0.5650535495968116
Adjusted R-Square:  0.5649262572285246


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:30:32,219][0m Trial 22 finished with value: 0.6205018076751291 and parameters: {'learning_rate': 0.001010908223148387, 'reg_lambda': 4.466038931928228e-07, 'reg_alpha': 0.1686784610770828, 'subsample': 0.2048925450826335, 'colsample_bytree': 0.5024156649484504, 'max_depth': 3, 'n_estimators': 2842}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  5.717887839781062
R2_score :  0.6206128399414156
Adjusted R-Square:  0.6205018076751291


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:32:02,627][0m Trial 23 finished with value: 0.5551152390161033 and parameters: {'learning_rate': 0.014283601054863473, 'reg_lambda': 3.9981534019538136e-07, 'reg_alpha': 0.005814624909556514, 'subsample': 0.21590964244170274, 'colsample_bytree': 0.5825206153582632, 'max_depth': 3, 'n_estimators': 2978}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.531623109219515
R2_score :  0.5552454018581072
Adjusted R-Square:  0.5551152390161033


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:33:21,533][0m Trial 24 finished with value: 0.5520512102178343 and parameters: {'learning_rate': 0.02819853869820689, 'reg_lambda': 1.3669099114544044e-06, 'reg_alpha': 0.04836651219794829, 'subsample': 0.35153365995935754, 'colsample_bytree': 0.2934480679693972, 'max_depth': 4, 'n_estimators': 2263}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.5144271644022576
R2_score :  0.5521822695227683
Adjusted R-Square:  0.5520512102178343


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:33:33,159][0m Trial 25 finished with value: 0.6065902799850841 and parameters: {'learning_rate': 0.01005574426653958, 'reg_lambda': 0.00020473930781702865, 'reg_alpha': 1.988413712357795, 'subsample': 0.18607953892779, 'colsample_bytree': 0.38997464505588414, 'max_depth': 3, 'n_estimators': 422}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  5.508879245227491
R2_score :  0.6067053824379437
Adjusted R-Square:  0.6065902799850841


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:34:14,378][0m Trial 26 finished with value: 0.5743318149768826 and parameters: {'learning_rate': 0.022631164507993077, 'reg_lambda': 2.8112964578351994e-05, 'reg_alpha': 24.14258092671522, 'subsample': 0.2950625833842776, 'colsample_bytree': 0.2719727221555278, 'max_depth': 5, 'n_estimators': 1271}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.542354674140233
R2_score :  0.5744563554996704
Adjusted R-Square:  0.5743318149768826


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:35:26,438][0m Trial 27 finished with value: 0.5604609714534068 and parameters: {'learning_rate': 0.009250005214435359, 'reg_lambda': 1.9609447740513024e-06, 'reg_alpha': 1.161773855270154, 'subsample': 0.4512092406961322, 'colsample_bytree': 0.5089664762565029, 'max_depth': 4, 'n_estimators': 1810}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.524403830750928
R2_score :  0.5605895702595353
Adjusted R-Square:  0.5604609714534068


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:35:46,447][0m Trial 28 finished with value: 0.5581976275969983 and parameters: {'learning_rate': 0.02390562334374129, 'reg_lambda': 9.123544703655384e-08, 'reg_alpha': 0.1246206232282382, 'subsample': 0.17292007348888594, 'colsample_bytree': 0.4014680261782875, 'max_depth': 3, 'n_estimators': 747}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.699641366926423
R2_score :  0.5583268886044306
Adjusted R-Square:  0.5581976275969983


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:36:07,988][0m Trial 29 finished with value: 0.6321030155400671 and parameters: {'learning_rate': 0.0037659856520998957, 'reg_lambda': 0.00010524264387974921, 'reg_alpha': 0.001390555260426134, 'subsample': 0.32090335442842205, 'colsample_bytree': 0.6280153063509486, 'max_depth': 3, 'n_estimators': 626}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  5.877553019987851
R2_score :  0.6322106535651504
Adjusted R-Square:  0.6321030155400671


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:36:33,426][0m Trial 30 finished with value: 0.5703295325971847 and parameters: {'learning_rate': 0.008678479945544408, 'reg_lambda': 0.0017999296431830641, 'reg_alpha': 0.0010898063675065276, 'subsample': 0.33335519793591833, 'colsample_bytree': 0.6440237262631314, 'max_depth': 4, 'n_estimators': 590}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  4.681389338778312
R2_score :  0.5704552440938847
Adjusted R-Square:  0.5703295325971847


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:36:47,404][0m Trial 31 finished with value: 0.34769865093799535 and parameters: {'learning_rate': 0.0015101455052080421, 'reg_lambda': 9.200721653128348e-05, 'reg_alpha': 0.016991515285269324, 'subsample': 0.2518814696821554, 'colsample_bytree': 0.5261486136763369, 'max_depth': 3, 'n_estimators': 334}. Best is trial 2 with value: 0.6346233508685379.[0m


Mean Absolute Error :  10.060994909742943
R2_score :  0.34788949900671584
Adjusted R-Square:  0.34769865093799535


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:37:19,283][0m Trial 32 finished with value: 0.635819033636011 and parameters: {'learning_rate': 0.0016960546571387152, 'reg_lambda': 7.760099984832666e-08, 'reg_alpha': 0.002433661215948604, 'subsample': 0.315533839636255, 'colsample_bytree': 0.6492758699322306, 'max_depth': 3, 'n_estimators': 954}. Best is trial 32 with value: 0.635819033636011.[0m


Mean Absolute Error :  6.810033014285006
R2_score :  0.6359255844413945
Adjusted R-Square:  0.635819033636011


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:37:58,497][0m Trial 33 finished with value: 0.5647463352548445 and parameters: {'learning_rate': 0.013180739622016706, 'reg_lambda': 6.184522237497648e-08, 'reg_alpha': 0.0012299604170425845, 'subsample': 0.42297448989535985, 'colsample_bytree': 0.6655886597956243, 'max_depth': 3, 'n_estimators': 990}. Best is trial 32 with value: 0.635819033636011.[0m


Mean Absolute Error :  4.581719337620577
R2_score :  0.5648736802640792
Adjusted R-Square:  0.5647463352548445


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:38:46,048][0m Trial 34 finished with value: 0.5652531541401054 and parameters: {'learning_rate': 0.005121979508663145, 'reg_lambda': 4.752540845073258e-06, 'reg_alpha': 6.271307797173207e-05, 'subsample': 0.34051335199628713, 'colsample_bytree': 0.6849486734671995, 'max_depth': 3, 'n_estimators': 1112}. Best is trial 32 with value: 0.635819033636011.[0m


Mean Absolute Error :  4.725506964476034
R2_score :  0.5653803508660266
Adjusted R-Square:  0.5652531541401054


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:38:49,256][0m Trial 35 finished with value: 0.3649993615908015 and parameters: {'learning_rate': 0.013084921058971143, 'reg_lambda': 4.6874697599592045e-08, 'reg_alpha': 0.0001835507442345221, 'subsample': 0.46909278795052256, 'colsample_bytree': 0.6200925915824838, 'max_depth': 4, 'n_estimators': 34}. Best is trial 32 with value: 0.635819033636011.[0m


Mean Absolute Error :  9.998901011970409
R2_score :  0.36518514787753986
Adjusted R-Square:  0.3649993615908015


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:39:35,342][0m Trial 36 finished with value: 0.5549412531417679 and parameters: {'learning_rate': 0.029599098855253622, 'reg_lambda': 7.724571194094857e-07, 'reg_alpha': 0.0022577511355025746, 'subsample': 0.37425155350053707, 'colsample_bytree': 0.7097693322742784, 'max_depth': 4, 'n_estimators': 874}. Best is trial 32 with value: 0.635819033636011.[0m


Mean Absolute Error :  4.452506306831691
R2_score :  0.5550714668879562
Adjusted R-Square:  0.5549412531417679


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:40:00,489][0m Trial 37 finished with value: 0.5551366184986051 and parameters: {'learning_rate': 0.03971651821816667, 'reg_lambda': 3.846348560808113e-06, 'reg_alpha': 1.1526152933159307e-05, 'subsample': 0.2891895338476868, 'colsample_bytree': 0.8394525400651502, 'max_depth': 3, 'n_estimators': 580}. Best is trial 32 with value: 0.635819033636011.[0m


Mean Absolute Error :  4.5075511604128655
R2_score :  0.5552667750854742
Adjusted R-Square:  0.5551366184986051


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:40:16,836][0m Trial 38 finished with value: 0.6359829708456632 and parameters: {'learning_rate': 0.006207276932071917, 'reg_lambda': 1.9600781311312877e-05, 'reg_alpha': 0.0003645637374173304, 'subsample': 0.4310092496935655, 'colsample_bytree': 0.5495291067044789, 'max_depth': 5, 'n_estimators': 256}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.627622281284216
R2_score :  0.6360894736868659
Adjusted R-Square:  0.6359829708456632


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:41:42,470][0m Trial 39 finished with value: 0.5650494267756074 and parameters: {'learning_rate': 0.005897066733255694, 'reg_lambda': 2.6306848716953655e-05, 'reg_alpha': 0.00040104107367809757, 'subsample': 0.4457906066321824, 'colsample_bytree': 0.5444658261441983, 'max_depth': 5, 'n_estimators': 1409}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.537838450589927
R2_score :  0.5651766831073751
Adjusted R-Square:  0.5650494267756074


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:42:04,984][0m Trial 40 finished with value: 0.6310522754887491 and parameters: {'learning_rate': 0.005503335870281123, 'reg_lambda': 1.2997895511868437e-06, 'reg_alpha': 0.0002520933884385249, 'subsample': 0.32540967635256873, 'colsample_bytree': 0.6229005703822338, 'max_depth': 6, 'n_estimators': 296}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.382212283468125
R2_score :  0.6311602209357163
Adjusted R-Square:  0.6310522754887491


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:42:27,669][0m Trial 41 finished with value: 0.6339383888704995 and parameters: {'learning_rate': 0.005137330622391362, 'reg_lambda': 2.2866372983929828e-07, 'reg_alpha': 2.6246102443493917e-05, 'subsample': 0.32384646955048524, 'colsample_bytree': 0.612635657455842, 'max_depth': 6, 'n_estimators': 305}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.48706644872805
R2_score :  0.6340454899084127
Adjusted R-Square:  0.6339383888704995


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:43:31,420][0m Trial 42 finished with value: 0.5512422748836903 and parameters: {'learning_rate': 0.016982157167971638, 'reg_lambda': 1.2691419323715692e-07, 'reg_alpha': 2.6434265040122415e-05, 'subsample': 0.3831294573874698, 'colsample_bytree': 0.7275871463111973, 'max_depth': 6, 'n_estimators': 810}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.375751029314997
R2_score :  0.5513735708641216
Adjusted R-Square:  0.5512422748836903


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:43:44,864][0m Trial 43 finished with value: 0.6057069602520866 and parameters: {'learning_rate': 0.005193640343839217, 'reg_lambda': 8.024246857100995e-06, 'reg_alpha': 8.915738427008435e-06, 'subsample': 0.2395444464986224, 'colsample_bytree': 0.5989299524575015, 'max_depth': 6, 'n_estimators': 215}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  7.486217427632303
R2_score :  0.6058223211435734
Adjusted R-Square:  0.6057069602520866


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:44:11,914][0m Trial 44 finished with value: 0.57093182064394 and parameters: {'learning_rate': 0.011908892741497408, 'reg_lambda': 2.2004976708802543e-07, 'reg_alpha': 0.00016409305784441773, 'subsample': 0.16666512832177624, 'colsample_bytree': 0.542705273540458, 'max_depth': 5, 'n_estimators': 634}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.6195894066923735
R2_score :  0.57105735592529
Adjusted R-Square:  0.57093182064394


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:44:32,792][0m Trial 45 finished with value: 0.5615614810150267 and parameters: {'learning_rate': 0.002139574855098317, 'reg_lambda': 3.953495442303746e-08, 'reg_alpha': 0.015038598927640185, 'subsample': 0.26420969714101855, 'colsample_bytree': 0.65406696635085, 'max_depth': 5, 'n_estimators': 410}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  8.210518477034743
R2_score :  0.5616897578378804
Adjusted R-Square:  0.5615614810150267


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:45:38,231][0m Trial 46 finished with value: 0.5633883442971424 and parameters: {'learning_rate': 0.014958997858326512, 'reg_lambda': 1.50599163985875e-05, 'reg_alpha': 0.002719586900919397, 'subsample': 0.49805274292925056, 'colsample_bytree': 0.46371591959854397, 'max_depth': 6, 'n_estimators': 916}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.446744465181001
R2_score :  0.5635160866226665
Adjusted R-Square:  0.5633883442971424


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:47:13,534][0m Trial 47 finished with value: 0.48376330533333034 and parameters: {'learning_rate': 0.010543817372527485, 'reg_lambda': 2.2504783196288682e-07, 'reg_alpha': 0.0006016043012678432, 'subsample': 0.40456605791736416, 'colsample_bytree': 0.5773402257967336, 'max_depth': 6, 'n_estimators': 1131}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.469387362912668
R2_score :  0.4839143440768825
Adjusted R-Square:  0.48376330533333034


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:47:24,448][0m Trial 48 finished with value: 0.5718409787764063 and parameters: {'learning_rate': 0.020926273740200802, 'reg_lambda': 1.0906538001891968e-06, 'reg_alpha': 0.00011083101598159644, 'subsample': 0.31937272436417774, 'colsample_bytree': 0.6183702887708604, 'max_depth': 5, 'n_estimators': 195}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.765291838785012
R2_score :  0.5719662480594198
Adjusted R-Square:  0.5718409787764063


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:47:55,717][0m Trial 49 finished with value: 0.5580953029776535 and parameters: {'learning_rate': 0.017077129675406047, 'reg_lambda': 3.179500304608357e-08, 'reg_alpha': 4.824531330280529e-05, 'subsample': 0.3616666820501131, 'colsample_bytree': 0.6881587447409623, 'max_depth': 5, 'n_estimators': 482}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.490217960000557
R2_score :  0.5582245939228685
Adjusted R-Square:  0.5580953029776535


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:48:42,629][0m Trial 50 finished with value: 0.6059561258372193 and parameters: {'learning_rate': 0.00368959847047847, 'reg_lambda': 4.885853566245447e-06, 'reg_alpha': 0.0023729907356962376, 'subsample': 0.2602707364614, 'colsample_bytree': 0.7522732850239554, 'max_depth': 6, 'n_estimators': 718}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.144739617872675
R2_score :  0.6060714138287024
Adjusted R-Square:  0.6059561258372193


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:49:05,030][0m Trial 51 finished with value: 0.6313497987024526 and parameters: {'learning_rate': 0.007522886114334541, 'reg_lambda': 1.9456274608638852e-06, 'reg_alpha': 0.00033395278841523635, 'subsample': 0.32222438237492224, 'colsample_bytree': 0.6293490130386408, 'max_depth': 6, 'n_estimators': 281}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.726765734887741
R2_score :  0.6314576571011086
Adjusted R-Square:  0.6313497987024526


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:49:23,786][0m Trial 52 finished with value: 0.6084036184590016 and parameters: {'learning_rate': 0.007719076148388039, 'reg_lambda': 8.262133203278078e-07, 'reg_alpha': 0.0072750379719165375, 'subsample': 0.15718457624543938, 'colsample_bytree': 0.5469959707431358, 'max_depth': 6, 'n_estimators': 316}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.563747536628767
R2_score :  0.6085181903715732
Adjusted R-Square:  0.6084036184590016


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:50:00,467][0m Trial 53 finished with value: 0.5332627818960268 and parameters: {'learning_rate': 0.001395300623944587, 'reg_lambda': 1.0173033341342395e-07, 'reg_alpha': 0.0005970446571175156, 'subsample': 0.3136882128074631, 'colsample_bytree': 0.6460301691362879, 'max_depth': 6, 'n_estimators': 554}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  8.520607124709416
R2_score :  0.5333993382542241
Adjusted R-Square:  0.5332627818960268


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:50:12,126][0m Trial 54 finished with value: 0.6345658860363286 and parameters: {'learning_rate': 0.011296577140272247, 'reg_lambda': 2.664559904350216e-07, 'reg_alpha': 0.00040002096372743115, 'subsample': 0.4153172997281444, 'colsample_bytree': 0.6056538392373545, 'max_depth': 6, 'n_estimators': 163}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.119145675902576
R2_score :  0.6346728034832951
Adjusted R-Square:  0.6345658860363286


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:50:18,907][0m Trial 55 finished with value: 0.5840831364347017 and parameters: {'learning_rate': 0.01255535212020922, 'reg_lambda': 1.2955923031845744e-08, 'reg_alpha': 0.0001227417246980067, 'subsample': 0.41724997914961703, 'colsample_bytree': 0.5809153518816423, 'max_depth': 5, 'n_estimators': 80}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  7.8276370386747285
R2_score :  0.5842048239496422
Adjusted R-Square:  0.5840831364347017


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:50:30,203][0m Trial 56 finished with value: 0.49140915957351794 and parameters: {'learning_rate': 0.0048026499172869755, 'reg_lambda': 3.454603868716637e-07, 'reg_alpha': 0.033055907901811336, 'subsample': 0.3831345233626979, 'colsample_bytree': 0.498416324387483, 'max_depth': 6, 'n_estimators': 154}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  8.853187355750233
R2_score :  0.49155796131952667
Adjusted R-Square:  0.49140915957351794


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:50:47,640][0m Trial 57 finished with value: 0.5725186479056736 and parameters: {'learning_rate': 0.01610260724512848, 'reg_lambda': 1.2231204585732235e-07, 'reg_alpha': 0.0034843674996289835, 'subsample': 0.5353928929327514, 'colsample_bytree': 0.4403294620877688, 'max_depth': 4, 'n_estimators': 407}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.776671914375029
R2_score :  0.5726437189186012
Adjusted R-Square:  0.5725186479056736


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:51:37,737][0m Trial 58 finished with value: 0.5679187035940316 and parameters: {'learning_rate': 0.010455233524133602, 'reg_lambda': 5.195572479336539e-07, 'reg_alpha': 0.011174305862561528, 'subsample': 0.14623527967649702, 'colsample_bytree': 0.5263897103179878, 'max_depth': 4, 'n_estimators': 1396}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.583218703480088
R2_score :  0.5680451204427288
Adjusted R-Square:  0.5679187035940316


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:52:32,494][0m Trial 59 finished with value: 0.5601063729078006 and parameters: {'learning_rate': 0.01994022311468336, 'reg_lambda': 2.244326396991082e-08, 'reg_alpha': 0.0006866186026321297, 'subsample': 0.29235826671960546, 'colsample_bytree': 0.6020090744830078, 'max_depth': 5, 'n_estimators': 1040}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.437578838850337
R2_score :  0.560235075461143
Adjusted R-Square:  0.5601063729078006


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:53:09,080][0m Trial 60 finished with value: 0.5739680523253257 and parameters: {'learning_rate': 0.0072351467273755144, 'reg_lambda': 1.0415147032676137e-08, 'reg_alpha': 2.5845253850935695e-05, 'subsample': 0.3674484504893736, 'colsample_bytree': 0.5610715351991867, 'max_depth': 5, 'n_estimators': 676}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.711843397873969
R2_score :  0.5740926992765298
Adjusted R-Square:  0.5739680523253257


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:54:10,028][0m Trial 61 finished with value: 0.21133478572406172 and parameters: {'learning_rate': 0.0010790103565700686, 'reg_lambda': 2.3760729685054027e-06, 'reg_alpha': 0.0003537010026008908, 'subsample': 0.21397440135694373, 'colsample_bytree': 0.6406924277690096, 'max_depth': 6, 'n_estimators': 265}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  10.82174163873988
R2_score :  0.21156553066001504
Adjusted R-Square:  0.21133478572406172


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:54:16,315][0m Trial 62 finished with value: 0.08415561695067797 and parameters: {'learning_rate': 0.007743275714172873, 'reg_lambda': 2.3308455299988247e-07, 'reg_alpha': 0.0018265594022333076, 'subsample': 0.27349741082366663, 'colsample_bytree': 0.6032763716104177, 'max_depth': 6, 'n_estimators': 23}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  11.486593623814773
R2_score :  0.08442357152720115
Adjusted R-Square:  0.08415561695067797


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:55:13,678][0m Trial 63 finished with value: 0.6103453499495244 and parameters: {'learning_rate': 0.005114960330921198, 'reg_lambda': 7.707627427695527e-06, 'reg_alpha': 0.00025629314621879537, 'subsample': 0.34010235134147726, 'colsample_bytree': 0.6814068558395845, 'max_depth': 6, 'n_estimators': 502}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.270240798527197
R2_score :  0.6104593537570238
Adjusted R-Square:  0.6103453499495244


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:56:08,556][0m Trial 64 finished with value: 0.5474943020304 and parameters: {'learning_rate': 0.00979401727099807, 'reg_lambda': 6.672140318309424e-07, 'reg_alpha': 9.167467638195185e-05, 'subsample': 0.2341595413227962, 'colsample_bytree': 0.5643492793638978, 'max_depth': 6, 'n_estimators': 751}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.540161442027399
R2_score :  0.5476266945797434
Adjusted R-Square:  0.5474943020304


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:56:36,662][0m Trial 65 finished with value: 0.5756503344110495 and parameters: {'learning_rate': 0.01476551392267546, 'reg_lambda': 6.924578397372969e-08, 'reg_alpha': 0.00569117471689139, 'subsample': 0.44232828863702933, 'colsample_bytree': 0.4803103826232989, 'max_depth': 6, 'n_estimators': 350}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.749242603860827
R2_score :  0.5757744891659895
Adjusted R-Square:  0.5756503344110495


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:56:46,336][0m Trial 66 finished with value: 0.35260384372794196 and parameters: {'learning_rate': 0.003333309212897925, 'reg_lambda': 2.1965868366262207e-06, 'reg_alpha': 0.0004452350158237971, 'subsample': 0.4008227048731643, 'colsample_bytree': 0.6278030399150577, 'max_depth': 3, 'n_estimators': 141}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  10.114607476368594
R2_score :  0.3527932566523506
Adjusted R-Square:  0.35260384372794196


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:57:06,942][0m Trial 67 finished with value: 0.6218903263347195 and parameters: {'learning_rate': 0.010553347566315664, 'reg_lambda': 4.613666322712914e-05, 'reg_alpha': 0.0011785919648379997, 'subsample': 0.3093881265232552, 'colsample_bytree': 0.6633544421349389, 'max_depth': 6, 'n_estimators': 230}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.399720900845539
R2_score :  0.6220009523530273
Adjusted R-Square:  0.6218903263347195


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:58:39,924][0m Trial 68 finished with value: 0.5656275509485548 and parameters: {'learning_rate': 0.006926216935821689, 'reg_lambda': 1.3854865676172405e-05, 'reg_alpha': 0.027343315373511468, 'subsample': 0.23325807627800704, 'colsample_bytree': 0.7029527431315313, 'max_depth': 4, 'n_estimators': 1664}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.522475138739359
R2_score :  0.5657546381347549
Adjusted R-Square:  0.5656275509485548


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:59:20,545][0m Trial 69 finished with value: 0.563312018220091 and parameters: {'learning_rate': 0.022656716658998495, 'reg_lambda': 1.5665898164754685e-07, 'reg_alpha': 0.5362508592386384, 'subsample': 0.3579390193959109, 'colsample_bytree': 0.5218047630374755, 'max_depth': 3, 'n_estimators': 919}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.586020764845481
R2_score :  0.5634397828768343
Adjusted R-Square:  0.563312018220091


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 12:59:51,786][0m Trial 70 finished with value: 0.5621117881232993 and parameters: {'learning_rate': 0.01335222314868104, 'reg_lambda': 4.175275051765272e-07, 'reg_alpha': 0.05813916645056799, 'subsample': 0.5774027299678879, 'colsample_bytree': 0.5872828959993852, 'max_depth': 4, 'n_estimators': 508}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.588083917252599
R2_score :  0.5622399039392061
Adjusted R-Square:  0.5621117881232993


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:00:15,421][0m Trial 71 finished with value: 0.6338097540700225 and parameters: {'learning_rate': 0.0051384428976871226, 'reg_lambda': 1.4588940930891686e-06, 'reg_alpha': 0.00023721495137176728, 'subsample': 0.32948005427468324, 'colsample_bytree': 0.6439333706033092, 'max_depth': 6, 'n_estimators': 279}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.712415453832097
R2_score :  0.63391689274346
Adjusted R-Square:  0.6338097540700225


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:00:45,760][0m Trial 72 finished with value: 0.6328449893937909 and parameters: {'learning_rate': 0.003921215471759306, 'reg_lambda': 1.5031924177989428e-06, 'reg_alpha': 0.0011933466865430018, 'subsample': 0.3417234964272216, 'colsample_bytree': 0.662826659008578, 'max_depth': 6, 'n_estimators': 360}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.773890413270981
R2_score :  0.6329524103347348
Adjusted R-Square:  0.6328449893937909


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:01:19,061][0m Trial 73 finished with value: 0.6182948573089373 and parameters: {'learning_rate': 0.002952389997216588, 'reg_lambda': 5.8999877784800136e-08, 'reg_alpha': 0.0010305784728818134, 'subsample': 0.27857654896998624, 'colsample_bytree': 0.7300045475319609, 'max_depth': 6, 'n_estimators': 386}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  7.290814065076984
R2_score :  0.6184065352771156
Adjusted R-Square:  0.6182948573089373


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:01:34,092][0m Trial 74 finished with value: 0.4626901327738091 and parameters: {'learning_rate': 0.004523137536305877, 'reg_lambda': 7.558302612845661e-07, 'reg_alpha': 0.003919004731159806, 'subsample': 0.3481446447500999, 'colsample_bytree': 0.6626080868895418, 'max_depth': 6, 'n_estimators': 134}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  9.175428614549851
R2_score :  0.4628473370331778
Adjusted R-Square:  0.4626901327738091


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:02:55,155][0m Trial 75 finished with value: 0.5587144492522989 and parameters: {'learning_rate': 0.010951657625100954, 'reg_lambda': 4.041159601730892e-06, 'reg_alpha': 0.00017910118319556788, 'subsample': 0.4187769911801117, 'colsample_bytree': 0.6070750730534045, 'max_depth': 6, 'n_estimators': 836}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.440895313202563
R2_score :  0.5588435590498418
Adjusted R-Square:  0.5587144492522989


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:03:30,360][0m Trial 76 finished with value: 0.5334673464235972 and parameters: {'learning_rate': 0.0013302691932759805, 'reg_lambda': 1.2965954078063307e-06, 'reg_alpha': 0.0015411768183809607, 'subsample': 0.382236136657698, 'colsample_bytree': 0.6779063805935195, 'max_depth': 3, 'n_estimators': 620}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  8.73455569206827
R2_score :  0.5336038429310137
Adjusted R-Square:  0.5334673464235972


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.501892690004289
R2_score :  0.550650464255583
Adjusted R-Square:  0.5505189566492086


[32m[I 2023-05-01 13:04:07,820][0m Trial 77 finished with value: 0.5505189566492086 and parameters: {'learning_rate': 0.018128027506421455, 'reg_lambda': 2.7109626616400617e-07, 'reg_alpha': 0.01183895672384676, 'subsample': 0.29139386631904246, 'colsample_bytree': 0.5660487742455593, 'max_depth': 6, 'n_estimators': 446}. Best is trial 38 with value: 0.6359829708456632.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:05:34,281][0m Trial 78 finished with value: 0.5608542989195813 and parameters: {'learning_rate': 0.008517301808700806, 'reg_lambda': 9.57358418887135e-08, 'reg_alpha': 6.301227803010942e-05, 'subsample': 0.33827965768908075, 'colsample_bytree': 0.6537140028257421, 'max_depth': 5, 'n_estimators': 1122}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.480885760390764
R2_score :  0.5609827826473225
Adjusted R-Square:  0.5608542989195813


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:07:12,428][0m Trial 79 finished with value: 0.563376400508431 and parameters: {'learning_rate': 0.003882332861721913, 'reg_lambda': 3.1682398597943116e-08, 'reg_alpha': 0.0006983551744917285, 'subsample': 0.1950874577516606, 'colsample_bytree': 0.7104245925655497, 'max_depth': 3, 'n_estimators': 2096}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.661180778895363
R2_score :  0.5635041463284274
Adjusted R-Square:  0.563376400508431


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:08:28,311][0m Trial 80 finished with value: 0.5573282720944461 and parameters: {'learning_rate': 0.01334183715125992, 'reg_lambda': 7.6037523746882085e-06, 'reg_alpha': 0.004739049308407746, 'subsample': 0.4711452477493513, 'colsample_bytree': 0.5936929916654222, 'max_depth': 6, 'n_estimators': 772}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.429406186109186
R2_score :  0.5574577874548994
Adjusted R-Square:  0.5573282720944461


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:08:50,181][0m Trial 81 finished with value: 0.6315534632093889 and parameters: {'learning_rate': 0.006896215336377845, 'reg_lambda': 2.28726830705513e-06, 'reg_alpha': 0.0003312269080351678, 'subsample': 0.3065764268815232, 'colsample_bytree': 0.6291316987759856, 'max_depth': 6, 'n_estimators': 226}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.483355683661985
R2_score :  0.6316612620205891
Adjusted R-Square:  0.6315534632093889


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:09:09,376][0m Trial 82 finished with value: 0.6035230139051877 and parameters: {'learning_rate': 0.005307057992797866, 'reg_lambda': 3.8454218426619054e-07, 'reg_alpha': 0.00017661514633267295, 'subsample': 0.2564384656390768, 'colsample_bytree': 0.6255980638503555, 'max_depth': 6, 'n_estimators': 206}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  7.5080118056282865
R2_score :  0.60363901376813
Adjusted R-Square:  0.6035230139051877


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:09:41,537][0m Trial 83 finished with value: 0.6187744503154491 and parameters: {'learning_rate': 0.007027665986324372, 'reg_lambda': 2.2466781044144617e-05, 'reg_alpha': 0.0003406544722289519, 'subsample': 0.3114659634353226, 'colsample_bytree': 0.6454576681298585, 'max_depth': 6, 'n_estimators': 348}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.406812954309781
R2_score :  0.6188859879659672
Adjusted R-Square:  0.6187744503154491


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:09:51,968][0m Trial 84 finished with value: 0.568451480304502 and parameters: {'learning_rate': 0.009861713846465479, 'reg_lambda': 1.0445134674291134e-06, 'reg_alpha': 0.0014764816761444622, 'subsample': 0.3709922134504266, 'colsample_bytree': 0.6113661710297129, 'max_depth': 6, 'n_estimators': 89}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  8.100356323054111
R2_score :  0.568577741275235
Adjusted R-Square:  0.568451480304502


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:10:49,241][0m Trial 85 finished with value: 0.6207455007064078 and parameters: {'learning_rate': 0.003984962292136816, 'reg_lambda': 1.66592206578088e-07, 'reg_alpha': 9.226911894921465e-05, 'subsample': 0.4035157546339107, 'colsample_bytree': 0.6894881674914323, 'max_depth': 6, 'n_estimators': 565}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.523223281816837
R2_score :  0.6208564616738315
Adjusted R-Square:  0.6207455007064078


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:11:14,293][0m Trial 86 finished with value: 0.5863916745206068 and parameters: {'learning_rate': 0.01521285133686196, 'reg_lambda': 5.677623734605323e-07, 'reg_alpha': 0.0007481350550412418, 'subsample': 0.33443877088418816, 'colsample_bytree': 0.5765750746811297, 'max_depth': 6, 'n_estimators': 263}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.7725336351611904
R2_score :  0.5865126866114725
Adjusted R-Square:  0.5863916745206068


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:11:35,706][0m Trial 87 finished with value: 0.5746167172918888 and parameters: {'learning_rate': 0.012500590429828084, 'reg_lambda': 3.010307233336328e-06, 'reg_alpha': 0.0943240862173345, 'subsample': 0.129002097901728, 'colsample_bytree': 0.6664237352031079, 'max_depth': 3, 'n_estimators': 478}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.7723212998513445
R2_score :  0.5747411744589443
Adjusted R-Square:  0.5746167172918888


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:11:50,607][0m Trial 88 finished with value: 0.10122500099233 and parameters: {'learning_rate': 0.0013432054139465165, 'reg_lambda': 5.0572256641511306e-06, 'reg_alpha': 0.0025145212954693153, 'subsample': 0.2958858629388434, 'colsample_bytree': 0.5482070768912611, 'max_depth': 6, 'n_estimators': 153}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  11.362055194070132
R2_score :  0.10148796146760952
Adjusted R-Square:  0.10122500099233


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:12:53,967][0m Trial 89 finished with value: 0.5776773154996824 and parameters: {'learning_rate': 0.006488452720964159, 'reg_lambda': 1.6392727566555927e-06, 'reg_alpha': 3.123437117435312e-05, 'subsample': 0.35693615727508216, 'colsample_bytree': 0.6314772248747665, 'max_depth': 5, 'n_estimators': 654}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.74415353605731
R2_score :  0.5778008772075183
Adjusted R-Square:  0.5776773154996824


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:14:27,209][0m Trial 90 finished with value: 0.5620302713380454 and parameters: {'learning_rate': 0.008093986449985695, 'reg_lambda': 0.00015061610576544662, 'reg_alpha': 0.022291941577563257, 'subsample': 0.10279526078266846, 'colsample_bytree': 0.5912379783147085, 'max_depth': 6, 'n_estimators': 1358}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.478277993065581
R2_score :  0.5621584110038507
Adjusted R-Square:  0.5620302713380454


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:14:50,146][0m Trial 91 finished with value: 0.5746326609789085 and parameters: {'learning_rate': 0.0034744264362957496, 'reg_lambda': 1.5677763910445087e-06, 'reg_alpha': 0.00044647171187384794, 'subsample': 0.32377118083803275, 'colsample_bytree': 0.6105294784876493, 'max_depth': 6, 'n_estimators': 270}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  7.997834935517474
R2_score :  0.5747571134812155
Adjusted R-Square:  0.5746326609789085


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:15:17,404][0m Trial 92 finished with value: 0.6006206018547369 and parameters: {'learning_rate': 0.00926422485054804, 'reg_lambda': 1.186270224126334e-05, 'reg_alpha': 0.0002957718988302191, 'subsample': 0.26441470610425466, 'colsample_bytree': 0.6266997336347195, 'max_depth': 6, 'n_estimators': 333}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.038002167289158
R2_score :  0.600737450895341
Adjusted R-Square:  0.6006206018547369


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:15:24,741][0m Trial 93 finished with value: 0.3985910312141243 and parameters: {'learning_rate': 0.006395283818333448, 'reg_lambda': 2.695367877865981e-06, 'reg_alpha': 0.00012398191966979334, 'subsample': 0.3119761639328793, 'colsample_bytree': 0.6702948039095554, 'max_depth': 6, 'n_estimators': 76}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  9.69410580373929
R2_score :  0.3987669893665472
Adjusted R-Square:  0.3985910312141243


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:16:03,048][0m Trial 94 finished with value: 0.3416755362670635 and parameters: {'learning_rate': 0.0010327359586271533, 'reg_lambda': 2.695554568768308e-07, 'reg_alpha': 0.00023589619145746976, 'subsample': 0.3789148552651638, 'colsample_bytree': 0.6365870649563592, 'max_depth': 6, 'n_estimators': 413}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  10.055381088133624
R2_score :  0.3418681465578065
Adjusted R-Square:  0.3416755362670635


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:16:42,258][0m Trial 95 finished with value: 0.5699627848609359 and parameters: {'learning_rate': 0.010715224018766277, 'reg_lambda': 9.158341156953768e-07, 'reg_alpha': 0.007736531446478283, 'subsample': 0.24485269627515122, 'colsample_bytree': 0.5326922210714728, 'max_depth': 6, 'n_estimators': 539}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.604305129465019
R2_score :  0.5700886036594179
Adjusted R-Square:  0.5699627848609359


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:16:53,566][0m Trial 96 finished with value: 0.48311802635868406 and parameters: {'learning_rate': 0.0037875393078191837, 'reg_lambda': 1.458979153416729e-07, 'reg_alpha': 0.0009541447168357091, 'subsample': 0.2740167830218317, 'colsample_bytree': 0.5623205734659353, 'max_depth': 4, 'n_estimators': 186}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  8.996945182576113
R2_score :  0.48326925389572306
Adjusted R-Square:  0.48311802635868406


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:18:34,783][0m Trial 97 finished with value: 0.5483131188306268 and parameters: {'learning_rate': 0.015748508736658895, 'reg_lambda': 6.87556260990597e-06, 'reg_alpha': 4.7112513386291357e-05, 'subsample': 0.22220701371669754, 'colsample_bytree': 0.6477957376190269, 'max_depth': 6, 'n_estimators': 1212}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.404048689623567
R2_score :  0.5484452718133878
Adjusted R-Square:  0.5483131188306268


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:18:50,445][0m Trial 98 finished with value: 0.6298573884462964 and parameters: {'learning_rate': 0.008352364916994153, 'reg_lambda': 3.798346545698351e-05, 'reg_alpha': 0.0004914705431343396, 'subsample': 0.34643232222282216, 'colsample_bytree': 0.5806267878925083, 'max_depth': 3, 'n_estimators': 305}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.741460383107488
R2_score :  0.6299656834891745
Adjusted R-Square:  0.6298573884462964


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:19:08,605][0m Trial 99 finished with value: 0.6040742653637297 and parameters: {'learning_rate': 0.011813219690994855, 'reg_lambda': 1.5542916972189445e-05, 'reg_alpha': 0.0018009603386845328, 'subsample': 0.39416694788766365, 'colsample_bytree': 0.7023656811281659, 'max_depth': 5, 'n_estimators': 237}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.143624004895269
R2_score :  0.6041901039434301
Adjusted R-Square:  0.6040742653637297


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:19:41,868][0m Trial 100 finished with value: 0.6194716214625929 and parameters: {'learning_rate': 0.006437588456504635, 'reg_lambda': 5.36907326479279e-07, 'reg_alpha': 0.002704150288950662, 'subsample': 0.4329473807878913, 'colsample_bytree': 0.6050845124156113, 'max_depth': 6, 'n_estimators': 388}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  5.435023840057498
R2_score :  0.6195829551371923
Adjusted R-Square:  0.6194716214625929


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:20:01,292][0m Trial 101 finished with value: 0.6289009493366398 and parameters: {'learning_rate': 0.006065761820159856, 'reg_lambda': 1.0963463951343348e-06, 'reg_alpha': 0.0002012236266306085, 'subsample': 0.31532959016018264, 'colsample_bytree': 0.6331551641363454, 'max_depth': 6, 'n_estimators': 288}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  6.200202924630351
R2_score :  0.6290095242111589
Adjusted R-Square:  0.6289009493366398


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:20:04,915][0m Trial 102 finished with value: -0.020860730178500875 and parameters: {'learning_rate': 0.004147341284172085, 'reg_lambda': 1.7418842470964683e-06, 'reg_alpha': 0.00031913424740729387, 'subsample': 0.3296168013799595, 'colsample_bytree': 0.6192123473075852, 'max_depth': 6, 'n_estimators': 25}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  11.97385590727832
R2_score :  -0.020562050282978195
Adjusted R-Square:  -0.020860730178500875


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:20:18,053][0m Trial 103 finished with value: 0.3522745425488759 and parameters: {'learning_rate': 0.0028826221682488676, 'reg_lambda': 3.054362930528023e-07, 'reg_alpha': 0.0007924351415143179, 'subsample': 0.29130514496575205, 'colsample_bytree': 0.6647577011344636, 'max_depth': 6, 'n_estimators': 151}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  9.999284188451709
R2_score :  0.3524640518190827
Adjusted R-Square:  0.3522745425488759


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:20:52,641][0m Trial 104 finished with value: 0.5872177532144621 and parameters: {'learning_rate': 0.00872625117798482, 'reg_lambda': 2.9342441321005998e-06, 'reg_alpha': 0.0001273201442125757, 'subsample': 0.35889654598419063, 'colsample_bytree': 0.5029151410562543, 'max_depth': 6, 'n_estimators': 443}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.981992825406644
R2_score :  0.5873385236140856
Adjusted R-Square:  0.5872177532144621


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-05-01 13:21:18,710][0m Trial 105 finished with value: 0.5794004825668784 and parameters: {'learning_rate': 0.011939207565669428, 'reg_lambda': 7.527923003307999e-07, 'reg_alpha': 6.667521710188819e-05, 'subsample': 0.17795658344322787, 'colsample_bytree': 0.6833245777255054, 'max_depth': 6, 'n_estimators': 339}. Best is trial 38 with value: 0.6359829708456632.[0m


Mean Absolute Error :  4.7008865601175325
R2_score :  0.5795235401164638
Adjusted R-Square:  0.5794004825668784


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[33m[W 2023-05-01 13:22:40,740][0m Trial 106 failed with parameters: {'learning_rate': 0.0026137531716421405, 'reg_lambda': 7.945335434396375e-08, 'reg_alpha': 1.8512088887908957e-05, 'subsample': 0.3343461527921268, 'colsample_bytree': 0.5493836068771565, 'max_depth': 6, 'n_estimators': 1034} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "c:\Users\nshre\anaconda3\envs\auto_gpt\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\nshre\AppData\Local\Temp\ipykernel_7188\2706932625.py", line 38, in run
    model.fit(xtrain, y_train)
  File "c:\Users\nshre\anaconda3\envs\auto_gpt\lib\site-packages\xgboost\core.py", line 620, in inner_f
    return func(**kwargs)
  File "c:\Users\nshre\anaconda3\envs\auto_gpt\lib\site-packages\xgboost\sklea

KeyboardInterrupt: 

In [50]:
def mae(y_true, predictions):
    y_true, predictions = np.array(y_true), np.array(predictions)
    return np.mean(np.abs(y_true - predictions))

def new_metric(df):
    df['mae'] = df[['quantity_received', 'preds']].apply(lambda row: mae(row['quantity_received'], row['preds']), axis=1)
    final_error = df['mae'].sum()/ df['preds'].sum()
    return final_error



In [54]:
"""Uncomment for tuning the model"""
import optuna
from sklearn.metrics import mean_absolute_error, r2_score

imp_features = ['quantity_ordered', 'quantity_submitted', 'order_delivery_days', 'd_id_509', 'd_id_584', 'd_id_540', 'd_id_648', 'd_id_410', 'row_group_ind_6']


def run(trial):
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.1)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 3, 6)
    n_estimators = trial.suggest_int("n_estimators", 20, 3000)

    xtrain =X_train[req_cols].copy()
    xvalid = X_valid[req_cols].copy()
    y_train = X_train['quantity_received']
    y_test = X_valid['quantity_received']


    xtrain[req_cols] = qt.fit_transform(xtrain[req_cols])
    xvalid[req_cols] = qt.transform(xvalid[req_cols])

    model = xgb.XGBRegressor(
            random_state=42,
            objective='reg:squarederror',
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            max_depth=max_depth,
            eval_metric='mae'
        )
    model.fit(xtrain, y_train)
    preds_valid = model.predict(xvalid)
    mae_ = mean_absolute_error(y_test, preds_valid)
    r2_sc = r2_score(y_test, preds_valid)

    temp = X_valid.copy()
    temp['preds'] = model.predict(xvalid)

    print("Mean Absolute Error : ", mae_)
    print("R2_score : ", r2_sc)
    r2 = r2_sc.copy()
    n = len(y_test)
    p = xvalid.shape[1]  # number of features in X
    adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    print("Adjusted R-Square: ", adj_r2)

    
    final_error = new_metric(temp)
    print("Custom Error: ",final_error)
    del temp
    return final_error

study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=200)

# study.best_params

[32m[I 2023-05-01 13:48:59,172][0m A new study created in memory with name: no-name-c8a82434-97c4-48a3-a553-bcfdcc1e1e3f[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  5.910776354421862
R2_score :  0.6185247311290162
Adjusted R-Square:  0.6184130877522913


[32m[I 2023-05-01 13:49:53,964][0m Trial 0 finished with value: 0.4230557280808336 and parameters: {'learning_rate': 0.006120366199678876, 'reg_lambda': 1.1151235108344202e-08, 'reg_alpha': 6.532018638028331e-07, 'subsample': 0.6073840929011901, 'colsample_bytree': 0.19162534545399815, 'max_depth': 3, 'n_estimators': 1006}. Best is trial 0 with value: 0.4230557280808336.[0m


Custom Error:  0.4230557280808336


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  5.507626375821629
R2_score :  0.5865816568619824
Adjusted R-Square:  0.5864606649560994


[32m[I 2023-05-01 13:50:21,432][0m Trial 1 finished with value: 0.38915926167642223 and parameters: {'learning_rate': 0.06024753099459203, 'reg_lambda': 3.647696230768239, 'reg_alpha': 0.012709563701156956, 'subsample': 0.38109959849252784, 'colsample_bytree': 0.12387975703191942, 'max_depth': 6, 'n_estimators': 174}. Best is trial 1 with value: 0.38915926167642223.[0m


Custom Error:  0.38915926167642223


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.4490294385209115
R2_score :  0.5603418267320106
Adjusted R-Square:  0.5602131554207259


[32m[I 2023-05-01 13:53:02,724][0m Trial 2 finished with value: 0.31074178550283804 and parameters: {'learning_rate': 0.031594986008039685, 'reg_lambda': 0.2145015740474754, 'reg_alpha': 0.1339740694779011, 'subsample': 0.6771481001450146, 'colsample_bytree': 0.3380412828720766, 'max_depth': 4, 'n_estimators': 1659}. Best is trial 2 with value: 0.31074178550283804.[0m


Custom Error:  0.31074178550283804


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.253754517936799
R2_score :  0.5664242147416638
Adjusted R-Square:  0.5662973235152007


[32m[I 2023-05-01 13:53:42,116][0m Trial 3 finished with value: 0.29846048404293274 and parameters: {'learning_rate': 0.08489754992726173, 'reg_lambda': 8.613688440812548e-07, 'reg_alpha': 0.013627456725012248, 'subsample': 0.5001132802619267, 'colsample_bytree': 0.9937551592311205, 'max_depth': 6, 'n_estimators': 144}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.29846048404293274


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  7.531301640994549
R2_score :  0.5980222644101428
Adjusted R-Square:  0.5979046207371932


[32m[I 2023-05-01 13:53:50,286][0m Trial 4 finished with value: 0.6805833147086257 and parameters: {'learning_rate': 0.036261945180446176, 'reg_lambda': 7.503189081471622e-08, 'reg_alpha': 8.544158887176951e-06, 'subsample': 0.16782959870693626, 'colsample_bytree': 0.3357214072940005, 'max_depth': 3, 'n_estimators': 43}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.6805833147086257


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.398694516230141
R2_score :  0.535601829856444
Adjusted R-Square:  0.5354659180841996


[32m[I 2023-05-01 13:58:49,290][0m Trial 5 finished with value: 0.3045429550459504 and parameters: {'learning_rate': 0.0869139410443342, 'reg_lambda': 0.015608915464997023, 'reg_alpha': 0.909226110915333, 'subsample': 0.25795124718544404, 'colsample_bytree': 0.5881187480835541, 'max_depth': 4, 'n_estimators': 2814}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.3045429550459504


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.3485044156454356
R2_score :  0.5671321184995625
Adjusted R-Square:  0.5670054344497433


[32m[I 2023-05-01 14:01:02,667][0m Trial 6 finished with value: 0.3055508636037728 and parameters: {'learning_rate': 0.038087716042280464, 'reg_lambda': 0.06319071193516765, 'reg_alpha': 9.597009740105433, 'subsample': 0.49327353322170997, 'colsample_bytree': 0.76732798327092, 'max_depth': 4, 'n_estimators': 1403}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.3055508636037728


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.470612748810693
R2_score :  0.4708165136029111
Adjusted R-Square:  0.47066164161998214


[32m[I 2023-05-01 14:03:52,563][0m Trial 7 finished with value: 0.3117901682950918 and parameters: {'learning_rate': 0.07635915309259085, 'reg_lambda': 5.719753753040638e-05, 'reg_alpha': 0.32409714715421084, 'subsample': 0.5729422555030258, 'colsample_bytree': 0.5991634032589526, 'max_depth': 3, 'n_estimators': 2449}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.3117901682950918


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.4192539845544765
R2_score :  0.5634463143637214
Adjusted R-Square:  0.5633185516184971


[32m[I 2023-05-01 14:05:56,500][0m Trial 8 finished with value: 0.30867019909238924 and parameters: {'learning_rate': 0.03128480870902634, 'reg_lambda': 4.185322176858921e-08, 'reg_alpha': 48.376585848446304, 'subsample': 0.8055534721927788, 'colsample_bytree': 0.3747595662973501, 'max_depth': 4, 'n_estimators': 1666}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.30867019909238924


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.326310271504815
R2_score :  0.5689999601375159
Adjusted R-Square:  0.5688738227342676


[32m[I 2023-05-01 14:06:54,217][0m Trial 9 finished with value: 0.3041203960036952 and parameters: {'learning_rate': 0.03250803666533038, 'reg_lambda': 3.3088395059901086e-08, 'reg_alpha': 1.898044371668257e-06, 'subsample': 0.6191697302423878, 'colsample_bytree': 0.85912763387056, 'max_depth': 5, 'n_estimators': 461}. Best is trial 3 with value: 0.29846048404293274.[0m


Custom Error:  0.3041203960036952


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.204550598926372
R2_score :  0.5508242530289105
Adjusted R-Square:  0.5506927962839341


[32m[I 2023-05-01 14:08:38,957][0m Trial 10 finished with value: 0.2958362033916501 and parameters: {'learning_rate': 0.09012487618527275, 'reg_lambda': 0.00011127737007195528, 'reg_alpha': 0.00042170805944965534, 'subsample': 0.9584679226703823, 'colsample_bytree': 0.9766636341904908, 'max_depth': 6, 'n_estimators': 752}. Best is trial 10 with value: 0.2958362033916501.[0m


Custom Error:  0.2958362033916501


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.185574429664349
R2_score :  0.5634382932577366
Adjusted R-Square:  0.5633105281650381


[32m[I 2023-05-01 14:11:01,346][0m Trial 11 finished with value: 0.2949270020687097 and parameters: {'learning_rate': 0.0966118652556032, 'reg_lambda': 2.7388089467531304e-05, 'reg_alpha': 0.000341269304317046, 'subsample': 0.9437046508402417, 'colsample_bytree': 0.9888366966940004, 'max_depth': 6, 'n_estimators': 811}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2949270020687097


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.198289116232196
R2_score :  0.5566137989585159
Adjusted R-Square:  0.5564840365945651


[32m[I 2023-05-01 14:13:37,145][0m Trial 12 finished with value: 0.29587496837684557 and parameters: {'learning_rate': 0.09492400229972624, 'reg_lambda': 0.0001878426699245411, 'reg_alpha': 0.00012653609158676845, 'subsample': 0.9496684281250866, 'colsample_bytree': 0.9884989315777312, 'max_depth': 6, 'n_estimators': 830}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29587496837684557


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.217783697157681
R2_score :  0.5692362292879785
Adjusted R-Square:  0.5691101610317705


[32m[I 2023-05-01 14:15:03,794][0m Trial 13 finished with value: 0.2970232934070914 and parameters: {'learning_rate': 0.09883978101671938, 'reg_lambda': 5.404743917541391e-06, 'reg_alpha': 1.1153729699002154e-08, 'subsample': 0.9816705689517963, 'colsample_bytree': 0.8283371524775355, 'max_depth': 5, 'n_estimators': 862}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2970232934070914


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.295316249282884
R2_score :  0.5533434690127309
Adjusted R-Square:  0.5532127495469494


[32m[I 2023-05-01 14:17:22,056][0m Trial 14 finished with value: 0.30127691763366526 and parameters: {'learning_rate': 0.07312634895592354, 'reg_lambda': 0.0010492838847628355, 'reg_alpha': 0.0005827134130779788, 'subsample': 0.8357332967639397, 'colsample_bytree': 0.7267551838101851, 'max_depth': 5, 'n_estimators': 1248}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.30127691763366526


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.2032243288501725
R2_score :  0.5567652357546156
Adjusted R-Square:  0.5566355177104848


[32m[I 2023-05-01 14:22:54,546][0m Trial 15 finished with value: 0.29633345524534 and parameters: {'learning_rate': 0.06923127560360698, 'reg_lambda': 7.302807529226975e-06, 'reg_alpha': 8.194431549958189e-05, 'subsample': 0.9876773334807313, 'colsample_bytree': 0.9205361706557467, 'max_depth': 6, 'n_estimators': 2043}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29633345524534


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.311219583882307
R2_score :  0.5591812281457094
Adjusted R-Square:  0.5590522171711338


[32m[I 2023-05-01 14:24:04,977][0m Trial 16 finished with value: 0.30206007958682035 and parameters: {'learning_rate': 0.09639671904193862, 'reg_lambda': 0.0040195604627478185, 'reg_alpha': 0.003761205348335704, 'subsample': 0.8326253618503876, 'colsample_bytree': 0.7288634897999727, 'max_depth': 5, 'n_estimators': 555}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.30206007958682035


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.228430760456394
R2_score :  0.5652937128648232
Adjusted R-Square:  0.5651664907832373


[32m[I 2023-05-01 14:25:55,732][0m Trial 17 finished with value: 0.2974997086704142 and parameters: {'learning_rate': 0.08438211217603728, 'reg_lambda': 0.0001645825152291963, 'reg_alpha': 0.0006398015030723421, 'subsample': 0.7000564520966073, 'colsample_bytree': 0.8753800634717761, 'max_depth': 6, 'n_estimators': 516}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2974997086704142


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.372185468928541
R2_score :  0.5530432085561499
Adjusted R-Square:  0.5529124012154942


[32m[I 2023-05-01 14:29:14,208][0m Trial 18 finished with value: 0.30723246258193404 and parameters: {'learning_rate': 0.06283609795658326, 'reg_lambda': 65.9731378619045, 'reg_alpha': 3.0164321548507806e-05, 'subsample': 0.8753867267806299, 'colsample_bytree': 0.971394674603931, 'max_depth': 6, 'n_estimators': 1116}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.30723246258193404


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.262477519658701
R2_score :  0.5665648955111062
Adjusted R-Square:  0.5664380454565812
Custom Error:  0.3002320491409576


[32m[I 2023-05-01 14:34:03,116][0m Trial 19 finished with value: 0.3002320491409576 and parameters: {'learning_rate': 0.08214372312542814, 'reg_lambda': 0.0014558983462749993, 'reg_alpha': 0.001210014559695339, 'subsample': 0.7323984765088548, 'colsample_bytree': 0.901599247087552, 'max_depth': 5, 'n_estimators': 1947}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.304233266495528
R2_score :  0.538311917221663
Adjusted R-Square:  0.5381767985894446


[32m[I 2023-05-01 14:36:00,037][0m Trial 20 finished with value: 0.3022338089367916 and parameters: {'learning_rate': 0.09123494087280326, 'reg_lambda': 2.6194358622530808e-05, 'reg_alpha': 0.028759330381290242, 'subsample': 0.9110128421608946, 'colsample_bytree': 0.7794610677418707, 'max_depth': 6, 'n_estimators': 606}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.3022338089367916


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.21626352759347
R2_score :  0.5615570882223655
Adjusted R-Square:  0.5614287725721352
Custom Error:  0.29766588009421296


[32m[I 2023-05-01 14:39:27,989][0m Trial 21 finished with value: 0.29766588009421296 and parameters: {'learning_rate': 0.0995850219749087, 'reg_lambda': 6.256232975413246e-05, 'reg_alpha': 0.00017694037390122812, 'subsample': 0.9421388403617097, 'colsample_bytree': 0.9874184454898469, 'max_depth': 6, 'n_estimators': 954}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.23226815553481
R2_score :  0.5513982998862064
Adjusted R-Square:  0.551267011143024


[32m[I 2023-05-01 14:41:37,266][0m Trial 22 finished with value: 0.2972882671201968 and parameters: {'learning_rate': 0.08981790501849266, 'reg_lambda': 0.00021286337909182473, 'reg_alpha': 0.0001261472550230421, 'subsample': 0.9940672768914434, 'colsample_bytree': 0.9903294906171701, 'max_depth': 6, 'n_estimators': 787}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2972882671201968


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.2432020412089635
R2_score :  0.5652945652750452
Adjusted R-Square:  0.5651673434429275


[32m[I 2023-05-01 14:45:12,715][0m Trial 23 finished with value: 0.29958556198878017 and parameters: {'learning_rate': 0.09271481945779082, 'reg_lambda': 0.00041334777698118723, 'reg_alpha': 0.003316667816275531, 'subsample': 0.8866767725792508, 'colsample_bytree': 0.9146437591023768, 'max_depth': 5, 'n_estimators': 1322}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29958556198878017


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.286588964073509
R2_score :  0.5569908095182585
Adjusted R-Square:  0.5568611574910329
Custom Error:  0.3006558746229783


[32m[I 2023-05-01 14:46:05,861][0m Trial 24 finished with value: 0.3006558746229783 and parameters: {'learning_rate': 0.09986227826689356, 'reg_lambda': 1.8409074444695268e-06, 'reg_alpha': 2.1970025062078784e-05, 'subsample': 0.787595150460294, 'colsample_bytree': 0.8244142539697001, 'max_depth': 6, 'n_estimators': 279}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.211905167477162
R2_score :  0.5556629610042536
Adjusted R-Square:  0.5555329203660119


[32m[I 2023-05-01 14:48:27,154][0m Trial 25 finished with value: 0.2966581226899688 and parameters: {'learning_rate': 0.07905593002747666, 'reg_lambda': 2.2482982064706882e-05, 'reg_alpha': 0.0002746560450356797, 'subsample': 0.9156342917359728, 'colsample_bytree': 0.9264836051141502, 'max_depth': 6, 'n_estimators': 710}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2966581226899688


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.319039009350509
R2_score :  0.5655281149090647
Adjusted R-Square:  0.5654009614280878


[32m[I 2023-05-01 14:49:28,604][0m Trial 26 finished with value: 0.3024211844001325 and parameters: {'learning_rate': 0.08883394600552577, 'reg_lambda': 0.0004287724630362002, 'reg_alpha': 0.0035660654754989625, 'subsample': 0.7659137773374695, 'colsample_bytree': 0.6674349840149847, 'max_depth': 5, 'n_estimators': 344}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.3024211844001325


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.220562657346267
R2_score :  0.5626458566884069
Adjusted R-Square:  0.5625178596795093


[32m[I 2023-05-01 14:53:28,453][0m Trial 27 finished with value: 0.297257410035908 and parameters: {'learning_rate': 0.07887078868753851, 'reg_lambda': 8.034389258105869e-07, 'reg_alpha': 1.4493652032620262e-05, 'subsample': 0.8584852157265475, 'colsample_bytree': 0.8538425257550736, 'max_depth': 6, 'n_estimators': 1157}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.297257410035908


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.268389681703065
R2_score :  0.5523764147402965
Adjusted R-Square:  0.5522454122543214
Custom Error:  0.29983151236212935


[32m[I 2023-05-01 14:56:44,649][0m Trial 28 finished with value: 0.29983151236212935 and parameters: {'learning_rate': 0.0908356650243522, 'reg_lambda': 0.0021031804683896717, 'reg_alpha': 0.0011694052703018648, 'subsample': 0.934073833176514, 'colsample_bytree': 0.8166267582849228, 'max_depth': 5, 'n_estimators': 1551}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.22093696217745
R2_score :  0.5613529088446838
Adjusted R-Square:  0.5612245334388756


[32m[I 2023-05-01 14:59:53,407][0m Trial 29 finished with value: 0.2971955534945944 and parameters: {'learning_rate': 0.07281496080115166, 'reg_lambda': 0.0064996887056154185, 'reg_alpha': 1.258295031357844e-06, 'subsample': 0.7910951550177833, 'colsample_bytree': 0.9512908303029245, 'max_depth': 6, 'n_estimators': 989}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2971955534945944


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.200954636868683
R2_score :  0.5522344587487789
Adjusted R-Square:  0.5521034147176566


[32m[I 2023-05-01 15:01:54,234][0m Trial 30 finished with value: 0.2956003832620008 and parameters: {'learning_rate': 0.09409234461191007, 'reg_lambda': 0.00011529951174197283, 'reg_alpha': 7.69301753596725e-05, 'subsample': 0.951786701052584, 'colsample_bytree': 0.9081911790639412, 'max_depth': 6, 'n_estimators': 777}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2956003832620008


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.19484336393671
R2_score :  0.5611429935079979
Adjusted R-Square:  0.5610145566679134


[32m[I 2023-05-01 15:04:21,360][0m Trial 31 finished with value: 0.2959537951099526 and parameters: {'learning_rate': 0.09471676068747673, 'reg_lambda': 0.00010709479935240076, 'reg_alpha': 4.8579458847237045e-05, 'subsample': 0.9282110934291831, 'colsample_bytree': 0.9195159168299676, 'max_depth': 6, 'n_estimators': 764}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2959537951099526


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.200111292811425
R2_score :  0.5651632304878533
Adjusted R-Square:  0.565035970219013


[32m[I 2023-05-01 15:07:12,651][0m Trial 32 finished with value: 0.2963146142969029 and parameters: {'learning_rate': 0.08375004509526293, 'reg_lambda': 0.0004359933061225746, 'reg_alpha': 0.0002028684133273992, 'subsample': 0.866833006659352, 'colsample_bytree': 0.9867170113078589, 'max_depth': 6, 'n_estimators': 1021}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2963146142969029


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.199148710808092
R2_score :  0.5545011156031268
Adjusted R-Square:  0.5543707349366986


[32m[I 2023-05-01 15:08:34,470][0m Trial 33 finished with value: 0.2955102259583037 and parameters: {'learning_rate': 0.09465505436962492, 'reg_lambda': 2.2963794020924542e-05, 'reg_alpha': 1.0010869601443382e-05, 'subsample': 0.9935977142901405, 'colsample_bytree': 0.8996890667848272, 'max_depth': 6, 'n_estimators': 412}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2955102259583037


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.21940299052285
R2_score :  0.5583779678878895
Adjusted R-Square:  0.5582487218294307


[32m[I 2023-05-01 15:09:27,974][0m Trial 34 finished with value: 0.2964940025434223 and parameters: {'learning_rate': 0.08491047327231771, 'reg_lambda': 1.5740289526214247e-05, 'reg_alpha': 5.0123372840988974e-06, 'subsample': 0.9795364300928532, 'colsample_bytree': 0.8604324283539522, 'max_depth': 6, 'n_estimators': 310}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2964940025434223


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.236630493568111
R2_score :  0.5719854622538729
Adjusted R-Square:  0.5718601985941271


[32m[I 2023-05-01 15:11:16,215][0m Trial 35 finished with value: 0.29844282048899323 and parameters: {'learning_rate': 0.09259531161728535, 'reg_lambda': 6.350294962031225e-06, 'reg_alpha': 3.0936167922059503e-07, 'subsample': 0.8984780297656223, 'colsample_bytree': 0.8959375180054212, 'max_depth': 5, 'n_estimators': 633}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29844282048899323


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.250524792934905
R2_score :  0.5622064729580643
Adjusted R-Square:  0.5620783473581743


[32m[I 2023-05-01 15:11:41,627][0m Trial 36 finished with value: 0.2988974713316581 and parameters: {'learning_rate': 0.09951040649691612, 'reg_lambda': 4.4008933282618474e-05, 'reg_alpha': 4.8346402684895704e-05, 'subsample': 0.9958828649094793, 'colsample_bytree': 0.9286779490074971, 'max_depth': 6, 'n_estimators': 96}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2988974713316581


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.2951078924334185
R2_score :  0.5546435200470294
Adjusted R-Square:  0.5545131810569937


[32m[I 2023-05-01 15:13:13,406][0m Trial 37 finished with value: 0.3011953899365347 and parameters: {'learning_rate': 0.08895852668606134, 'reg_lambda': 4.497286437249318e-07, 'reg_alpha': 7.523972466557516e-06, 'subsample': 0.841290080384933, 'colsample_bytree': 0.7940589820614599, 'max_depth': 6, 'n_estimators': 402}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.3011953899365347


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.360327781379255
R2_score :  0.5765712506949441
Adjusted R-Square:  0.5764473291219553


[32m[I 2023-05-01 15:13:39,583][0m Trial 38 finished with value: 0.30732577222089724 and parameters: {'learning_rate': 0.08353734545562615, 'reg_lambda': 3.4139341658010083e-06, 'reg_alpha': 0.04237041450319463, 'subsample': 0.7495560640738005, 'colsample_bytree': 0.8795380176757449, 'max_depth': 4, 'n_estimators': 171}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.30732577222089724


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  6.194915524324218
R2_score :  0.6413864154877684
Adjusted R-Square:  0.6412814628610022


[32m[I 2023-05-01 15:13:50,742][0m Trial 39 finished with value: 0.5237953039472059 and parameters: {'learning_rate': 0.07805295981899493, 'reg_lambda': 1.5035283161596565e-05, 'reg_alpha': 0.0071613085388436285, 'subsample': 0.65366352601926, 'colsample_bytree': 0.9503125230386098, 'max_depth': 3, 'n_estimators': 22}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.5237953039472059


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.258797747998105
R2_score :  0.5513209566772973
Adjusted R-Square:  0.5511896452986843
Custom Error:  0.29942068109137665


[32m[I 2023-05-01 15:18:20,740][0m Trial 40 finished with value: 0.29942068109137665 and parameters: {'learning_rate': 0.05250592307444961, 'reg_lambda': 0.013885974837696725, 'reg_alpha': 0.0005997033682404703, 'subsample': 0.9453816563392168, 'colsample_bytree': 0.7499236589002618, 'max_depth': 6, 'n_estimators': 1429}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.211456565475137
R2_score :  0.5560470888759588
Adjusted R-Square:  0.5559171606574103


[32m[I 2023-05-01 15:20:58,683][0m Trial 41 finished with value: 0.29655981240240814 and parameters: {'learning_rate': 0.09461176002118932, 'reg_lambda': 0.00011408153065418536, 'reg_alpha': 7.970819033838354e-05, 'subsample': 0.9503746474205382, 'colsample_bytree': 0.9629309589411548, 'max_depth': 6, 'n_estimators': 854}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29655981240240814


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.215267193007913
R2_score :  0.5588254290115436
Adjusted R-Square:  0.5586963139080244


[32m[I 2023-05-01 15:25:19,221][0m Trial 42 finished with value: 0.2970460499342966 and parameters: {'learning_rate': 0.09530209455022522, 'reg_lambda': 7.619609370114237e-05, 'reg_alpha': 0.00021485424227177933, 'subsample': 0.8968360640925379, 'colsample_bytree': 0.989898938868736, 'max_depth': 6, 'n_estimators': 657}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2970460499342966


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.215231806906377
R2_score :  0.5637501254505192
Adjusted R-Square:  0.5636224516193042


[32m[I 2023-05-01 15:34:11,482][0m Trial 43 finished with value: 0.296534699879577 and parameters: {'learning_rate': 0.08808023136831747, 'reg_lambda': 0.0006212352933462485, 'reg_alpha': 2.7286230324185806e-05, 'subsample': 0.8082432197859756, 'colsample_bytree': 0.995619379432704, 'max_depth': 6, 'n_estimators': 1124}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.296534699879577


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.2261193973306534
R2_score :  0.5552007745298109
Adjusted R-Square:  0.5550705986270765


[32m[I 2023-05-01 15:40:10,665][0m Trial 44 finished with value: 0.29753507197632506 and parameters: {'learning_rate': 0.09504724472075379, 'reg_lambda': 0.00021451378425452192, 'reg_alpha': 6.45304258732666e-06, 'subsample': 0.9409553147140363, 'colsample_bytree': 0.8412060444556856, 'max_depth': 6, 'n_estimators': 926}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29753507197632506


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.235300487969123
R2_score :  0.5677412482734993
Adjusted R-Square:  0.5676147424929163


[32m[I 2023-05-01 15:42:46,608][0m Trial 45 finished with value: 0.2977135352306701 and parameters: {'learning_rate': 0.08620161915753224, 'reg_lambda': 3.518623573391983e-05, 'reg_alpha': 0.0012393369123078857, 'subsample': 0.9973474942046225, 'colsample_bytree': 0.9430204541198469, 'max_depth': 5, 'n_estimators': 448}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2977135352306701


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.280479767825043
R2_score :  0.5539749399865456
Adjusted R-Square:  0.5538444053284242


[32m[I 2023-05-01 15:54:52,836][0m Trial 46 finished with value: 0.30298728378128753 and parameters: {'learning_rate': 0.09993094152388358, 'reg_lambda': 0.0015828211133834728, 'reg_alpha': 7.24580656484037e-05, 'subsample': 0.8443718761556043, 'colsample_bytree': 0.8821618668491433, 'max_depth': 6, 'n_estimators': 2903}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.30298728378128753


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.21947596161013
R2_score :  0.5521524968512791
Adjusted R-Square:  0.5520214288330108


[32m[I 2023-05-01 16:02:28,320][0m Trial 47 finished with value: 0.2973904584962343 and parameters: {'learning_rate': 0.08022098068327006, 'reg_lambda': 8.87686130989495e-06, 'reg_alpha': 0.00048533483172598775, 'subsample': 0.9613494780161713, 'colsample_bytree': 0.9478371311723455, 'max_depth': 6, 'n_estimators': 1795}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2973904584962343


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.28873095814931
R2_score :  0.5568844548565888
Adjusted R-Square:  0.5567547717033781


[32m[I 2023-05-01 16:03:15,907][0m Trial 48 finished with value: 0.30038778724356024 and parameters: {'learning_rate': 0.09509634645797348, 'reg_lambda': 2.5261575921599227e-06, 'reg_alpha': 1.6427506125005503e-05, 'subsample': 0.8845196081669451, 'colsample_bytree': 0.8120468003655208, 'max_depth': 5, 'n_estimators': 257}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.30038778724356024


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.208691674869607
R2_score :  0.5677738512849014
Adjusted R-Square:  0.5676473550459862


[32m[I 2023-05-01 16:05:18,739][0m Trial 49 finished with value: 0.2965608389809171 and parameters: {'learning_rate': 0.07472675930807117, 'reg_lambda': 2.77914975522773e-05, 'reg_alpha': 0.0002975350509501221, 'subsample': 0.8210208904403671, 'colsample_bytree': 0.8938957851472037, 'max_depth': 6, 'n_estimators': 532}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2965608389809171


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.24545069636587
R2_score :  0.5597287600801388
Adjusted R-Square:  0.5595999093474426


[32m[I 2023-05-01 16:08:40,517][0m Trial 50 finished with value: 0.29872587640489867 and parameters: {'learning_rate': 0.08699937933709406, 'reg_lambda': 0.00016980131836487247, 'reg_alpha': 0.001962334059613791, 'subsample': 0.9611619243670697, 'colsample_bytree': 0.8487961173333681, 'max_depth': 5, 'n_estimators': 1254}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.29872587640489867


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.204202680071576
R2_score :  0.5598001073906297
Adjusted R-Square:  0.5596712775385917


[32m[I 2023-05-01 16:12:22,528][0m Trial 51 finished with value: 0.2967230366031396 and parameters: {'learning_rate': 0.09288922792028335, 'reg_lambda': 6.737201438742957e-05, 'reg_alpha': 4.896251270521316e-05, 'subsample': 0.920582115182339, 'colsample_bytree': 0.9232351720357854, 'max_depth': 6, 'n_estimators': 753}. Best is trial 11 with value: 0.2949270020687097.[0m


Custom Error:  0.2967230366031396


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.205504511262772
R2_score :  0.5669075985520623
Adjusted R-Square:  0.56678084879375
Custom Error:  0.29673806901872674


[32m[I 2023-05-01 16:17:08,879][0m Trial 52 finished with value: 0.29673806901872674 and parameters: {'learning_rate': 0.09562385546040146, 'reg_lambda': 0.00012380181200895402, 'reg_alpha': 0.0001347072293631438, 'subsample': 0.9100971923570085, 'colsample_bytree': 0.9563619611209622, 'max_depth': 6, 'n_estimators': 826}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.198650459437923
R2_score :  0.550962300463744
Adjusted R-Square:  0.5508308841200282
Custom Error:  0.29493819303354085


[32m[I 2023-05-01 16:27:55,665][0m Trial 53 finished with value: 0.29493819303354085 and parameters: {'learning_rate': 0.09019900420544338, 'reg_lambda': 0.0007045322511018172, 'reg_alpha': 4.008640225858118e-05, 'subsample': 0.9990456908679225, 'colsample_bytree': 0.8988080894068274, 'max_depth': 6, 'n_estimators': 2297}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


Mean Absolute Error :  4.257471417067054
R2_score :  0.515884631972213
Adjusted R-Square:  0.5157429497222181
Custom Error:  0.29984674099300396


[32m[I 2023-05-01 16:40:36,342][0m Trial 54 finished with value: 0.29984674099300396 and parameters: {'learning_rate': 0.08197000502372673, 'reg_lambda': 0.0005746256715923267, 'reg_alpha': 0.00044656614264815096, 'subsample': 0.9639438557702577, 'colsample_bytree': 0.9964243167844639, 'max_depth': 6, 'n_estimators': 2100}. Best is trial 11 with value: 0.2949270020687097.[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


In [54]:

X = X_train[req_cols].copy()
y = X_train['quantity_received'].copy()
X[req_cols] = qt.fit_transform(X[req_cols])
X[req_cols].head()

Unnamed: 0,quantity_ordered,quantity_submitted,quantity_received,visibility,order_delivery_days,d_id_509,d_id_584,d_id_540,d_id_648,d_id_410,d_id_7,d_id_1,d_id_643,d_id_454,d_id_110,isbn_id_4595,isbn_id_26550,isbn_id_27825,isbn_id_14820,isbn_id_19934,isbn_id_29771,isbn_id_7865,isbn_id_31986,isbn_id_6790,isbn_id_8453,po_source_ind_AFT_Lite,row_group_ind_4,row_group_ind_1,row_group_ind_5,row_group_ind_3,row_group_ind_2,row_group_ind_6,row_group_ind_0,row_group_ind_7
2,0.368368,0.256757,0.455956,0.579079,0.633133,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.368368,0.256757,0.455956,0.579079,0.633133,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8,0.661662,0.60961,0.705706,0.0,0.316316,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9,0.792292,0.770771,0.814815,0.912412,0.863864,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
11,0.864865,0.851351,0.881882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [19]:
df['order_year'] = df['order_date'].dt.year
df['order_month'] = df['order_date'].dt.month
df['order_day'] = df['order_date'].dt.day
df['order_dayofweek'] = df['order_date'].dt.dayofweek
df['order_dayofyear'] = df['order_date'].dt.dayofyear

df['delivered_year'] = df['evsd'].dt.year
df['delivered_month'] = df['evsd'].dt.month
df['delivered_day'] = df['evsd'].dt.day
df['delivered_dayofweek'] = df['evsd'].dt.dayofweek
df['delivered_dayofyear'] = df['evsd'].dt.dayofyear


In [20]:
df['order_month_sin'] = np.sin(2 * np.pi * df['order_month'] / 12)
df['order_month_cos'] = np.cos(2 * np.pi * df['order_month'] / 12)

df['order_dayofweek_sin'] = np.sin(2 * np.pi * df['order_dayofweek'] / 7)
df['order_dayofweek_cos'] = np.cos(2 * np.pi * df['order_dayofweek'] / 7)

df['delivered_month_sin'] = np.sin(2 * np.pi * df['delivered_month'] / 12)
df['delivered_month_cos'] = np.cos(2 * np.pi * df['delivered_month'] / 12)

df['delivered_dayofweek_sin'] = np.sin(2 * np.pi * df['delivered_dayofweek'] / 7)
df['delivered_dayofweek_cos'] = np.cos(2 * np.pi * df['delivered_dayofweek'] / 7)

df.head()

Unnamed: 0,LOCATION,d_id,isbn_id,order_id_1,order_date,evsd,row_group_ind,visibility,quantity_ordered,quantity_submitted,quantity_received,tag,order_delivery_days,order_year,order_month,order_day,order_dayofweek,order_dayofyear,delivered_year,delivered_month,delivered_day,delivered_dayofweek,delivered_dayofyear,order_month_sin,order_month_cos,order_dayofweek_sin,order_dayofweek_cos,delivered_month_sin,delivered_month_cos,delivered_dayofweek_sin,delivered_dayofweek_cos
0,JFK2,1,25556,3912,2022-09-30,2022-10-04,7.0,2.0,4.0,4.0,4,train,4,2022,9,30,4,273,2022,10,4,1,277,-1.0,-1.83697e-16,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349
1,JFK2,1,25556,91302,2022-11-16,2022-11-19,4.0,1.0,20.0,20.0,20,train,3,2022,11,16,2,320,2022,11,19,5,323,-0.5,0.8660254,0.974928,-0.222521,-0.5,0.866025,-0.974928,-0.222521
2,JFK2,1,33083,26096,2022-10-07,2022-10-11,4.0,4.0,4.0,4.0,4,train,4,2022,10,7,4,280,2022,10,11,1,284,-0.866025,0.5,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349
3,JFK2,1,33083,138855,2022-10-19,2022-10-22,5.0,2.0,4.0,4.0,4,train,3,2022,10,19,2,292,2022,10,22,5,295,-0.866025,0.5,0.974928,-0.222521,-0.866025,0.5,-0.974928,-0.222521
4,JFK2,1,33083,114502,2022-10-18,2022-10-21,6.0,1.0,4.0,4.0,4,train,3,2022,10,18,1,291,2022,10,21,4,294,-0.866025,0.5,0.781831,0.62349,-0.866025,0.5,-0.433884,-0.900969


In [21]:
# As categorical variable
df['order_month_season'] = df['order_date'].dt.month.map({1: 'winter', 2: 'winter', 3: 'spring', 4: 'spring', 5: 'spring', 6: 'summer', 7: 'summer', 8: 'summer', 9: 'fall', 10: 'fall', 11: 'fall', 12: 'winter'})
df['delivered_month_season'] = df['evsd'].dt.month.map({1: 'winter', 2: 'winter', 3: 'spring', 4: 'spring', 5: 'spring', 6: 'summer', 7: 'summer', 8: 'summer', 9: 'fall', 10: 'fall', 11: 'fall', 12: 'winter'})

# As continuous variable using sine and cosine transformations
df['order_month_sin'] = np.sin(2 * np.pi * df['order_month'] / 12)
df['order_month_cos'] = np.cos(2 * np.pi * df['order_month'] / 12)
df['delivered_month_sin'] = np.sin(2 * np.pi * df['delivered_month'] / 12)
df['delivered_month_cos'] = np.cos(2 * np.pi * df['delivered_month'] / 12)
df.head()

Unnamed: 0,LOCATION,d_id,isbn_id,order_id_1,order_date,evsd,row_group_ind,visibility,quantity_ordered,quantity_submitted,quantity_received,tag,order_delivery_days,order_year,order_month,order_day,order_dayofweek,order_dayofyear,delivered_year,delivered_month,delivered_day,delivered_dayofweek,delivered_dayofyear,order_month_sin,order_month_cos,order_dayofweek_sin,order_dayofweek_cos,delivered_month_sin,delivered_month_cos,delivered_dayofweek_sin,delivered_dayofweek_cos,order_month_season,delivered_month_season
0,JFK2,1,25556,3912,2022-09-30,2022-10-04,7.0,2.0,4.0,4.0,4,train,4,2022,9,30,4,273,2022,10,4,1,277,-1.0,-1.83697e-16,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall
1,JFK2,1,25556,91302,2022-11-16,2022-11-19,4.0,1.0,20.0,20.0,20,train,3,2022,11,16,2,320,2022,11,19,5,323,-0.5,0.8660254,0.974928,-0.222521,-0.5,0.866025,-0.974928,-0.222521,fall,fall
2,JFK2,1,33083,26096,2022-10-07,2022-10-11,4.0,4.0,4.0,4.0,4,train,4,2022,10,7,4,280,2022,10,11,1,284,-0.866025,0.5,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall
3,JFK2,1,33083,138855,2022-10-19,2022-10-22,5.0,2.0,4.0,4.0,4,train,3,2022,10,19,2,292,2022,10,22,5,295,-0.866025,0.5,0.974928,-0.222521,-0.866025,0.5,-0.974928,-0.222521,fall,fall
4,JFK2,1,33083,114502,2022-10-18,2022-10-21,6.0,1.0,4.0,4.0,4,train,3,2022,10,18,1,291,2022,10,21,4,294,-0.866025,0.5,0.781831,0.62349,-0.866025,0.5,-0.433884,-0.900969,fall,fall


In [22]:
df['order_weekday'] = (df['order_dayofweek'] < 5).astype(int)  # 0 for weekend, 1 for weekday
df['delivered_weekday'] = (df['delivered_dayofweek'] < 5).astype(int)


In [23]:
df.shape

(357596, 35)

In [24]:
(df['d_id'] + df['isbn_id']).nunique()

10878

In [25]:
(df['d_id'] +'__' + df['isbn_id']).value_counts(dropna = False, normalize = True).head(50)
# Remaining you put as unknown


540__4595     0.001158
524__26550    0.001124
410__24163    0.001119
540__373      0.001116
540__14820    0.001116
410__29416    0.001116
524__7865     0.001102
410__10204    0.001096
524__29771    0.001077
410__19394    0.001077
410__1528     0.001077
524__8453     0.001071
410__28311    0.001057
410__2081     0.001054
410__28446    0.001054
410__13646    0.001054
524__27825    0.001051
524__21625    0.001049
410__26105    0.001043
540__31986    0.001032
524__6790     0.001026
524__19934    0.001024
410__32365    0.001018
410__8551     0.001018
410__17481    0.001012
410__30418    0.000998
410__5310     0.000998
410__31291    0.000979
410__12868    0.000976
410__28595    0.000973
410__1108     0.000970
410__6998     0.000956
410__4002     0.000954
540__29918    0.000948
540__24090    0.000948
524__30821    0.000948
540__15727    0.000948
540__9338     0.000948
540__2044     0.000942
540__15592    0.000937
410__30967    0.000937
410__20003    0.000937
524__20309    0.000937
540__23917 

In [26]:
df['isbn_id'].value_counts(dropna = False, normalize = True).head(10)

isbn_id
4595     0.001158
26550    0.001124
24163    0.001119
373      0.001116
14820    0.001116
29416    0.001116
7865     0.001102
10204    0.001096
29771    0.001077
19394    0.001077
Name: proportion, dtype: float64

In [27]:
df['d_id'].value_counts(dropna = False, normalize = True)

d_id
509    0.382574
584    0.123184
540    0.094204
648    0.060823
7      0.047584
410    0.045951
1      0.042819
643    0.021091
684    0.019609
454    0.019226
110    0.017995
639    0.015613
174    0.014419
524    0.014391
652    0.013622
18     0.008901
136    0.008266
393    0.007525
566    0.007192
472    0.006689
763    0.006502
343    0.005271
218    0.004899
711    0.003591
13     0.001913
302    0.001326
373    0.001270
830    0.001054
440    0.000453
79     0.000419
613    0.000411
216    0.000313
845    0.000277
701    0.000235
127    0.000204
746    0.000067
803    0.000056
462    0.000039
210    0.000017
241    0.000006
Name: proportion, dtype: float64

In [28]:
df.nunique()


LOCATION                      1
d_id                         40
isbn_id                    9492
order_id_1                 1993
order_date                  110
evsd                        106
row_group_ind                 8
visibility                   25
quantity_ordered            218
quantity_submitted          230
quantity_received           264
tag                           2
order_delivery_days          23
order_year                    1
order_month                   4
order_day                    31
order_dayofweek               7
order_dayofyear             110
delivered_year                2
delivered_month               5
delivered_day                31
delivered_dayofweek           7
delivered_dayofyear         106
order_month_sin               4
order_month_cos               4
order_dayofweek_sin           7
order_dayofweek_cos           7
delivered_month_sin           5
delivered_month_cos           5
delivered_dayofweek_sin       7
delivered_dayofweek_cos       7
order_mo

In [29]:
# Create a new column that combines the values of the existing columns
df['unique_id'] = df.apply(lambda row: f"{row['LOCATION']}_{row['d_id']}_{row['isbn_id']}_{row['order_id_1']}", axis=1)

# Drop duplicates based on the new column
df_unique = df.drop_duplicates(subset=['unique_id'])

df_unique.shape, df.shape

((142773, 36), (357596, 36))

In [30]:
print(df_unique.shape)
df_unique.head()

(142773, 36)


Unnamed: 0,LOCATION,d_id,isbn_id,order_id_1,order_date,evsd,row_group_ind,visibility,quantity_ordered,quantity_submitted,quantity_received,tag,order_delivery_days,order_year,order_month,order_day,order_dayofweek,order_dayofyear,delivered_year,delivered_month,delivered_day,delivered_dayofweek,delivered_dayofyear,order_month_sin,order_month_cos,order_dayofweek_sin,order_dayofweek_cos,delivered_month_sin,delivered_month_cos,delivered_dayofweek_sin,delivered_dayofweek_cos,order_month_season,delivered_month_season,order_weekday,delivered_weekday,unique_id
0,JFK2,1,25556,3912,2022-09-30,2022-10-04,7.0,2.0,4.0,4.0,4,train,4,2022,9,30,4,273,2022,10,4,1,277,-1.0,-1.83697e-16,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall,1,1,JFK2_1_25556_3912
1,JFK2,1,25556,91302,2022-11-16,2022-11-19,4.0,1.0,20.0,20.0,20,train,3,2022,11,16,2,320,2022,11,19,5,323,-0.5,0.8660254,0.974928,-0.222521,-0.5,0.866025,-0.974928,-0.222521,fall,fall,1,0,JFK2_1_25556_91302
2,JFK2,1,33083,26096,2022-10-07,2022-10-11,4.0,4.0,4.0,4.0,4,train,4,2022,10,7,4,280,2022,10,11,1,284,-0.866025,0.5,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall,1,1,JFK2_1_33083_26096
3,JFK2,1,33083,138855,2022-10-19,2022-10-22,5.0,2.0,4.0,4.0,4,train,3,2022,10,19,2,292,2022,10,22,5,295,-0.866025,0.5,0.974928,-0.222521,-0.866025,0.5,-0.974928,-0.222521,fall,fall,1,0,JFK2_1_33083_138855
4,JFK2,1,33083,114502,2022-10-18,2022-10-21,6.0,1.0,4.0,4.0,4,train,3,2022,10,18,1,291,2022,10,21,4,294,-0.866025,0.5,0.781831,0.62349,-0.866025,0.5,-0.433884,-0.900969,fall,fall,1,1,JFK2_1_33083_114502


In [31]:
df.columns

Index(['LOCATION', 'd_id', 'isbn_id', 'order_id_1', 'order_date', 'evsd',
       'row_group_ind', 'visibility', 'quantity_ordered', 'quantity_submitted',
       'quantity_received', 'tag', 'order_delivery_days', 'order_year',
       'order_month', 'order_day', 'order_dayofweek', 'order_dayofyear',
       'delivered_year', 'delivered_month', 'delivered_day',
       'delivered_dayofweek', 'delivered_dayofyear', 'order_month_sin',
       'order_month_cos', 'order_dayofweek_sin', 'order_dayofweek_cos',
       'delivered_month_sin', 'delivered_month_cos', 'delivered_dayofweek_sin',
       'delivered_dayofweek_cos', 'order_month_season',
       'delivered_month_season', 'order_weekday', 'delivered_weekday',
       'unique_id'],
      dtype='object')

In [32]:
df[['d_id', 'isbn_id', 'order_id_1', 'order_date', 'evsd', 'row_group_ind']].dtypes

d_id                     object
isbn_id                  object
order_id_1               object
order_date       datetime64[ns]
evsd             datetime64[ns]
row_group_ind           float64
dtype: object

In [33]:
df['row_group_ind'] = df['row_group_ind'].astype(str)

In [34]:
df.columns

Index(['LOCATION', 'd_id', 'isbn_id', 'order_id_1', 'order_date', 'evsd',
       'row_group_ind', 'visibility', 'quantity_ordered', 'quantity_submitted',
       'quantity_received', 'tag', 'order_delivery_days', 'order_year',
       'order_month', 'order_day', 'order_dayofweek', 'order_dayofyear',
       'delivered_year', 'delivered_month', 'delivered_day',
       'delivered_dayofweek', 'delivered_dayofyear', 'order_month_sin',
       'order_month_cos', 'order_dayofweek_sin', 'order_dayofweek_cos',
       'delivered_month_sin', 'delivered_month_cos', 'delivered_dayofweek_sin',
       'delivered_dayofweek_cos', 'order_month_season',
       'delivered_month_season', 'order_weekday', 'delivered_weekday',
       'unique_id'],
      dtype='object')

In [37]:
df.drop(['order_date_VC', 'evsd_VC'], axis =1, inplace = True)

In [38]:
for col in ['d_id', 'isbn_id', 'order_id_1',  'row_group_ind', 'order_month_season', 'delivered_month_season']:
    temp_dict = dict(df[col].value_counts(normalize = True))
    df[f'{col}_VC'] = df[col].map(temp_dict)
    del temp_dict

In [41]:
req_cols = [col for col in df.columns if col not in ['LOCATION', 'order_date', 'evsd', 'd_id', 
                                                     'isbn_id', 'order_id_1','quantity_received', 'tag',
                                                      'order_date', 'evsd', 'row_group_ind', 'unique_id','target', 
                                                      'delivered_year', 'order_year',  'order_month_season',
                                                        'delivered_month_season', 'order_month', 'delivered_month']]

In [42]:
df[req_cols].head(1)

Unnamed: 0,visibility,quantity_ordered,quantity_submitted,order_delivery_days,order_day,order_dayofweek,order_dayofyear,delivered_day,delivered_dayofweek,delivered_dayofyear,order_month_sin,order_month_cos,order_dayofweek_sin,order_dayofweek_cos,delivered_month_sin,delivered_month_cos,delivered_dayofweek_sin,delivered_dayofweek_cos,order_weekday,delivered_weekday,d_id_VC,isbn_id_VC,order_id_1_VC,row_group_ind_VC,order_month_season_VC,delivered_month_season_VC
0,2.0,4.0,4.0,4,30,4,273,4,1,277,-1.0,-1.83697e-16,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,1,1,0.042819,0.000506,0.000895,0.124056,0.725084,0.680735


In [43]:
df['order_month_season'].value_counts(dropna = False)

order_month_season
fall      259287
winter     98309
Name: count, dtype: int64

In [44]:
df.head()

Unnamed: 0,LOCATION,d_id,isbn_id,order_id_1,order_date,evsd,row_group_ind,visibility,quantity_ordered,quantity_submitted,quantity_received,tag,order_delivery_days,order_year,order_month,order_day,order_dayofweek,order_dayofyear,delivered_year,delivered_month,delivered_day,delivered_dayofweek,delivered_dayofyear,order_month_sin,order_month_cos,order_dayofweek_sin,order_dayofweek_cos,delivered_month_sin,delivered_month_cos,delivered_dayofweek_sin,delivered_dayofweek_cos,order_month_season,delivered_month_season,order_weekday,delivered_weekday,unique_id,d_id_VC,isbn_id_VC,order_id_1_VC,row_group_ind_VC,order_month_season_VC,delivered_month_season_VC
0,JFK2,1,25556,3912,2022-09-30,2022-10-04,7.0,2.0,4.0,4.0,4,train,4,2022,9,30,4,273,2022,10,4,1,277,-1.0,-1.83697e-16,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall,1,1,JFK2_1_25556_3912,0.042819,0.000506,0.000895,0.124056,0.725084,0.680735
1,JFK2,1,25556,91302,2022-11-16,2022-11-19,4.0,1.0,20.0,20.0,20,train,3,2022,11,16,2,320,2022,11,19,5,323,-0.5,0.8660254,0.974928,-0.222521,-0.5,0.866025,-0.974928,-0.222521,fall,fall,1,0,JFK2_1_25556_91302,0.042819,0.000506,0.000604,0.12546,0.725084,0.680735
2,JFK2,1,33083,26096,2022-10-07,2022-10-11,4.0,4.0,4.0,4.0,4,train,4,2022,10,7,4,280,2022,10,11,1,284,-0.866025,0.5,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall,1,1,JFK2_1_33083_26096,0.042819,0.00035,0.00094,0.12546,0.725084,0.680735
3,JFK2,1,33083,138855,2022-10-19,2022-10-22,5.0,2.0,4.0,4.0,4,train,3,2022,10,19,2,292,2022,10,22,5,295,-0.866025,0.5,0.974928,-0.222521,-0.866025,0.5,-0.974928,-0.222521,fall,fall,1,0,JFK2_1_33083_138855,0.042819,0.00035,0.000738,0.125169,0.725084,0.680735
4,JFK2,1,33083,114502,2022-10-18,2022-10-21,6.0,1.0,4.0,4.0,4,train,3,2022,10,18,1,291,2022,10,21,4,294,-0.866025,0.5,0.781831,0.62349,-0.866025,0.5,-0.433884,-0.900969,fall,fall,1,1,JFK2_1_33083_114502,0.042819,0.00035,0.000721,0.124764,0.725084,0.680735


In [45]:
from sklearn.preprocessing import QuantileTransformer

# Instantiate the transformer
qt = QuantileTransformer(output_distribution='uniform')



In [46]:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import PoissonRegressor, LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import xgboost as xgb

# Split data into features and target
X = df[req_cols]
y = df['quantity_received']

# Split data into train and test sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = df[df['tag'] == 'train'][req_cols].copy()
X_test = df[df['tag'] == 'test'][req_cols].copy()
y_train = df[df['tag'] == 'train']['quantity_received']
y_test = df[df['tag'] == 'test']['quantity_received']


X_train = qt.fit_transform(X_train)
X_test = qt.transform(X_test)

# Train Poisson regression model
# Define XGBoost model parameters
params = {'learning_rate': 0.09494445071749025, 'reg_lambda': 0.12153018654604152, 'reg_alpha': 0.002926293135127233, 
          'subsample': 0.4759168036681104, 'colsample_bytree': 0.23668139727995788, 'max_depth': 5, 
          'n_estimators': 926}


# {
#     'objective': 'reg:squarederror',
#     'learning_rate': 0.1,
#     'max_depth': 6,
#     'min_child_weight': 1,
#     'gamma': 0,
#     'subsample': 0.8,
#     'colsample_bytree': 0.8,
#     'n_estimators': 100
# }
model = xgb.XGBRegressor(**params, 
            random_state=42,
            objective='reg:squarederror',
            eval_metric='mae')
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate model performance on test set
mae = mean_absolute_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print(f'Mean absolute error: {mae:.4f}')
print(f'Score: {score:.4f}')


Mean absolute error: 4.8610
Score: 0.7694


## Score all Data

In [47]:
df_scored = df.copy()
print(df_scored.shape)
df_scored.head()

(357596, 42)


Unnamed: 0,LOCATION,d_id,isbn_id,order_id_1,order_date,evsd,row_group_ind,visibility,quantity_ordered,quantity_submitted,quantity_received,tag,order_delivery_days,order_year,order_month,order_day,order_dayofweek,order_dayofyear,delivered_year,delivered_month,delivered_day,delivered_dayofweek,delivered_dayofyear,order_month_sin,order_month_cos,order_dayofweek_sin,order_dayofweek_cos,delivered_month_sin,delivered_month_cos,delivered_dayofweek_sin,delivered_dayofweek_cos,order_month_season,delivered_month_season,order_weekday,delivered_weekday,unique_id,d_id_VC,isbn_id_VC,order_id_1_VC,row_group_ind_VC,order_month_season_VC,delivered_month_season_VC
0,JFK2,1,25556,3912,2022-09-30,2022-10-04,7.0,2.0,4.0,4.0,4,train,4,2022,9,30,4,273,2022,10,4,1,277,-1.0,-1.83697e-16,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall,1,1,JFK2_1_25556_3912,0.042819,0.000506,0.000895,0.124056,0.725084,0.680735
1,JFK2,1,25556,91302,2022-11-16,2022-11-19,4.0,1.0,20.0,20.0,20,train,3,2022,11,16,2,320,2022,11,19,5,323,-0.5,0.8660254,0.974928,-0.222521,-0.5,0.866025,-0.974928,-0.222521,fall,fall,1,0,JFK2_1_25556_91302,0.042819,0.000506,0.000604,0.12546,0.725084,0.680735
2,JFK2,1,33083,26096,2022-10-07,2022-10-11,4.0,4.0,4.0,4.0,4,train,4,2022,10,7,4,280,2022,10,11,1,284,-0.866025,0.5,-0.433884,-0.900969,-0.866025,0.5,0.781831,0.62349,fall,fall,1,1,JFK2_1_33083_26096,0.042819,0.00035,0.00094,0.12546,0.725084,0.680735
3,JFK2,1,33083,138855,2022-10-19,2022-10-22,5.0,2.0,4.0,4.0,4,train,3,2022,10,19,2,292,2022,10,22,5,295,-0.866025,0.5,0.974928,-0.222521,-0.866025,0.5,-0.974928,-0.222521,fall,fall,1,0,JFK2_1_33083_138855,0.042819,0.00035,0.000738,0.125169,0.725084,0.680735
4,JFK2,1,33083,114502,2022-10-18,2022-10-21,6.0,1.0,4.0,4.0,4,train,3,2022,10,18,1,291,2022,10,21,4,294,-0.866025,0.5,0.781831,0.62349,-0.866025,0.5,-0.433884,-0.900969,fall,fall,1,1,JFK2_1_33083_114502,0.042819,0.00035,0.000721,0.124764,0.725084,0.680735


In [40]:
# df['order_year'].value_counts(dropna = False)


"""
MAE : < 2.5
"""


[['year', 'month', 'day', 'delivered_dayofyear', ]]

# [['order_dayofweek']]

order_year
2022    357596
Name: count, dtype: int64

In [39]:
dict(df['row_group_ind'].value_counts(dropna= False, normalize = True))

{'1.0': 0.12554111343527333,
 '4.0': 0.12546001633127887,
 '2.0': 0.12540129084217944,
 '5.0': 0.1251691853376436,
 '3.0': 0.1249678408035884,
 '6.0': 0.12476369981767134,
 '0.0': 0.12464065593574872,
 '7.0': 0.1240561974966163}

In [None]:
df_new['row_group_ind'].map()

In [48]:
df_scored[req_cols]  = qt.transform(df_scored[req_cols])
df_scored['predicted_orders'] = model.predict(df_scored[req_cols])
df_scored[['quantity_received', 'predicted_orders']]

Unnamed: 0,quantity_received,predicted_orders
0,4,2.189799
1,20,20.220161
2,4,2.500038
3,4,4.191618
4,4,4.620018
...,...,...
357591,8,9.290109
357592,12,11.229070
357593,132,126.057327
357594,24,29.560225


In [51]:
mae = mean_absolute_error(df_scored['quantity_received'], df_scored['predicted_orders'])
score = r2_score(df_scored['quantity_received'], df_scored['predicted_orders'])
print(f'Mean absolute error: {mae:.4f}')
print(f'Score: {score:.4f}')

Mean absolute error: 3.2349
Score: 0.8917


In [49]:
df_scored['quantity_received'].describe()

count    357596.000000
mean         13.113701
std          27.279292
min           0.000000
25%           0.000000
50%           6.000000
75%          12.000000
max        1650.000000
Name: quantity_received, dtype: float64

In [50]:
df_scored['predicted_orders'].describe()

count    357596.000000
mean         13.112150
std          25.541054
min         -39.842880
25%           3.075947
50%           7.475512
75%          12.033930
max         790.471252
Name: predicted_orders, dtype: float64

## Hypertuning


In [96]:
"""Uncomment for tuning the model"""
import optuna
def run(trial):
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.1)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 3, 6)
    n_estimators = trial.suggest_int("n_estimators", 20, 3000)


    model = xgb.XGBRegressor(
            random_state=42,
            objective='reg:squarederror',
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            max_depth=max_depth,
            eval_metric='mae'
        )
    model.fit(X_train, y_train)
    preds_valid = model.predict(X_test)
    mae_ = mean_absolute_error(y_test, preds_valid)
    r2_sc = r2_score(y_test, preds_valid)
    print("Mean Absolute Error : ", mae_)
    print("R2_score : ", r2_sc)
    return mae_

study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=20)

# study.best_params

[32m[I 2023-04-28 08:27:37,198][0m A new study created in memory with name: no-name-6633f2c9-b973-4c05-879d-a6a88d4adc0f[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:30:51,858][0m Trial 0 finished with value: 5.30203999121291 and parameters: {'learning_rate': 0.03309359543919969, 'reg_lambda': 0.2159147341660498, 'reg_alpha': 0.06916918029376977, 'subsample': 0.5038536182347256, 'colsample_bytree': 0.3266281397228815, 'max_depth': 5, 'n_estimators': 2820}. Best is trial 0 with value: 5.30203999121291.[0m


Mean Absolute Error :  5.30203999121291
R2_score :  0.7478631424634002


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:31:44,537][0m Trial 1 finished with value: 6.824564105550033 and parameters: {'learning_rate': 0.09494445071749025, 'reg_lambda': 0.12153018654604152, 'reg_alpha': 0.002926293135127233, 'subsample': 0.4759168036681104, 'colsample_bytree': 0.23668139727995788, 'max_depth': 5, 'n_estimators': 926}. Best is trial 0 with value: 5.30203999121291.[0m


Mean Absolute Error :  6.824564105550033
R2_score :  0.7252528819371045


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:32:14,441][0m Trial 2 finished with value: 5.085238951615853 and parameters: {'learning_rate': 0.08539001075999679, 'reg_lambda': 0.0017116086819376096, 'reg_alpha': 9.733517229448778e-07, 'subsample': 0.8486752491235564, 'colsample_bytree': 0.765683289497556, 'max_depth': 6, 'n_estimators': 302}. Best is trial 2 with value: 5.085238951615853.[0m


Mean Absolute Error :  5.085238951615853
R2_score :  0.6582640974257273


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:34:36,542][0m Trial 3 finished with value: 4.316466609593959 and parameters: {'learning_rate': 0.06002410622268421, 'reg_lambda': 0.9102659582342552, 'reg_alpha': 0.00019889251192707353, 'subsample': 0.3447540708103442, 'colsample_bytree': 0.644771781552105, 'max_depth': 3, 'n_estimators': 2362}. Best is trial 3 with value: 4.316466609593959.[0m


Mean Absolute Error :  4.316466609593959
R2_score :  0.7345140742866197


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:36:29,075][0m Trial 4 finished with value: 4.014516335495757 and parameters: {'learning_rate': 0.029709652164319054, 'reg_lambda': 6.140086141171898, 'reg_alpha': 0.019604127340872607, 'subsample': 0.25246008848760754, 'colsample_bytree': 0.7080422852270917, 'max_depth': 4, 'n_estimators': 1433}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.014516335495757
R2_score :  0.7506425492580595


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:41:06,670][0m Trial 5 finished with value: 4.8431481285810944 and parameters: {'learning_rate': 0.06534568249655477, 'reg_lambda': 1.3336415440754149e-05, 'reg_alpha': 0.000345415372188516, 'subsample': 0.844978725545569, 'colsample_bytree': 0.44989422418684, 'max_depth': 6, 'n_estimators': 2945}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.8431481285810944
R2_score :  0.7186509266666582


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:43:14,007][0m Trial 6 finished with value: 4.817154102344354 and parameters: {'learning_rate': 0.05297263382164796, 'reg_lambda': 0.0009063555368964637, 'reg_alpha': 1.2798673223114063e-08, 'subsample': 0.35112507992555586, 'colsample_bytree': 0.6116591007571499, 'max_depth': 3, 'n_estimators': 1879}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.817154102344354
R2_score :  0.7055969828412461


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:47:33,274][0m Trial 7 finished with value: 4.788554937946665 and parameters: {'learning_rate': 0.017752000591089934, 'reg_lambda': 2.710613584367079e-06, 'reg_alpha': 2.4821573039019524, 'subsample': 0.29862823089860413, 'colsample_bytree': 0.3345167048863346, 'max_depth': 6, 'n_estimators': 2080}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.788554937946665
R2_score :  0.7591261342754274


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:49:55,215][0m Trial 8 finished with value: 4.065070158728204 and parameters: {'learning_rate': 0.029324035728187696, 'reg_lambda': 0.6438178323500426, 'reg_alpha': 0.0821740082898111, 'subsample': 0.14134871330258492, 'colsample_bytree': 0.8295613348742853, 'max_depth': 3, 'n_estimators': 2301}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.065070158728204
R2_score :  0.7233241158947401


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:50:37,802][0m Trial 9 finished with value: 6.206847562617861 and parameters: {'learning_rate': 0.077082421202773, 'reg_lambda': 0.053731762413557996, 'reg_alpha': 0.008959806512862106, 'subsample': 0.12998078892517306, 'colsample_bytree': 0.1113762572136053, 'max_depth': 6, 'n_estimators': 898}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  6.206847562617861
R2_score :  0.7392738976386494


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:52:23,091][0m Trial 10 finished with value: 5.448784966336275 and parameters: {'learning_rate': 0.001683717177778158, 'reg_lambda': 55.502350347799705, 'reg_alpha': 49.42282137256974, 'subsample': 0.6723973732262163, 'colsample_bytree': 0.9949414623192445, 'max_depth': 4, 'n_estimators': 1275}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  5.448784966336275
R2_score :  0.6797171374340709


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:54:08,304][0m Trial 11 finished with value: 4.085750954056798 and parameters: {'learning_rate': 0.03542790877442886, 'reg_lambda': 41.25654627199507, 'reg_alpha': 0.4560511887435661, 'subsample': 0.147532038106214, 'colsample_bytree': 0.815671183841725, 'max_depth': 4, 'n_estimators': 1836}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.085750954056798
R2_score :  0.7594484502850081


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:55:09,808][0m Trial 12 finished with value: 4.254570276865067 and parameters: {'learning_rate': 0.03513889075361243, 'reg_lambda': 5.924322568326093, 'reg_alpha': 0.1128754407699121, 'subsample': 0.10981470118206949, 'colsample_bytree': 0.7705293464848719, 'max_depth': 3, 'n_estimators': 1440}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.254570276865067
R2_score :  0.7572186240389522


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:57:49,328][0m Trial 13 finished with value: 4.791194169543509 and parameters: {'learning_rate': 0.025010535957519203, 'reg_lambda': 6.824506512475626e-08, 'reg_alpha': 10.570979534990876, 'subsample': 0.22461807089542404, 'colsample_bytree': 0.9158758279999428, 'max_depth': 4, 'n_estimators': 2496}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.791194169543509
R2_score :  0.6846157907626407


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:58:36,173][0m Trial 14 finished with value: 4.3047275109652485 and parameters: {'learning_rate': 0.045580208679295006, 'reg_lambda': 4.269465720292161, 'reg_alpha': 95.5353225451274, 'subsample': 0.2490420291766568, 'colsample_bytree': 0.6795898624820328, 'max_depth': 3, 'n_estimators': 989}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.3047275109652485
R2_score :  0.7616933575832009


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 08:58:48,379][0m Trial 15 finished with value: 4.995289434156859 and parameters: {'learning_rate': 0.014966719408486352, 'reg_lambda': 0.015391094671520136, 'reg_alpha': 1.2413569766336248, 'subsample': 0.3809086949632191, 'colsample_bytree': 0.533610399214107, 'max_depth': 4, 'n_estimators': 204}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.995289434156859
R2_score :  0.7495774682087979


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:00:52,979][0m Trial 16 finished with value: 4.243051719462243 and parameters: {'learning_rate': 0.04593885265112103, 'reg_lambda': 88.0882031076654, 'reg_alpha': 0.009811285188130726, 'subsample': 0.20974407775230022, 'colsample_bytree': 0.875570296468998, 'max_depth': 5, 'n_estimators': 1673}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.243051719462243
R2_score :  0.7392742644368074


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:03:06,459][0m Trial 17 finished with value: 4.093689252968496 and parameters: {'learning_rate': 0.024155222458682596, 'reg_lambda': 2.301706730001585, 'reg_alpha': 4.662088877381834e-05, 'subsample': 0.40407323084066116, 'colsample_bytree': 0.731074366583236, 'max_depth': 3, 'n_estimators': 2229}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  4.093689252968496
R2_score :  0.7476057198864594


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:03:40,280][0m Trial 18 finished with value: 9.070102359674655 and parameters: {'learning_rate': 0.0012930917356519175, 'reg_lambda': 0.4013732538033482, 'reg_alpha': 0.055953531181059404, 'subsample': 0.25423058984118274, 'colsample_bytree': 0.8662101703697235, 'max_depth': 4, 'n_estimators': 529}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  9.070102359674655
R2_score :  0.44163328405924074


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:04:29,924][0m Trial 19 finished with value: 5.430407336248827 and parameters: {'learning_rate': 0.04153763734194716, 'reg_lambda': 0.013079948726058456, 'reg_alpha': 0.9995965435106116, 'subsample': 0.11164312307294227, 'colsample_bytree': 0.5426943298548812, 'max_depth': 3, 'n_estimators': 1308}. Best is trial 4 with value: 4.014516335495757.[0m


Mean Absolute Error :  5.430407336248827
R2_score :  0.7386240109592743


In [97]:
"""Uncomment for tuning the model"""
import optuna
def run(trial):
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.1)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 3, 6)
    n_estimators = trial.suggest_int("n_estimators", 20, 3000)


    model = xgb.XGBRegressor(
            random_state=42,
            objective='reg:squarederror',
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            max_depth=max_depth,
            eval_metric='mae'
        )
    model.fit(X_train, y_train)
    preds_valid = model.predict(X_test)
    mae_ = mean_absolute_error(y_test, preds_valid)
    r2_sc = r2_score(y_test, preds_valid)
    print("Mean Absolute Error : ", mae_)
    print("R2_score : ", r2_sc)
    return mae_

study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=100)

# study.best_params

[32m[I 2023-04-28 09:04:30,128][0m A new study created in memory with name: no-name-0d3aee4a-db2a-445a-aee6-6a631401074b[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:04:41,170][0m Trial 0 finished with value: 4.45109362412442 and parameters: {'learning_rate': 0.07511926391905335, 'reg_lambda': 9.155477788500397, 'reg_alpha': 2.0220448757521527e-06, 'subsample': 0.485471495251942, 'colsample_bytree': 0.2838175433464417, 'max_depth': 3, 'n_estimators': 260}. Best is trial 0 with value: 4.45109362412442.[0m


Mean Absolute Error :  4.45109362412442
R2_score :  0.7820779441828376


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:04:51,238][0m Trial 1 finished with value: 4.37555884175738 and parameters: {'learning_rate': 0.09352998540410909, 'reg_lambda': 0.00036642843752606705, 'reg_alpha': 0.4009758857433482, 'subsample': 0.6332212397539065, 'colsample_bytree': 0.524477195640949, 'max_depth': 3, 'n_estimators': 192}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.37555884175738
R2_score :  0.7225718959237353


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:06:12,419][0m Trial 2 finished with value: 4.6421249747019795 and parameters: {'learning_rate': 0.08751711247805773, 'reg_lambda': 8.926641410419766, 'reg_alpha': 4.097168185791003e-08, 'subsample': 0.821061008835693, 'colsample_bytree': 0.12700196394387855, 'max_depth': 3, 'n_estimators': 2335}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.6421249747019795
R2_score :  0.7665492732905539


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:08:14,493][0m Trial 3 finished with value: 4.419209723726743 and parameters: {'learning_rate': 0.03145172256075458, 'reg_lambda': 4.999706038464734e-05, 'reg_alpha': 60.774996801999954, 'subsample': 0.551267338326362, 'colsample_bytree': 0.5609709595493059, 'max_depth': 5, 'n_estimators': 1537}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.419209723726743
R2_score :  0.7323315937345198


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:11:07,241][0m Trial 4 finished with value: 4.543365449905182 and parameters: {'learning_rate': 0.09888878048825313, 'reg_lambda': 24.855488205229097, 'reg_alpha': 0.5834662929754639, 'subsample': 0.9802581610602823, 'colsample_bytree': 0.5554147201309242, 'max_depth': 3, 'n_estimators': 2672}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.543365449905182
R2_score :  0.7398357016519148


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:13:38,257][0m Trial 5 finished with value: 5.357332845560095 and parameters: {'learning_rate': 0.06625231145468774, 'reg_lambda': 8.848086435644778e-07, 'reg_alpha': 1.9862217971987475e-07, 'subsample': 0.5544800717464493, 'colsample_bytree': 0.2980344126396455, 'max_depth': 4, 'n_estimators': 1767}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.357332845560095
R2_score :  0.7420422338459665


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:16:51,756][0m Trial 6 finished with value: 5.1697740533867504 and parameters: {'learning_rate': 0.08370992322094004, 'reg_lambda': 0.006482423000071544, 'reg_alpha': 7.607417108993234e-08, 'subsample': 0.47335763912093665, 'colsample_bytree': 0.5491432977090945, 'max_depth': 6, 'n_estimators': 1628}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.1697740533867504
R2_score :  0.7181880729554668


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:20:16,777][0m Trial 7 finished with value: 4.399734740061761 and parameters: {'learning_rate': 0.019277007954551645, 'reg_lambda': 3.7228712122524425e-05, 'reg_alpha': 1.5096281335986974e-07, 'subsample': 0.9714907644671561, 'colsample_bytree': 0.44359452647471065, 'max_depth': 5, 'n_estimators': 2662}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.399734740061761
R2_score :  0.7243442076497664


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 09:23:11,825][0m Trial 8 finished with value: 5.623420045858467 and parameters: {'learning_rate': 0.09363933420622975, 'reg_lambda': 2.215876134372027e-08, 'reg_alpha': 1.9828507172812145e-08, 'subsample': 0.53214042829062, 'colsample_bytree': 0.6078857109761135, 'max_depth': 3, 'n_estimators': 2867}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.623420045858467
R2_score :  0.6709702157996298


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 20:57:38,146][0m Trial 9 finished with value: 4.78350110575384 and parameters: {'learning_rate': 0.023361361840033373, 'reg_lambda': 2.6842680948509104e-08, 'reg_alpha': 2.174301431738352e-06, 'subsample': 0.5837255176982205, 'colsample_bytree': 0.6572166118349376, 'max_depth': 5, 'n_estimators': 2868}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.78350110575384
R2_score :  0.66309180139157


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 20:57:42,148][0m Trial 10 finished with value: 4.963308442169709 and parameters: {'learning_rate': 0.055280297416579895, 'reg_lambda': 0.010017499383590443, 'reg_alpha': 0.0075379566623527666, 'subsample': 0.23734176895651116, 'colsample_bytree': 0.839702589285737, 'max_depth': 4, 'n_estimators': 48}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.963308442169709
R2_score :  0.7128506477516869


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 20:59:29,658][0m Trial 11 finished with value: 5.141923206292742 and parameters: {'learning_rate': 0.0029367987349087354, 'reg_lambda': 0.00017443901378252373, 'reg_alpha': 0.0008930028522699851, 'subsample': 0.9999053622380785, 'colsample_bytree': 0.36904344791808086, 'max_depth': 6, 'n_estimators': 1099}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.141923206292742
R2_score :  0.7497472931596717


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:01:05,343][0m Trial 12 finished with value: 4.667377574193389 and parameters: {'learning_rate': 0.04733416631341617, 'reg_lambda': 9.448236366700265e-06, 'reg_alpha': 0.0001951386840519026, 'subsample': 0.7233303225789511, 'colsample_bytree': 0.42905839112183586, 'max_depth': 5, 'n_estimators': 915}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.667377574193389
R2_score :  0.7467026265693292


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:05:26,624][0m Trial 13 finished with value: 5.8964317790871545 and parameters: {'learning_rate': 0.0643927278916192, 'reg_lambda': 0.0013307672342664915, 'reg_alpha': 0.020823809558739532, 'subsample': 0.8356682139085756, 'colsample_bytree': 0.8214128717417577, 'max_depth': 4, 'n_estimators': 2215}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.8964317790871545
R2_score :  0.6161606793703946


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:07:28,670][0m Trial 14 finished with value: 4.822415342725868 and parameters: {'learning_rate': 0.04094568027395599, 'reg_lambda': 0.062232081534995216, 'reg_alpha': 2.1835796233173826e-05, 'subsample': 0.7548016187822557, 'colsample_bytree': 0.9876338614755396, 'max_depth': 5, 'n_estimators': 663}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.822415342725868
R2_score :  0.6522874519661634


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:10:52,191][0m Trial 15 finished with value: 4.931632162857391 and parameters: {'learning_rate': 0.0776307037492587, 'reg_lambda': 4.849245682816249e-06, 'reg_alpha': 0.20238574029532697, 'subsample': 0.8807157111762105, 'colsample_bytree': 0.4394858138331028, 'max_depth': 4, 'n_estimators': 2124}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.931632162857391
R2_score :  0.7190859172954589


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:13:29,048][0m Trial 16 finished with value: 5.063364859133622 and parameters: {'learning_rate': 0.09819158855403236, 'reg_lambda': 0.00034341496474826554, 'reg_alpha': 8.685371232187952e-05, 'subsample': 0.6763872875166711, 'colsample_bytree': 0.4762805141338949, 'max_depth': 6, 'n_estimators': 1180}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.063364859133622
R2_score :  0.7137483657870682


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:14:00,344][0m Trial 17 finished with value: 5.830158951581806 and parameters: {'learning_rate': 0.016688006508771308, 'reg_lambda': 6.846872680092222e-07, 'reg_alpha': 7.811267017168039e-06, 'subsample': 0.9073801101619346, 'colsample_bytree': 0.2137514578792299, 'max_depth': 4, 'n_estimators': 591}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.830158951581806
R2_score :  0.7534792072791402


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-28 21:17:11,041][0m Trial 18 finished with value: 4.697925384982993 and parameters: {'learning_rate': 0.041172998421319336, 'reg_lambda': 0.2246398383128713, 'reg_alpha': 6.782115019621906e-07, 'subsample': 0.3642868226790396, 'colsample_bytree': 0.3700396266689391, 'max_depth': 5, 'n_estimators': 1931}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.697925384982993
R2_score :  0.7522455232994153


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:18:29,975][0m Trial 19 finished with value: 4.98551796727635 and parameters: {'learning_rate': 0.0559156637451384, 'reg_lambda': 4.134558762817977e-05, 'reg_alpha': 3.626373817173261e-05, 'subsample': 0.6563325330397529, 'colsample_bytree': 0.6707737713717916, 'max_depth': 6, 'n_estimators': 2539}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.98551796727635
R2_score :  0.6724445628913924


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:20:59,067][0m Trial 20 finished with value: 5.625785244983675 and parameters: {'learning_rate': 0.0021799216433986378, 'reg_lambda': 0.0006548952573343785, 'reg_alpha': 1.1659993940803949e-08, 'subsample': 0.8183645303365207, 'colsample_bytree': 0.48306887793246134, 'max_depth': 3, 'n_estimators': 1302}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  5.625785244983675
R2_score :  0.7277754723430012


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:23:44,208][0m Trial 21 finished with value: 4.414182957581479 and parameters: {'learning_rate': 0.029723386540009406, 'reg_lambda': 4.5635006789963095e-05, 'reg_alpha': 57.324263689718975, 'subsample': 0.6521685229298144, 'colsample_bytree': 0.5415459716195707, 'max_depth': 5, 'n_estimators': 1530}. Best is trial 1 with value: 4.37555884175738.[0m


Mean Absolute Error :  4.414182957581479
R2_score :  0.7295748556805105


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:24:34,616][0m Trial 22 finished with value: 4.16206846602308 and parameters: {'learning_rate': 0.030899569144436076, 'reg_lambda': 5.602972645831192e-05, 'reg_alpha': 36.071524272788935, 'subsample': 0.6530854963857263, 'colsample_bytree': 0.4995139480338302, 'max_depth': 5, 'n_estimators': 585}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.16206846602308
R2_score :  0.7524943906343082


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:25:03,607][0m Trial 23 finished with value: 4.38973422855411 and parameters: {'learning_rate': 0.017163007580703913, 'reg_lambda': 0.0016049584622673757, 'reg_alpha': 5.598771983444693, 'subsample': 0.7553138125327648, 'colsample_bytree': 0.34749782748917635, 'max_depth': 5, 'n_estimators': 393}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.38973422855411
R2_score :  0.7669688466688336


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:25:26,466][0m Trial 24 finished with value: 4.452039583893925 and parameters: {'learning_rate': 0.03509657806928665, 'reg_lambda': 0.0009257683647484289, 'reg_alpha': 3.5202753065987493, 'subsample': 0.7393864112984442, 'colsample_bytree': 0.33617502750522643, 'max_depth': 4, 'n_estimators': 355}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.452039583893925
R2_score :  0.7589972403517342


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:25:29,287][0m Trial 25 finished with value: 11.022509490011672 and parameters: {'learning_rate': 0.010970652304149533, 'reg_lambda': 0.0051398126041990455, 'reg_alpha': 3.526588185877139, 'subsample': 0.6270438848786655, 'colsample_bytree': 0.23955194360159904, 'max_depth': 5, 'n_estimators': 35}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  11.022509490011672
R2_score :  0.12806585527071734


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:26:34,999][0m Trial 26 finished with value: 4.370567402891388 and parameters: {'learning_rate': 0.02699229892641566, 'reg_lambda': 0.00016100356387594708, 'reg_alpha': 11.879967461336992, 'subsample': 0.7158436554054788, 'colsample_bytree': 0.3801427786438073, 'max_depth': 6, 'n_estimators': 668}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.370567402891388
R2_score :  0.7590264123876233


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:28:01,032][0m Trial 27 finished with value: 4.386246209050042 and parameters: {'learning_rate': 0.026550288093056526, 'reg_lambda': 0.00021019593476522382, 'reg_alpha': 30.75457392537315, 'subsample': 0.6964318965838936, 'colsample_bytree': 0.39584776073201683, 'max_depth': 6, 'n_estimators': 797}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.386246209050042
R2_score :  0.7593866114558462


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:29:12,606][0m Trial 28 finished with value: 4.409622182285922 and parameters: {'learning_rate': 0.03913916647192389, 'reg_lambda': 6.844263241858075e-06, 'reg_alpha': 0.2564504982036819, 'subsample': 0.6161134313129298, 'colsample_bytree': 0.4999867077318635, 'max_depth': 6, 'n_estimators': 485}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.409622182285922
R2_score :  0.7398960823438403


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:29:44,094][0m Trial 29 finished with value: 5.452584033303764 and parameters: {'learning_rate': 0.047418805026620575, 'reg_lambda': 0.06741119800221529, 'reg_alpha': 15.307158610539174, 'subsample': 0.48505318634446026, 'colsample_bytree': 0.28656194863005585, 'max_depth': 6, 'n_estimators': 244}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  5.452584033303764
R2_score :  0.758917240880584


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:30:49,041][0m Trial 30 finished with value: 4.66906052827371 and parameters: {'learning_rate': 0.03302518646850048, 'reg_lambda': 0.00023026454461969335, 'reg_alpha': 1.412580595281687, 'subsample': 0.44361583014676687, 'colsample_bytree': 0.41351119007989745, 'max_depth': 3, 'n_estimators': 935}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.66906052827371
R2_score :  0.752124758861517


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:32:09,933][0m Trial 31 finished with value: 4.555563514582846 and parameters: {'learning_rate': 0.027658043703990165, 'reg_lambda': 0.00010758856671957315, 'reg_alpha': 22.275398701418908, 'subsample': 0.6946228704740551, 'colsample_bytree': 0.3940359931873895, 'max_depth': 6, 'n_estimators': 722}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.555563514582846
R2_score :  0.754623380046116


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:33:31,777][0m Trial 32 finished with value: 4.34299317114551 and parameters: {'learning_rate': 0.026797594821316894, 'reg_lambda': 0.0002564060457360141, 'reg_alpha': 76.6949820877262, 'subsample': 0.6145906762669788, 'colsample_bytree': 0.47278256406827796, 'max_depth': 6, 'n_estimators': 837}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.34299317114551
R2_score :  0.750086223066412


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:33:51,294][0m Trial 33 finished with value: 4.4858745930537225 and parameters: {'learning_rate': 0.0227581571982003, 'reg_lambda': 0.0018599694533991693, 'reg_alpha': 89.06895052512989, 'subsample': 0.6133987814833736, 'colsample_bytree': 0.4886165366130381, 'max_depth': 6, 'n_estimators': 202}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.4858745930537225
R2_score :  0.7551726813447495


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:35:01,726][0m Trial 34 finished with value: 5.299714149304696 and parameters: {'learning_rate': 0.036987963179830476, 'reg_lambda': 0.00046469390861853264, 'reg_alpha': 18.42646399212349, 'subsample': 0.571630535200394, 'colsample_bytree': 0.1661875440456722, 'max_depth': 6, 'n_estimators': 1028}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  5.299714149304696
R2_score :  0.7599823692970771


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:36:13,044][0m Trial 35 finished with value: 4.914944675185423 and parameters: {'learning_rate': 0.030790242788511314, 'reg_lambda': 1.748077456336655e-05, 'reg_alpha': 94.1946482090506, 'subsample': 0.7920290059179739, 'colsample_bytree': 0.32019024682464825, 'max_depth': 3, 'n_estimators': 1362}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.914944675185423
R2_score :  0.7623985806248748


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:37:09,942][0m Trial 36 finished with value: 4.80031801964555 and parameters: {'learning_rate': 0.07292816644426295, 'reg_lambda': 8.170996853619417e-05, 'reg_alpha': 6.3863989063216104, 'subsample': 0.6715318454096648, 'colsample_bytree': 0.601366615561174, 'max_depth': 6, 'n_estimators': 529}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.80031801964555
R2_score :  0.6866329071328003


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:38:21,024][0m Trial 37 finished with value: 4.996640815685249 and parameters: {'learning_rate': 0.09203017753952292, 'reg_lambda': 1.7136449551526287e-06, 'reg_alpha': 0.9388516345293361, 'subsample': 0.5404965329688156, 'colsample_bytree': 0.5224150983303405, 'max_depth': 5, 'n_estimators': 806}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.996640815685249
R2_score :  0.686964514892371


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:38:46,757][0m Trial 38 finished with value: 4.3541906510846005 and parameters: {'learning_rate': 0.0862157449837759, 'reg_lambda': 3.081623986935537e-05, 'reg_alpha': 12.427626587452988, 'subsample': 0.717426360651229, 'colsample_bytree': 0.4522452025645343, 'max_depth': 6, 'n_estimators': 246}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.3541906510846005
R2_score :  0.7445050394158561


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:39:20,848][0m Trial 39 finished with value: 5.137115658577089 and parameters: {'learning_rate': 0.08491702354015329, 'reg_lambda': 1.4690069071035517e-05, 'reg_alpha': 11.920251518394666, 'subsample': 0.7043986189044606, 'colsample_bytree': 0.27533252184161994, 'max_depth': 6, 'n_estimators': 386}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  5.137115658577089
R2_score :  0.7568061878488215


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:40:27,587][0m Trial 40 finished with value: 4.430424568482242 and parameters: {'learning_rate': 0.03326878085752285, 'reg_lambda': 3.081192224336148e-05, 'reg_alpha': 1.7356309032739714, 'subsample': 0.7847519525624512, 'colsample_bytree': 0.45673273018484306, 'max_depth': 6, 'n_estimators': 674}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.430424568482242
R2_score :  0.7424285649713435


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:40:46,092][0m Trial 41 finished with value: 4.265181703833172 and parameters: {'learning_rate': 0.09006283209368009, 'reg_lambda': 9.321571484808277e-05, 'reg_alpha': 31.371323061753866, 'subsample': 0.6184949306956964, 'colsample_bytree': 0.5159777594625803, 'max_depth': 6, 'n_estimators': 195}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.265181703833172
R2_score :  0.7335369705901376


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:40:58,061][0m Trial 42 finished with value: 4.354079127982873 and parameters: {'learning_rate': 0.0865147497506379, 'reg_lambda': 0.00010394892406704635, 'reg_alpha': 39.650541964801434, 'subsample': 0.5932648985888622, 'colsample_bytree': 0.4426836840816764, 'max_depth': 6, 'n_estimators': 137}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.354079127982873
R2_score :  0.7511735657051752


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:41:08,756][0m Trial 43 finished with value: 4.3083706432649675 and parameters: {'learning_rate': 0.08771586432379344, 'reg_lambda': 7.052150522155923e-05, 'reg_alpha': 29.570415043362704, 'subsample': 0.5744740784166902, 'colsample_bytree': 0.51244913218334, 'max_depth': 6, 'n_estimators': 117}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.3083706432649675
R2_score :  0.738745116602817


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:41:21,380][0m Trial 44 finished with value: 4.274497647648274 and parameters: {'learning_rate': 0.08168787891264208, 'reg_lambda': 0.00012961646073970544, 'reg_alpha': 36.810909382562116, 'subsample': 0.5908565765915298, 'colsample_bytree': 0.548903554846571, 'max_depth': 6, 'n_estimators': 138}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.274497647648274
R2_score :  0.7378594044683048


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:41:23,883][0m Trial 45 finished with value: 5.816274106853026 and parameters: {'learning_rate': 0.08065838432965464, 'reg_lambda': 3.142692032505424e-06, 'reg_alpha': 91.18162004176678, 'subsample': 0.5122890221061458, 'colsample_bytree': 0.578828205851809, 'max_depth': 6, 'n_estimators': 22}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  5.816274106853026
R2_score :  0.698224167874806


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:41:54,161][0m Trial 46 finished with value: 4.40616087264129 and parameters: {'learning_rate': 0.08961856567055707, 'reg_lambda': 1.3442497528121747e-05, 'reg_alpha': 39.16010131426144, 'subsample': 0.5710589047911794, 'colsample_bytree': 0.5407462311860841, 'max_depth': 5, 'n_estimators': 399}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.40616087264129
R2_score :  0.712649442023616


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:42:48,454][0m Trial 47 finished with value: 4.808260285803529 and parameters: {'learning_rate': 0.09419455924601666, 'reg_lambda': 0.0004282210733403822, 'reg_alpha': 2.4358525883842916, 'subsample': 0.6402855736737838, 'colsample_bytree': 0.5140981034807655, 'max_depth': 6, 'n_estimators': 521}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.808260285803529
R2_score :  0.7060323865364072


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:43:02,188][0m Trial 48 finished with value: 4.574980276997106 and parameters: {'learning_rate': 0.09856168657460923, 'reg_lambda': 6.554396515004641e-05, 'reg_alpha': 0.8901672790915536, 'subsample': 0.5862663826543902, 'colsample_bytree': 0.6365253899645124, 'max_depth': 5, 'n_estimators': 158}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.574980276997106
R2_score :  0.6795893738667056


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:43:36,151][0m Trial 49 finished with value: 4.608285965194395 and parameters: {'learning_rate': 0.08080628170643483, 'reg_lambda': 4.879713547842803e-07, 'reg_alpha': 0.33116907261196377, 'subsample': 0.5076610399444756, 'colsample_bytree': 0.5801344109458466, 'max_depth': 6, 'n_estimators': 337}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.608285965194395
R2_score :  0.7028053839908068


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:43:51,209][0m Trial 50 finished with value: 4.382142151832527 and parameters: {'learning_rate': 0.08953951989549665, 'reg_lambda': 3.0679493383562366e-06, 'reg_alpha': 5.598254553193769, 'subsample': 0.5454781566305231, 'colsample_bytree': 0.5558117398564005, 'max_depth': 6, 'n_estimators': 138}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.382142151832527
R2_score :  0.7293401201117835


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:44:07,551][0m Trial 51 finished with value: 4.320723346542186 and parameters: {'learning_rate': 0.07217876635566452, 'reg_lambda': 9.938145268331484e-05, 'reg_alpha': 27.32063502793319, 'subsample': 0.6125282834707884, 'colsample_bytree': 0.42853210182187357, 'max_depth': 6, 'n_estimators': 147}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.320723346542186
R2_score :  0.7677741584150275


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:44:44,802][0m Trial 52 finished with value: 4.322049629897842 and parameters: {'learning_rate': 0.07206493348975067, 'reg_lambda': 0.00016549610518195129, 'reg_alpha': 36.30551298196829, 'subsample': 0.637520161424352, 'colsample_bytree': 0.5031895503289158, 'max_depth': 6, 'n_estimators': 319}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.322049629897842
R2_score :  0.7322908670175112


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:45:10,019][0m Trial 53 finished with value: 4.298606277181556 and parameters: {'learning_rate': 0.07364154180621085, 'reg_lambda': 9.951333906093892e-05, 'reg_alpha': 32.391712534429786, 'subsample': 0.6525482827897798, 'colsample_bytree': 0.5038705042596235, 'max_depth': 6, 'n_estimators': 267}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.298606277181556
R2_score :  0.7353061547116665


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:45:19,035][0m Trial 54 finished with value: 4.4538692815233825 and parameters: {'learning_rate': 0.07713435428620194, 'reg_lambda': 2.9215850748934527e-05, 'reg_alpha': 12.361320695254099, 'subsample': 0.6668167588121499, 'colsample_bytree': 0.416951691937145, 'max_depth': 6, 'n_estimators': 105}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.4538692815233825
R2_score :  0.771260576853233


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:45:40,354][0m Trial 55 finished with value: 4.448015731890351 and parameters: {'learning_rate': 0.08182598128917774, 'reg_lambda': 6.687611975465178e-05, 'reg_alpha': 5.350714066492331, 'subsample': 0.5871622606316635, 'colsample_bytree': 0.5205567513749965, 'max_depth': 6, 'n_estimators': 237}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.448015731890351
R2_score :  0.7301259268573115


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:45:42,483][0m Trial 56 finished with value: 6.622156958928079 and parameters: {'learning_rate': 0.06946070738815664, 'reg_lambda': 8.735964381424693e-06, 'reg_alpha': 28.4274477414945, 'subsample': 0.5495753407608353, 'colsample_bytree': 0.6193310565916752, 'max_depth': 5, 'n_estimators': 20}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  6.622156958928079
R2_score :  0.6391540693090769


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:46:38,399][0m Trial 57 finished with value: 4.443443428703982 and parameters: {'learning_rate': 0.06636236857798948, 'reg_lambda': 0.0006815589776174062, 'reg_alpha': 0.5536176939304908, 'subsample': 0.6589886772655184, 'colsample_bytree': 0.5730345182255399, 'max_depth': 6, 'n_estimators': 462}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.443443428703982
R2_score :  0.7179695101266745


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:47:30,159][0m Trial 58 finished with value: 4.635923459468388 and parameters: {'learning_rate': 0.07759044545599864, 'reg_lambda': 0.0035001045818649807, 'reg_alpha': 0.0915880119794646, 'subsample': 0.6065205704884374, 'colsample_bytree': 0.46516654652772643, 'max_depth': 5, 'n_estimators': 585}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.635923459468388
R2_score :  0.7230579096122622


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:47:56,273][0m Trial 59 finished with value: 4.643469812510305 and parameters: {'learning_rate': 0.08264363152171751, 'reg_lambda': 0.0007966036792914367, 'reg_alpha': 2.4060397534826166, 'subsample': 0.5195695991434268, 'colsample_bytree': 0.4270486615478562, 'max_depth': 6, 'n_estimators': 261}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.643469812510305
R2_score :  0.7497860095660704


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:51:10,783][0m Trial 60 finished with value: 4.497823732625809 and parameters: {'learning_rate': 0.062402195410235915, 'reg_lambda': 7.151970795658751e-05, 'reg_alpha': 41.372774816790105, 'subsample': 0.6772116561208983, 'colsample_bytree': 0.6732227646731582, 'max_depth': 6, 'n_estimators': 1782}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.497823732625809
R2_score :  0.6931391611806134


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:51:37,794][0m Trial 61 finished with value: 4.273382612810257 and parameters: {'learning_rate': 0.07348002617117197, 'reg_lambda': 0.00015649057066246857, 'reg_alpha': 32.34496171947777, 'subsample': 0.6299800502212085, 'colsample_bytree': 0.5004074588295053, 'max_depth': 6, 'n_estimators': 298}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.273382612810257
R2_score :  0.7130715305702116


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:51:49,354][0m Trial 62 finished with value: 4.2632241002943445 and parameters: {'learning_rate': 0.07523083382832856, 'reg_lambda': 0.0002509195927318387, 'reg_alpha': 8.078226277390751, 'subsample': 0.6288097577757339, 'colsample_bytree': 0.5253791695278762, 'max_depth': 6, 'n_estimators': 124}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.2632241002943445
R2_score :  0.7326839584494931


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:52:39,184][0m Trial 63 finished with value: 4.547632667369512 and parameters: {'learning_rate': 0.0762258231355543, 'reg_lambda': 0.000340741999037839, 'reg_alpha': 7.121707433651155, 'subsample': 0.6456641151812221, 'colsample_bytree': 0.524512656644265, 'max_depth': 6, 'n_estimators': 421}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.547632667369512
R2_score :  0.7238210165339498


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:53:15,057][0m Trial 64 finished with value: 4.448519823047109 and parameters: {'learning_rate': 0.08027689241948374, 'reg_lambda': 0.0017809449955525874, 'reg_alpha': 9.512855552006748, 'subsample': 0.5639793771436592, 'colsample_bytree': 0.49158663571069644, 'max_depth': 6, 'n_estimators': 303}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.448519823047109
R2_score :  0.7328386718224194


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:53:49,922][0m Trial 65 finished with value: 4.331125732778508 and parameters: {'learning_rate': 0.08438462575444312, 'reg_lambda': 0.00017816963665836125, 'reg_alpha': 4.02396355583523, 'subsample': 0.7381117761903162, 'colsample_bytree': 0.5552328563762057, 'max_depth': 4, 'n_estimators': 559}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.331125732778508
R2_score :  0.7022662882998613


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:54:00,700][0m Trial 66 finished with value: 4.254392855450327 and parameters: {'learning_rate': 0.07555500224775527, 'reg_lambda': 2.309574248681351e-05, 'reg_alpha': 99.91990480265024, 'subsample': 0.6903191881378571, 'colsample_bytree': 0.594115371295488, 'max_depth': 6, 'n_estimators': 104}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.254392855450327
R2_score :  0.7323209321883501


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:54:28,843][0m Trial 67 finished with value: 4.579813424204215 and parameters: {'learning_rate': 0.07390438155253846, 'reg_lambda': 2.52701259438616e-05, 'reg_alpha': 56.59172936958021, 'subsample': 0.6866183726025779, 'colsample_bytree': 0.5987200755472759, 'max_depth': 6, 'n_estimators': 286}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.579813424204215
R2_score :  0.7197361484540286


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:54:48,711][0m Trial 68 finished with value: 4.196502887233966 and parameters: {'learning_rate': 0.07001527123734705, 'reg_lambda': 0.00038791894306115473, 'reg_alpha': 98.95371012330142, 'subsample': 0.628179169509355, 'colsample_bytree': 0.5462755630113153, 'max_depth': 6, 'n_estimators': 215}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.196502887233966
R2_score :  0.7258173367482088


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:55:29,190][0m Trial 69 finished with value: 4.334381403229536 and parameters: {'learning_rate': 0.06818252156129706, 'reg_lambda': 0.0004143393280760942, 'reg_alpha': 87.98017018768107, 'subsample': 0.6322079294312514, 'colsample_bytree': 0.6430902014655004, 'max_depth': 5, 'n_estimators': 438}. Best is trial 22 with value: 4.16206846602308.[0m


Mean Absolute Error :  4.334381403229536
R2_score :  0.7179745159099504


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:55:54,830][0m Trial 70 finished with value: 4.144307145683879 and parameters: {'learning_rate': 0.060874267198799376, 'reg_lambda': 0.0008196424699002651, 'reg_alpha': 14.698503071138058, 'subsample': 0.7005947696689363, 'colsample_bytree': 0.5442856655981725, 'max_depth': 6, 'n_estimators': 210}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.144307145683879
R2_score :  0.7411635677250943


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:56:05,345][0m Trial 71 finished with value: 4.216353911262755 and parameters: {'learning_rate': 0.06209686627923121, 'reg_lambda': 0.00108033870884223, 'reg_alpha': 14.425368269472887, 'subsample': 0.7047570169460033, 'colsample_bytree': 0.5430695173707097, 'max_depth': 6, 'n_estimators': 83}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.216353911262755
R2_score :  0.7447629844832471


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:56:14,010][0m Trial 72 finished with value: 4.442927967867167 and parameters: {'learning_rate': 0.0620082815017137, 'reg_lambda': 0.0012478463538253914, 'reg_alpha': 13.813313670717967, 'subsample': 0.6866226413767275, 'colsample_bytree': 0.5820897713055916, 'max_depth': 6, 'n_estimators': 63}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.442927967867167
R2_score :  0.7496117907131414


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:56:36,924][0m Trial 73 finished with value: 4.351794957377905 and parameters: {'learning_rate': 0.06142274043378958, 'reg_lambda': 0.0035207884187649723, 'reg_alpha': 17.913302035804644, 'subsample': 0.7196935931580135, 'colsample_bytree': 0.4655435459146039, 'max_depth': 6, 'n_estimators': 202}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.351794957377905
R2_score :  0.7381589943289952


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 15:57:18,136][0m Trial 74 finished with value: 4.3710426334684955 and parameters: {'learning_rate': 0.06866456407534142, 'reg_lambda': 0.0008568117395720759, 'reg_alpha': 7.755173050362583, 'subsample': 0.7497106171625239, 'colsample_bytree': 0.534105029776069, 'max_depth': 6, 'n_estimators': 347}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.3710426334684955
R2_score :  0.7095465847139789


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 16:02:32,661][0m Trial 75 finished with value: 5.194821686721342 and parameters: {'learning_rate': 0.058996378860857235, 'reg_lambda': 0.00028986400881725116, 'reg_alpha': 3.6244787525150906, 'subsample': 0.7173318377412843, 'colsample_bytree': 0.6126226603703888, 'max_depth': 6, 'n_estimators': 2960}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  5.194821686721342
R2_score :  0.6597540545776153


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 16:03:36,643][0m Trial 76 finished with value: 4.379539755331345 and parameters: {'learning_rate': 0.06517790066148384, 'reg_lambda': 0.0005686459283779009, 'reg_alpha': 64.84556852625104, 'subsample': 0.6956691489135968, 'colsample_bytree': 0.47933883777638336, 'max_depth': 6, 'n_estimators': 620}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.379539755331345
R2_score :  0.7394218008012313


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 16:03:53,513][0m Trial 77 finished with value: 4.32725933895831 and parameters: {'learning_rate': 0.05443444243009719, 'reg_lambda': 3.931702928280631e-05, 'reg_alpha': 20.462047635324122, 'subsample': 0.7647378430226827, 'colsample_bytree': 0.5596644869359695, 'max_depth': 6, 'n_estimators': 190}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.32725933895831
R2_score :  0.7385981763015244


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 16:03:58,736][0m Trial 78 finished with value: 4.415439169207173 and parameters: {'learning_rate': 0.06659804350711579, 'reg_lambda': 0.011105218071647164, 'reg_alpha': 93.73460808896282, 'subsample': 0.6640237201920987, 'colsample_bytree': 0.541755651027377, 'max_depth': 4, 'n_estimators': 77}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.415439169207173
R2_score :  0.7454738318076641


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[32m[I 2023-04-29 16:07:34,211][0m Trial 79 finished with value: 4.813531152463998 and parameters: {'learning_rate': 0.07035064337196545, 'reg_lambda': 0.0002147660371873115, 'reg_alpha': 1.614675960285272, 'subsample': 0.627188199496903, 'colsample_bytree': 0.4919196737316733, 'max_depth': 6, 'n_estimators': 2451}. Best is trial 70 with value: 4.144307145683879.[0m


Mean Absolute Error :  4.813531152463998
R2_score :  0.7154486232380166


  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)


In [1]:
"""Uncomment for tuning the model"""
import optuna
def run(trial):
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.1)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 3, 6)
    n_estimators = trial.suggest_int("n_estimators", 20, 3000)


    model = xgb.XGBRegressor(
            random_state=42,
            objective='reg:squarederror',
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            max_depth=max_depth,
            eval_metric='r2_score'
        )
    model.fit(X_train, y_train)
    preds_valid = model.predict(X_test)
    mae_ = mean_absolute_error(y_test, preds_valid)
    r2_sc = r2_score(y_test, preds_valid)
    print("Mean Absolute Error : ", mae_)
    print("R2_score : ", r2_sc)
    return r2_sc

study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=100)

# study.best_params

  from .autonotebook import tqdm as notebook_tqdm
[32m[I 2023-04-30 20:29:28,143][0m A new study created in memory with name: no-name-479de14e-c7bb-45e0-8584-c0aa19e972fd[0m
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-8, 100.0)
[33m[W 2023-04-30 20:29:28,150][0m Trial 0 failed with parameters: {'learning_rate': 0.07791084133262398, 'reg_lambda': 0.0007188067232693896, 'reg_alpha': 1.823825918142417e-08, 'subsample': 0.6392031163550392, 'colsample_bytree': 0.8021660748450357, 'max_depth': 5, 'n_estimators': 1277} because of the following error: NameError("name 'xgb' is not defined").[0m
Traceback (most recent call last):
  File "c:\Users\nshre\anaconda3\envs\auto_gpt\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\nshre\AppData\Local\Temp\ipykernel_15036\4217039969.py", line 13, in run
    model = xgb.XGBRegressor(
NameError: name '

NameError: name 'xgb' is not defined