In [45]:
import pandas as pd
train = pd.read_csv("train.csv",index_col=0)
test = pd.read_csv("test.csv",index_col=0)
sample_submit = pd.read_csv("sample_submit.csv",index_col=0,header=None)

In [46]:
train.head()

Unnamed: 0_level_0,age,sex,bmi,children,smoker,region,charges
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,26,male,32.665465,3,no,southeast,0
1,41,male,29.798725,1,no,southwest,0
2,28,male,32.722029,0,yes,northwest,1
3,20,female,38.429831,2,no,southeast,0
4,45,female,29.641854,1,no,northwest,0


In [47]:
test.head()

Unnamed: 0_level_0,age,sex,bmi,children,smoker,region
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13,19,male,24.365178,1,no,northeast
23,59,male,33.997763,0,yes,northeast
27,42,female,29.28345,0,no,southwest
28,30,male,24.903725,1,no,northwest
29,41,female,29.644536,0,no,southwest


In [48]:
print(train.shape)
print(test.shape)

(1600, 7)
(400, 6)


In [49]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1600 entries, 0 to 1999
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1600 non-null   int64  
 1   sex       1600 non-null   object 
 2   bmi       1600 non-null   float64
 3   children  1600 non-null   int64  
 4   smoker    1600 non-null   object 
 5   region    1600 non-null   object 
 6   charges   1600 non-null   int64  
dtypes: float64(1), int64(3), object(3)
memory usage: 100.0+ KB


In [50]:
train['charges'].value_counts()

0    1256
1     198
2     146
Name: charges, dtype: int64

In [51]:
train.describe()

Unnamed: 0,age,bmi,children,charges
count,1600.0,1600.0,1600.0,1600.0
mean,38.985,32.424376,1.014375,0.30625
std,13.555012,5.766915,1.259031,0.628656
min,18.0,20.627626,0.0,0.0
25%,27.0,28.634267,0.0,0.0
50%,40.0,32.268786,1.0,0.0
75%,50.0,37.069581,2.0,0.0
max,64.0,47.290644,5.0,2.0


In [52]:
train[['sex','charges']].groupby(['sex']).mean()

Unnamed: 0_level_0,charges
sex,Unnamed: 1_level_1
female,0.227931
male,0.376932


In [53]:
train[['smoker','charges']].groupby(['smoker']).mean()

Unnamed: 0_level_0,charges
smoker,Unnamed: 1_level_1
no,0.070579
yes,1.182891


In [54]:
train[['region','charges']].groupby(['region']).mean()

Unnamed: 0_level_0,charges
region,Unnamed: 1_level_1
northeast,0.277778
northwest,0.230769
southeast,0.465686
southwest,0.245333


In [55]:
data = pd.concat([train,test], sort=False)

In [56]:
data['sex'].replace(['male', 'female'], [0, 1], inplace=True)
data['smoker'].replace(['no', 'yes'], [0, 1], inplace=True)
data['region'] = data['region'].map({'northeast': 0, 'northwest': 1, 'southeast': 2, 'southwest': 3}).astype(int)

In [57]:
data.head(10)

Unnamed: 0_level_0,age,sex,bmi,children,smoker,region,charges
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,26,0,32.665465,3,0,2,0.0
1,41,0,29.798725,1,0,3,0.0
2,28,0,32.722029,0,1,1,1.0
3,20,1,38.429831,2,0,2,0.0
4,45,1,29.641854,1,0,1,0.0
5,20,0,37.785937,2,1,2,1.0
6,37,0,43.393374,2,0,0,0.0
7,41,1,32.280407,1,0,0,0.0
8,40,0,28.865828,1,0,0,0.0
9,54,1,33.870887,2,0,0,0.0


In [58]:
data.shape

(2000, 7)

In [59]:
data.reset_index(drop=True, inplace=True)
train = data.loc[:(len(train)-1)]
test = data.loc[len(train):]

In [60]:
y_train = train['charges']
X_train = train.drop('charges', axis=1)
X_test = test.drop('charges', axis=1)

In [61]:
X_train.shape

(1600, 6)

In [62]:
X_test.shape

(400, 6)

In [63]:
from sklearn.model_selection import train_test_split


X_train, X_valid, y_train, y_valid = \
    train_test_split(X_train, y_train, test_size=0.3,
                                 random_state=0, stratify=y_train)

In [64]:
from sklearn.metrics import log_loss
import lightgbm as lgb
import numpy as np
import optuna
import tensorflow as tf

categorical_features = ['sex', 'smoker', 'region']

def objective(trial):
    params = {
        'objective': 'multiclass',
        'verbose': -1,
        'num_class': 3,
        'max_bin' : trial.suggest_int('max_bin',255,500),
        'learning_rate' : 0.05,
        'num_leaves' : trial.suggest_int('num_leaves',32,128),
        'metric' : 'multi_logloss',
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 100),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100)
    }
    
    lgb_train = lgb.Dataset(X_train, y_train, categorical_feature=categorical_features)
    lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train, categorical_feature=categorical_features)
    
    model = lgb.train(params,lgb_train,
                      valid_sets=[lgb_train, lgb_eval],
                      verbose_eval=10,
                      num_boost_round=1000,
                      early_stopping_rounds=10)
    y_pred_valid = model.predict(X_valid, num_iteration=model.best_iteration)
    score = log_loss(y_valid,y_pred_valid)
    return score

In [65]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=0))
study.optimize(objective,n_trials=40)

[I 2023-06-19 21:52:26,239] A new study created in memory with name: no-name-5fc864f7-4068-4643-80cb-dd58f2dfcd10
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:26,294] Trial 0 finished with value: 0.38348145448218673 and parameters: {'max_bin': 390, 'num_leaves': 101, 'lambda_l1': 0.0026599310838681858, 'lambda_l2': 0.0008015832747965142, 'feature_fraction': 0.6541928796033428, 'bagging_fraction': 0.7875364678399936, 'bagging_freq': 44, 'min_child_samples': 90}. Best is trial 0 with value: 0.38348145448218673.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction':

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.490049	valid_1's multi_logloss: 0.50784
[20]	training's multi_logloss: 0.425426	valid_1's multi_logloss: 0.448819
[30]	training's multi_logloss: 0.379504	valid_1's multi_logloss: 0.412643
[40]	training's multi_logloss: 0.356326	valid_1's multi_logloss: 0.398345
[50]	training's multi_logloss: 0.339328	valid_1's multi_logloss: 0.390235
[60]	training's multi_logloss: 0.327231	valid_1's multi_logloss: 0.386616
[70]	training's multi_logloss: 0.317327	valid_1's multi_logloss: 0.384243
[80]	training's multi_logloss: 0.308475	valid_1's multi_logloss: 0.383741
Early stopping, best iteration is:
[72]	training's multi_logloss: 0.315772	valid_1's multi_logloss: 0.383481
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.436274	valid_1's multi_logloss: 0.501209
[20]	training's multi_logloss: 0.341583	valid_1's multi_logloss: 0.446205
[30]	training's multi_logloss:

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:26,485] Trial 3 finished with value: 0.38845242585348544 and parameters: {'max_bin': 284, 'num_leaves': 94, 'lambda_l1': 1.9506510537765813e-07, 'lambda_l2': 3.177022228527139, 'feature_fraction': 0.7131089930500429, 'bagging_fraction': 0.6487971639943142, 'bagging_freq': 27, 'min_child_samples': 79}. Best is trial 0 with value: 0.38348145448218673.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:26,534] Trial 4 finished with value: 0.39339

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),


Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.504584	valid_1's multi_logloss: 0.518332
[20]	training's multi_logloss: 0.442463	valid_1's multi_logloss: 0.460001
[30]	training's multi_logloss: 0.397771	valid_1's multi_logloss: 0.421388
[40]	training's multi_logloss: 0.375836	valid_1's multi_logloss: 0.402632
[50]	training's multi_logloss: 0.35957	valid_1's multi_logloss: 0.393559
[60]	training's multi_logloss: 0.347835	valid_1's multi_logloss: 0.390178
[70]	training's multi_logloss: 0.339402	valid_1's multi_logloss: 0.389021
[80]	training's multi_logloss: 0.3325	valid_1's multi_logloss: 0.389238
Early stopping, best iteration is:
[72]	training's multi_logloss: 0.338262	valid_1's multi_logloss: 0.388452
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.469811	valid_1's multi_logloss: 0.489096
[20]	training's multi_logloss: 0.394762	valid_1's multi_logloss: 0.429424
[30]	training's multi_logloss: 0

[I 2023-06-19 21:52:26,651] Trial 6 finished with value: 0.3975941467716012 and parameters: {'max_bin': 332, 'num_leaves': 67, 'lambda_l1': 0.0013544748136111338, 'lambda_l2': 8.859589936506331e-05, 'feature_fraction': 0.9930243028355357, 'bagging_fraction': 0.4612268864488169, 'bagging_freq': 21, 'min_child_samples': 20}. Best is trial 0 with value: 0.38348145448218673.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:26,739] Trial 7 finished with value: 0.39866967328122993 and parameters: {'max_bin': 415, 'num_leaves': 56, 'lambda_l1': 0.00015732471573476155, 'lambda_l2': 1.5842741206950525e-06, 'feature_fraction': 0.49538175018731184, 'bagging_fraction': 0.4662250846985831, 'bagging_freq': 66, 'min_child_samples': 18}. Best is trial 0

Early stopping, best iteration is:
[38]	training's multi_logloss: 0.306097	valid_1's multi_logloss: 0.397594
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.50311	valid_1's multi_logloss: 0.527776
[20]	training's multi_logloss: 0.419132	valid_1's multi_logloss: 0.46383
[30]	training's multi_logloss: 0.375684	valid_1's multi_logloss: 0.43482
[40]	training's multi_logloss: 0.34155	valid_1's multi_logloss: 0.416881
[50]	training's multi_logloss: 0.322887	valid_1's multi_logloss: 0.414437
[60]	training's multi_logloss: 0.305851	valid_1's multi_logloss: 0.410558
[70]	training's multi_logloss: 0.287607	valid_1's multi_logloss: 0.405564
[80]	training's multi_logloss: 0.270778	valid_1's multi_logloss: 0.3997
[90]	training's multi_logloss: 0.258952	valid_1's multi_logloss: 0.398797
Early stopping, best iteration is:
[81]	training's multi_logloss: 0.269076	valid_1's multi_logloss: 0.39867
Training until validation scores don't improve for 10 rounds
[

[I 2023-06-19 21:52:26,854] Trial 9 finished with value: 0.4060396526877144 and parameters: {'max_bin': 495, 'num_leaves': 90, 'lambda_l1': 0.0450165506544907, 'lambda_l2': 2.2526313830315164e-08, 'feature_fraction': 0.5696841775458458, 'bagging_fraction': 0.47211793672790137, 'bagging_freq': 30, 'min_child_samples': 16}. Best is trial 0 with value: 0.38348145448218673.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:26,920] Trial 10 finished with value: 0.4040842434244033 and parameters: {'max_bin': 333, 'num_leaves': 72, 'lambda_l1': 3.778569956294474e-08, 'lambda_l2': 0.017070611332063324, 'feature_fraction': 0.739960872523945, 'bagging_fraction': 0.5592336945636672, 'bagging_freq': 53, 'min_child_samples': 14}. Best is trial 0 with 

[50]	training's multi_logloss: 0.312218	valid_1's multi_logloss: 0.410101
[60]	training's multi_logloss: 0.295467	valid_1's multi_logloss: 0.406382
Early stopping, best iteration is:
[59]	training's multi_logloss: 0.296261	valid_1's multi_logloss: 0.40604
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.455972	valid_1's multi_logloss: 0.500421
[20]	training's multi_logloss: 0.373574	valid_1's multi_logloss: 0.444511
[30]	training's multi_logloss: 0.316615	valid_1's multi_logloss: 0.413614
[40]	training's multi_logloss: 0.284589	valid_1's multi_logloss: 0.406595
[50]	training's multi_logloss: 0.261551	valid_1's multi_logloss: 0.407464
Early stopping, best iteration is:
[45]	training's multi_logloss: 0.271568	valid_1's multi_logloss: 0.404084
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.490131	valid_1's multi_logloss: 0.521045
[20]	training's multi_logloss: 0.397328	valid_1's multi_logloss: 0.45

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,077] Trial 12 finished with value: 0.38641960894696153 and parameters: {'max_bin': 399, 'num_leaves': 33, 'lambda_l1': 0.2887089604396595, 'lambda_l2': 1.1021971551176847e-08, 'feature_fraction': 0.8066899220777382, 'bagging_fraction': 0.562004783915299, 'bagging_freq': 74, 'min_child_samples': 97}. Best is trial 0 with value: 0.38348145448218673.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,161] Trial 13 finished with value: 0.389

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.5095	valid_1's multi_logloss: 0.513922
[20]	training's multi_logloss: 0.43723	valid_1's multi_logloss: 0.449707
[30]	training's multi_logloss: 0.395777	valid_1's multi_logloss: 0.417259
[40]	training's multi_logloss: 0.372749	valid_1's multi_logloss: 0.402222
[50]	training's multi_logloss: 0.356564	valid_1's multi_logloss: 0.394652
[60]	training's multi_logloss: 0.345394	valid_1's multi_logloss: 0.390999
[70]	training's multi_logloss: 0.337327	valid_1's multi_logloss: 0.38876
[80]	training's multi_logloss: 0.329882	valid_1's multi_logloss: 0.38739
[90]	training's multi_logloss: 0.324764	valid_1's multi_logloss: 0.38642
[100]	training's multi_logloss: 0.320955	valid_1's multi_logloss: 0.387863
Early stopping, best iteration is:
[90]	training's multi_logloss: 0.324764	valid_1's multi_logloss: 0.38642
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.51

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,313] Trial 15 finished with value: 0.3813251859830747 and parameters: {'max_bin': 433, 'num_leaves': 80, 'lambda_l1': 4.024877923200602, 'lambda_l2': 0.0062504573061377385, 'feature_fraction': 0.6543130291349077, 'bagging_fraction': 0.7638359284767546, 'bagging_freq': 2, 'min_child_samples': 33}. Best is trial 15 with value: 0.3813251859830747.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,370] Trial 16 finished with value: 0.390952

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.496825	valid_1's multi_logloss: 0.517917
[20]	training's multi_logloss: 0.436305	valid_1's multi_logloss: 0.4647
[30]	training's multi_logloss: 0.391491	valid_1's multi_logloss: 0.426782
[40]	training's multi_logloss: 0.368034	valid_1's multi_logloss: 0.409893
[50]	training's multi_logloss: 0.353379	valid_1's multi_logloss: 0.400573
[60]	training's multi_logloss: 0.34305	valid_1's multi_logloss: 0.395446
[70]	training's multi_logloss: 0.335949	valid_1's multi_logloss: 0.39244
[80]	training's multi_logloss: 0.329967	valid_1's multi_logloss: 0.389517
[90]	training's multi_logloss: 0.324793	valid_1's multi_logloss: 0.386996
[100]	training's multi_logloss: 0.320073	valid_1's multi_logloss: 0.385974
[110]	training's multi_logloss: 0.316241	valid_1's multi_logloss: 0.384592
[120]	training's multi_logloss: 0.313231	valid_1's multi_logloss: 0.383547
[130]	training's multi_logloss: 0.310723	valid_1's m

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,476] Trial 18 finished with value: 0.3890078653759553 and parameters: {'max_bin': 453, 'num_leaves': 100, 'lambda_l1': 7.980718378532859e-08, 'lambda_l2': 1.8851396313227162, 'feature_fraction': 0.8285447797294669, 'bagging_fraction': 0.99930820394072, 'bagging_freq': 15, 'min_child_samples': 88}. Best is trial 15 with value: 0.3813251859830747.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,547] Trial 19 finished with value: 0.41349

[I 2023-06-19 21:52:27,604] Trial 20 finished with value: 0.38594367238873173 and parameters: {'max_bin': 426, 'num_leaves': 75, 'lambda_l1': 0.031513772481390125, 'lambda_l2': 0.6272503803859496, 'feature_fraction': 0.9853129030017315, 'bagging_fraction': 0.9134820054355666, 'bagging_freq': 2, 'min_child_samples': 39}. Best is trial 15 with value: 0.3813251859830747.


Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.476637	valid_1's multi_logloss: 0.49664
[20]	training's multi_logloss: 0.399231	valid_1's multi_logloss: 0.434882
[30]	training's multi_logloss: 0.360843	valid_1's multi_logloss: 0.408077
[40]	training's multi_logloss: 0.33893	valid_1's multi_logloss: 0.397453
[50]	training's multi_logloss: 0.32343	valid_1's multi_logloss: 0.392836
[60]	training's multi_logloss: 0.3119	valid_1's multi_logloss: 0.390113
[70]	training's multi_logloss: 0.301275	valid_1's multi_logloss: 0.389044
Early stopping, best iteration is:
[69]	training's multi_logloss: 0.302545	valid_1's multi_logloss: 0.389008
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.432858	valid_1's multi_logloss: 0.495346
[20]	training's multi_logloss: 0.333696	valid_1's multi_logloss: 0.435469
[30]	training's multi_logloss: 0.276607	valid_1's multi_logloss: 0.418002
[40]	training's multi_logloss: 0.2

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,655] Trial 21 finished with value: 0.414449797096688 and parameters: {'max_bin': 434, 'num_leaves': 48, 'lambda_l1': 0.0004890231917101806, 'lambda_l2': 3.083485390256928e-08, 'feature_fraction': 0.5199979149378401, 'bagging_fraction': 0.4111130766763684, 'bagging_freq': 80, 'min_child_samples': 26}. Best is trial 15 with value: 0.3813251859830747.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,744] Trial 22 finished with value: 0.39

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.514311	valid_1's multi_logloss: 0.532127
[20]	training's multi_logloss: 0.435179	valid_1's multi_logloss: 0.468824
[30]	training's multi_logloss: 0.395324	valid_1's multi_logloss: 0.441444
[40]	training's multi_logloss: 0.366494	valid_1's multi_logloss: 0.425237
[50]	training's multi_logloss: 0.351236	valid_1's multi_logloss: 0.420407
[60]	training's multi_logloss: 0.336514	valid_1's multi_logloss: 0.415715
[70]	training's multi_logloss: 0.32759	valid_1's multi_logloss: 0.416461
Early stopping, best iteration is:
[64]	training's multi_logloss: 0.332315	valid_1's multi_logloss: 0.41445
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.500197	valid_1's multi_logloss: 0.525126
[20]	training's multi_logloss: 0.412179	valid_1's multi_logloss: 0.458983
[30]	training's multi_logloss: 0.363536	valid_1's multi_logloss: 0.428428
[40]	training's multi_logloss: 

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,867] Trial 24 finished with value: 0.38465033255357406 and parameters: {'max_bin': 300, 'num_leaves': 123, 'lambda_l1': 0.04528529022952061, 'lambda_l2': 0.00025949516931228906, 'feature_fraction': 0.5364487767839939, 'bagging_fraction': 0.5526138890622357, 'bagging_freq': 6, 'min_child_samples': 46}. Best is trial 15 with value: 0.3813251859830747.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:27,923] Trial 25 finished with value: 0.3

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.512606	valid_1's multi_logloss: 0.525146
[20]	training's multi_logloss: 0.43385	valid_1's multi_logloss: 0.461255
[30]	training's multi_logloss: 0.39183	valid_1's multi_logloss: 0.429615
[40]	training's multi_logloss: 0.361622	valid_1's multi_logloss: 0.4111
[50]	training's multi_logloss: 0.345767	valid_1's multi_logloss: 0.402443
[60]	training's multi_logloss: 0.328838	valid_1's multi_logloss: 0.394822
[70]	training's multi_logloss: 0.318128	valid_1's multi_logloss: 0.390229
[80]	training's multi_logloss: 0.307232	valid_1's multi_logloss: 0.386899
[90]	training's multi_logloss: 0.297074	valid_1's multi_logloss: 0.386563
[100]	training's multi_logloss: 0.289091	valid_1's multi_logloss: 0.385174
[110]	training's multi_logloss: 0.281246	valid_1's multi_logloss: 0.385546
Early stopping, best iteration is:
[104]	training's multi_logloss: 0.286222	valid_1's multi_logloss: 0.38465
Training until val

[I 2023-06-19 21:52:28,021] Trial 26 finished with value: 0.3876097930148041 and parameters: {'max_bin': 386, 'num_leaves': 118, 'lambda_l1': 8.185600152816129, 'lambda_l2': 8.955149893759669e-07, 'feature_fraction': 0.7978469218600606, 'bagging_fraction': 0.5579934260422904, 'bagging_freq': 3, 'min_child_samples': 77}. Best is trial 15 with value: 0.3813251859830747.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:28,071] Trial 27 finished with value: 0.3853899525110762 and parameters: {'max_bin': 333, 'num_leaves': 69, 'lambda_l1': 0.0019717598633691787, 'lambda_l2': 0.301603305596099, 'feature_fraction': 0.7773891061546893, 'bagging_fraction': 0.9235903932684372, 'bagging_freq': 28, 'min_child_samples': 81}. Best is trial 15 with val

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),


Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.4707	valid_1's multi_logloss: 0.490939
[20]	training's multi_logloss: 0.394418	valid_1's multi_logloss: 0.429379
[30]	training's multi_logloss: 0.354576	valid_1's multi_logloss: 0.40579
[40]	training's multi_logloss: 0.332454	valid_1's multi_logloss: 0.393332
[50]	training's multi_logloss: 0.315965	valid_1's multi_logloss: 0.388011
[60]	training's multi_logloss: 0.302077	valid_1's multi_logloss: 0.386199
[70]	training's multi_logloss: 0.29023	valid_1's multi_logloss: 0.385881
Early stopping, best iteration is:
[65]	training's multi_logloss: 0.295933	valid_1's multi_logloss: 0.38539
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.419208	valid_1's multi_logloss: 0.460202
[20]	training's multi_logloss: 0.338405	valid_1's multi_logloss: 0.408311
[30]	training's multi_logloss: 0.293771	valid_1's multi_logloss: 0.390003
[40]	training's multi_logloss: 0.2

[I 2023-06-19 21:52:28,239] Trial 30 finished with value: 0.40043803163596803 and parameters: {'max_bin': 467, 'num_leaves': 43, 'lambda_l1': 0.0004533273946994952, 'lambda_l2': 1.5438779234885336e-07, 'feature_fraction': 0.8301158087155562, 'bagging_fraction': 0.6376358216843763, 'bagging_freq': 57, 'min_child_samples': 22}. Best is trial 15 with value: 0.3813251859830747.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:28,320] Trial 31 finished with value: 0.39239460395051523 and parameters: {'max_bin': 290, 'num_leaves': 79, 'lambda_l1': 1.586772410276577e-05, 'lambda_l2': 2.909963269906004, 'feature_fraction': 0.8591951522841792, 'bagging_fraction': 0.8491981719103283, 'bagging_freq': 91, 'min_child_samples': 13}. Best is trial 15 w

Early stopping, best iteration is:
[37]	training's multi_logloss: 0.287025	valid_1's multi_logloss: 0.400438
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.458235	valid_1's multi_logloss: 0.49686
[20]	training's multi_logloss: 0.366607	valid_1's multi_logloss: 0.431544
[30]	training's multi_logloss: 0.315999	valid_1's multi_logloss: 0.4067
[40]	training's multi_logloss: 0.283334	valid_1's multi_logloss: 0.397133
[50]	training's multi_logloss: 0.259117	valid_1's multi_logloss: 0.393696
[60]	training's multi_logloss: 0.240206	valid_1's multi_logloss: 0.394252
Early stopping, best iteration is:
[53]	training's multi_logloss: 0.253349	valid_1's multi_logloss: 0.392395
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.556518	valid_1's multi_logloss: 0.558184
[20]	training's multi_logloss: 0.490024	valid_1's multi_logloss: 0.494956
[30]	training's multi_logloss: 0.455995	valid_1's multi_logloss: 0.4641

[I 2023-06-19 21:52:28,498] Trial 33 finished with value: 0.3756202124969309 and parameters: {'max_bin': 419, 'num_leaves': 108, 'lambda_l1': 3.432187077228294e-06, 'lambda_l2': 0.0018953590604522356, 'feature_fraction': 0.4383731596725887, 'bagging_fraction': 0.6913765575607738, 'bagging_freq': 98, 'min_child_samples': 89}. Best is trial 33 with value: 0.3756202124969309.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:28,556] Trial 34 finished with value: 0.38550554274382554 and parameters: {'max_bin': 338, 'num_leaves': 125, 'lambda_l1': 1.2170674239379863e-06, 'lambda_l2': 3.4983994957691755, 'feature_fraction': 0.9648266228238992, 'bagging_fraction': 0.879521552411435, 'bagging_freq': 64, 'min_child_samples': 88}. Best is trial 33 

[90]	training's multi_logloss: 0.33346	valid_1's multi_logloss: 0.383397
[100]	training's multi_logloss: 0.326404	valid_1's multi_logloss: 0.381218
[110]	training's multi_logloss: 0.319815	valid_1's multi_logloss: 0.379365
[120]	training's multi_logloss: 0.313829	valid_1's multi_logloss: 0.37766
[130]	training's multi_logloss: 0.308214	valid_1's multi_logloss: 0.376721
[140]	training's multi_logloss: 0.303737	valid_1's multi_logloss: 0.375693
[150]	training's multi_logloss: 0.299892	valid_1's multi_logloss: 0.376115
Early stopping, best iteration is:
[143]	training's multi_logloss: 0.302327	valid_1's multi_logloss: 0.37562
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.463951	valid_1's multi_logloss: 0.482063
[20]	training's multi_logloss: 0.399772	valid_1's multi_logloss: 0.427731
[30]	training's multi_logloss: 0.369063	valid_1's multi_logloss: 0.405843
[40]	training's multi_logloss: 0.350475	valid_1's multi_logloss: 0.395573
[50]	trainin

  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:28,742] Trial 37 finished with value: 0.39651958778495505 and parameters: {'max_bin': 279, 'num_leaves': 115, 'lambda_l1': 5.70525947903341, 'lambda_l2': 4.441318538278994, 'feature_fraction': 0.9439332995327073, 'bagging_fraction': 0.8644283996191833, 'bagging_freq': 34, 'min_child_samples': 12}. Best is trial 33 with value: 0.3756202124969309.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2023-06-19 21:52:28,802] Trial 38 finished with value: 0.41424986

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.472295	valid_1's multi_logloss: 0.498502
[20]	training's multi_logloss: 0.406429	valid_1's multi_logloss: 0.447621
[30]	training's multi_logloss: 0.374297	valid_1's multi_logloss: 0.423043
[40]	training's multi_logloss: 0.356592	valid_1's multi_logloss: 0.410435
[50]	training's multi_logloss: 0.345717	valid_1's multi_logloss: 0.403504
[60]	training's multi_logloss: 0.338599	valid_1's multi_logloss: 0.399707
[70]	training's multi_logloss: 0.333365	valid_1's multi_logloss: 0.397423
[80]	training's multi_logloss: 0.329347	valid_1's multi_logloss: 0.396557
[90]	training's multi_logloss: 0.326592	valid_1's multi_logloss: 0.396556
Early stopping, best iteration is:
[82]	training's multi_logloss: 0.328705	valid_1's multi_logloss: 0.39652
Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.462285	valid_1's multi_logloss: 0.496402
[20]	training's multi_logloss:

In [66]:
study.best_params

{'max_bin': 419,
 'num_leaves': 108,
 'lambda_l1': 3.432187077228294e-06,
 'lambda_l2': 0.0018953590604522356,
 'feature_fraction': 0.4383731596725887,
 'bagging_fraction': 0.6913765575607738,
 'bagging_freq': 98,
 'min_child_samples': 89}

In [67]:
params = {
    'objective': 'multiclass',
    'verbose': -1,
    'num_class': 3,
    'max_bin' : study.best_params['max_bin'],
    'learning_rate' : 0.05,
    'num_leaves' : study.best_params['num_leaves'],
    'metric' : 'multi_logloss',
    'lambda_l1': study.best_params['lambda_l1'],
    'lambda_l2': study.best_params['lambda_l2'],
    'feature_fraction': study.best_params['feature_fraction'],
    'bagging_fraction': study.best_params['bagging_fraction'],
    'bagging_freq': study.best_params['bagging_freq'],
    'min_child_samples': study.best_params['min_child_samples']
}

lgb_train = lgb.Dataset(X_train, y_train, categorical_feature=categorical_features)
lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train, categorical_feature=categorical_features)

model = lgb.train(params,lgb_train,
                  valid_sets=[lgb_train, lgb_eval],
                  verbose_eval=10,
                  num_boost_round=1000,
                  early_stopping_rounds=10)
y_pred = model.predict(X_test, num_iteration=model.best_iteration)

Training until validation scores don't improve for 10 rounds
[10]	training's multi_logloss: 0.53889	valid_1's multi_logloss: 0.542051
[20]	training's multi_logloss: 0.464411	valid_1's multi_logloss: 0.473401
[30]	training's multi_logloss: 0.424115	valid_1's multi_logloss: 0.439256
[40]	training's multi_logloss: 0.396632	valid_1's multi_logloss: 0.416869
[50]	training's multi_logloss: 0.378813	valid_1's multi_logloss: 0.4062
[60]	training's multi_logloss: 0.362579	valid_1's multi_logloss: 0.39522
[70]	training's multi_logloss: 0.351304	valid_1's multi_logloss: 0.390167
[80]	training's multi_logloss: 0.342395	valid_1's multi_logloss: 0.387488
[90]	training's multi_logloss: 0.33346	valid_1's multi_logloss: 0.383397
[100]	training's multi_logloss: 0.326404	valid_1's multi_logloss: 0.381218
[110]	training's multi_logloss: 0.319815	valid_1's multi_logloss: 0.379365
[120]	training's multi_logloss: 0.313829	valid_1's multi_logloss: 0.37766
[130]	training's multi_logloss: 0.308214	valid_1's mul



In [68]:
y_pred.shape

(400, 3)

In [69]:
y_pred_max = np.argmax(y_pred, axis=1)

In [70]:
sample_submit = pd.read_csv("sample_submit.csv",header=None, names=['id','charges'])

In [71]:
sample_submit['charges'] = y_pred_max

In [72]:
pd.DataFrame(sample_submit).to_csv('hold-outOptuna3_.csv', index=False, header=False)