## Bayesian Optimization of the lgbm model using optuna

In [1]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import lightgbm as lgb
import optuna
from optuna.visualization import plot_parallel_coordinate, plot_slice, plot_optimization_history, \
        plot_contour, plot_param_importances

In [2]:
data = pd.read_csv('regression/final_elem_SP.csv')
data.head()

Unnamed: 0,formula,S_p,Bandgap,mpe,PF,X_b,std_BL,mean_BL,vdw_r_b,Mean_NpValence,...,Mean_NfUnfilled,AM_b,MP_b,Mean_NsUnfilled,Mean_NsValence,MeanColumn,MEN_V_b,Mean_NdUnfilled,TC_b,D_b
0,ReO3,0.336845,0.0,0.311128,0.276212,3.44,1e-06,1.913169,1.52,3.0,...,0.0,15.9994,54.8,0.0,2.0,13.75,14.0,1.25,0.02658,0.001308
1,Yb3Mg,14.406792,0.0,0.74048,0.573238,1.31,0.0,3.690337,1.73,0.0,...,0.0,24.305,923.0,0.0,2.0,2.75,14.0,0.0,160.0,1.74
2,B9H11,638.974583,2.911795,0.101052,0.125865,2.2,0.059786,1.236931,1.1,0.45,...,0.0,1.00794,14.01,0.55,1.45,6.4,14.1,0.0,0.1805,8.2e-05
3,Li3Ga7,9.061345,0.0,0.542895,0.55513,1.81,0.073217,2.838474,1.87,0.7,...,0.0,69.723,302.91,0.3,1.7,9.4,11.8,0.0,29.0,5.91
4,Sc3Cd,22.887388,0.0,0.713049,0.719525,1.69,0.004315,3.206197,2.18,0.0,...,0.0,112.411,594.22,0.0,2.0,5.25,13.1,6.75,97.0,8.69


In [3]:
data = data.drop(['TC_a','D_a','MEN_V_a','Mean_NfValence','EA_b','r_a',
'Mean_NdValence','r_b','Mean_GSbandgap','Number','Mean_GSmagmom','Mean_NfUnfilled',
'AM_b','MP_b','Mean_NsUnfilled','Mean_NsValence','MeanColumn','MEN_V_b',
'Mean_NdUnfilled','TC_b','D_b'], axis=1)

In [4]:
data.head()

Unnamed: 0,formula,S_p,Bandgap,mpe,PF,X_b,std_BL,mean_BL,vdw_r_b,Mean_NpValence,...,MeltingT,Mean_NValence,X_a,Mean_GSvolume_pa,Mean_SpaceGroupNumber,MeanCovalentRadius,Dp_a,Mean_NpUnfilled,AM_a,MP_a
0,ReO3,0.336845,0.0,0.311128,0.276212,3.44,1e-06,1.913169,1.52,3.0,...,905.85,9.75,1.9,10.4925,57.5,87.25,62.0,1.5,186.207,3459.0
1,Yb3Mg,14.406792,0.0,0.74048,0.573238,1.31,0.0,3.690337,1.73,0.0,...,1049.75,12.5,1.1,31.3125,217.25,175.5,139.0,0.0,173.04,1097.0
2,B9H11,638.974583,2.911795,0.101052,0.125865,2.2,0.059786,1.236931,1.1,0.45,...,1064.3055,1.9,2.04,6.865875,181.4,54.85,20.5,2.25,10.811,2349.0
3,Li3Ga7,9.061345,0.0,0.542895,0.55513,1.81,0.073217,2.838474,1.87,0.7,...,348.144,9.4,0.98,18.17825,113.5,123.8,164.1125,3.5,6.941,453.69
4,Sc3Cd,22.887388,0.0,0.713049,0.719525,1.69,0.004315,3.206197,2.18,0.0,...,1509.055,5.25,1.36,21.55,194.0,163.5,97.0,0.0,44.955912,1814.0


In [5]:
X = data.iloc[:,2:-1]
Y = data.iloc[:,1]

In [6]:
# =============================================================================
#        Define the function 
# =============================================================================
    
def objective(trial):
    data, target = X, Y
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.3)
    dtrain = lgb.Dataset(train_x, label=train_y)

    param = {
        "objective": "regression",
        "metric": "root_mean_squared_error",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 50),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1),
    }

    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(valid_x)
    # pred_labels = preds
    accuracy = np.sqrt(mean_squared_error(valid_y, preds))
    return accuracy

In [8]:
# =============================================================================
#                    Perform bayesian optimization
# =============================================================================
"""
The final hyperparameters change every time the run is code. So try running at least 3-4 and select the best one
"""

f = open('regression/best-paramters_lgb_optuna_TPE_plot1.txt', 'a')
if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=500)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))
    f.write("  Value: {}\n".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value)) 
        f.write("    {}: {}\n".format(key, value))

f.write("\n")
f.close()

[32m[I 2023-01-08 21:39:46,888][0m A new study created in memory with name: no-name-71b4633b-ec9b-45a5-9549-a046543f6732[0m
[32m[I 2023-01-08 21:39:47,030][0m Trial 0 finished with value: 48.59433143065533 and parameters: {'lambda_l1': 2.658418799713775, 'lambda_l2': 3.090436805940092e-05, 'num_leaves': 47, 'feature_fraction': 0.9807412938747926, 'bagging_fraction': 0.5845058542871782, 'bagging_freq': 7, 'min_child_samples': 58, 'learning_rate': 0.04219314335777799}. Best is trial 0 with value: 48.59433143065533.[0m
[32m[I 2023-01-08 21:39:47,110][0m Trial 1 finished with value: 46.00950056825774 and parameters: {'lambda_l1': 1.741434959352254, 'lambda_l2': 0.002857333770330985, 'num_leaves': 12, 'feature_fraction': 0.9846769237191453, 'bagging_fraction': 0.9285185872048476, 'bagging_freq': 3, 'min_child_samples': 68, 'learning_rate': 0.09903824087007704}. Best is trial 1 with value: 46.00950056825774.[0m
[32m[I 2023-01-08 21:39:47,255][0m Trial 2 finished with value: 47.449

Number of finished trials: 500
Best trial:
  Value: 40.77371195316632
  Params: 
    lambda_l1: 2.7519222236869456e-05
    lambda_l2: 4.295059234532295e-08
    num_leaves: 41
    feature_fraction: 0.9699317353516802
    bagging_fraction: 0.9528809862071511
    bagging_freq: 5
    min_child_samples: 49
    learning_rate: 0.07445682930952059


In [6]:
## Parallel coordinate plot
p1 = plot_parallel_coordinate(study)
p1.write_image('images/parallel_coordinate_plot_lgb_reg.png')
p1.show()

In [7]:
p2 = optuna.visualization.plot_contour(study, params=["min_child_samples","learning_rate", "feature_fraction"])
p2.write_image('images/contour_plot_imp_lgb_reg.png')
p2.show()

In [8]:
p3 = plot_slice(study)
p3.write_image('images/slice_plot_lgb_reg.png')
p3.show()

In [9]:
p4 = plot_param_importances(study)
p4.write_image('images/param_imp_plot_lgb_reg.png')
p4.show()