In [2]:
from src.model.boost.algo import Lgbm
input_data = Lgbm.df_input()

In [10]:
import optuna

def objective(trial : optuna.Trial):
    params = {
        'metric':trial.suggest_categorical('metric', [None,'rmse']) ,
        # 'seed':trial.suggest_int('seed', 2023, 2023),  #随机种子固定，所以设置为2023-2023
        # 'num_boost_round':trial.suggest_int('num_boost_round', 50, 300),  #迭代器数量 50-300 的整数
        'learning_rate':trial.suggest_float('learning_rate', 1e-4, 0.1, log = True),
        'max_depth':trial.suggest_int('max_depth', 3,10),  #整数    
        'linear_tree': trial.suggest_categorical('linear_tree', [True,False]), 
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-6, 0.1, log = True),
        'alpha': trial.suggest_float('alpha', 1e-7, 0.1, log = True),
    }
    model = Lgbm(params)
    model.import_data(**input_data)
    model.fit(silence=True)
    return model.validation_pred().rankic().mean().item()

study = optuna.create_study(storage='sqlite:///visualization/optuna.sqlite3',direction='maximize')
study.optimize(objective, n_trials=20)

print(study.best_params)

[I 2024-08-30 14:42:32,877] A new study created in RDB with name: no-name-914bd3ea-a268-48b5-9c92-05f4db0ef9fd
[I 2024-08-30 14:42:52,470] Trial 0 finished with value: 0.151337668299675 and parameters: {'metric': 'rmse', 'learning_rate': 0.00031837529395438565, 'max_depth': 4, 'linear_tree': False, 'lambda_l2': 1.0179933591421158e-06, 'alpha': 0.009673046188434384}. Best is trial 0 with value: 0.151337668299675.
[I 2024-08-30 14:43:27,251] Trial 1 finished with value: 0.16787059605121613 and parameters: {'metric': None, 'learning_rate': 0.00011333949272537665, 'max_depth': 7, 'linear_tree': False, 'lambda_l2': 0.017456843726619483, 'alpha': 4.546637576134971e-05}. Best is trial 1 with value: 0.16787059605121613.
[I 2024-08-30 14:43:42,888] Trial 2 finished with value: 0.0786234587430954 and parameters: {'metric': 'rmse', 'learning_rate': 0.0003831127692671432, 'max_depth': 3, 'linear_tree': True, 'lambda_l2': 0.0010205913746380764, 'alpha': 0.0014691819601486266}. Best is trial 1 with 

{'metric': 'rmse', 'learning_rate': 0.00015730705586057832, 'max_depth': 10, 'linear_tree': False, 'lambda_l2': 2.7592667501022596e-06, 'alpha': 0.009765892479356657}


In [6]:
study.best_params

{'metric': 'rmse',
 'seed': 2023,
 'num_boost_round': 52,
 'learning_rate': 0.03998632904310606,
 'max_depth': 5}

In [5]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()

In [8]:
import optuna
 
# 需要优化的函数
def objective(trial : optuna.Trial):
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2
 
# 使用study对象的optimize来优化，里面的参数是上面定义的方法，以及迭代次数
study = optuna.create_study(sampler=optuna.samplers.TPESampler(), storage='sqlite:///db.sqlite3')
study.optimize(objective, n_trials=20)
 
print(study.best_params)
#{'x': 2.128194654190586}


[I 2024-08-30 10:53:33,759] A new study created in RDB with name: no-name-7d176243-549d-4201-a625-942da14c9392
[I 2024-08-30 10:53:33,846] Trial 0 finished with value: 2.3420550272871314 and parameters: {'x': 0.46962258665153733}. Best is trial 0 with value: 2.3420550272871314.
[I 2024-08-30 10:53:33,886] Trial 1 finished with value: 68.29174508615235 and parameters: {'x': -6.2638819622591635}. Best is trial 0 with value: 2.3420550272871314.
[I 2024-08-30 10:53:33,932] Trial 2 finished with value: 29.824745983593704 and parameters: {'x': 7.461203711966228}. Best is trial 0 with value: 2.3420550272871314.
[I 2024-08-30 10:53:33,988] Trial 3 finished with value: 34.331908215632694 and parameters: {'x': -3.859343667650217}. Best is trial 0 with value: 2.3420550272871314.
[I 2024-08-30 10:53:34,030] Trial 4 finished with value: 82.74381624973537 and parameters: {'x': -7.096362803326139}. Best is trial 0 with value: 2.3420550272871314.
[I 2024-08-30 10:53:34,068] Trial 5 finished with value

{'x': 1.4626236263272538}


In [62]:
self.predict()

BoosterOutput(pred=tensor([-0.0034, -0.0074,  0.0023,  ..., -0.0149, -0.0149,  0.0158],
       dtype=torch.float64), secid=array([     1,      2,      4, ..., 873576, 873593, 873665]), date=array([20231031]), finite=tensor([[True],
        [True],
        [True],
        ...,
        [True],
        [True],
        [True]]), label=tensor([[-0.0746],
        [ 0.0097],
        [ 0.0000],
        ...,
        [ 0.4355],
        [ 0.8805],
        [ 0.5677]], dtype=torch.float64))

In [21]:
model_dict = self.to_dict()
model_dict.keys()

dict_keys(['class_name', 'train_param', 'weight_param', 'model'])