-
Notifications
You must be signed in to change notification settings - Fork 389
/
lightgbm.py
159 lines (139 loc) · 5.45 KB
/
lightgbm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import numpy as np
import lightgbm as lgb
import optuna
from supervised.utils.metric import Metric
from supervised.utils.metric import (
lightgbm_eval_metric_r2,
lightgbm_eval_metric_spearman,
lightgbm_eval_metric_pearson,
lightgbm_eval_metric_f1,
lightgbm_eval_metric_average_precision,
lightgbm_eval_metric_accuracy,
)
from supervised.algorithms.registry import BINARY_CLASSIFICATION
from supervised.algorithms.registry import MULTICLASS_CLASSIFICATION
from supervised.algorithms.registry import REGRESSION
from supervised.algorithms.lightgbm import (
lightgbm_objective,
lightgbm_eval_metric,
)
EPS = 1e-8
class LightgbmObjective:
def __init__(
self,
ml_task,
X_train,
y_train,
sample_weight,
X_validation,
y_validation,
sample_weight_validation,
eval_metric,
cat_features_indices,
n_jobs,
random_state,
):
self.X_train = X_train
self.y_train = y_train
self.sample_weight = sample_weight
self.X_validation = X_validation
self.y_validation = y_validation
self.sample_weight_validation = sample_weight_validation
self.dtrain = lgb.Dataset(
self.X_train, label=self.y_train, weight=self.sample_weight
)
self.dvalid = lgb.Dataset(
self.X_validation,
label=self.y_validation,
weight=self.sample_weight_validation,
)
self.cat_features_indices = cat_features_indices
self.eval_metric = eval_metric
self.learning_rate = 0.025
self.rounds = 1000
self.early_stopping_rounds = 50
self.seed = random_state
self.n_jobs = n_jobs
if n_jobs == -1:
self.n_jobs = 0
self.objective = ""
self.eval_metric_name = ""
self.eval_metric_name, self.custom_eval_metric_name = lightgbm_eval_metric(
ml_task, eval_metric.name
)
self.custom_eval_metric = None
if self.eval_metric.name == "r2":
self.custom_eval_metric = lightgbm_eval_metric_r2
elif self.eval_metric.name == "spearman":
self.custom_eval_metric = lightgbm_eval_metric_spearman
elif self.eval_metric.name == "pearson":
self.custom_eval_metric = lightgbm_eval_metric_pearson
elif self.eval_metric.name == "f1":
self.custom_eval_metric = lightgbm_eval_metric_f1
elif self.eval_metric.name == "average_precision":
self.custom_eval_metric = lightgbm_eval_metric_average_precision
elif self.eval_metric.name == "accuracy":
self.custom_eval_metric = lightgbm_eval_metric_accuracy
self.num_class = (
len(np.unique(y_train)) if ml_task == MULTICLASS_CLASSIFICATION else None
)
self.objective = lightgbm_objective(ml_task, eval_metric.name)
def __call__(self, trial):
param = {
"objective": self.objective,
"metric": self.eval_metric_name,
"verbosity": -1,
"boosting_type": "gbdt",
"learning_rate": trial.suggest_categorical(
"learning_rate", [0.0125, 0.025, 0.05, 0.1]
),
"num_leaves": trial.suggest_int("num_leaves", 2, 2048),
"lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
"lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
"feature_fraction": min(
trial.suggest_float("feature_fraction", 0.3, 1.0 + EPS), 1.0
),
"bagging_fraction": min(
trial.suggest_float("bagging_fraction", 0.3, 1.0 + EPS), 1.0
),
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
"min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
"feature_pre_filter": False,
"seed": self.seed,
"num_threads": self.n_jobs,
"extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
}
if self.cat_features_indices:
param["cat_feature"] = self.cat_features_indices
param["cat_l2"] = trial.suggest_float("cat_l2", EPS, 100.0)
param["cat_smooth"] = trial.suggest_float("cat_smooth", EPS, 100.0)
if self.num_class is not None:
param["num_class"] = self.num_class
try:
metric_name = self.eval_metric_name
if metric_name == "custom":
metric_name = self.custom_eval_metric_name
pruning_callback = optuna.integration.LightGBMPruningCallback(
trial, metric_name, "validation"
)
gbm = lgb.train(
param,
self.dtrain,
valid_sets=[self.dvalid],
valid_names=["validation"],
verbose_eval=False,
callbacks=[pruning_callback],
num_boost_round=self.rounds,
early_stopping_rounds=self.early_stopping_rounds,
feval=self.custom_eval_metric,
)
preds = gbm.predict(self.X_validation)
score = self.eval_metric(self.y_validation, preds)
if Metric.optimize_negative(self.eval_metric.name):
score *= -1.0
except optuna.exceptions.TrialPruned as e:
raise e
except Exception as e:
print("Exception in LightgbmObjective", str(e))
return None
return score