Skip to content

Commit

Permalink
fixed conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS committed Mar 18, 2019
2 parents b73ad13 + 95246cd commit 1c804f7
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,5 @@ test_script:
if (!$?) { $host.SetShouldExit(-1) }
} # run all examples
- cd %APPVEYOR_BUILD_FOLDER%\examples\python-guide\notebooks
- conda install -q -y -n test-env ipywidgets notebook
- conda install -q -y -n test-env ipywidgets notebook "tornado=5.1.1"
- jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb # run all notebooks
2 changes: 1 addition & 1 deletion .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,6 @@ matplotlib.use\(\"Agg\"\)\
sed -i'.bak' 's/graph.render(view=True)/graph.render(view=False)/' plot_example.py
for f in *.py; do python $f || exit -1; done # run all examples
cd $BUILD_DIRECTORY/examples/python-guide/notebooks
conda install -q -y -n $CONDA_ENV ipywidgets notebook
conda install -q -y -n $CONDA_ENV ipywidgets notebook "tornado=5.1.1"
jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb || exit -1 # run all notebooks
fi
2 changes: 1 addition & 1 deletion .ci/test_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ if ($env:TASK -eq "regular") {
python $file ; Check-Output $?
} # run all examples
cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide/notebooks
conda install -q -y -n $env:CONDA_ENV ipywidgets notebook
conda install -q -y -n $env:CONDA_ENV ipywidgets notebook "tornado=5.1.1"
jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb ; Check-Output $? # run all notebooks
}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,7 @@ lightgbm.Rcheck/

# Files generated by aspell
**/*.bak

# GraphViz artifacts
*.gv
*.gv.*
21 changes: 21 additions & 0 deletions examples/python-guide/advanced_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,27 @@ def binary_error(preds, train_data):

print('Finished 40 - 50 rounds with self-defined objective function and eval metric...')


# another self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, eval_result: float, is_higher_better: bool
# accuracy
def accuracy(preds, train_data):
labels = train_data.get_label()
return 'accuracy', np.mean(labels == (preds > 0.5)), True


gbm = lgb.train(params,
lgb_train,
num_boost_round=10,
init_model=gbm,
fobj=loglikelihood,
feval=lambda preds, train_data: [binary_error(preds, train_data),
accuracy(preds, train_data)],
valid_sets=lgb_eval)

print('Finished 50 - 60 rounds with self-defined objective function '
'and multiple self-defined eval metrics...')

print('Starting a new training job...')


Expand Down
8 changes: 4 additions & 4 deletions examples/python-guide/plot_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@
ax = lgb.plot_importance(gbm, max_num_features=10)
plt.show()

print('Plotting 84th tree...') # one tree use categorical feature to split
ax = lgb.plot_tree(gbm, tree_index=83, figsize=(20, 8), show_info=['split_gain'])
print('Plotting 54th tree...') # one tree use categorical feature to split
ax = lgb.plot_tree(gbm, tree_index=53, figsize=(15, 15), show_info=['split_gain'])
plt.show()

print('Plotting 84th tree with graphviz...')
graph = lgb.create_tree_digraph(gbm, tree_index=83, name='Tree84')
print('Plotting 54th tree with graphviz...')
graph = lgb.create_tree_digraph(gbm, tree_index=53, name='Tree54')
graph.render(view=True)
16 changes: 16 additions & 0 deletions examples/python-guide/sklearn_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,27 @@ def rmsle(y_true, y_pred):
eval_metric=rmsle,
early_stopping_rounds=5)


# another self-defined eval metric
# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool
# Relative Absolute Error (RAE)
def rae(y_true, y_pred):
return 'RAE', np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False


print('Starting training with multiple custom eval functions...')
# train
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric=lambda y_true, y_pred: [rmsle(y_true, y_pred), rae(y_true, y_pred)],
early_stopping_rounds=5)

print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmsle of prediction is:', rmsle(y_test, y_pred)[1])
print('The rae of prediction is:', rae(y_test, y_pred)[1])

# other scikit-learn modules
estimator = lgb.LGBMRegressor(num_leaves=31)
Expand Down
10 changes: 9 additions & 1 deletion python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2437,6 +2437,11 @@ def get_split_value_histogram(self, feature, bins=None, xgboost_style=False):
The feature name or index the histogram is calculated for.
If int, interpreted as index.
If string, interpreted as name.
Note
----
Categorical features are not supported.
bins : int, string or None, optional (default=None)
The maximum number of bins.
If None, or int and > number of unique split values and ``xgboost_style=True``,
Expand Down Expand Up @@ -2464,7 +2469,10 @@ def add(root):
else:
split_feature = root['split_feature']
if split_feature == feature:
values.append(root['threshold'])
if isinstance(root['threshold'], string_type):
raise LightGBMError('Cannot compute split value histogram for the categorical feature')
else:
values.append(root['threshold'])
add(root['left_child'])
add(root['right_child'])

Expand Down
31 changes: 27 additions & 4 deletions swig/lightgbmlib.i
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
/* lightgbmlib.i */
%module lightgbmlib
%ignore LGBM_BoosterSaveModelToString;
%ignore LGBM_BoosterGetEvalNames;
%{
/* Includes the header in the wrapper code */
#include "../include/LightGBM/export.h"
#include "../include/LightGBM/utils/log.h"
#include "../include/LightGBM/utils/common.h"
#include "../include/LightGBM/c_api.h"
%}

Expand All @@ -16,12 +18,32 @@

%inline %{
char * LGBM_BoosterSaveModelToStringSWIG(BoosterHandle handle,
int start_iteration,
int num_iteration,
int64_t buffer_len,
int64_t* out_len) {
int start_iteration,
int num_iteration,
int64_t buffer_len,
int64_t* out_len) {
char* dst = new char[buffer_len];
int result = LGBM_BoosterSaveModelToString(handle, start_iteration, num_iteration, buffer_len, out_len, dst);
// Reallocate to use larger length
if (*out_len > buffer_len) {
delete [] dst;
int64_t realloc_len = *out_len;
dst = new char[realloc_len];
result = LGBM_BoosterSaveModelToString(handle, start_iteration, num_iteration, realloc_len, out_len, dst);
}
if (result != 0) {
return nullptr;
}
return dst;
}

char ** LGBM_BoosterGetEvalNamesSWIG(BoosterHandle handle,
int eval_counts) {
char** dst = new char*[eval_counts];
for (int i = 0; i < eval_counts; ++i) {
dst[i] = new char[128];
}
int result = LGBM_BoosterGetEvalNames(handle, &eval_counts, dst);
if (result != 0) {
return nullptr;
}
Expand Down Expand Up @@ -49,6 +71,7 @@
%array_functions(float, floatArray)
%array_functions(int, intArray)
%array_functions(long, longArray)
%array_functions(char *, stringArray)

/* Custom pointer manipulation template */
%define %pointer_manipulation(TYPE,NAME)
Expand Down
15 changes: 9 additions & 6 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1246,17 +1246,17 @@ def test_model_size(self):

def test_get_split_value_histogram(self):
X, y = load_boston(True)
lgb_train = lgb.Dataset(X, y)
lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
# test XGBoost-style return value
params = {'feature': 0, 'xgboost_style': True}
self.assertTupleEqual(gbm.get_split_value_histogram(**params).shape, (10, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=999, **params).shape, (10, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(**params).shape, (9, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=999, **params).shape, (9, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=-1, **params).shape, (1, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=0, **params).shape, (1, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=1, **params).shape, (1, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=2, **params).shape, (2, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=6, **params).shape, (6, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=6, **params).shape, (5, 2))
self.assertTupleEqual(gbm.get_split_value_histogram(bins=7, **params).shape, (6, 2))
if lgb.compat.PANDAS_INSTALLED:
np.testing.assert_almost_equal(
Expand All @@ -1278,8 +1278,8 @@ def test_get_split_value_histogram(self):
)
# test numpy-style return value
hist, bins = gbm.get_split_value_histogram(0)
self.assertEqual(len(hist), 22)
self.assertEqual(len(bins), 23)
self.assertEqual(len(hist), 23)
self.assertEqual(len(bins), 24)
hist, bins = gbm.get_split_value_histogram(0, bins=999)
self.assertEqual(len(hist), 999)
self.assertEqual(len(bins), 1000)
Expand Down Expand Up @@ -1317,6 +1317,9 @@ def test_get_split_value_histogram(self):
mask = hist_vals > 0
np.testing.assert_array_equal(hist_vals[mask], hist[:, 1])
np.testing.assert_almost_equal(bin_edges[1:][mask], hist[:, 0])
# test histogram is disabled for categorical features
self.assertRaises(lgb.basic.LightGBMError, gbm.get_split_value_histogram, 2)


def test_early_stopping_for_only_first_metric(self):
X, y = load_boston(True)
Expand Down

0 comments on commit 1c804f7

Please sign in to comment.