Skip to content

Commit

Permalink
[ci][python] fixes according to scikit-learn 0.20 release (#1707)
Browse files Browse the repository at this point in the history
* fixed FutureWarning about cv default value

* fixed according to new check_estimator API

* fixed joblib warning
  • Loading branch information
StrikerRUS authored and guolinke committed Sep 28, 2018
1 parent 7825084 commit f53116a
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 12 deletions.
3 changes: 2 additions & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ install:
default {$env:MINICONDA = """C:\Miniconda36-x64"""}
}
- set PATH=%MINICONDA%;%MINICONDA%\Scripts;%PATH%
- set SKLEARN_SITE_JOBLIB=true # temp fix for joblib warning in examples
- ps: $env:LGB_VER = (Get-Content VERSION.txt).trim()
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda create -q -n test-env python=%PYTHON_VERSION% numpy nose scipy scikit-learn pandas matplotlib python-graphviz pytest
- conda create -q -n test-env python=%PYTHON_VERSION% numpy nose scipy scikit-learn pandas matplotlib python-graphviz pytest joblib
- activate test-env

build_script:
Expand Down
2 changes: 1 addition & 1 deletion examples/python-guide/sklearn_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def rmsle(y_true, y_pred):
'n_estimators': [20, 40]
}

gbm = GridSearchCV(estimator, param_grid)
gbm = GridSearchCV(estimator, param_grid, cv=3)

gbm.fit(X_train, y_train)

Expand Down
5 changes: 4 additions & 1 deletion python-package/lightgbm/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ class DataFrame(object):
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_array, check_consistent_length
from sklearn.utils.validation import (assert_all_finite, check_X_y,
check_array, check_consistent_length)
try:
from sklearn.model_selection import StratifiedKFold, GroupKFold
from sklearn.exceptions import NotFittedError
Expand All @@ -108,6 +109,7 @@ class DataFrame(object):
_LGBMCheckXY = check_X_y
_LGBMCheckArray = check_array
_LGBMCheckConsistentLength = check_consistent_length
_LGBMAssertAllFinite = assert_all_finite
_LGBMCheckClassificationTargets = check_classification_targets
_LGBMComputeSampleWeight = compute_sample_weight
except ImportError:
Expand All @@ -122,6 +124,7 @@ class DataFrame(object):
_LGBMCheckXY = None
_LGBMCheckArray = None
_LGBMCheckConsistentLength = None
_LGBMAssertAllFinite = None
_LGBMCheckClassificationTargets = None
_LGBMComputeSampleWeight = None

Expand Down
3 changes: 2 additions & 1 deletion python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
_LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
_LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
argc_, range_, string_type, DataFrame, LGBMDeprecationWarning)
from .engine import train

Expand Down Expand Up @@ -656,6 +656,7 @@ def fit(self, X, y,
eval_class_weight=None, eval_init_score=None, eval_metric=None,
early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None):
_LGBMAssertAllFinite(y)
_LGBMCheckClassificationTargets(y)
self._le = _LGBMLabelEncoder().fit(y)
_y = self._le.transform(y)
Expand Down
13 changes: 5 additions & 8 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,16 @@

import lightgbm as lgb
import numpy as np
from sklearn import __version__ as sk_version
from sklearn.base import clone
from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
load_iris, load_svmlight_file)
from sklearn.exceptions import SkipTestWarning
from sklearn.externals import joblib
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.utils.estimator_checks import (_yield_all_checks, SkipTest,
check_parameters_default_constructible)
try:
from sklearn.utils.estimator_checks import check_no_fit_attributes_set_in_init
sklearn_at_least_019 = True
except ImportError:
sklearn_at_least_019 = False


def multi_error(y_true, y_pred):
Expand Down Expand Up @@ -180,17 +177,17 @@ def test_feature_importances_type(self):
self.assertNotEqual(importance_split_top1, importance_gain_top1)

# sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1
@unittest.skipIf(not sklearn_at_least_019, 'scikit-learn version is less than 0.19')
@unittest.skipIf(sk_version < '0.19.0', 'scikit-learn version is less than 0.19')
def test_sklearn_integration(self):
# we cannot use `check_estimator` directly since there is no skip test mechanism
for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier),
(lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)):
check_parameters_default_constructible(name, estimator)
check_no_fit_attributes_set_in_init(name, estimator)
# we cannot leave default params (see https://github.com/Microsoft/LightGBM/issues/833)
estimator = estimator(min_child_samples=1, min_data_in_bin=1)
for check in _yield_all_checks(name, estimator):
if check.__name__ == 'check_estimators_nan_inf':
check_name = check.func.__name__ if hasattr(check, 'func') else check.__name__
if check_name == 'check_estimators_nan_inf':
continue # skip test because LightGBM deals with nan
try:
check(name, estimator)
Expand Down

0 comments on commit f53116a

Please sign in to comment.