microsoft · guolinke · Mar 14, 2019 · Mar 10, 2019 · Mar 10, 2019 · Mar 10, 2019
@@ -2437,6 +2437,11 @@ def get_split_value_histogram(self, feature, bins=None, xgboost_style=False):
             The feature name or index the histogram is calculated for.
             If int, interpreted as index.
             If string, interpreted as name.
+
+            Note
+            ----
+            Categorical features are not supported.
+
         bins : int, string or None, optional (default=None)
             The maximum number of bins.
             If None, or int and > number of unique split values and ``xgboost_style=True``,
@@ -2464,7 +2469,10 @@ def add(root):
                 else:
                     split_feature = root['split_feature']
                 if split_feature == feature:
-                    values.append(root['threshold'])
+                    if isinstance(root['threshold'], string_type):
+                        raise LightGBMError('Cannot compute split value histogram for the categorical feature')
+                    else:
+                        values.append(root['threshold'])
                 add(root['left_child'])
                 add(root['right_child'])
 

@@ -1245,17 +1245,17 @@ def test_model_size(self):
 
     def test_get_split_value_histogram(self):
         X, y = load_boston(True)
-        lgb_train = lgb.Dataset(X, y)
+        lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
         gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
         # test XGBoost-style return value
         params = {'feature': 0, 'xgboost_style': True}
-        self.assertTupleEqual(gbm.get_split_value_histogram(**params).shape, (10, 2))
-        self.assertTupleEqual(gbm.get_split_value_histogram(bins=999, **params).shape, (10, 2))
+        self.assertTupleEqual(gbm.get_split_value_histogram(**params).shape, (9, 2))
+        self.assertTupleEqual(gbm.get_split_value_histogram(bins=999, **params).shape, (9, 2))
         self.assertTupleEqual(gbm.get_split_value_histogram(bins=-1, **params).shape, (1, 2))
         self.assertTupleEqual(gbm.get_split_value_histogram(bins=0, **params).shape, (1, 2))
         self.assertTupleEqual(gbm.get_split_value_histogram(bins=1, **params).shape, (1, 2))
         self.assertTupleEqual(gbm.get_split_value_histogram(bins=2, **params).shape, (2, 2))
-        self.assertTupleEqual(gbm.get_split_value_histogram(bins=6, **params).shape, (6, 2))
+        self.assertTupleEqual(gbm.get_split_value_histogram(bins=6, **params).shape, (5, 2))
         self.assertTupleEqual(gbm.get_split_value_histogram(bins=7, **params).shape, (6, 2))
         if lgb.compat.PANDAS_INSTALLED:
             np.testing.assert_almost_equal(
@@ -1277,8 +1277,8 @@ def test_get_split_value_histogram(self):
             )
         # test numpy-style return value
         hist, bins = gbm.get_split_value_histogram(0)
-        self.assertEqual(len(hist), 22)
-        self.assertEqual(len(bins), 23)
+        self.assertEqual(len(hist), 23)
+        self.assertEqual(len(bins), 24)
         hist, bins = gbm.get_split_value_histogram(0, bins=999)
         self.assertEqual(len(hist), 999)
         self.assertEqual(len(bins), 1000)
@@ -1316,3 +1316,5 @@ def test_get_split_value_histogram(self):
                 mask = hist_vals > 0
                 np.testing.assert_array_equal(hist_vals[mask], hist[:, 1])
                 np.testing.assert_almost_equal(bin_edges[1:][mask], hist[:, 0])
+        # test histogram is disabled for categorical features
+        self.assertRaises(lgb.basic.LightGBMError, gbm.get_split_value_histogram, 2)