Skip to content

Commit

Permalink
In tree, min_samples_split and min_samples_leaf now accept float numb…
Browse files Browse the repository at this point in the history
…er as percentage.
  • Loading branch information
yelite committed Jul 10, 2014
1 parent 3acda36 commit 8f715dd
Showing 1 changed file with 38 additions and 13 deletions.
51 changes: 38 additions & 13 deletions sklearn/tree/tree.py
Expand Up @@ -244,8 +244,17 @@ def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True,
else:
min_weight_leaf = 0.

if isinstance(self.min_samples_leaf, float):
min_samples_leaf = int(np.ceil(self.min_samples_leaf * n_samples))
else:
min_samples_leaf = self.min_samples_leaf

# Set min_samples_split sensibly
min_samples_split = max(self.min_samples_split,
if isinstance(self.min_samples_split, float):
min_samples_split = int(np.ceil(self.min_samples_split * n_samples))
else:
min_samples_split = self.min_samples_split
min_samples_split = max(min_samples_split,
2 * self.min_samples_leaf)

# Build tree
Expand All @@ -261,7 +270,7 @@ def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True,
if not isinstance(self.splitter, Splitter):
splitter = SPLITTERS[self.splitter](criterion,
self.max_features_,
self.min_samples_leaf,
min_samples_leaf,
min_weight_leaf,
random_state)

Expand All @@ -270,12 +279,12 @@ def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True,
# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(splitter, min_samples_split,
self.min_samples_leaf,
min_samples_leaf,
min_weight_leaf,
max_depth)
else:
builder = BestFirstTreeBuilder(splitter, min_samples_split,
self.min_samples_leaf,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes)
Expand Down Expand Up @@ -402,11 +411,19 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
min_samples_split samples.
Ignored if ``max_samples_leaf`` is not None.
min_samples_split : int, optional (default=2)
The minimum number of samples required to split an internal node.
min_samples_split : int, float, optional (default=2)
The minimum number of samples required to split an internal node:
- If int, then consider `min_samples_split` as the minimum number.
- If float, then `min_samples_split` is a percentage and
`int(min_samples_split * n_samples)` are the minimum
number of samples for each split.
min_samples_leaf : int, optional (default=1)
The minimum number of samples required to be at a leaf node.
min_samples_leaf : int, float, optional (default=1)
The minimum number of samples required to be at a leaf node:
- If int, then consider `min_samples_leaf` as the minimum number.
- If float, then `min_samples_leaf` is a percentage and
`int(min_samples_leaf * n_samples)` are the minimum
number of samples for each node.
min_weight_fraction_leaf : float, optional (default=0.)
The minimum weighted fraction of the input samples required to be at a
Expand Down Expand Up @@ -625,11 +642,19 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
min_samples_split samples.
Ignored if ``max_samples_leaf`` is not None.
min_samples_split : int, optional (default=2)
The minimum number of samples required to split an internal node.
min_samples_leaf : int, optional (default=1)
The minimum number of samples required to be at a leaf node.
min_samples_split : int, float, optional (default=2)
The minimum number of samples required to split an internal node:
- If int, then consider `min_samples_split` as the minimum number.
- If float, then `min_samples_split` is a percentage and
`int(min_samples_split * n_samples)` are the minimum
number of samples for each split.
min_samples_leaf : int, float, optional (default=1)
The minimum number of samples required to be at a leaf node:
- If int, then consider `min_samples_leaf` as the minimum number.
- If float, then `min_samples_leaf` is a percentage and
`int(min_samples_leaf * n_samples)` are the minimum
number of samples for each node.
min_weight_fraction_leaf : float, optional (default=0.)
The minimum weighted fraction of the input samples required to be at a
Expand Down

0 comments on commit 8f715dd

Please sign in to comment.