From 52b199178fe316715d0e798ba3457c83f3a6fa1e Mon Sep 17 00:00:00 2001 From: Marie Douriez Date: Tue, 19 Nov 2019 06:54:41 -0800 Subject: [PATCH] [MRG] documentation for random_state in forest.py (#15516) * documentation for random_state in forests * move note to parameter * same for RandomForestRegressor * add doc for ExtraTreesRegressor and ExtraTreesClassifier * skip line * lint * move note back to where it was * add Glossary in RandomForestRegressor * adding description for RandomTreesEmbedding * small fix * correct description for RandomTreesEmbedding --- sklearn/ensemble/_forest.py | 53 ++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 2dd600dc8f984..e7a0b8e56dde8 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -962,10 +962,11 @@ class RandomForestClassifier(ForestClassifier): ` for more details. random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Controls both the randomness of the bootstrapping of the samples used + when building trees (if ``bootstrap=True``) and the sampling of the + features to consider when looking for the best split at each node + (if ``max_features < n_features``). + See :term:`Glossary ` for details. verbose : int, optional (default=0) Controls the verbosity when fitting and predicting. @@ -1278,10 +1279,11 @@ class RandomForestRegressor(ForestRegressor): ` for more details. random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Controls both the randomness of the bootstrapping of the samples used + when building trees (if ``bootstrap=True``) and the sampling of the + features to consider when looking for the best split at each node + (if ``max_features < n_features``). + See :term:`Glossary ` for details. verbose : int, optional (default=0) Controls the verbosity when fitting and predicting. @@ -1540,7 +1542,7 @@ class ExtraTreesClassifier(ForestClassifier): bootstrap : boolean, optional (default=False) Whether bootstrap samples are used when building trees. If False, the - whole datset is used to build each tree. + whole dataset is used to build each tree. oob_score : bool, optional (default=False) Whether to use out-of-bag samples to estimate @@ -1554,10 +1556,14 @@ class ExtraTreesClassifier(ForestClassifier): ` for more details. random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Controls 3 sources of randomness: + + - the bootstrapping of the samples used when building trees + (if ``bootstrap=True``) + - the sampling of the features to consider when looking for the best + split at each node (if ``max_features < n_features``) + - the draw of the splits for each of the `max_features` + See :term:`Glossary ` for details. verbose : int, optional (default=0) Controls the verbosity when fitting and predicting. @@ -1845,7 +1851,7 @@ class ExtraTreesRegressor(ForestRegressor): bootstrap : boolean, optional (default=False) Whether bootstrap samples are used when building trees. If False, the - whole datset is used to build each tree. + whole dataset is used to build each tree. oob_score : bool, optional (default=False) Whether to use out-of-bag samples to estimate the R^2 on unseen data. @@ -1858,10 +1864,14 @@ class ExtraTreesRegressor(ForestRegressor): ` for more details. random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Controls 3 sources of randomness: + + - the bootstrapping of the samples used when building trees + (if ``bootstrap=True``) + - the sampling of the features to consider when looking for the best + split at each node (if ``max_features < n_features``) + - the draw of the splits for each of the `max_features` + See :term:`Glossary ` for details. verbose : int, optional (default=0) Controls the verbosity when fitting and predicting. @@ -2088,10 +2098,9 @@ class RandomTreesEmbedding(BaseForest): ` for more details. random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Controls the generation of the random `y` used to fit the trees + and the draw of the splits for each feature at the trees' nodes. + See :term:`Glossary ` for details. verbose : int, optional (default=0) Controls the verbosity when fitting and predicting.