DOC: documentation tweaks and remove global param setting from plot e…

…xample
scikit-learn · Dec 5, 2017 · e67e5f6 · e67e5f6
1 parent 234d69d
commit e67e5f6
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 18 deletions.
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -308,7 +308,7 @@ Mapping to a Gaussian distribution
 In many modeling scenarios, normality of the features in a dataset is desirable.
 Power transforms are a family of parametric, monotonic transformations that aim
 to map data from any distribution to as close to a Gaussian distribution as
-possible in order to minimize skewness.
+possible in order to stabilize variance and minimize skewness.
 
 :class:`PowerTransformer` currently provides one such power transformation,
 the Box-Cox transform. The Box-Cox transform is given by:

diff --git a/examples/preprocessing/plot_power_transformer.py b/examples/preprocessing/plot_power_transformer.py
@@ -14,7 +14,7 @@
 
 Note that the transformation successfully maps the data to a normal
 distribution when applied to certain datasets, but is ineffective with others.
-This highlights the importance ofvisualizing the data before and after
+This highlights the importance of visualizing the data before and after
 transformation.
 """
 
@@ -31,6 +31,9 @@
 
 
 N_SAMPLES = 3000
+FONT_SIZE = 6
+BINS = 100
+
 
 pt = PowerTransformer(method='box-cox')
 rng = np.random.RandomState(304)
@@ -74,17 +77,12 @@
 colors = ['firebrick', 'darkorange', 'goldenrod',
           'seagreen', 'royalblue', 'darkorchid']
 
-params = {
-    'font.size': 6,
-    'hist.bins': 150
-}
-matplotlib.rcParams.update(params)
-
 fig, axes = plt.subplots(nrows=4, ncols=3)
 axes = axes.flatten()
 axes_idxs = [(0, 3), (1, 4), (2, 5), (6, 9), (7, 10), (8, 11)]
 axes_list = [(axes[i], axes[j]) for i, j in axes_idxs]
 
+
 for distribution, color, axes in zip(distributions, colors, axes_list):
     name, X = distribution
     # scale all distributions to the range [0, 10]
@@ -96,11 +94,14 @@
 
     ax_original, ax_trans = axes
 
-    ax_original.hist(X, color=color)
-    ax_original.set_title(name)
+    ax_original.hist(X, color=color, bins=BINS)
+    ax_original.set_title(name, fontsize=FONT_SIZE)
+    ax_original.tick_params(axis='both', which='major', labelsize=FONT_SIZE)
 
-    ax_trans.hist(X_trans, color=color)
-    ax_trans.set_title('{} after Box-Cox, $\lambda$ = {}'.format(name, lmbda))
+    ax_trans.hist(X_trans, color=color, bins=BINS)
+    ax_trans.set_title('{} after Box-Cox, $\lambda$ = {}'.format(name, lmbda),
+                       fontsize=FONT_SIZE)
+    ax_trans.tick_params(axis='both', which='major', labelsize=FONT_SIZE)
 
 
 plt.tight_layout()

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -2593,11 +2593,13 @@ class PowerTransformer(BaseEstimator, TransformerMixin):
     that are applied to make data more Gaussian-like. This is useful for
     modeling issues related to heteroscedasticity (non-constant variance),
     or other situations where normality is desired. Note that power
-    transforms do not result in standard normal distributions.
+    transforms do not result in standard normal distributions (i.e. the
+    transformed data could be far from zero-mean, unit-variance).
 
     Currently, PowerTransformer supports the Box-Cox transform. Box-Cox
     requires input data to be strictly positive. The optimal parameter
-    for minimizing skewness is estimated through maximum likelihood.
+    for stabilizing variance and minimizing skewness is estimated through
+    maximum likelihood.
 
     Read more in the :ref:`User Guide <preprocessing_transformer>`.
 
@@ -2635,7 +2637,7 @@ class PowerTransformer(BaseEstimator, TransformerMixin):
     power_transform : Equivalent function without the estimator API.
 
     QuantileTransformer : Maps data to a standard normal distribution with
-        the parameter output_distribution='normal'.
+        the parameter `output_distribution='normal'`.
 
     Notes
     -----
@@ -2770,11 +2772,15 @@ def power_transform(X, method='box-cox', copy=True):
     that are applied to make data more Gaussian-like. This is useful for
     modeling issues related to heteroscedasticity (non-constant variance),
     or other situations where normality is desired. Note that power
-    transforms do not result in standard normal distributions.
+    transforms do not result in standard normal distributions (i.e. the
+    transformed data could be far from zero-mean, unit-variance).
+.
 
     Currently, power_transform() supports the Box-Cox transform. Box-Cox
     requires input data to be strictly positive. The optimal parameter
-    for minimizing skewness is estimated through maximum likelihood.
+    for stabilizing variance and minimizing skewness is estimated
+    through maximum likelihood.
+
 
     Read more in the :ref:`User Guide <preprocessing_transformer>`.
 
@@ -2806,7 +2812,7 @@ def power_transform(X, method='box-cox', copy=True):
         API (as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
 
     quantile_transform : Maps data to a standard normal distribution with
-        the parameter output_distribution='normal'.
+        the parameter `output_distribution='normal'`.
 
     Notes
     -----