fix NORMALIZE_WHITESPACE issues in doctests

scikit-learn · Sep 14, 2010 · 34c2758 · 34c2758
1 parent e882140
commit 34c2758
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 27 deletions.
diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py
@@ -26,12 +26,12 @@ def __init__(self, **kwargs):
 
 def load_iris():
     """load the iris dataset and returns it.
-    
+
     Returns
     -------
     data : Bunch
         Dictionnary-like object, the interesting attributes are:
-        'data', the data to learn, 'target', the classification labels, 
+        'data', the data to learn, 'target', the classification labels,
         'target_names', the meaning of the labels, and 'DESCR', the
         full description of the dataset.
 
@@ -45,14 +45,15 @@ def load_iris():
     >>> data.target[[10, 25, 50]]
     array([0, 0, 1])
     >>> data.target_names
+    ... #doctest: +NORMALIZE_WHITESPACE
     array(['setosa', 'versicolor', 'virginica'],
           dtype='|S10')
 
     """
-    
+
-    data_file = csv.reader(open(os.path.dirname(__file__) 
+    data_file = csv.reader(open(os.path.dirname(__file__)
                         + '/data/iris.csv'))
-    fdescr = open(os.path.dirname(__file__) 
+    fdescr = open(os.path.dirname(__file__)
                         + '/descr/iris.rst')
     temp = data_file.next()
     n_samples = int(temp[0])
@@ -63,20 +64,20 @@ def load_iris():
     for i, ir in enumerate(data_file):
         data[i] = np.asanyarray(ir[:-1], dtype=np.float)
         target[i] = np.asanyarray(ir[-1], dtype=np.int)
-    return Bunch(data=data, target=target, target_names=target_names, 
+    return Bunch(data=data, target=target, target_names=target_names,
                  DESCR=fdescr.read())
 
 
 def load_digits():
     """load the digits dataset and returns it.
-    
+
     Returns
     -------
     data : Bunch
         Dictionnary-like object, the interesting attributes are:
         'data', the data to learn, `images`, the images corresponding
         to each sample, 'target', the classification labels for each
-        sample, 'target_names', the meaning of the labels, and 'DESCR', 
+        sample, 'target_names', the meaning of the labels, and 'DESCR',
         the full description of the dataset.
 
     Example
@@ -90,17 +91,17 @@ def load_digits():
         pl.matshow(digits.raw_data[0])
 
     """
-    
+
-    data = np.loadtxt(os.path.join(os.path.dirname(__file__) 
+    data = np.loadtxt(os.path.join(os.path.dirname(__file__)
                         + '/data/digits.csv.gz'), delimiter=',')
-    fdescr = open(os.path.join(os.path.dirname(__file__) 
+    fdescr = open(os.path.join(os.path.dirname(__file__)
                         + '/descr/digits.rst'))
     target = data[:, -1]
     flat_data = data[:, :-1]
     images = flat_data.view()
     images.shape = (-1, 8, 8)
-    return Bunch(data=flat_data, target=target.astype(np.int), 
+    return Bunch(data=flat_data, target=target.astype(np.int),
-                 target_names=np.arange(10), 
+                 target_names=np.arange(10),
                  images=images,
                  DESCR=fdescr.read())
 

diff --git a/scikits/learn/gmm.py b/scikits/learn/gmm.py
@@ -194,19 +194,20 @@ class GMM(BaseEstimator):
     array([[ 0.],
            [ 0.]])
     >>> np.round(g.covars, 2)
+    ... #doctest: +NORMALIZE_WHITESPACE
     array([[[ 1.]],
-    <BLANKLINE>
+           [[ 1.]]])
-            [[ 1.]]])
 
     >>> # Generate random observations with two modes centered on 0
     >>> # and 10 to use for training.
     >>> np.random.seed(0)
     >>> obs = np.concatenate((np.random.randn(100, 1),
     ...                       10 + np.random.randn(300, 1)))
     >>> g.fit(obs)
+    ... #doctest: +NORMALIZE_WHITESPACE
     GMM(n_dim=1, cvtype='diag',
         means=array([[ 9.94199],
-            [ 0.05981]]),
+                     [ 0.05981]]),
         covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2,
         weights=array([ 0.75,  0.25]))
 
@@ -216,8 +217,8 @@ class GMM(BaseEstimator):
     array([[ 9.94],
            [ 0.06]])
     >>> np.round(g.covars, 2)
+    ... #doctest: +NORMALIZE_WHITESPACE
     array([[[ 0.96]],
-    <BLANKLINE>
            [[ 1.02]]])
     >>> g.predict([[0], [2], [9], [10]])
     array([1, 1, 0, 0])
@@ -227,9 +228,10 @@ class GMM(BaseEstimator):
     >>> # Refit the model on new data (initial parameters remain the
     >>> #same), this time with an even split between the two modes.
     >>> g.fit(20 * [[0]] +  20 * [[10]])
+    ... #doctest: +NORMALIZE_WHITESPACE
     GMM(n_dim=1, cvtype='diag',
         means=array([[ 10.],
-            [  0.]]),
+                     [  0.]]),
         covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2,
         weights=array([ 0.5,  0.5]))
 

diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py
@@ -78,7 +78,7 @@ def __init__(self, n_states=1, startprob=None, transmat=None,
         if startprob_prior is None:
             startprob_prior = 1.0
         self.startprob_prior = startprob_prior
-        
+
         if transmat is None:
             transmat = np.tile(1.0 / n_states, (n_states, n_states))
         self.transmat = transmat
@@ -486,7 +486,7 @@ def _init(self, obs, params, **kwargs):
             self.transmat[:] = 1.0 / self._n_states
 
     # Methods used by self.fit()
-    
+
     def _initialize_sufficient_statistics(self):
         stats = {'nobs': 0,
                  'start': np.zeros(self._n_states),
@@ -586,7 +586,7 @@ class GaussianHMM(_BaseHMM):
     --------
     GMM : Gaussian mixture model
     """
-    
+
     def __init__(self, n_states=1, n_dim=1, cvtype='diag', startprob=None,
                  transmat=None, labels=None, means=None, covars=None,
                  startprob_prior=None, transmat_prior=None,
@@ -821,6 +821,7 @@ class MultinomialHMM(_BaseHMM):
     --------
     >>> from scikits.learn.hmm import MultinomialHMM
     >>> MultinomialHMM(n_states=2, nsymbols=3)
+    ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
     MultinomialHMM(n_states=2,
                 emissionprob=array([[ 0.3663 ,  0.12783,  0.50587],
                [ 0.35851,  0.21559,  0.42589]]),
@@ -829,7 +830,7 @@ class MultinomialHMM(_BaseHMM):
                 transmat=array([[ 0.5,  0.5],
                [ 0.5,  0.5]]), nsymbols=3,
                 transmat_prior=1.0)
-    
+
     See Also
     --------
     GaussianHMM : HMM with Gaussian emissions
@@ -952,7 +953,8 @@ class GMMHMM(_BaseHMM):
     Examples
     --------
     >>> from scikits.learn.hmm import GMMHMM
-    >>> GMMHMM(n_states=2, n_mix=10, n_dim=3) # doctest: +ELLIPSIS
+    >>> GMMHMM(n_states=2, n_mix=10, n_dim=3)
+    ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
     GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...)
 
     See Also
@@ -1025,7 +1027,7 @@ def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
         super(GMMHMM, self)._accumulate_sufficient_statistics(
             stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice,
             params)
-        
+
         for state,g in enumerate(self.gmms):
             gmm_logprob, gmm_posteriors = g.eval(obs)
             gmm_posteriors *= posteriors[:,state][:,np.newaxis]

diff --git a/scikits/learn/pipeline.py b/scikits/learn/pipeline.py
@@ -62,9 +62,13 @@ class Pipeline(BaseEstimator):
         >>> # You can set the parameters using the names issued
         >>> # For instance, fit using a k of 10 in the SelectKBest
         >>> # and a parameter 'C' of the svn
-        >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) #doctest: +ELLIPSIS
+        >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1)
-        Pipeline(steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at ...>)), ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, coef0=0.0, eps=0.001,
+        ... #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-        cache_size=100.0, shrinking=True, gamma=0.01))])
+        Pipeline(steps=[('anova', SelectKBest(k=10,
+          score_func=<function f_regression at ...>)),
+          ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3,
+           coef0=0.0, eps=0.001, cache_size=100.0, shrinking=True,
+           gamma=0.01))])
 
         >>> prediction = anova_svm.predict(X)
         >>> score = anova_svm.score(X)