Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

fix NORMALIZE_WHITESPACE issues in doctests

  • Loading branch information...
commit 34c2758c6a90aab30b7ec33a25a4d2dd9e152b01 1 parent e882140
@ogrisel ogrisel authored
View
27 scikits/learn/datasets/base.py
@@ -26,12 +26,12 @@ def __init__(self, **kwargs):
def load_iris():
"""load the iris dataset and returns it.
-
+
Returns
-------
data : Bunch
Dictionnary-like object, the interesting attributes are:
- 'data', the data to learn, 'target', the classification labels,
+ 'data', the data to learn, 'target', the classification labels,
'target_names', the meaning of the labels, and 'DESCR', the
full description of the dataset.
@@ -45,14 +45,15 @@ def load_iris():
>>> data.target[[10, 25, 50]]
array([0, 0, 1])
>>> data.target_names
+ ... #doctest: +NORMALIZE_WHITESPACE
array(['setosa', 'versicolor', 'virginica'],
dtype='|S10')
"""
-
- data_file = csv.reader(open(os.path.dirname(__file__)
+
+ data_file = csv.reader(open(os.path.dirname(__file__)
+ '/data/iris.csv'))
- fdescr = open(os.path.dirname(__file__)
+ fdescr = open(os.path.dirname(__file__)
+ '/descr/iris.rst')
temp = data_file.next()
n_samples = int(temp[0])
@@ -63,20 +64,20 @@ def load_iris():
for i, ir in enumerate(data_file):
data[i] = np.asanyarray(ir[:-1], dtype=np.float)
target[i] = np.asanyarray(ir[-1], dtype=np.int)
- return Bunch(data=data, target=target, target_names=target_names,
+ return Bunch(data=data, target=target, target_names=target_names,
DESCR=fdescr.read())
def load_digits():
"""load the digits dataset and returns it.
-
+
Returns
-------
data : Bunch
Dictionnary-like object, the interesting attributes are:
'data', the data to learn, `images`, the images corresponding
to each sample, 'target', the classification labels for each
- sample, 'target_names', the meaning of the labels, and 'DESCR',
+ sample, 'target_names', the meaning of the labels, and 'DESCR',
the full description of the dataset.
Example
@@ -90,17 +91,17 @@ def load_digits():
pl.matshow(digits.raw_data[0])
"""
-
- data = np.loadtxt(os.path.join(os.path.dirname(__file__)
+
+ data = np.loadtxt(os.path.join(os.path.dirname(__file__)
+ '/data/digits.csv.gz'), delimiter=',')
- fdescr = open(os.path.join(os.path.dirname(__file__)
+ fdescr = open(os.path.join(os.path.dirname(__file__)
+ '/descr/digits.rst'))
target = data[:, -1]
flat_data = data[:, :-1]
images = flat_data.view()
images.shape = (-1, 8, 8)
- return Bunch(data=flat_data, target=target.astype(np.int),
- target_names=np.arange(10),
+ return Bunch(data=flat_data, target=target.astype(np.int),
+ target_names=np.arange(10),
images=images,
DESCR=fdescr.read())
View
12 scikits/learn/gmm.py
@@ -194,9 +194,9 @@ class GMM(BaseEstimator):
array([[ 0.],
[ 0.]])
>>> np.round(g.covars, 2)
+ ... #doctest: +NORMALIZE_WHITESPACE
array([[[ 1.]],
- <BLANKLINE>
- [[ 1.]]])
+ [[ 1.]]])
>>> # Generate random observations with two modes centered on 0
>>> # and 10 to use for training.
@@ -204,9 +204,10 @@ class GMM(BaseEstimator):
>>> obs = np.concatenate((np.random.randn(100, 1),
... 10 + np.random.randn(300, 1)))
>>> g.fit(obs)
+ ... #doctest: +NORMALIZE_WHITESPACE
GMM(n_dim=1, cvtype='diag',
means=array([[ 9.94199],
- [ 0.05981]]),
+ [ 0.05981]]),
covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2,
weights=array([ 0.75, 0.25]))
@@ -216,8 +217,8 @@ class GMM(BaseEstimator):
array([[ 9.94],
[ 0.06]])
>>> np.round(g.covars, 2)
+ ... #doctest: +NORMALIZE_WHITESPACE
array([[[ 0.96]],
- <BLANKLINE>
[[ 1.02]]])
>>> g.predict([[0], [2], [9], [10]])
array([1, 1, 0, 0])
@@ -227,9 +228,10 @@ class GMM(BaseEstimator):
>>> # Refit the model on new data (initial parameters remain the
>>> #same), this time with an even split between the two modes.
>>> g.fit(20 * [[0]] + 20 * [[10]])
+ ... #doctest: +NORMALIZE_WHITESPACE
GMM(n_dim=1, cvtype='diag',
means=array([[ 10.],
- [ 0.]]),
+ [ 0.]]),
covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2,
weights=array([ 0.5, 0.5]))
View
14 scikits/learn/hmm.py
@@ -78,7 +78,7 @@ def __init__(self, n_states=1, startprob=None, transmat=None,
if startprob_prior is None:
startprob_prior = 1.0
self.startprob_prior = startprob_prior
-
+
if transmat is None:
transmat = np.tile(1.0 / n_states, (n_states, n_states))
self.transmat = transmat
@@ -486,7 +486,7 @@ def _init(self, obs, params, **kwargs):
self.transmat[:] = 1.0 / self._n_states
# Methods used by self.fit()
-
+
def _initialize_sufficient_statistics(self):
stats = {'nobs': 0,
'start': np.zeros(self._n_states),
@@ -586,7 +586,7 @@ class GaussianHMM(_BaseHMM):
--------
GMM : Gaussian mixture model
"""
-
+
def __init__(self, n_states=1, n_dim=1, cvtype='diag', startprob=None,
transmat=None, labels=None, means=None, covars=None,
startprob_prior=None, transmat_prior=None,
@@ -821,6 +821,7 @@ class MultinomialHMM(_BaseHMM):
--------
>>> from scikits.learn.hmm import MultinomialHMM
>>> MultinomialHMM(n_states=2, nsymbols=3)
+ ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
MultinomialHMM(n_states=2,
emissionprob=array([[ 0.3663 , 0.12783, 0.50587],
[ 0.35851, 0.21559, 0.42589]]),
@@ -829,7 +830,7 @@ class MultinomialHMM(_BaseHMM):
transmat=array([[ 0.5, 0.5],
[ 0.5, 0.5]]), nsymbols=3,
transmat_prior=1.0)
-
+
See Also
--------
GaussianHMM : HMM with Gaussian emissions
@@ -952,7 +953,8 @@ class GMMHMM(_BaseHMM):
Examples
--------
>>> from scikits.learn.hmm import GMMHMM
- >>> GMMHMM(n_states=2, n_mix=10, n_dim=3) # doctest: +ELLIPSIS
+ >>> GMMHMM(n_states=2, n_mix=10, n_dim=3)
+ ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...)
See Also
@@ -1025,7 +1027,7 @@ def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
super(GMMHMM, self)._accumulate_sufficient_statistics(
stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice,
params)
-
+
for state,g in enumerate(self.gmms):
gmm_logprob, gmm_posteriors = g.eval(obs)
gmm_posteriors *= posteriors[:,state][:,np.newaxis]
View
10 scikits/learn/pipeline.py
@@ -62,9 +62,13 @@ class Pipeline(BaseEstimator):
>>> # You can set the parameters using the names issued
>>> # For instance, fit using a k of 10 in the SelectKBest
>>> # and a parameter 'C' of the svn
- >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) #doctest: +ELLIPSIS
- Pipeline(steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at ...>)), ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, coef0=0.0, eps=0.001,
- cache_size=100.0, shrinking=True, gamma=0.01))])
+ >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1)
+ ... #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ Pipeline(steps=[('anova', SelectKBest(k=10,
+ score_func=<function f_regression at ...>)),
+ ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3,
+ coef0=0.0, eps=0.001, cache_size=100.0, shrinking=True,
+ gamma=0.01))])
>>> prediction = anova_svm.predict(X)
>>> score = anova_svm.score(X)
Please sign in to comment.
Something went wrong with that request. Please try again.