Navigation Menu

Skip to content

Commit

Permalink
fix NORMALIZE_WHITESPACE issues in doctests
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel committed Sep 14, 2010
1 parent e882140 commit 34c2758
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 27 deletions.
27 changes: 14 additions & 13 deletions scikits/learn/datasets/base.py
Expand Up @@ -26,12 +26,12 @@ def __init__(self, **kwargs):


def load_iris(): def load_iris():
"""load the iris dataset and returns it. """load the iris dataset and returns it.
Returns Returns
------- -------
data : Bunch data : Bunch
Dictionnary-like object, the interesting attributes are: Dictionnary-like object, the interesting attributes are:
'data', the data to learn, 'target', the classification labels, 'data', the data to learn, 'target', the classification labels,
'target_names', the meaning of the labels, and 'DESCR', the 'target_names', the meaning of the labels, and 'DESCR', the
full description of the dataset. full description of the dataset.
Expand All @@ -45,14 +45,15 @@ def load_iris():
>>> data.target[[10, 25, 50]] >>> data.target[[10, 25, 50]]
array([0, 0, 1]) array([0, 0, 1])
>>> data.target_names >>> data.target_names
... #doctest: +NORMALIZE_WHITESPACE
array(['setosa', 'versicolor', 'virginica'], array(['setosa', 'versicolor', 'virginica'],
dtype='|S10') dtype='|S10')
""" """

data_file = csv.reader(open(os.path.dirname(__file__) data_file = csv.reader(open(os.path.dirname(__file__)
+ '/data/iris.csv')) + '/data/iris.csv'))
fdescr = open(os.path.dirname(__file__) fdescr = open(os.path.dirname(__file__)
+ '/descr/iris.rst') + '/descr/iris.rst')
temp = data_file.next() temp = data_file.next()
n_samples = int(temp[0]) n_samples = int(temp[0])
Expand All @@ -63,20 +64,20 @@ def load_iris():
for i, ir in enumerate(data_file): for i, ir in enumerate(data_file):
data[i] = np.asanyarray(ir[:-1], dtype=np.float) data[i] = np.asanyarray(ir[:-1], dtype=np.float)
target[i] = np.asanyarray(ir[-1], dtype=np.int) target[i] = np.asanyarray(ir[-1], dtype=np.int)
return Bunch(data=data, target=target, target_names=target_names, return Bunch(data=data, target=target, target_names=target_names,
DESCR=fdescr.read()) DESCR=fdescr.read())




def load_digits(): def load_digits():
"""load the digits dataset and returns it. """load the digits dataset and returns it.
Returns Returns
------- -------
data : Bunch data : Bunch
Dictionnary-like object, the interesting attributes are: Dictionnary-like object, the interesting attributes are:
'data', the data to learn, `images`, the images corresponding 'data', the data to learn, `images`, the images corresponding
to each sample, 'target', the classification labels for each to each sample, 'target', the classification labels for each
sample, 'target_names', the meaning of the labels, and 'DESCR', sample, 'target_names', the meaning of the labels, and 'DESCR',
the full description of the dataset. the full description of the dataset.
Example Example
Expand All @@ -90,17 +91,17 @@ def load_digits():
pl.matshow(digits.raw_data[0]) pl.matshow(digits.raw_data[0])
""" """

data = np.loadtxt(os.path.join(os.path.dirname(__file__) data = np.loadtxt(os.path.join(os.path.dirname(__file__)
+ '/data/digits.csv.gz'), delimiter=',') + '/data/digits.csv.gz'), delimiter=',')
fdescr = open(os.path.join(os.path.dirname(__file__) fdescr = open(os.path.join(os.path.dirname(__file__)
+ '/descr/digits.rst')) + '/descr/digits.rst'))
target = data[:, -1] target = data[:, -1]
flat_data = data[:, :-1] flat_data = data[:, :-1]
images = flat_data.view() images = flat_data.view()
images.shape = (-1, 8, 8) images.shape = (-1, 8, 8)
return Bunch(data=flat_data, target=target.astype(np.int), return Bunch(data=flat_data, target=target.astype(np.int),
target_names=np.arange(10), target_names=np.arange(10),
images=images, images=images,
DESCR=fdescr.read()) DESCR=fdescr.read())


Expand Down
12 changes: 7 additions & 5 deletions scikits/learn/gmm.py
Expand Up @@ -194,19 +194,20 @@ class GMM(BaseEstimator):
array([[ 0.], array([[ 0.],
[ 0.]]) [ 0.]])
>>> np.round(g.covars, 2) >>> np.round(g.covars, 2)
... #doctest: +NORMALIZE_WHITESPACE
array([[[ 1.]], array([[[ 1.]],
<BLANKLINE> [[ 1.]]])
[[ 1.]]])
>>> # Generate random observations with two modes centered on 0 >>> # Generate random observations with two modes centered on 0
>>> # and 10 to use for training. >>> # and 10 to use for training.
>>> np.random.seed(0) >>> np.random.seed(0)
>>> obs = np.concatenate((np.random.randn(100, 1), >>> obs = np.concatenate((np.random.randn(100, 1),
... 10 + np.random.randn(300, 1))) ... 10 + np.random.randn(300, 1)))
>>> g.fit(obs) >>> g.fit(obs)
... #doctest: +NORMALIZE_WHITESPACE
GMM(n_dim=1, cvtype='diag', GMM(n_dim=1, cvtype='diag',
means=array([[ 9.94199], means=array([[ 9.94199],
[ 0.05981]]), [ 0.05981]]),
covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2, covars=[array([[ 0.96081]]), array([[ 1.01683]])], n_states=2,
weights=array([ 0.75, 0.25])) weights=array([ 0.75, 0.25]))
Expand All @@ -216,8 +217,8 @@ class GMM(BaseEstimator):
array([[ 9.94], array([[ 9.94],
[ 0.06]]) [ 0.06]])
>>> np.round(g.covars, 2) >>> np.round(g.covars, 2)
... #doctest: +NORMALIZE_WHITESPACE
array([[[ 0.96]], array([[[ 0.96]],
<BLANKLINE>
[[ 1.02]]]) [[ 1.02]]])
>>> g.predict([[0], [2], [9], [10]]) >>> g.predict([[0], [2], [9], [10]])
array([1, 1, 0, 0]) array([1, 1, 0, 0])
Expand All @@ -227,9 +228,10 @@ class GMM(BaseEstimator):
>>> # Refit the model on new data (initial parameters remain the >>> # Refit the model on new data (initial parameters remain the
>>> #same), this time with an even split between the two modes. >>> #same), this time with an even split between the two modes.
>>> g.fit(20 * [[0]] + 20 * [[10]]) >>> g.fit(20 * [[0]] + 20 * [[10]])
... #doctest: +NORMALIZE_WHITESPACE
GMM(n_dim=1, cvtype='diag', GMM(n_dim=1, cvtype='diag',
means=array([[ 10.], means=array([[ 10.],
[ 0.]]), [ 0.]]),
covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2, covars=[array([[ 0.001]]), array([[ 0.001]])], n_states=2,
weights=array([ 0.5, 0.5])) weights=array([ 0.5, 0.5]))
Expand Down
14 changes: 8 additions & 6 deletions scikits/learn/hmm.py
Expand Up @@ -78,7 +78,7 @@ def __init__(self, n_states=1, startprob=None, transmat=None,
if startprob_prior is None: if startprob_prior is None:
startprob_prior = 1.0 startprob_prior = 1.0
self.startprob_prior = startprob_prior self.startprob_prior = startprob_prior

if transmat is None: if transmat is None:
transmat = np.tile(1.0 / n_states, (n_states, n_states)) transmat = np.tile(1.0 / n_states, (n_states, n_states))
self.transmat = transmat self.transmat = transmat
Expand Down Expand Up @@ -486,7 +486,7 @@ def _init(self, obs, params, **kwargs):
self.transmat[:] = 1.0 / self._n_states self.transmat[:] = 1.0 / self._n_states


# Methods used by self.fit() # Methods used by self.fit()

def _initialize_sufficient_statistics(self): def _initialize_sufficient_statistics(self):
stats = {'nobs': 0, stats = {'nobs': 0,
'start': np.zeros(self._n_states), 'start': np.zeros(self._n_states),
Expand Down Expand Up @@ -586,7 +586,7 @@ class GaussianHMM(_BaseHMM):
-------- --------
GMM : Gaussian mixture model GMM : Gaussian mixture model
""" """

def __init__(self, n_states=1, n_dim=1, cvtype='diag', startprob=None, def __init__(self, n_states=1, n_dim=1, cvtype='diag', startprob=None,
transmat=None, labels=None, means=None, covars=None, transmat=None, labels=None, means=None, covars=None,
startprob_prior=None, transmat_prior=None, startprob_prior=None, transmat_prior=None,
Expand Down Expand Up @@ -821,6 +821,7 @@ class MultinomialHMM(_BaseHMM):
-------- --------
>>> from scikits.learn.hmm import MultinomialHMM >>> from scikits.learn.hmm import MultinomialHMM
>>> MultinomialHMM(n_states=2, nsymbols=3) >>> MultinomialHMM(n_states=2, nsymbols=3)
... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
MultinomialHMM(n_states=2, MultinomialHMM(n_states=2,
emissionprob=array([[ 0.3663 , 0.12783, 0.50587], emissionprob=array([[ 0.3663 , 0.12783, 0.50587],
[ 0.35851, 0.21559, 0.42589]]), [ 0.35851, 0.21559, 0.42589]]),
Expand All @@ -829,7 +830,7 @@ class MultinomialHMM(_BaseHMM):
transmat=array([[ 0.5, 0.5], transmat=array([[ 0.5, 0.5],
[ 0.5, 0.5]]), nsymbols=3, [ 0.5, 0.5]]), nsymbols=3,
transmat_prior=1.0) transmat_prior=1.0)
See Also See Also
-------- --------
GaussianHMM : HMM with Gaussian emissions GaussianHMM : HMM with Gaussian emissions
Expand Down Expand Up @@ -952,7 +953,8 @@ class GMMHMM(_BaseHMM):
Examples Examples
-------- --------
>>> from scikits.learn.hmm import GMMHMM >>> from scikits.learn.hmm import GMMHMM
>>> GMMHMM(n_states=2, n_mix=10, n_dim=3) # doctest: +ELLIPSIS >>> GMMHMM(n_states=2, n_mix=10, n_dim=3)
... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...) GMMHMM(n_dim=3, n_mix=10, n_states=2, cvtype=None, labels=[None, None], ...)
See Also See Also
Expand Down Expand Up @@ -1025,7 +1027,7 @@ def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
super(GMMHMM, self)._accumulate_sufficient_statistics( super(GMMHMM, self)._accumulate_sufficient_statistics(
stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice,
params) params)

for state,g in enumerate(self.gmms): for state,g in enumerate(self.gmms):
gmm_logprob, gmm_posteriors = g.eval(obs) gmm_logprob, gmm_posteriors = g.eval(obs)
gmm_posteriors *= posteriors[:,state][:,np.newaxis] gmm_posteriors *= posteriors[:,state][:,np.newaxis]
Expand Down
10 changes: 7 additions & 3 deletions scikits/learn/pipeline.py
Expand Up @@ -62,9 +62,13 @@ class Pipeline(BaseEstimator):
>>> # You can set the parameters using the names issued >>> # You can set the parameters using the names issued
>>> # For instance, fit using a k of 10 in the SelectKBest >>> # For instance, fit using a k of 10 in the SelectKBest
>>> # and a parameter 'C' of the svn >>> # and a parameter 'C' of the svn
>>> anova_svm.fit(X, y, anova__k=10, svc__C=.1) #doctest: +ELLIPSIS >>> anova_svm.fit(X, y, anova__k=10, svc__C=.1)
Pipeline(steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at ...>)), ('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3, coef0=0.0, eps=0.001, ... #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
cache_size=100.0, shrinking=True, gamma=0.01))]) Pipeline(steps=[('anova', SelectKBest(k=10,
score_func=<function f_regression at ...>)),
('svc', SVC(kernel='linear', C=0.1, probability=False, degree=3,
coef0=0.0, eps=0.001, cache_size=100.0, shrinking=True,
gamma=0.01))])
>>> prediction = anova_svm.predict(X) >>> prediction = anova_svm.predict(X)
>>> score = anova_svm.score(X) >>> score = anova_svm.score(X)
Expand Down

0 comments on commit 34c2758

Please sign in to comment.