Permalink
Browse files

Merge pull request #410 from larsmans/accept-matrix-input

ENH accept matrix input throughout
  • Loading branch information...
2 parents 59f1970 + 1f2549d commit 0c9cb49f4d2ec9a060a4337e72bb84cf74c7c248 @larsmans larsmans committed Oct 25, 2011
Showing with 2,669 additions and 2,441 deletions.
  1. +10 −13 doc/developers/index.rst
  2. +5 −2 sklearn/cluster/_feature_agglomeration.py
  3. +2 −5 sklearn/cluster/affinity_propagation_.py
  4. +1 −1 sklearn/cluster/hierarchical.py
  5. +2 −2 sklearn/cluster/k_means_.py
  6. +3 −2 sklearn/covariance/empirical_covariance_.py
  7. +1 −1 sklearn/covariance/robust_covariance.py
  8. +4 −3 sklearn/covariance/shrunk_covariance_.py
  9. +1 −6 sklearn/covariance/tests/test_covariance.py
  10. +1 −1 sklearn/cross_validation.py
  11. +4 −4 sklearn/datasets/base.py
  12. +2 −2 sklearn/datasets/samples_generator.py
  13. +7 −7 sklearn/decomposition/dict_learning.py
  14. +3 −2 sklearn/decomposition/fastica_.py
  15. +5 −7 sklearn/decomposition/pca.py
  16. +2 −2 sklearn/decomposition/sparse_pca.py
  17. +2 −2 sklearn/feature_extraction/image.py
  18. +7 −10 sklearn/feature_selection/univariate_selection.py
  19. +12 −12 sklearn/gaussian_process/correlation_models.py
  20. +17 −16 sklearn/gaussian_process/gaussian_process.py
  21. +3 −3 sklearn/gaussian_process/regression_models.py
  22. +9 −9 sklearn/hmm.py
  23. +4 −4 sklearn/lda.py
  24. +16 −24 sklearn/linear_model/base.py
  25. +4 −4 sklearn/linear_model/bayes.py
  26. +2 −3 sklearn/linear_model/coordinate_descent.py
  27. +6 −8 sklearn/linear_model/least_angle.py
  28. +1 −1 sklearn/linear_model/logistic.py
  29. +8 −5 sklearn/linear_model/omp.py
  30. +6 −6 sklearn/linear_model/ridge.py
  31. +1 −1 sklearn/linear_model/sparse/coordinate_descent.py
  32. +1 −1 sklearn/linear_model/sparse/logistic.py
  33. +6 −6 sklearn/linear_model/sparse/stochastic_gradient.py
  34. +3 −3 sklearn/linear_model/stochastic_gradient.py
  35. +5 −2 sklearn/manifold/locally_linear.py
  36. +2 −2 sklearn/metrics/cluster/supervised.py
  37. +4 −4 sklearn/metrics/pairwise.py
  38. +3 −3 sklearn/mixture/dpgmm.py
  39. +3 −3 sklearn/mixture/gmm.py
  40. +5 −5 sklearn/naive_bayes.py
  41. +2,356 −2,163 sklearn/neighbors/ball_tree.c
  42. +4 −4 sklearn/neighbors/ball_tree.pyx
  43. +6 −6 sklearn/neighbors/base.py
  44. +14 −17 sklearn/pls.py
  45. +4 −4 sklearn/preprocessing/__init__.py
  46. +3 −3 sklearn/qda.py
  47. +14 −15 sklearn/svm/base.py
  48. +2 −2 sklearn/svm/bounds.py
  49. +10 −10 sklearn/svm/sparse/base.py
  50. +4 −4 sklearn/svm/tests/test_bounds.py
  51. +1 −1 sklearn/tree/tests/test_tree.py
  52. +5 −7 sklearn/tree/tree.py
  53. +20 −6 sklearn/utils/__init__.py
  54. +1 −1 sklearn/utils/fixes.py
  55. +42 −1 sklearn/utils/tests/test___init__.py
View
@@ -275,10 +275,16 @@ Input validation
----------------
The module ``sklearn.utils`` contains various functions for doing input
-validation/conversion. Sometimes, ``np.atleast_2d`` suffices for validation;
-in other cases, be sure to call ``safe_asanyarray``, ``atleast2d_or_csr`` or
-``as_float_array`` on any array-like argument passed to a scikit-learn API
-function.
+validation/conversion. Sometimes, ``np.asarray`` suffices for validation;
+do `not` use ``np.asanyarray`` or ``np.atleast_2d``, since those let NumPy's
+``np.matrix`` through, which has a different API
+(e.g., ``*`` means dot product on ``np.matrix``,
+but Hadamard product on ``np.ndarray``).
+
+In other cases, be sure to call ``safe_asarray``, ``atleast2d_or_csr``,
+``as_float_array`` or ``array2d`` on any array-like argument passed to a
+scikit-learn API function. The exact function to use depends mainly on whether
+``scipy.sparse`` matrices must be accepted.
APIs of scikit-learn objects
@@ -430,15 +436,6 @@ you call ``fit`` a second time without taking any previous value into
account: **fit should be idempotent**.
-Python tuples
-^^^^^^^^^^^^^
-
-In addition to numpy arrays, all methods should be able to accept
-Python tuples as arguments. In practice, this means you should call
-``numpy.asanyarray`` at the beginning at each public method that accepts
-arrays.
-
-
Optional Arguments
^^^^^^^^^^^^^^^^^^
@@ -6,7 +6,9 @@
# License: BSD 3 clause
import numpy as np
+
from ..base import TransformerMixin
+from ..utils import array2d
###############################################################################
@@ -31,6 +33,7 @@ def transform(self, X, pooling_func=np.mean):
return an array of value of size M.
Defaut is np.mean
"""
+ X = np.asarray(X)
nX = []
for l in np.unique(self.labels_):
nX.append(pooling_func(X[:, self.labels_ == l], axis=1))
@@ -63,6 +66,6 @@ def inverse_transform(self, Xred):
X[self.labels_ == unil[i]] = Xred[i]
else:
ncol = np.sum(self.labels_ == unil[i])
- X[:, self.labels_ == unil[i]] = np.tile(np.atleast_2d(Xred
- [:, i]).T, ncol)
+ X[:, self.labels_ == unil[i]] = np.tile(array2d(Xred[:, i]).T,
+ ncol)
return X
@@ -10,6 +10,7 @@
import numpy as np
from ..base import BaseEstimator
+from ..utils import as_float_array
def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
@@ -49,11 +50,7 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
Between Data Points", Science Feb. 2007
"""
- if copy:
- # Copy the affinity matrix to avoid modifying it inplace
- S = np.array(S, copy=True, dtype=np.float)
- else:
- S = np.asanyarray(S, dtype=np.float)
+ S = as_float_array(S, copy=copy)
n_points = S.shape[0]
@@ -65,7 +65,7 @@ def ward_tree(X, connectivity=None, n_components=None, copy=True):
n_leaves : int
The number of leaves in the tree
"""
- X = np.asanyarray(X)
+ X = np.asarray(X)
n_samples, n_features = X.shape
if X.ndim == 1:
X = np.reshape(X, (-1, 1))
@@ -499,8 +499,8 @@ def __init__(self, k=8, init='k-means++', n_init=10, max_iter=300,
def _check_data(self, X):
"""Verify that the number of samples given is larger than k"""
if sp.issparse(X):
- raise ValueError("K-Means does not support sparse input matrices.")
- X = np.asanyarray(X)
+ raise TypeError("K-Means does not support sparse input matrices.")
+ X = np.asarray(X)
if X.shape[0] < self.k:
raise ValueError("n_samples=%d should be >= k=%d" % (
X.shape[0], self.k))
@@ -15,6 +15,7 @@
from scipy import linalg
from ..base import BaseEstimator
+from ..utils import array2d
from ..utils.extmath import fast_logdet as exact_logdet
@@ -52,7 +53,7 @@ def empirical_covariance(X, assume_centered=False):
Empirical covariance (Maximum Likelihood Estimator)
"""
- X = np.asanyarray(X)
+ X = np.asarray(X)
if X.ndim == 1:
X = np.atleast_2d(X).T
@@ -98,7 +99,7 @@ def _set_estimates(self, covariance):
is computed.
"""
- covariance = np.atleast_2d(covariance)
+ covariance = array2d(covariance)
# set covariance
self.covariance_ = covariance
# set precision
@@ -271,7 +271,7 @@ def fast_mcd(X, correction="empirical", reweight="rousseeuw"):
the robust location and covariance estimates of the data set
"""
- X = np.asanyarray(X)
+ X = np.asarray(X)
if X.ndim <= 1:
X = X.reshape((-1, 1))
n_samples, n_features = X.shape
@@ -18,6 +18,7 @@
import numpy as np
from .empirical_covariance_ import empirical_covariance, EmpiricalCovariance
+from ..utils import array2d
###############################################################################
@@ -50,7 +51,7 @@ def shrunk_covariance(emp_cov, shrinkage=0.1):
where mu = trace(cov) / n_features
"""
- emp_cov = np.atleast_2d(emp_cov)
+ emp_cov = array2d(emp_cov)
n_features = emp_cov.shape[0]
mu = np.trace(emp_cov) / n_features
@@ -165,7 +166,7 @@ def ledoit_wolf(X, assume_centered=False):
where mu = trace(cov) / n_features
"""
- X = np.asanyarray(X)
+ X = np.asarray(X)
# for only one feature, the result is the same whatever the shrinkage
if X.ndim == 1:
if not assume_centered:
@@ -303,7 +304,7 @@ def oas(X, assume_centered=False):
where mu = trace(cov) / n_features
"""
- X = np.asanyarray(X)
+ X = np.asarray(X)
# for only one feature, the result is the same whatever the shrinkage
if X.ndim == 1:
if not assume_centered:
@@ -14,6 +14,7 @@
fast_mcd, MCD
X = datasets.load_iris().data
+X_1d = X[:, 0]
n_samples, n_features = X.shape
@@ -40,7 +41,6 @@ def test_covariance():
assert(np.amin(mahal_dist) > 50)
# test with n_features = 1
- X_1d = X[:, 0]
cov = EmpiricalCovariance()
cov.fit(X_1d)
assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
@@ -78,7 +78,6 @@ def test_shrunk_covariance():
assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)
# test with n_features = 1
- X_1d = X[:, 0]
cov = ShrunkCovariance(shrinkage=0.3)
cov.fit(X_1d)
assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
@@ -109,7 +108,6 @@ def test_ledoit_wolf():
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
- X_1d = X[:, 0]
lw = LedoitWolf()
lw.fit(X_1d, assume_centered=True)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
@@ -140,7 +138,6 @@ def test_ledoit_wolf():
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
- X_1d = X[:, 0]
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
@@ -174,7 +171,6 @@ def test_oas():
assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
# test with n_features = 1
- X_1d = X[:, 0]
oa = OAS()
oa.fit(X_1d, assume_centered=True)
oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
@@ -204,7 +200,6 @@ def test_oas():
assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)
# test with n_features = 1
- X_1d = X[:, 0]
oa = OAS()
oa.fit(X_1d)
oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
@@ -306,7 +306,7 @@ class StratifiedKFold(object):
"""
def __init__(self, y, k, indices=False):
- y = np.asanyarray(y)
+ y = np.asarray(y)
n = y.shape[0]
assert k > 0, ValueError('Cannot have number of folds k below 1.')
assert k <= n, ValueError('Cannot have number of folds k=%d, '
View
@@ -211,8 +211,8 @@ def load_iris():
target = np.empty((n_samples,), dtype=np.int)
for i, ir in enumerate(data_file):
- data[i] = np.asanyarray(ir[:-1], dtype=np.float)
- target[i] = np.asanyarray(ir[-1], dtype=np.int)
+ data[i] = np.asarray(ir[:-1], dtype=np.float)
+ target[i] = np.asarray(ir[-1], dtype=np.int)
return Bunch(data=data, target=target,
target_names=target_names,
@@ -350,8 +350,8 @@ def load_boston():
feature_names = np.array(temp)
for i, d in enumerate(data_file):
- data[i] = np.asanyarray(d[:-1], dtype=np.float)
- target[i] = np.asanyarray(d[-1], dtype=np.float)
+ data[i] = np.asarray(d[:-1], dtype=np.float)
+ target[i] = np.asarray(d[-1], dtype=np.float)
return Bunch(data=data,
target=target,
@@ -9,7 +9,7 @@
import numpy as np
from scipy import linalg
-from ..utils import check_random_state
+from ..utils import array2d, check_random_state
def make_classification(n_samples=100, n_features=20, n_informative=2,
@@ -395,7 +395,7 @@ def make_blobs(n_samples=100, n_features=2, centers=3, cluster_std=1.0,
centers = generator.uniform(center_box[0], center_box[1],
size=(centers, n_features))
else:
- centers = np.atleast_2d(centers)
+ centers = array2d(centers)
n_features = centers.shape[1]
X = []
@@ -15,8 +15,7 @@
from ..base import BaseEstimator, TransformerMixin
from ..externals.joblib import Parallel, delayed, cpu_count
-from ..utils import check_random_state
-from ..utils import gen_even_slices
+from ..utils import array2d, check_random_state, gen_even_slices
from ..utils.extmath import fast_svd
from ..linear_model import Lasso, orthogonal_mp_gram, lars_path
@@ -90,7 +89,8 @@ def sparse_encode(X, Y, gram=None, cov=None, algorithm='lasso_lars',
linear_model.Lasso
"""
alpha = float(alpha) if alpha is not None else None
- X, Y = map(np.asanyarray, (X, Y))
+ X = np.asarray(X)
+ Y = np.asarray(Y)
if Y.ndim == 1:
Y = Y[:, np.newaxis]
n_features = Y.shape[1]
@@ -688,7 +688,7 @@ def transform(self, X, y=None):
Transformed data
"""
# XXX : kwargs is not documented
- X = np.atleast_2d(X)
+ X = array2d(X)
n_samples, n_features = X.shape
code = sparse_encode_parallel(
@@ -832,7 +832,7 @@ def fit(self, X, y=None):
Returns the object itself
"""
self.random_state = check_random_state(self.random_state)
- X = np.asanyarray(X)
+ X = np.asarray(X)
V, U, E = dict_learning(X, self.n_atoms, self.alpha,
tol=self.tol, max_iter=self.max_iter,
method=self.fit_algorithm,
@@ -968,7 +968,7 @@ def fit(self, X, y=None):
Returns the instance itself.
"""
self.random_state = check_random_state(self.random_state)
- X = np.asanyarray(X)
+ X = np.asarray(X)
U = dict_learning_online(X, self.n_atoms, self.alpha,
n_iter=self.n_iter, return_code=False,
method=self.fit_algorithm,
@@ -995,7 +995,7 @@ def partial_fit(self, X, y=None, iter_offset=0):
Returns the instance itself.
"""
self.random_state = check_random_state(self.random_state)
- X = np.atleast_2d(X)
+ X = array2d(X)
if hasattr(self, 'components_'):
dict_init = self.components_
else:
@@ -14,6 +14,7 @@
from scipy import linalg
from ..base import BaseEstimator
+from ..utils import array2d
__all__ = ['fastica', 'FastICA']
@@ -121,7 +122,7 @@ def fastica(X, n_components=None, algorithm="parallel", whiten=True,
Parameters
----------
- X : (n, p) array of shape = [n_samples, n_features], optional
+ X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples is the number of samples and
n_features is the number of features.
n_components : int, optional
@@ -197,7 +198,7 @@ def fastica(X, n_components=None, algorithm="parallel", whiten=True,
# make interface compatible with other decompositions
warnings.warn("Please note: the interface of fastica has changed: "
"X is now assumed to be of shape [n_samples, n_features]")
- X = X.T
+ X = array2d(X).T
algorithm_funcs = {'parallel': _ica_par,
'deflation': _ica_def}
Oops, something went wrong.

0 comments on commit 0c9cb49

Please sign in to comment.