diff --git a/README.rst b/README.rst index b87c004d5893e..6ad2b3726f9d5 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ Dependencies scikit-learn is tested to work under Python 2.6+ and Python 3.3+ (using the same codebase thanks to an embedded copy of `six `_). -The required dependencies to build the software Numpy >= 1.3, SciPy >= 0.7 +The required dependencies to build the software NumPy >= 1.6.1, SciPy >= 0.9 and a working C/C++ compiler. For running the examples Matplotlib >= 0.99.1 is required and for running the diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py index f1de8e263bc46..379d1a79bac42 100644 --- a/benchmarks/bench_sparsify.py +++ b/benchmarks/bench_sparsify.py @@ -45,7 +45,6 @@ from scipy.sparse.csr import csr_matrix import numpy as np -from sklearn.utils.fixes import count_nonzero from sklearn.linear_model.stochastic_gradient import SGDRegressor from sklearn.metrics import r2_score @@ -53,7 +52,7 @@ def sparsity_ratio(X): - return count_nonzero(X) / float(n_samples * n_features) + return np.count_nonzero(X) / float(n_samples * n_features) n_samples, n_features = 5000, 300 X = np.random.randn(n_samples, n_features) diff --git a/doc/developers/index.rst b/doc/developers/index.rst index a84ac19386e70..797fdab86080e 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -764,10 +764,7 @@ E.g., here's a custom classifier:: ... return self ... def predict(self, X): ... return np.repeat(self.classes_[self.majority_], len(X)) - ... # doctest: +SKIP -.. We don't run the above "doctest" because it requires a recent NumPy and we - don't want users to import from sklearn.utils.fixes. get_params and set_params ------------------------- @@ -860,12 +857,11 @@ should match the order in which ``predict_proba``, ``predict_log_proba`` and ``decision_function`` return their values. The easiest way to achieve this is to put:: - self.classes_, y = unique(y, return_inverse=True) + self.classes_, y = np.unique(y, return_inverse=True) in ``fit``. This return a new ``y`` that contains class indexes, rather than labels, in the range [0, ``n_classes``). -``unique`` is available in ``sklearn.utils.fixes``. A classifier's ``predict`` method should return arrays containing class labels from ``classes_``. diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst index ab31578fa7f86..9af5156b1f076 100644 --- a/doc/developers/utilities.rst +++ b/doc/developers/utilities.rst @@ -181,26 +181,13 @@ Graph Routines Backports ========= -- :func:`fixes.unique`: (backport of ``np.unique`` from numpy 1.4). Find the - unique entries in an array. In numpy versions < 1.4, ``np.unique`` is less - flexible. Used in :mod:`sklearn.cross_validation`. - -- :func:`fixes.copysign`: (backport of ``np.copysign`` from numpy 1.4). - Change the sign of ``x1`` to that of ``x2``, element-wise. - -- :func:`fixes.in1d`: (backport of ``np.in1d`` from numpy 1.4). - Test whether each element of an array is in a second array. Used in - ``sklearn.datasets.twenty_newsgroups`` and - ``sklearn.feature_extraction.image``. +- :func:`fixes.expit`: Logistic sigmoid function. Replacement for SciPy 0.10's + ``scipy.special.expit``. - :func:`fixes.savemat` (backport of ``scipy.io.savemat`` from scipy 0.7.2). Save an array in MATLAB-format. In earlier versions, the keyword ``oned_as`` is not available. -- :func:`fixes.count_nonzero` (backport of ``np.count_nonzero`` from - numpy 1.6). Count the nonzero elements of a matrix. Used in - tests of :mod:`sklearn.linear_model`. - - :func:`arrayfuncs.solve_triangular` (Back-ported from scipy v0.9) Used in ``sklearn.linear_model.omp``, independent back-ports in ``sklearn.mixture.gmm`` and diff --git a/doc/install.rst b/doc/install.rst index b93ca87de1f3c..f646edef7dabe 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -35,7 +35,7 @@ Getting the dependencies ------------------------ Installing from source requires you to have installed Python (>= 2.6), -NumPy (>= 1.3), SciPy (>= 0.7), setuptools, Python development headers +NumPy (>= 1.6.1), SciPy (>= 0.9), setuptools, Python development headers and a working C++ compiler. Under Debian-based operating systems, which include Ubuntu, you can install all these requirements by issuing:: diff --git a/doc/modules/computational_performance.rst b/doc/modules/computational_performance.rst index 3222879705843..9b2a925f53c70 100644 --- a/doc/modules/computational_performance.rst +++ b/doc/modules/computational_performance.rst @@ -109,10 +109,8 @@ max, to be checked depending on the hardware) for the sparse input representation to be faster than the dense input representation on a machine with many CPUs and an optimized BLAS implementation. -Here is sample code to test the sparsity of your input -(requires a relatively recent NumPy for the ``count_nonzero`` function):: +Here is sample code to test the sparsity of your input:: - from sklearn.utils.fixes import count_nonzero def sparsity_ratio(X): return 1.0 - np.count_nonzero(X) / float(X.shape[0] * X.shape[1]) print("input sparsity ratio:", sparsity_ratio(X)) diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py index 19ed816cc09d4..39cf64fd66a52 100644 --- a/examples/applications/plot_model_complexity_influence.py +++ b/examples/applications/plot_model_complexity_influence.py @@ -33,7 +33,6 @@ from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor from sklearn.linear_model.stochastic_gradient import SGDClassifier from sklearn.metrics.metrics import hamming_loss -from sklearn.utils.fixes import count_nonzero ############################################################################### # Routines @@ -121,7 +120,7 @@ def plot_influence(conf, mse_values, prediction_times, complexities): def _count_nonzero_coefficients(estimator): a = estimator.coef_.todense() - return count_nonzero(a) + return np.count_nonzero(a) ############################################################################### # main code diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index 5cbff2299cc94..ad2334a8669f7 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -30,7 +30,6 @@ from sklearn.linear_model.ridge import Ridge from sklearn.linear_model.stochastic_gradient import SGDRegressor from sklearn.svm.classes import SVR -from sklearn.utils.fixes import count_nonzero def _not_in_sphinx(): @@ -287,7 +286,7 @@ def plot_benchmark_throughput(throughputs, configuration): 'instance': SGDRegressor(penalty='elasticnet', alpha=0.01, l1_ratio=0.25, fit_intercept=True), 'complexity_label': 'non-zero coefficients', - 'complexity_computer': lambda clf: count_nonzero(clf.coef_)}, + 'complexity_computer': lambda clf: np.count_nonzero(clf.coef_)}, {'name': 'RandomForest', 'instance': RandomForestRegressor(), 'complexity_label': 'estimators', diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py index b55a862d5d2c5..e5a01240ccdb0 100644 --- a/examples/ensemble/plot_gradient_boosting_regularization.py +++ b/examples/ensemble/plot_gradient_boosting_regularization.py @@ -30,14 +30,13 @@ from sklearn import ensemble from sklearn import datasets -from sklearn.utils.fixes import unique X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X = X.astype(np.float32) # map labels from {-1, 1} to {0, 1} -labels, y = unique(y, return_inverse=True) +labels, y = np.unique(y, return_inverse=True) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index e9a972dec0b6e..3b0b1e44ffd6b 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -9,7 +9,6 @@ from ..base import TransformerMixin from ..utils import array2d -from ..utils.fixes import unique ############################################################################### @@ -67,5 +66,5 @@ def inverse_transform(self, Xred): A vector of size n_samples with the values of Xred assigned to each of the cluster of samples. """ - unil, inverse = unique(self.labels_, return_inverse=True) + unil, inverse = np.unique(self.labels_, return_inverse=True) return Xred[..., inverse] diff --git a/sklearn/cluster/_k_means.c b/sklearn/cluster/_k_means.c index 398fd0f4b18d8..dcc205a62a42a 100644 --- a/sklearn/cluster/_k_means.c +++ b/sklearn/cluster/_k_means.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.20 on Wed Feb 5 13:47:23 2014 */ +/* Generated by Cython 0.20.1 on Sun Mar 2 13:36:58 2014 */ #define PY_SSIZE_T_CLEAN #ifndef CYTHON_USE_PYLONG_INTERNALS @@ -19,7 +19,7 @@ #elif PY_VERSION_HEX < 0x02040000 #error Cython requires Python 2.4+. #else -#define CYTHON_ABI "0_20" +#define CYTHON_ABI "0_20_1" #include /* For offsetof */ #ifndef offsetof #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) @@ -116,7 +116,7 @@ #if PY_MAJOR_VERSION < 3 #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ - PyCode_New(a, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyClass_Type #else #define __Pyx_BUILTIN_MODULE_NAME "builtins" @@ -161,10 +161,16 @@ #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) #endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif #define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) #define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) -#define __Pyx_PyUnicode_Concat(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ - PyNumber_Add(a, b) : PyUnicode_Concat(a, b)) #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) #else @@ -231,7 +237,7 @@ #if PY_MAJOR_VERSION >= 3 #define PyBoolObject PyLongObject #endif -#if PY_VERSION_HEX < 0x03020000 +#if PY_VERSION_HEX < 0x030200A4 typedef long Py_hash_t; #define __Pyx_PyInt_FromHash_t PyInt_FromLong #define __Pyx_PyInt_AsHash_t PyInt_AsLong @@ -1018,6 +1024,12 @@ static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info); static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); /*proto*/ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/ +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ #define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1) @@ -1393,7 +1405,6 @@ static char __pyx_k_old_center[] = "old_center"; static char __pyx_k_sample_idx[] = "sample_idx"; static char __pyx_k_center_diff[] = "center_diff"; static char __pyx_k_feature_idx[] = "feature_idx"; -static char __pyx_k_utils_fixes[] = "utils.fixes"; static char __pyx_k_RuntimeError[] = "RuntimeError"; static char __pyx_k_scipy_sparse[] = "scipy.sparse"; static char __pyx_k_squared_diff[] = "squared_diff"; @@ -1411,7 +1422,7 @@ static char __pyx_k_n_samples_in_cluster[] = "n_samples_in_cluster"; static char __pyx_k_mini_batch_update_csr[] = "_mini_batch_update_csr"; static char __pyx_k_sklearn_cluster__k_means[] = "sklearn.cluster._k_means"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; -static char __pyx_k_home_felipe_nlp_contrib_scikit[] = "/home/felipe/nlp/contrib/scikit-learn-eltermann/sklearn/cluster/_k_means.pyx"; +static char __pyx_k_home_larsb_src_scikit_learn_skl[] = "/home/larsb/src/scikit-learn/sklearn/cluster/_k_means.pyx"; static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; static char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; @@ -1446,7 +1457,7 @@ static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_far_from_centers; static PyObject *__pyx_n_s_feature_idx; static PyObject *__pyx_n_s_float64; -static PyObject *__pyx_kp_s_home_felipe_nlp_contrib_scikit; +static PyObject *__pyx_kp_s_home_larsb_src_scikit_learn_skl; static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_import; static PyObject *__pyx_n_s_indices; @@ -1484,7 +1495,6 @@ static PyObject *__pyx_n_s_squared_diff; static PyObject *__pyx_n_s_test; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; static PyObject *__pyx_n_s_utils_extmath; -static PyObject *__pyx_n_s_utils_fixes; static PyObject *__pyx_n_s_where; static PyObject *__pyx_n_s_x_squared_norms; static PyObject *__pyx_n_s_zeros; @@ -1730,7 +1740,7 @@ static __pyx_t_7sklearn_7cluster_8_k_means_DOUBLE __pyx_f_7sklearn_7cluster_8_k_ * n_clusters, dtype=np.float64) * */ - __pyx_t_5 = PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -2537,7 +2547,7 @@ static __pyx_t_7sklearn_7cluster_8_k_means_DOUBLE __pyx_f_7sklearn_7cluster_8_k_ * n_clusters, dtype=np.float64) * */ - __pyx_t_9 = PyObject_Call(__pyx_t_1, __pyx_t_7, __pyx_t_5); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_7, __pyx_t_5); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -3923,7 +3933,7 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU * n_features = X.shape[1] * cdef int i, j, c * cdef np.ndarray[DOUBLE, ndim=2] centers = np.zeros((n_clusters, n_features)) # <<<<<<<<<<<<<< - * n_samples_in_cluster = bincount(labels, minlength=n_clusters) + * n_samples_in_cluster = np.bincount(labels, minlength=n_clusters) * empty_clusters = np.where(n_samples_in_cluster == 0)[0] */ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3948,7 +3958,7 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -3969,16 +3979,19 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU /* "sklearn/cluster/_k_means.pyx":275 * cdef int i, j, c * cdef np.ndarray[DOUBLE, ndim=2] centers = np.zeros((n_clusters, n_features)) - * n_samples_in_cluster = bincount(labels, minlength=n_clusters) # <<<<<<<<<<<<<< + * n_samples_in_cluster = np.bincount(labels, minlength=n_clusters) # <<<<<<<<<<<<<< * empty_clusters = np.where(n_samples_in_cluster == 0)[0] * # maybe also relocate small clusters? */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_bincount); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_bincount); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(((PyObject *)__pyx_v_labels)); - PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_labels)); + PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_v_labels)); __Pyx_GIVEREF(((PyObject *)__pyx_v_labels)); __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -3986,17 +3999,17 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_minlength, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_4, __pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_n_samples_in_cluster = __pyx_t_1; __pyx_t_1 = 0; /* "sklearn/cluster/_k_means.pyx":276 * cdef np.ndarray[DOUBLE, ndim=2] centers = np.zeros((n_clusters, n_features)) - * n_samples_in_cluster = bincount(labels, minlength=n_clusters) + * n_samples_in_cluster = np.bincount(labels, minlength=n_clusters) * empty_clusters = np.where(n_samples_in_cluster == 0)[0] # <<<<<<<<<<<<<< * # maybe also relocate small clusters? * @@ -4007,20 +4020,20 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = PyObject_RichCompare(__pyx_v_n_samples_in_cluster, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_1, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 0); if (unlikely(__pyx_t_3 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_1, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_empty_clusters = __pyx_t_3; - __pyx_t_3 = 0; + __pyx_v_empty_clusters = __pyx_t_4; + __pyx_t_4 = 0; /* "sklearn/cluster/_k_means.pyx":279 * # maybe also relocate small clusters? @@ -4040,16 +4053,16 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU * * for i, cluster_id in enumerate(empty_clusters): */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_distances), __pyx_n_s_argsort); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyObject_Call(__pyx_t_3, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_distances), __pyx_n_s_argsort); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetItem(__pyx_t_1, __pyx_slice__2); if (unlikely(__pyx_t_3 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_slice__2); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_far_from_centers = __pyx_t_3; - __pyx_t_3 = 0; + __pyx_v_far_from_centers = __pyx_t_4; + __pyx_t_4 = 0; goto __pyx_L3; } __pyx_L3:; @@ -4063,30 +4076,30 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU */ __pyx_t_8 = 0; if (PyList_CheckExact(__pyx_v_empty_clusters) || PyTuple_CheckExact(__pyx_v_empty_clusters)) { - __pyx_t_3 = __pyx_v_empty_clusters; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; + __pyx_t_4 = __pyx_v_empty_clusters; __Pyx_INCREF(__pyx_t_4); __pyx_t_6 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_empty_clusters); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_9 = Py_TYPE(__pyx_t_3)->tp_iternext; + __pyx_t_6 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_empty_clusters); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; } for (;;) { - if (!__pyx_t_9 && PyList_CheckExact(__pyx_t_3)) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; + if (!__pyx_t_9 && PyList_CheckExact(__pyx_t_4)) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif - } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_3)) { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; + } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_4)) { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - __pyx_t_1 = __pyx_t_9(__pyx_t_3); + __pyx_t_1 = __pyx_t_9(__pyx_t_4); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { @@ -4136,7 +4149,7 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU */ if (unlikely(PyObject_SetItem(__pyx_v_n_samples_in_cluster, __pyx_v_cluster_id, __pyx_int_1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; /* "sklearn/cluster/_k_means.pyx":289 * n_samples_in_cluster[cluster_id] = 1 @@ -4183,27 +4196,27 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU * * return centers */ - __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_newaxis); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_newaxis); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_slice__3); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_slice__3); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_slice__3); __Pyx_GIVEREF(__pyx_slice__3); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetItem(__pyx_v_n_samples_in_cluster, __pyx_t_3); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_2 = PyObject_GetItem(__pyx_v_n_samples_in_cluster, __pyx_t_4); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyNumber_InPlaceDivide(((PyObject *)__pyx_v_centers), __pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyNumber_InPlaceDivide(((PyObject *)__pyx_v_centers), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = ((PyArrayObject *)__pyx_t_3); + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = ((PyArrayObject *)__pyx_t_4); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_centers.rcbuffer->pybuffer); @@ -4221,8 +4234,8 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_6_centers_dense(CYTHON_UNU if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_5 = 0; - __Pyx_DECREF_SET(__pyx_v_centers, ((PyArrayObject *)__pyx_t_3)); - __pyx_t_3 = 0; + __Pyx_DECREF_SET(__pyx_v_centers, ((PyArrayObject *)__pyx_t_4)); + __pyx_t_4 = 0; /* "sklearn/cluster/_k_means.pyx":295 * centers /= n_samples_in_cluster[:, np.newaxis] @@ -4589,7 +4602,7 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, __pyx_t_6, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_6, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; @@ -4610,24 +4623,27 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN /* "sklearn/cluster/_k_means.pyx":334 * cdef np.ndarray[np.npy_intp, ndim=1] far_from_centers * cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] n_samples_in_cluster = \ - * bincount(labels, minlength=n_clusters) # <<<<<<<<<<<<<< + * np.bincount(labels, minlength=n_clusters) # <<<<<<<<<<<<<< * cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \ * np.where(n_samples_in_cluster == 0)[0] */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_bincount); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_bincount); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_v_labels)); - PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_v_labels)); + PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_labels)); __Pyx_GIVEREF(((PyObject *)__pyx_v_labels)); __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_minlength, __pyx_v_n_clusters) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_8 = PyObject_Call(__pyx_t_2, __pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_2, __pyx_t_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = ((PyArrayObject *)__pyx_t_8); @@ -4644,7 +4660,7 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN __pyx_t_8 = 0; /* "sklearn/cluster/_k_means.pyx":336 - * bincount(labels, minlength=n_clusters) + * np.bincount(labels, minlength=n_clusters) * cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \ * np.where(n_samples_in_cluster == 0)[0] # <<<<<<<<<<<<<< * @@ -4656,20 +4672,20 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_t_8 = PyObject_RichCompare(((PyObject *)__pyx_v_n_samples_in_cluster), __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_8); + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyObject_Call(__pyx_t_1, __pyx_t_6, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetItemInt(__pyx_t_8, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_8, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_10 = ((PyArrayObject *)__pyx_t_6); + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = ((PyArrayObject *)__pyx_t_2); { __Pyx_BufFmt_StackElem __pyx_stack[1]; if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_empty_clusters.rcbuffer->pybuffer, (PyObject*)__pyx_t_10, &__Pyx_TypeInfo_nn_npy_intp, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) { @@ -4679,8 +4695,8 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN } } __pyx_t_10 = 0; - __pyx_v_empty_clusters = ((PyArrayObject *)__pyx_t_6); - __pyx_t_6 = 0; + __pyx_v_empty_clusters = ((PyArrayObject *)__pyx_t_2); + __pyx_t_2 = 0; /* "sklearn/cluster/_k_means.pyx":340 * # maybe also relocate small clusters? @@ -4699,16 +4715,16 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN * * for i in range(empty_clusters.shape[0]): */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_distances), __pyx_n_s_argsort); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_8 = PyObject_Call(__pyx_t_6, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_distances), __pyx_n_s_argsort); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyObject_GetItem(__pyx_t_8, __pyx_slice__4); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = PyObject_GetItem(__pyx_t_8, __pyx_slice__4); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_12 = ((PyArrayObject *)__pyx_t_6); + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = ((PyArrayObject *)__pyx_t_2); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_far_from_centers.rcbuffer->pybuffer); @@ -4726,8 +4742,8 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN if (unlikely(__pyx_t_13 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_12 = 0; - __pyx_v_far_from_centers = ((PyArrayObject *)__pyx_t_6); - __pyx_t_6 = 0; + __pyx_v_far_from_centers = ((PyArrayObject *)__pyx_t_2); + __pyx_t_2 = 0; goto __pyx_L3; } __pyx_L3:; @@ -4796,9 +4812,9 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN * n_samples_in_cluster[cluster_id] = 1 * */ - __pyx_t_6 = __Pyx_GetItemInt(((PyObject *)__pyx_v_centers), __pyx_v_cluster_id, npy_intp, 1, __Pyx_PyInt_From_Py_intptr_t, 0, 1, 1); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(((PyObject *)__pyx_v_centers), __pyx_v_cluster_id, npy_intp, 1, __Pyx_PyInt_From_Py_intptr_t, 0, 1, 1); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_2); + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "sklearn/cluster/_k_means.pyx":349 * # XXX two relocated clusters could be close to each other @@ -4807,8 +4823,8 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN * centers[cluster_id]) * n_samples_in_cluster[cluster_id] = 1 */ - __pyx_f_7sklearn_5utils_11sparsefuncs_add_row_csr(((PyArrayObject *)__pyx_v_data), ((PyArrayObject *)__pyx_v_indices), ((PyArrayObject *)__pyx_v_indptr), (*__Pyx_BufPtrStrided1d(npy_intp *, __pyx_pybuffernd_far_from_centers.rcbuffer->pybuffer.buf, __pyx_t_20, __pyx_pybuffernd_far_from_centers.diminfo[0].strides)), ((PyArrayObject *)__pyx_t_6)); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_f_7sklearn_5utils_11sparsefuncs_add_row_csr(((PyArrayObject *)__pyx_v_data), ((PyArrayObject *)__pyx_v_indices), ((PyArrayObject *)__pyx_v_indptr), (*__Pyx_BufPtrStrided1d(npy_intp *, __pyx_pybuffernd_far_from_centers.rcbuffer->pybuffer.buf, __pyx_t_20, __pyx_pybuffernd_far_from_centers.diminfo[0].strides)), ((PyArrayObject *)__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "sklearn/cluster/_k_means.pyx":351 * add_row_csr(data, indices, indptr, far_from_centers[i], @@ -4859,11 +4875,11 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_23 = (*__Pyx_BufPtrStrided1d(__pyx_t_7sklearn_7cluster_8_k_means_INT *, __pyx_pybuffernd_labels.rcbuffer->pybuffer.buf, __pyx_t_22, __pyx_pybuffernd_labels.diminfo[0].strides)); - __pyx_t_6 = __Pyx_GetItemInt(((PyObject *)__pyx_v_centers), __pyx_t_23, __pyx_t_7sklearn_7cluster_8_k_means_INT, 1, __Pyx_PyInt_From_npy_int32, 0, 1, 1); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_f_7sklearn_5utils_11sparsefuncs_add_row_csr(((PyArrayObject *)__pyx_v_data), ((PyArrayObject *)__pyx_v_indices), ((PyArrayObject *)__pyx_v_indptr), __pyx_v_i, ((PyArrayObject *)__pyx_t_6)); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_2 = __Pyx_GetItemInt(((PyObject *)__pyx_v_centers), __pyx_t_23, __pyx_t_7sklearn_7cluster_8_k_means_INT, 1, __Pyx_PyInt_From_npy_int32, 0, 1, 1); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_2); + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_f_7sklearn_5utils_11sparsefuncs_add_row_csr(((PyArrayObject *)__pyx_v_data), ((PyArrayObject *)__pyx_v_indices), ((PyArrayObject *)__pyx_v_indptr), __pyx_v_i, ((PyArrayObject *)__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } /* "sklearn/cluster/_k_means.pyx":356 @@ -4873,27 +4889,27 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN * * return centers */ - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_newaxis); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_newaxis); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_slice__5); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_slice__5); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_slice__5); __Pyx_GIVEREF(__pyx_slice__5); - PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_8); + PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyObject_GetItem(((PyObject *)__pyx_v_n_samples_in_cluster), __pyx_t_6); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_8 = PyObject_GetItem(((PyObject *)__pyx_v_n_samples_in_cluster), __pyx_t_2); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyNumber_InPlaceDivide(((PyObject *)__pyx_v_centers), __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyNumber_InPlaceDivide(((PyObject *)__pyx_v_centers), __pyx_t_8); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_7 = ((PyArrayObject *)__pyx_t_6); + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = ((PyArrayObject *)__pyx_t_2); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_centers.rcbuffer->pybuffer); @@ -4911,8 +4927,8 @@ static PyObject *__pyx_pf_7sklearn_7cluster_8_k_means_8_centers_sparse(CYTHON_UN if (unlikely(__pyx_t_13 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_7 = 0; - __Pyx_DECREF_SET(__pyx_v_centers, ((PyArrayObject *)__pyx_t_6)); - __pyx_t_6 = 0; + __Pyx_DECREF_SET(__pyx_v_centers, ((PyArrayObject *)__pyx_t_2)); + __pyx_t_2 = 0; /* "sklearn/cluster/_k_means.pyx":358 * centers /= n_samples_in_cluster[:, np.newaxis] @@ -5136,7 +5152,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ - __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -5174,7 +5190,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * * info.buf = PyArray_DATA(self) */ - __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -5446,7 +5462,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -5667,7 +5683,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_8, 0, 0, 0); @@ -6277,7 +6293,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_t_3 = PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -6326,7 +6342,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_t_3 = PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 803; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 803; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -6427,7 +6443,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_t_4 = PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -6761,7 +6777,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_3, 0, 0, 0); @@ -7044,7 +7060,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_far_from_centers, __pyx_k_far_from_centers, sizeof(__pyx_k_far_from_centers), 0, 0, 1, 1}, {&__pyx_n_s_feature_idx, __pyx_k_feature_idx, sizeof(__pyx_k_feature_idx), 0, 0, 1, 1}, {&__pyx_n_s_float64, __pyx_k_float64, sizeof(__pyx_k_float64), 0, 0, 1, 1}, - {&__pyx_kp_s_home_felipe_nlp_contrib_scikit, __pyx_k_home_felipe_nlp_contrib_scikit, sizeof(__pyx_k_home_felipe_nlp_contrib_scikit), 0, 0, 1, 0}, + {&__pyx_kp_s_home_larsb_src_scikit_learn_skl, __pyx_k_home_larsb_src_scikit_learn_skl, sizeof(__pyx_k_home_larsb_src_scikit_learn_skl), 0, 0, 1, 0}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, {&__pyx_n_s_indices, __pyx_k_indices, sizeof(__pyx_k_indices), 0, 0, 1, 1}, @@ -7082,7 +7098,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, {&__pyx_n_s_utils_extmath, __pyx_k_utils_extmath, sizeof(__pyx_k_utils_extmath), 0, 0, 1, 1}, - {&__pyx_n_s_utils_fixes, __pyx_k_utils_fixes, sizeof(__pyx_k_utils_fixes), 0, 0, 1, 1}, {&__pyx_n_s_where, __pyx_k_where, sizeof(__pyx_k_where), 0, 0, 1, 1}, {&__pyx_n_s_x_squared_norms, __pyx_k_x_squared_norms, sizeof(__pyx_k_x_squared_norms), 0, 0, 1, 1}, {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1}, @@ -7233,7 +7248,7 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__13 = PyTuple_Pack(21, __pyx_n_s_X, __pyx_n_s_x_squared_norms, __pyx_n_s_centers, __pyx_n_s_counts, __pyx_n_s_nearest_center, __pyx_n_s_old_center, __pyx_n_s_compute_squared_diff, __pyx_n_s_X_data, __pyx_n_s_X_indices, __pyx_n_s_X_indptr, __pyx_n_s_n_samples, __pyx_n_s_n_clusters, __pyx_n_s_n_features, __pyx_n_s_sample_idx, __pyx_n_s_center_idx, __pyx_n_s_feature_idx, __pyx_n_s_k, __pyx_n_s_old_count, __pyx_n_s_new_count, __pyx_n_s_center_diff, __pyx_n_s_squared_diff); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(7, 0, 21, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_felipe_nlp_contrib_scikit, __pyx_n_s_mini_batch_update_csr, 141, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(7, 0, 21, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_larsb_src_scikit_learn_skl, __pyx_n_s_mini_batch_update_csr, 141, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "sklearn/cluster/_k_means.pyx":244 * @cython.wraparound(False) @@ -7245,7 +7260,7 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__15 = PyTuple_Pack(15, __pyx_n_s_X, __pyx_n_s_labels, __pyx_n_s_n_clusters, __pyx_n_s_distances, __pyx_n_s_n_samples, __pyx_n_s_n_features, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_c, __pyx_n_s_centers, __pyx_n_s_n_samples_in_cluster, __pyx_n_s_empty_clusters, __pyx_n_s_far_from_centers, __pyx_n_s_cluster_id, __pyx_n_s_new_center); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(4, 0, 15, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_felipe_nlp_contrib_scikit, __pyx_n_s_centers_dense, 244, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(4, 0, 15, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_larsb_src_scikit_learn_skl, __pyx_n_s_centers_dense, 244, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "sklearn/cluster/_k_means.pyx":298 * @@ -7257,7 +7272,7 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__17 = PyTuple_Pack(14, __pyx_n_s_X, __pyx_n_s_labels, __pyx_n_s_n_clusters, __pyx_n_s_distances, __pyx_n_s_n_features, __pyx_n_s_cluster_id, __pyx_n_s_data, __pyx_n_s_indices, __pyx_n_s_indptr, __pyx_n_s_centers, __pyx_n_s_far_from_centers, __pyx_n_s_n_samples_in_cluster, __pyx_n_s_empty_clusters, __pyx_n_s_i); if (unlikely(!__pyx_tuple__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(4, 0, 14, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_felipe_nlp_contrib_scikit, __pyx_n_s_centers_sparse, 298, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(4, 0, 14, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_larsb_src_scikit_learn_skl, __pyx_n_s_centers_sparse, 298, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -7378,78 +7393,57 @@ PyMODINIT_FUNC PyInit__k_means(void) Py_DECREF(__pyx_t_1); __pyx_t_1 = 0; /*--- Execution code ---*/ - /* "sklearn/cluster/_k_means.pyx":11 + /* "sklearn/cluster/_k_means.pyx":12 * * from libc.math cimport sqrt * import numpy as np # <<<<<<<<<<<<<< * import scipy.sparse as sp * cimport numpy as np */ - __pyx_t_2 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "sklearn/cluster/_k_means.pyx":12 + /* "sklearn/cluster/_k_means.pyx":13 * from libc.math cimport sqrt * import numpy as np * import scipy.sparse as sp # <<<<<<<<<<<<<< * cimport numpy as np * cimport cython */ - __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_n_s__12); PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s__12); __Pyx_GIVEREF(__pyx_n_s__12); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_scipy_sparse, __pyx_t_2, -1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_Import(__pyx_n_s_scipy_sparse, __pyx_t_2, -1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_sp, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_sp, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "sklearn/cluster/_k_means.pyx":16 + /* "sklearn/cluster/_k_means.pyx":17 * cimport cython * * from ..utils.extmath import norm # <<<<<<<<<<<<<< - * from ..utils.fixes import bincount * from sklearn.utils.sparsefuncs cimport add_row_csr + * */ - __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_n_s_norm); PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_norm); __Pyx_GIVEREF(__pyx_n_s_norm); - __pyx_t_2 = __Pyx_Import(__pyx_n_s_utils_extmath, __pyx_t_3, 2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_Import(__pyx_n_s_utils_extmath, __pyx_t_3, 2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_norm); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_norm); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_norm, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_norm, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "sklearn/cluster/_k_means.pyx":17 - * - * from ..utils.extmath import norm - * from ..utils.fixes import bincount # <<<<<<<<<<<<<< - * from sklearn.utils.sparsefuncs cimport add_row_csr - * - */ - __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(__pyx_n_s_bincount); - PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_bincount); - __Pyx_GIVEREF(__pyx_n_s_bincount); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_utils_fixes, __pyx_t_2, 2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_bincount); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_bincount, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "sklearn/cluster/_k_means.pyx":26 * double ddot "cblas_ddot"(int N, double *X, int incX, double *Y, int incY) * @@ -7466,10 +7460,10 @@ PyMODINIT_FUNC PyInit__k_means(void) * np.ndarray[DOUBLE, ndim=2] centers, * np.ndarray[INT, ndim=1] counts, */ - __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_7sklearn_7cluster_8_k_means_5_mini_batch_update_csr, NULL, __pyx_n_s_sklearn_cluster__k_means); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_mini_batch_update_csr, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_7sklearn_7cluster_8_k_means_5_mini_batch_update_csr, NULL, __pyx_n_s_sklearn_cluster__k_means); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_mini_batch_update_csr, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "sklearn/cluster/_k_means.pyx":244 * @cython.wraparound(False) @@ -7478,10 +7472,10 @@ PyMODINIT_FUNC PyInit__k_means(void) * np.ndarray[INT, ndim=1] labels, int n_clusters, * np.ndarray[DOUBLE, ndim=1] distances): */ - __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_7sklearn_7cluster_8_k_means_7_centers_dense, NULL, __pyx_n_s_sklearn_cluster__k_means); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_centers_dense, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_7sklearn_7cluster_8_k_means_7_centers_dense, NULL, __pyx_n_s_sklearn_cluster__k_means); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_centers_dense, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "sklearn/cluster/_k_means.pyx":298 * @@ -7490,20 +7484,20 @@ PyMODINIT_FUNC PyInit__k_means(void) * np.ndarray[DOUBLE, ndim=1] distances): * """M step of the K-means EM algorithm */ - __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_7sklearn_7cluster_8_k_means_9_centers_sparse, NULL, __pyx_n_s_sklearn_cluster__k_means); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_centers_sparse, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_7sklearn_7cluster_8_k_means_9_centers_sparse, NULL, __pyx_n_s_sklearn_cluster__k_means); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_centers_sparse, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "sklearn/cluster/_k_means.pyx":1 * # cython: profile=True # <<<<<<<<<<<<<< * # Profiling is enabled by default as the overhead does not seem to be measurable * # on this specific use case. */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "/usr/local/lib/python2.7/dist-packages/Cython/Includes/numpy/__init__.pxd":976 * arr.base = baseptr @@ -7983,8 +7977,10 @@ __pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) } if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; while (*ts && *ts != ')') { - if (isspace(*ts)) - continue; + switch (*ts) { + case ' ': case '\f': case '\r': case '\n': case '\t': case '\v': continue; + default: break; /* not a 'break' in the loop */ + } number = __Pyx_BufFmt_ExpectNumber(&ts); if (number == -1) return NULL; if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) @@ -8203,6 +8199,29 @@ static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { return result; } +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = func->ob_type->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); +#if PY_VERSION_HEX >= 0x02060000 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; +#endif + result = (*call)(func, arg, kw); +#if PY_VERSION_HEX >= 0x02060000 + Py_LeaveRecursiveCall(); +#endif + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { if (unlikely(!type)) { PyErr_SetString(PyExc_SystemError, "Missing type object"); @@ -10200,11 +10219,13 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_ #endif /* PY_VERSION_HEX < 0x03030000 */ } else #endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */ +#if !CYTHON_COMPILING_IN_PYPY #if PY_VERSION_HEX >= 0x02060000 if (PyByteArray_Check(o)) { *length = PyByteArray_GET_SIZE(o); return PyByteArray_AS_STRING(o); } else +#endif #endif { char* result; diff --git a/sklearn/cluster/_k_means.pyx b/sklearn/cluster/_k_means.pyx index 3332056a50f0b..0e064128f1656 100644 --- a/sklearn/cluster/_k_means.pyx +++ b/sklearn/cluster/_k_means.pyx @@ -4,6 +4,7 @@ # Author: Peter Prettenhofer # Olivier Grisel +# Lars Buitinck # # Licence: BSD 3 clause @@ -14,7 +15,6 @@ cimport numpy as np cimport cython from ..utils.extmath import norm -from ..utils.fixes import bincount from sklearn.utils.sparsefuncs cimport add_row_csr ctypedef np.float64_t DOUBLE @@ -272,7 +272,7 @@ def _centers_dense(np.ndarray[DOUBLE, ndim=2] X, n_features = X.shape[1] cdef int i, j, c cdef np.ndarray[DOUBLE, ndim=2] centers = np.zeros((n_clusters, n_features)) - n_samples_in_cluster = bincount(labels, minlength=n_clusters) + n_samples_in_cluster = np.bincount(labels, minlength=n_clusters) empty_clusters = np.where(n_samples_in_cluster == 0)[0] # maybe also relocate small clusters? @@ -331,7 +331,7 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters, np.zeros((n_clusters, n_features)) cdef np.ndarray[np.npy_intp, ndim=1] far_from_centers cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] n_samples_in_cluster = \ - bincount(labels, minlength=n_clusters) + np.bincount(labels, minlength=n_clusters) cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \ np.where(n_samples_in_cluster == 0)[0] diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 67ebb2ac4c7df..10c6a1f2a01de 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -16,7 +16,6 @@ from sklearn.utils.testing import assert_warns from sklearn.utils.extmath import row_norms -from sklearn.utils.fixes import unique from sklearn.metrics.cluster import v_measure_score from sklearn.cluster import KMeans, k_means from sklearn.cluster import MiniBatchKMeans @@ -192,7 +191,7 @@ def test_k_means_new_centers(): this_labels = km.labels_ # Reorder the labels so that the first instance is in cluster 0, # the second in cluster 1, ... - this_labels = unique(this_labels, return_index=True)[1][this_labels] + this_labels = np.unique(this_labels, return_index=True)[1][this_labels] np.testing.assert_array_equal(this_labels, labels) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index c679ea9caf6cb..d0dd63aa2e422 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -24,7 +24,6 @@ from .base import is_classifier, clone from .utils import check_arrays, check_random_state, safe_mask from .utils.validation import _num_samples -from .utils.fixes import unique from .externals.joblib import Parallel, delayed, logger from .externals.six import with_metaclass from .metrics.scorer import check_scoring @@ -379,7 +378,7 @@ def __init__(self, y, n_folds=3, indices=None): super(StratifiedKFold, self).__init__(len(y), n_folds, indices) y = np.asarray(y) n_samples = y.shape[0] - unique_labels, y_inversed = unique(y, return_inverse=True) + unique_labels, y_inversed = np.unique(y, return_inverse=True) label_counts = np.bincount(y_inversed) min_labels = np.min(label_counts) if self.n_folds > min_labels: @@ -474,7 +473,7 @@ def __init__(self, labels, indices=None): super(LeaveOneLabelOut, self).__init__(len(labels), indices) # We make a copy of labels to avoid side-effects during iteration self.labels = np.array(labels, copy=True) - self.unique_labels = unique(labels) + self.unique_labels = np.unique(labels) self.n_unique_labels = len(self.unique_labels) def _iter_test_masks(self): @@ -547,7 +546,7 @@ def __init__(self, labels, p, indices=None): # We make a copy of labels to avoid side-effects during iteration super(LeavePLabelOut, self).__init__(len(labels), indices) self.labels = np.array(labels, copy=True) - self.unique_labels = unique(labels) + self.unique_labels = np.unique(labels) self.n_unique_labels = len(self.unique_labels) self.p = p @@ -967,7 +966,7 @@ def __init__(self, y, n_iter=10, test_size=0.1, train_size=None, len(y), n_iter, test_size, train_size, indices, random_state, n_iterations) self.y = np.array(y) - self.classes, self.y_indices = unique(y, return_inverse=True) + self.classes, self.y_indices = np.unique(y, return_inverse=True) n_cls = self.classes.shape[0] if np.min(np.bincount(self.y_indices)) < 2: @@ -1274,7 +1273,7 @@ def _shuffle(y, labels, random_state): ind = random_state.permutation(len(y)) else: ind = np.arange(len(labels)) - for label in unique(labels): + for label in np.unique(labels): this_mask = (labels == label) ind[this_mask] = random_state.permutation(ind[this_mask]) return y[ind] diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 4db0807f379ae..a2f9099015975 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -50,7 +50,6 @@ from .base import Bunch from .base import load_files from ..utils import check_random_state -from ..utils.fixes import in1d from ..feature_extraction.text import CountVectorizer from ..preprocessing import normalize from ..externals import joblib, six @@ -246,7 +245,7 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, # Sort the categories to have the ordering of the labels labels.sort() labels, categories = zip(*labels) - mask = in1d(data.target, labels) + mask = np.in1d(data.target, labels) data.filenames = data.filenames[mask] data.target = data.target[mask] # searchsorted to have continuous labels diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 30ffc8882d7ae..0b574e95139e9 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -1,4 +1,3 @@ - # Author: Mathieu Blondel # Arnaud Joly # License: BSD 3 clause @@ -8,7 +7,6 @@ from .base import BaseEstimator, ClassifierMixin, RegressorMixin from .externals.six.moves import xrange from .utils import check_random_state -from .utils.fixes import unique from .utils.validation import safe_asarray @@ -107,7 +105,7 @@ def fit(self, X, y): "shape (%d, 1)." % self.n_outputs_) for k in xrange(self.n_outputs_): - classes, y_k = unique(y[:, k], return_inverse=True) + classes, y_k = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes) self.n_classes_.append(classes.shape[0]) self.class_prior_.append(np.bincount(y_k) / float(y_k.shape[0])) diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index 1e589b9913743..327b32bd2e14a 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -19,7 +19,6 @@ from ..metrics import r2_score, accuracy_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..utils import check_random_state, check_arrays, column_or_1d -from ..utils.fixes import bincount, unique, logaddexp from ..utils.random import sample_without_replacement from .base import BaseEnsemble, _partition_estimators @@ -86,7 +85,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight, if bootstrap: indices = random_state.randint(0, n_samples, max_samples) - sample_counts = bincount(indices, minlength=n_samples) + sample_counts = np.bincount(indices, minlength=n_samples) curr_sample_weight *= sample_counts else: @@ -109,7 +108,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight, max_samples, random_state=random_state) - sample_counts = bincount(indices, minlength=n_samples) + sample_counts = np.bincount(indices, minlength=n_samples) estimator.fit((X[indices])[:, features], y[indices]) samples = sample_counts > 0. @@ -158,16 +157,16 @@ def _parallel_predict_log_proba(estimators, estimators_features, X, n_classes): log_proba_estimator = estimator.predict_log_proba(X[:, features]) if n_classes == len(estimator.classes_): - log_proba = logaddexp(log_proba, log_proba_estimator) + log_proba = np.logaddexp(log_proba, log_proba_estimator) else: - log_proba[:, estimator.classes_] = logaddexp( + log_proba[:, estimator.classes_] = np.logaddexp( log_proba[:, estimator.classes_], log_proba_estimator[:, range(len(estimator.classes_))]) missing = np.setdiff1d(all_classes, estimator.classes_) - log_proba[:, missing] = logaddexp(log_proba[:, missing], - -np.inf) + log_proba[:, missing] = np.logaddexp(log_proba[:, missing], + -np.inf) return log_proba @@ -491,7 +490,7 @@ def _set_oob_score(self, X, y): def _validate_y(self, y): y = column_or_1d(y, warn=True) - self.classes_, y = unique(y, return_inverse=True) + self.classes_, y = np.unique(y, return_inverse=True) self.n_classes_ = len(self.classes_) return y @@ -605,7 +604,7 @@ def predict_log_proba(self, X): log_proba = all_log_proba[0] for j in range(1, len(all_log_proba)): - log_proba = logaddexp(log_proba, all_log_proba[j]) + log_proba = np.logaddexp(log_proba, all_log_proba[j]) log_proba -= np.log(self.n_estimators) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 6f2a55e319a07..1f18ccfe4e5b1 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -55,7 +55,6 @@ class calls the ``fit`` method of each sub-estimator on random samples from ..tree._tree import DTYPE, DOUBLE from ..utils import array2d, check_random_state, check_arrays, safe_asarray from ..utils.validation import DataConversionWarning -from ..utils.fixes import bincount, unique from .base import BaseEnsemble, _partition_estimators @@ -82,7 +81,7 @@ def _parallel_build_trees(trees, forest, X, y, sample_weight, verbose): random_state = check_random_state(tree.random_state) indices = random_state.randint(0, n_samples, n_samples) - sample_counts = bincount(indices, minlength=n_samples) + sample_counts = np.bincount(indices, minlength=n_samples) curr_sample_weight *= sample_counts tree.fit(X, y, @@ -395,7 +394,7 @@ def _validate_y(self, y): self.n_classes_ = [] for k in xrange(self.n_outputs_): - classes_k, y[:, k] = unique(y[:, k], return_inverse=True) + classes_k, y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes_k) self.n_classes_.append(classes_k.shape[0]) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index e6a9187dc96f2..69a3adca76f21 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -37,7 +37,6 @@ from ..base import RegressorMixin from ..utils import check_random_state, array2d, check_arrays, column_or_1d from ..utils.extmath import logsumexp -from ..utils.fixes import unique from ..externals import six from ..tree.tree import DecisionTreeRegressor @@ -1119,7 +1118,7 @@ def fit(self, X, y, monitor=None): Returns self. """ y = column_or_1d(y, warn=True) - self.classes_, y = unique(y, return_inverse=True) + self.classes_, y = np.unique(y, return_inverse=True) self.n_classes_ = len(self.classes_) return super(GradientBoostingClassifier, self).fit(X, y, monitor) diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py index a9136cb2e1fdf..dc3ce563a61da 100644 --- a/sklearn/feature_extraction/image.py +++ b/sklearn/feature_extraction/image.py @@ -15,7 +15,6 @@ from scipy import sparse from numpy.lib.stride_tricks import as_strided -from ..utils.fixes import in1d from ..utils import array2d, check_random_state from ..base import BaseEstimator @@ -68,8 +67,8 @@ def _mask_edges_weights(mask, edges, weights=None): """Apply a mask to edges (weighted or not)""" inds = np.arange(mask.size) inds = inds[mask.ravel()] - ind_mask = np.logical_and(in1d(edges[0], inds), - in1d(edges[1], inds)) + ind_mask = np.logical_and(np.in1d(edges[0], inds), + np.in1d(edges[1], inds)) edges = edges[:, ind_mask] if weights is not None: weights = weights[ind_mask] diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 2035b509bc416..291f01735fb5a 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -25,7 +25,6 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin -from ..utils.fixes import bincount from ..externals.six.moves import xrange from ..preprocessing import normalize from .hashing import FeatureHasher @@ -458,7 +457,7 @@ def _get_hasher(self): def _document_frequency(X): """Count the number of non-zero values for each feature in sparse X.""" if sp.isspmatrix_csr(X): - return bincount(X.indices, minlength=X.shape[1]) + return np.bincount(X.indices, minlength=X.shape[1]) else: return np.diff(sp.csc_matrix(X, copy=False).indptr) diff --git a/sklearn/lda.py b/sklearn/lda.py index 9771b5c80e315..d648088e60585 100644 --- a/sklearn/lda.py +++ b/sklearn/lda.py @@ -12,7 +12,6 @@ from .base import BaseEstimator, ClassifierMixin, TransformerMixin from .utils.extmath import logsumexp -from .utils.fixes import unique from .utils import check_arrays, array2d, column_or_1d __all__ = ['LDA'] @@ -113,7 +112,7 @@ def fit(self, X, y, store_covariance=False, tol=1.0e-4): """ X, y = check_arrays(X, y, sparse_format='dense') y = column_or_1d(y, warn=True) - self.classes_, y = unique(y, return_inverse=True) + self.classes_, y = np.unique(y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) if n_classes < 2: diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index fcae65a0dbafd..22e83a5c8cb46 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -16,7 +16,6 @@ OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV, LinearRegression) -from sklearn.utils.fixes import count_nonzero from sklearn.utils import check_random_state from sklearn.datasets import make_sparse_coded_signal @@ -43,10 +42,10 @@ def test_correct_shapes_gram(): def test_n_nonzero_coefs(): - assert_true(count_nonzero(orthogonal_mp(X, y[:, 0], - n_nonzero_coefs=5)) <= 5) - assert_true(count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5, - precompute=True)) <= 5) + assert_true(np.count_nonzero(orthogonal_mp(X, y[:, 0], + n_nonzero_coefs=5)) <= 5) + assert_true(np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5, + precompute=True)) <= 5) def test_tol(): @@ -107,24 +106,24 @@ def test_estimator(): omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) - assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) + assert_true(np.count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) - assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) + assert_true(np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) omp.set_params(fit_intercept=False, normalize=False) assert_warns(DeprecationWarning, omp.fit, X, y[:, 0], Gram=G, Xy=Xy[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_, 0) - assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) + assert_true(np.count_nonzero(omp.coef_) <= n_nonzero_coefs) assert_warns(DeprecationWarning, omp.fit, X, y, Gram=G, Xy=Xy) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_, 0) - assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) + assert_true(np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) def test_scaling_with_gram(): diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py index 99add6c7fa06f..aa0887a0edfdc 100644 --- a/sklearn/metrics/cluster/supervised.py +++ b/sklearn/metrics/cluster/supervised.py @@ -15,7 +15,6 @@ from scipy.sparse import coo_matrix import numpy as np -from ...utils.fixes import unique from .expected_mutual_info_fast import expected_mutual_information @@ -68,8 +67,8 @@ def contingency_matrix(labels_true, labels_pred, eps=None): ``eps is None``, the dtype of this array will be integer. If ``eps`` is given, the dtype will be float. """ - classes, class_idx = unique(labels_true, return_inverse=True) - clusters, cluster_idx = unique(labels_pred, return_inverse=True) + classes, class_idx = np.unique(labels_true, return_inverse=True) + clusters, cluster_idx = np.unique(labels_pred, return_inverse=True) n_classes = classes.shape[0] n_clusters = clusters.shape[0] # Using coo_matrix to accelerate simple histogram calculation, @@ -738,7 +737,7 @@ def entropy(labels): """Calculates the entropy for a labeling.""" if len(labels) == 0: return 1.0 - label_idx = unique(labels, return_inverse=True)[1] + label_idx = np.unique(labels, return_inverse=True)[1] pi = np.bincount(label_idx).astype(np.float) pi = pi[pi > 0] pi_sum = np.sum(pi) diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py index ccf0d96d2785e..fcd790305a3ba 100644 --- a/sklearn/metrics/metrics.py +++ b/sklearn/metrics/metrics.py @@ -34,7 +34,6 @@ from ..utils import column_or_1d from ..utils.multiclass import unique_labels from ..utils.multiclass import type_of_target -from ..utils.fixes import bincount ############################################################################### @@ -1619,14 +1618,14 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None, # labels are now from 0 to len(labels) - 1 -> use bincount tp_bins = y_true[y_true == y_pred] if len(tp_bins): - tp_sum = bincount(tp_bins, minlength=len(labels)) + tp_sum = np.bincount(tp_bins, minlength=len(labels)) else: # Pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) if len(y_pred): - pred_sum = bincount(y_pred, minlength=len(labels)) + pred_sum = np.bincount(y_pred, minlength=len(labels)) if len(y_true): - true_sum = bincount(y_true, minlength=len(labels)) + true_sum = np.bincount(y_true, minlength=len(labels)) ### Select labels to keep ### diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index 053d6fd867cac..de348b3a96d00 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -19,7 +19,6 @@ from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS from ..utils import safe_asarray, atleast2d_or_csr, check_arrays from ..utils.validation import DataConversionWarning -from ..utils.fixes import unique from ..externals import six @@ -620,7 +619,7 @@ def fit(self, X, y): self.classes_ = [] self._y = np.empty(y.shape, dtype=np.int) for k in range(self._y.shape[1]): - classes, self._y[:, k] = unique(y[:, k], return_inverse=True) + classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py index c799f5acf336a..a99ed15973238 100644 --- a/sklearn/preprocessing/label.py +++ b/sklearn/preprocessing/label.py @@ -8,7 +8,7 @@ from ..base import BaseEstimator, TransformerMixin -from ..utils.fixes import unique, np_version +from ..utils.fixes import np_version from ..utils import deprecated, column_or_1d from ..utils.multiclass import unique_labels @@ -113,7 +113,7 @@ def fit_transform(self, y): """ y = column_or_1d(y, warn=True) _check_numpy_unicode_bug(y) - self.classes_, y = unique(y, return_inverse=True) + self.classes_, y = np.unique(y, return_inverse=True) return y def transform(self, y): diff --git a/sklearn/qda.py b/sklearn/qda.py index 4aa2b6c4b0096..ab67616d5d29e 100644 --- a/sklearn/qda.py +++ b/sklearn/qda.py @@ -12,7 +12,6 @@ from .base import BaseEstimator, ClassifierMixin from .externals.six.moves import xrange -from .utils.fixes import unique from .utils import check_arrays, array2d, column_or_1d __all__ = ['QDA'] @@ -97,7 +96,7 @@ def fit(self, X, y, store_covariances=False, tol=1.0e-4): """ X, y = check_arrays(X, y) y = column_or_1d(y, warn=True) - self.classes_, y = unique(y, return_inverse=True) + self.classes_, y = np.unique(y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) if n_classes < 2: diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 1e4f0af73f06f..248564fe602e8 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -11,7 +11,6 @@ from ..preprocessing import LabelEncoder from ..utils import atleast2d_or_csr, array2d, check_random_state, column_or_1d from ..utils import ConvergenceWarning, compute_class_weight -from ..utils.fixes import unique from ..utils.extmath import safe_sparse_dot from ..externals import six @@ -437,7 +436,7 @@ class BaseSVC(BaseLibSVM, ClassifierMixin): def _validate_targets(self, y): y_ = column_or_1d(y, warn=True) - cls, y = unique(y_, return_inverse=True) + cls, y = np.unique(y_, return_inverse=True) self.class_weight_ = compute_class_weight(self.class_weight, cls, y_) if len(cls) < 2: raise ValueError( diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 5de7c9257a541..ba13ea818538c 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -15,7 +15,6 @@ from sklearn.metrics import f1_score from sklearn.utils import check_random_state from sklearn.utils import ConvergenceWarning -from sklearn.utils.fixes import unique from sklearn.utils.testing import assert_greater, assert_in, assert_less from sklearn.utils.testing import assert_warns @@ -358,7 +357,7 @@ def test_auto_weight(): X, y = iris.data[:, :2], iris.target + 1 unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2]) - classes = unique(y[unbalanced]) + classes = np.unique(y[unbalanced]) class_weights = compute_class_weight('auto', classes, y[unbalanced]) assert_true(np.argmax(class_weights) == 2) diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index 4dda4293a6143..0e375a6dd1365 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -18,8 +18,6 @@ from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings -from sklearn.utils.fixes import unique - from sklearn import cross_validation as cval from sklearn.base import BaseEstimator from sklearn.datasets import make_regression @@ -374,12 +372,12 @@ def test_stratified_shuffle_split_iter(): sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33, random_state=0) for train, test in sss: - assert_array_equal(unique(y[train]), unique(y[test])) + assert_array_equal(np.unique(y[train]), np.unique(y[test])) # Checks if folds keep classes proportions - p_train = (np.bincount(unique(y[train], return_inverse=True)[1]) / - float(len(y[train]))) - p_test = (np.bincount(unique(y[test], return_inverse=True)[1]) / - float(len(y[test]))) + p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1]) + / float(len(y[train]))) + p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1]) + / float(len(y[test]))) assert_array_almost_equal(p_train, p_test, 1) assert_equal(y[train].size + y[test].size, y.size) assert_array_equal(np.lib.arraysetops.intersect1d(train, test), []) diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index d9c46a6a266f1..d14ee3dfa0c85 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -29,7 +29,6 @@ from sklearn import tree from sklearn import datasets -from sklearn.utils.fixes import bincount from sklearn.preprocessing._weights import _balance_weights @@ -624,7 +623,7 @@ def test_sample_weight(): clf = DecisionTreeClassifier(random_state=1) clf.fit(X[duplicates], y[duplicates]) - sample_weight = bincount(duplicates, minlength=X.shape[0]) + sample_weight = np.bincount(duplicates, minlength=X.shape[0]) clf2 = DecisionTreeClassifier(random_state=1) clf2.fit(X, y, sample_weight=sample_weight) diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 323847c81d333..2250cc614d419 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -23,7 +23,6 @@ from ..externals.six.moves import xrange from ..feature_selection.from_model import _LearntSelectorMixin from ..utils import array2d, check_random_state -from ..utils.fixes import unique from ..utils.validation import check_arrays from ._tree import Criterion @@ -158,7 +157,7 @@ def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True, self.n_classes_ = [] for k in xrange(self.n_outputs_): - classes_k, y[:, k] = unique(y[:, k], return_inverse=True) + classes_k, y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes_k) self.n_classes_.append(classes_k.shape[0]) diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index 45c31721199c7..50d3b8f0f70e5 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -4,8 +4,6 @@ import numpy as np -from .fixes import bincount - def compute_class_weight(class_weight, classes, y): """Estimate class weights for unbalanced datasets. @@ -45,7 +43,7 @@ def compute_class_weight(class_weight, classes, y): raise ValueError("classes should have valid labels that are in y") # inversely proportional to the number of samples in the class - recip_freq = 1. / bincount(y_ind) + recip_freq = 1. / np.bincount(y_ind) weight = recip_freq[le.transform(classes)] / np.mean(recip_freq) else: # user-defined dictionary diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index bec7567794c14..2010b3b633a16 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -12,10 +12,9 @@ import numpy as np from scipy import linalg from scipy.sparse import issparse -from distutils.version import LooseVersion from . import check_random_state, deprecated -from .fixes import qr_economic +from .fixes import np_version, qr_economic from ._logistic_sigmoid import _log_logistic_sigmoid from ..externals.six.moves import xrange from .sparsefuncs import csr_row_norms @@ -33,50 +32,25 @@ def norm(x): return nrm2(x) -_have_einsum = hasattr(np, "einsum") - - def row_norms(X, squared=False): """Row-wise (squared) Euclidean norm of X. - Equivalent to (X * X).sum(axis=1), but also supports CSR sparse matrices. - With newer NumPy versions, prevents an X.shape-sized temporary. + Equivalent to (X * X).sum(axis=1), but also supports CSR sparse matrices + and does not create an X.shape-sized temporary. Performs no input validation. """ if issparse(X): norms = csr_row_norms(X) - elif _have_einsum: - # einsum avoids the creation of a temporary the size of X, - # but it's only available in NumPy >= 1.6. - norms = np.einsum('ij,ij->i', X, X) else: - norms = (X * X).sum(axis=1) + norms = np.einsum('ij,ij->i', X, X) if not squared: np.sqrt(norms, norms) return norms -def _fast_logdet(A): - """Compute log(det(A)) for A symmetric - - Equivalent to : np.log(np.linalg.det(A)) but more robust. - It returns -Inf if det(A) is non positive or is not defined. - """ - # XXX: Should be implemented as in numpy, using ATLAS - # http://projects.scipy.org/numpy/browser/ \ - # trunk/numpy/linalg/linalg.py#L1559 - ld = np.sum(np.log(np.diag(A))) - a = np.exp(ld / A.shape[0]) - d = np.linalg.det(A / a) - ld += np.log(d) - if not np.isfinite(ld): - return -np.inf - return ld - - -def _fast_logdet_numpy(A): +def fast_logdet(A): """Compute log(det(A)) for A symmetric Equivalent to : np.log(nl.det(A)) but more robust. @@ -88,13 +62,6 @@ def _fast_logdet_numpy(A): return ld -# Numpy >= 1.5 provides a fast logdet -if hasattr(np.linalg, 'slogdet'): - fast_logdet = _fast_logdet_numpy -else: - fast_logdet = _fast_logdet - - def _impose_f_order(X): """Helper Function""" # important to access flags instead of calling np.isfortran, @@ -136,7 +103,7 @@ def _have_blas_gemm(): # Only use fast_dot for older NumPy; newer ones have tackled the speed issue. -if LooseVersion(np.__version__) < '1.7.2' and _have_blas_gemm(): +if np_version < (1, 7, 2) and _have_blas_gemm(): def fast_dot(A, B): """Compute fast dot products directly calling BLAS. diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 89ed12fe1287c..0f0ea673f06ce 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -36,48 +36,6 @@ def lsqr(X, y, tol=1e-3): return coef, None, None, residues -def _unique(ar, return_index=False, return_inverse=False): - """A replacement for the np.unique that appeared in numpy 1.4. - - While np.unique existed long before, keyword return_inverse was - only added in 1.4. - """ - try: - ar = ar.flatten() - except AttributeError: - if not return_inverse and not return_index: - items = sorted(set(ar)) - return np.asarray(items) - else: - ar = np.asarray(ar).flatten() - - if ar.size == 0: - if return_inverse and return_index: - return ar, np.empty(0, np.bool), np.empty(0, np.bool) - elif return_inverse or return_index: - return ar, np.empty(0, np.bool) - else: - return ar - - if return_inverse or return_index: - perm = ar.argsort() - aux = ar[perm] - flag = np.concatenate(([True], aux[1:] != aux[:-1])) - if return_inverse: - iflag = np.cumsum(flag) - 1 - iperm = perm.argsort() - if return_index: - return aux[flag], perm[flag], iflag[iperm] - else: - return aux[flag], iflag[iperm] - else: - return aux[flag], perm[flag] - - else: - ar.sort() - flag = np.concatenate(([True], ar[1:] != ar[:-1])) - return ar[flag] - np_version = [] for x in np.__version__.split('.'): try: @@ -87,28 +45,6 @@ def _unique(ar, return_index=False, return_inverse=False): np_version.append(x) np_version = tuple(np_version) -if np_version[:2] < (1, 5): - unique = _unique -else: - unique = np.unique - - -def _logaddexp(x1, x2, out=None): - """Fix np.logaddexp in numpy < 1.4 when x1 == x2 == -np.inf.""" - if out is not None: - result = np.logaddexp(x1, x2, out=out) - else: - result = np.logaddexp(x1, x2) - - result[np.logical_and(x1 == -np.inf, x2 == -np.inf)] = -np.inf - - return result - -if np_version[:2] < (1, 4): - logaddexp = _logaddexp -else: - logaddexp = np.logaddexp - try: from scipy.special import expit # SciPy >= 0.10 @@ -131,57 +67,6 @@ def expit(x, out=None): return out -def _bincount(X, weights=None, minlength=None): - """Replacing np.bincount in numpy < 1.6 to provide minlength.""" - result = np.bincount(X, weights) - if len(result) >= minlength: - return result - out = np.zeros(minlength, np.int) - out[:len(result)] = result - return out - -if np_version[:2] < (1, 6): - bincount = _bincount -else: - bincount = np.bincount - - -def _copysign(x1, x2): - """Slow replacement for np.copysign, which was introduced in numpy 1.4""" - return np.abs(x1) * np.sign(x2) - -if not hasattr(np, 'copysign'): - copysign = _copysign -else: - copysign = np.copysign - - -def _in1d(ar1, ar2, assume_unique=False): - """Replacement for in1d that is provided for numpy >= 1.4""" - if not assume_unique: - ar1, rev_idx = unique(ar1, return_inverse=True) - ar2 = np.unique(ar2) - ar = np.concatenate((ar1, ar2)) - # We need this to be a stable sort, so always use 'mergesort' - # here. The values from the first array should always come before - # the values from the second array. - order = ar.argsort(kind='mergesort') - sar = ar[order] - equal_adj = (sar[1:] == sar[:-1]) - flag = np.concatenate((equal_adj, [False])) - indx = order.argsort(kind='mergesort')[:len(ar1)] - - if assume_unique: - return flag[indx] - else: - return flag[indx][rev_idx] - -if not hasattr(np, 'in1d'): - in1d = _in1d -else: - in1d = np.in1d - - def qr_economic(A, **kwargs): """Compat function for the QR-decomposition in economic mode @@ -211,12 +96,6 @@ def savemat(file_name, mdict, oned_as="column", **kwargs): except TypeError: return scipy.io.savemat(file_name, mdict, **kwargs) -try: - from numpy import count_nonzero -except ImportError: - def count_nonzero(X): - return len(np.flatnonzero(X)) - # little danse to see if np.copy has an 'order' keyword argument if 'order' in inspect.getargspec(np.copy)[0]: def safe_copy(X): diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index 1ef86dffc72a4..b95b573dc847c 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -1,7 +1,6 @@ import numpy as np from sklearn.utils.class_weight import compute_class_weight -from sklearn.utils.fixes import unique from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal @@ -13,7 +12,7 @@ def test_compute_class_weight(): """Test (and demo) compute_class_weight.""" y = np.asarray([2, 2, 2, 3, 3, 4]) - classes = unique(y) + classes = np.unique(y) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_true(cw[0] < cw[1] < cw[2]) diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 937765f12d94e..7d1d882614580 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -1,5 +1,6 @@ -# Authors: Emmanuelle Gouillart -# Gael Varoquaux +# Authors: Gael Varoquaux +# Justin Vincent +# Lars Buitinck # License: BSD 3 clause import numpy as np @@ -7,13 +8,7 @@ from nose.tools import assert_equal from numpy.testing import assert_almost_equal, assert_array_equal -from ..fixes import _in1d, _copysign, divide, expit - - -def test_in1d(): - a = np.arange(10) - b = a[a % 2 == 0] - assert_equal(_in1d(a, b).sum(), 5) +from ..fixes import divide, expit def test_expit(): @@ -28,11 +23,3 @@ def test_expit(): def test_divide(): assert_equal(divide(.6, 1), .600000000000) - - -def test_copysign(): - a = np.array([-1, 1, -1]) - b = np.array([1, -1, 1]) - - assert_array_equal(_copysign(a, b), b) - assert_array_equal(_copysign(b, a), a)