Skip to content

Commit

Permalink
[MRG] Changed name n_components to n_connected_components in Agglomer…
Browse files Browse the repository at this point in the history
…ativeClustering base class (#13427)

* Changed name n_components to n_connected_components in base class

* Fixed line which exceeded PEP8 max of 79 chars

* Fixed line 818 which exceeded PEP8 max of 79 chars

* Added try and except to provide deprecation warning if  passed

* Updated deprecation and removal version numbers

* Added deprecation of n_components using @Property generator

* Makes FeatureAgglomeration class inherit n_connected_components_ attribute from AgglomerativeClustering class

* Added test for DeprecationWarning when trying to access n_components

* Removed @Property generator causing linting error

* Fixed typo in test

* Fixed flake8 error due to single line between 2 functions

* Test fix attempt

* Edited test function docstring

* Corrected n_components deprecation test docstring

* Fixed line continuation issue in AgglomerativeClustering base class

* Added deprecation message as part of the @deprecated decorator

* Added  attribute deprecation information in the Attributes section of the AgglomerativeClustering base class docstring

* Added test for deprecation warning message

* Added  attribute deprecation information in the Attributes section of the FeatureAgglomeration base class docstring

* Fixed test issue and added longer match string

* Edited n_components_ deprecation message to add double backticks

* Fixed match string to reflect deprecation message change in test

* Added name to list of contributors

* Documented information in v0.21 changelog

* Added cluster parent folder to documentation in v0.21 changelog

* Removed myself from list of core contributors

* Moved |API| subsection to the end of the list, and changed reference to Github username

* Removed n_components deprecation documentation from FeatureAgglomeration and AgglomerativeClustering class dosctrings

* Fix indentation on _fix_connectivity function call
  • Loading branch information
Stéphane Couvreur authored and agramfort committed Apr 4, 2019
1 parent 22ee7b2 commit 93e09aa
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 27 deletions.
5 changes: 5 additions & 0 deletions doc/whats_new/v0.21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ Support for Python 3.4 and below has been officially dropped.
to set and that scales better, by :user:`Shane <espg>`,
:user:`Adrin Jalali <adrinjalali>`, and :user:`Erich Schubert <kno10>`.

- |API| The ``n_components_`` attribute in :class:`cluster.AgglomerativeClustering`
and :class:`cluster.FeatureAgglomeration` has been renamed to
``n_connected_components_``.
:issue:`13427` by :user:`Stephane Couvreur <scouvreur>`.

:mod:`sklearn.datasets`
.......................

Expand Down
64 changes: 37 additions & 27 deletions sklearn/cluster/hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
from ._feature_agglomeration import AgglomerationTransform
from ..utils.fast_dict import IntFloatDict
from ..utils.fixes import _astype_copy_false
from ..utils import deprecated

###############################################################################
# For non fully-connected graphs


def _fix_connectivity(X, connectivity, affinity):
"""
Fixes the connectivity matrix
Expand All @@ -54,15 +54,15 @@ def _fix_connectivity(X, connectivity, affinity):
connectivity = connectivity.tolil()

# Compute the number of nodes
n_components, labels = connected_components(connectivity)
n_connected_components, labels = connected_components(connectivity)

if n_components > 1:
if n_connected_components > 1:
warnings.warn("the number of connected components of the "
"connectivity matrix is %d > 1. Completing it to avoid "
"stopping the tree early." % n_components,
"stopping the tree early." % n_connected_components,
stacklevel=2)
# XXX: Can we do without completing the matrix?
for i in range(n_components):
for i in range(n_connected_components):
idx_i = np.where(labels == i)[0]
Xi = X[idx_i]
for j in range(i):
Expand All @@ -75,11 +75,11 @@ def _fix_connectivity(X, connectivity, affinity):
connectivity[idx_i[ii], idx_j[jj]] = True
connectivity[idx_j[jj], idx_i[ii]] = True

return connectivity, n_components
return connectivity, n_connected_components


def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,
n_components, return_distance):
n_connected_components, return_distance):
"""
Perform single linkage clustering on sparse data via the minimum
spanning tree from scipy.sparse.csgraph, then using union-find to label.
Expand Down Expand Up @@ -125,8 +125,8 @@ def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,

if return_distance:
distances = single_linkage_tree[:, 2]
return children_, n_components, n_samples, parent, distances
return children_, n_components, n_samples, parent
return children_, n_connected_components, n_samples, parent, distances
return children_, n_connected_components, n_samples, parent


###############################################################################
Expand Down Expand Up @@ -177,7 +177,7 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
at the i-th iteration, children[i][0] and children[i][1]
are merged to form node `n_samples + i`
n_components : int
n_connected_components : int
The number of connected components in the graph.
n_leaves : int
Expand Down Expand Up @@ -239,8 +239,9 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
else:
return children_, 1, n_samples, None

connectivity, n_components = _fix_connectivity(X, connectivity,
affinity='euclidean')
connectivity, n_connected_components = _fix_connectivity(
X, connectivity,
affinity='euclidean')
if n_clusters is None:
n_nodes = 2 * n_samples - 1
else:
Expand Down Expand Up @@ -333,9 +334,9 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
if return_distance:
# 2 is scaling factor to compare w/ unstructured version
distances = np.sqrt(2. * distances)
return children, n_components, n_leaves, parent, distances
return children, n_connected_components, n_leaves, parent, distances
else:
return children, n_components, n_leaves, parent
return children, n_connected_components, n_leaves, parent


# single average and complete linkage
Expand Down Expand Up @@ -396,7 +397,7 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
at the i-th iteration, children[i][0] and children[i][1]
are merged to form node `n_samples + i`
n_components : int
n_connected_components : int
The number of connected components in the graph.
n_leaves : int
Expand Down Expand Up @@ -467,9 +468,9 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
return children_, 1, n_samples, None, distances
return children_, 1, n_samples, None

connectivity, n_components = _fix_connectivity(X, connectivity,
affinity=affinity)

connectivity, n_connected_components = _fix_connectivity(
X, connectivity,
affinity=affinity)
connectivity = connectivity.tocoo()
# Put the diagonal to zero
diag_mask = (connectivity.row != connectivity.col)
Expand Down Expand Up @@ -497,7 +498,8 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',

if linkage == 'single':
return _single_linkage_tree(connectivity, n_samples, n_nodes,
n_clusters, n_components, return_distance)
n_clusters, n_connected_components,
return_distance)

if return_distance:
distances = np.empty(n_nodes - n_samples)
Expand Down Expand Up @@ -567,8 +569,8 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
children = np.array(children)[:, ::-1]

if return_distance:
return children, n_components, n_leaves, parent, distances
return children, n_components, n_leaves, parent
return children, n_connected_components, n_leaves, parent, distances
return children, n_connected_components, n_leaves, parent


# Matching names to tree-building strategies
Expand Down Expand Up @@ -717,7 +719,7 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
n_leaves_ : int
Number of leaves in the hierarchical tree.
n_components_ : int
n_connected_components_ : int
The estimated number of connected components in the graph.
children_ : array-like, shape (n_samples-1, 2)
Expand Down Expand Up @@ -756,6 +758,13 @@ def __init__(self, n_clusters=2, affinity="euclidean",
self.affinity = affinity
self.pooling_func = pooling_func

@property
@deprecated("The ``n_components_`` attribute was deprecated "
"in favor of ``n_connected_components_`` in 0.21 "
"and will be removed in 0.23.")
def n_components_(self):
return self.n_connected_components_

def fit(self, X, y=None):
"""Fit the hierarchical clustering on the data
Expand Down Expand Up @@ -819,10 +828,11 @@ def fit(self, X, y=None):
if self.linkage != 'ward':
kwargs['linkage'] = self.linkage
kwargs['affinity'] = self.affinity
self.children_, self.n_components_, self.n_leaves_, parents = \
memory.cache(tree_builder)(X, connectivity,
n_clusters=n_clusters,
**kwargs)
(self.children_, self.n_connected_components_, self.n_leaves_,
parents) = memory.cache(tree_builder)(X, connectivity,
n_clusters=n_clusters,
**kwargs)

# Cut the tree
if compute_full_tree:
self.labels_ = _hc_cut(self.n_clusters, self.children_,
Expand Down Expand Up @@ -902,7 +912,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
n_leaves_ : int
Number of leaves in the hierarchical tree.
n_components_ : int
n_connected_components_ : int
The estimated number of connected components in the graph.
children_ : array-like, shape (n_nodes-1, 2)
Expand Down
14 changes: 14 additions & 0 deletions sklearn/cluster/tests/test_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,3 +598,17 @@ def increment(self, *args, **kwargs):
linkage_tree(X, connectivity=connectivity, affinity=fa.increment)

assert_equal(fa.counter, 3)


def test_n_components_deprecation():
# Test that a Deprecation warning is thrown when n_components_
# attribute is accessed

X = np.array([[1, 2], [1, 4], [1, 0], [4, 2]])
agc = AgglomerativeClustering().fit(X)

match = ("``n_components_`` attribute was deprecated "
"in favor of ``n_connected_components_``")
with pytest.warns(DeprecationWarning, match=match):
n = agc.n_components_
assert n == agc.n_connected_components_

0 comments on commit 93e09aa

Please sign in to comment.