Skip to content

Commit

Permalink
Merge pull request #939 from CarloLepelaars/get_feature_names_out
Browse files Browse the repository at this point in the history
`get_feature_names_out` for `UMAP` + tests
  • Loading branch information
lmcinnes authored Nov 11, 2022
2 parents cb1848d + 8b5601b commit 615cb1a
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 0 deletions.
53 changes: 53 additions & 0 deletions umap/tests/test_umap_get_feature_names_out.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.pipeline import Pipeline, FeatureUnion

from ..umap_ import UMAP


def test_get_feature_names_out_passthrough():
umap = UMAP()
# get_feature_names_out should return same names if feature are passed in directly.
example_passthrough = ['feature1', 'feature2']
passthrough_result = umap.get_feature_names_out(feature_names_out=example_passthrough)
assert example_passthrough == passthrough_result


def test_get_feature_names_out_default():
umap = UMAP()
# get_feature_names_out should generate feature names in a certain format if no names are passed.
default_result = umap.get_feature_names_out()
expected_default_result = ["umap_component_1", "umap_component_2"]
assert default_result == expected_default_result


def test_get_feature_names_out_multicomponent():
# The output length should be equal to the number of components UMAP generates.
umap10 = UMAP(n_components=10)
result_umap10 = umap10.get_feature_names_out()
expected_umap10_result = [f"umap_component_{i+1}" for i in range(10)]
assert len(result_umap10) == 10
assert result_umap10 == expected_umap10_result


def test_get_feature_names_out_featureunion():
X, _ = make_classification(n_samples=10)
pipeline = Pipeline(
[
(
"umap_pipeline",
FeatureUnion(
[
("umap1", UMAP()),
("umap2", UMAP(n_components=3)),
]
),
)
]
)

pipeline.fit(X)
feature_names = pipeline.get_feature_names_out()
expected_feature_names = np.array(["umap1__umap_component_1", "umap1__umap_component_2", "umap2__umap_component_1",
"umap2__umap_component_2", "umap2__umap_component_3"])
np.testing.assert_array_equal(feature_names, expected_feature_names)
11 changes: 11 additions & 0 deletions umap/umap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3454,6 +3454,17 @@ def update(self, X):
self.rad_orig_ = aux_data["rad_orig"]
self.rad_emb_ = aux_data["rad_emb"]

def get_feature_names_out(self, feature_names_out=None):
"""
Defines descriptive names for each output of the (fitted) estimator.
:param feature_names_out: Optional passthrough for feature names.
By default, feature names will be generated automatically.
:return: List of descriptive names for each output variable from the fitted estimator.
"""
if feature_names_out is None:
feature_names_out = [f"umap_component_{i+1}" for i in range(self.n_components)]
return feature_names_out

def __repr__(self):
from sklearn.utils._pprint import _EstimatorPrettyPrinter
import re
Expand Down

0 comments on commit 615cb1a

Please sign in to comment.