Skip to content
This repository has been archived by the owner on Dec 29, 2023. It is now read-only.

BUG: Raise exception when samples in table not present in metadata #64

Merged
merged 2 commits into from
Aug 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion q2_gneiss/cluster/_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def correlation_clustering(table: pd.DataFrame, pseudocount: float = 0.5

def gradient_clustering(table: pd.DataFrame,
gradient: NumericMetadataColumn,
ignore_missing_samples: bool = False,
weighted: bool = True) -> skbio.TreeNode:
""" Builds a tree for features based on a gradient.

Expand All @@ -78,6 +79,9 @@ def gradient_clustering(table: pd.DataFrame,
Contingency table where rows are samples and columns are features.
gradient : qiime2.NumericMetadataColumn
Continuous vector of measurements corresponding to samples.
ignore_missing_samples: bool
Whether to except or ignore when there are samples present in the table
that are not present in the gradient metadata.
weighted : bool
Specifies if abundance or presence/absence information
should be used to perform the clustering.
Expand All @@ -88,6 +92,14 @@ def gradient_clustering(table: pd.DataFrame,
Represents the partitioning of features with respect to the gradient.
"""
c = gradient.to_series()
if not ignore_missing_samples:
difference = set(table.index) - set(c.index)
if difference:
raise KeyError("There are samples present in the table not "
"present in the gradient metadata column. Override "
"this error by using the `ignore_missing_samples` "
"argument. Offending samples: %r"
% ', '.join(sorted([str(i) for i in difference])))
if not weighted:
table = (table > 0).astype(np.float)
table, c = match(table, c)
Expand All @@ -109,7 +121,8 @@ def gradient_clustering(table: pd.DataFrame,
'table': ('The feature table containing the samples in which '
'the columns will be clustered.'),
},
parameters={'gradient': MetadataColumn[Numeric], 'weighted': Bool},
parameters={'gradient': MetadataColumn[Numeric], 'weighted': Bool,
'ignore_missing_samples': Bool},
parameter_descriptions={
'gradient': ('Contains gradient values to sort the '
'features and samples.'),
Expand Down
20 changes: 20 additions & 0 deletions q2_gneiss/cluster/tests/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,26 @@ def test_gradient_artifact_weighted(self):

self.assertNotEqual(str(res_clust_uw), str(res_clust_w))

def test_gradient_missing_samples(self):
from qiime2.plugins.gneiss.methods import gradient_clustering
table = pd.DataFrame({"x": 1, "y": 2}, index=["a", "s1"])
table = qiime2.Artifact.import_data("FeatureTable[Frequency]", table)
metadata = qiime2.Metadata.load(get_data_path("test_metadata.txt"))

with self.assertRaisesRegex(KeyError, "not present.*a"):
gradient_clustering(table, metadata.get_column("x"))

def test_gradient_ignore_missing_samples(self):
from qiime2.plugins.gneiss.methods import gradient_clustering
table = pd.DataFrame({"x": 1, "y": 2}, index=["a", "s1"])
table = qiime2.Artifact.import_data("FeatureTable[Frequency]", table)
metadata = qiime2.Metadata.load(get_data_path("test_metadata.txt"))

gradient_clustering(table, metadata.get_column("x"),
ignore_missing_samples=True)
# Checkpoint assertion
self.assertTrue(True)

Oddant1 marked this conversation as resolved.
Show resolved Hide resolved
def test_assign_ids(self):
from qiime2.plugins.gneiss.methods import assign_ids
tree_f = get_data_path("tree.qza")
Expand Down