Skip to content

Commit

Permalink
Merge 7c70944 into 98d2e37
Browse files Browse the repository at this point in the history
  • Loading branch information
mortonjt committed Jun 19, 2019
2 parents 98d2e37 + 7c70944 commit d2071e9
Show file tree
Hide file tree
Showing 12 changed files with 289 additions and 13 deletions.
10 changes: 6 additions & 4 deletions q2_types/feature_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,23 @@
HeaderlessTSVTaxonomyDirectoryFormat, TSVTaxonomyFormat,
TSVTaxonomyDirectoryFormat, DNAFASTAFormat, DNASequencesDirectoryFormat,
PairedDNASequencesDirectoryFormat, AlignedDNAFASTAFormat,
AlignedDNASequencesDirectoryFormat)
AlignedDNASequencesDirectoryFormat, DifferentialFormat,
DifferentialDirectoryFormat)
from ._type import (
FeatureData, Taxonomy, Sequence, PairedEndSequence, AlignedSequence)
FeatureData, Taxonomy, Sequence, PairedEndSequence, AlignedSequence,
Differential)

# TODO remove these imports when tests are rewritten. Remove from __all__ too
from ._transformer import DNAIterator, PairedDNAIterator, AlignedDNAIterator

__all__ = [
'TaxonomyFormat', 'TaxonomyDirectoryFormat', 'HeaderlessTSVTaxonomyFormat',
'HeaderlessTSVTaxonomyDirectoryFormat', 'TSVTaxonomyFormat',
'TSVTaxonomyDirectoryFormat', 'DNAFASTAFormat',
'TSVTaxonomyDirectoryFormat', 'DNAFASTAFormat', 'DifferentialFormat',
'DNASequencesDirectoryFormat', 'PairedDNASequencesDirectoryFormat',
'AlignedDNAFASTAFormat', 'AlignedDNASequencesDirectoryFormat',
'FeatureData', 'Taxonomy', 'Sequence', 'PairedEndSequence',
'AlignedSequence', 'DNAIterator', 'PairedDNAIterator',
'AlignedDNAIterator']
'AlignedDNAIterator', 'Differential', 'DifferentialDirectoryFormat']

importlib.import_module('q2_types.feature_data._transformer')
25 changes: 23 additions & 2 deletions q2_types/feature_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

import skbio.io
import qiime2.plugin.model as model

import qiime2
from qiime2.plugin import ValidationError
from ..plugin_setup import plugin


Expand Down Expand Up @@ -185,10 +186,30 @@ def sniff(self):
AlignedDNAFASTAFormat)


class DifferentialFormat(model.TextFileFormat):
def validate(self, *args):
try:
md = qiime2.Metadata.load(str(self))
except qiime2.metadata.MetadataFileError as md_exc:
raise ValidationError(md_exc) from md_exc

if md.column_count == 0:
raise ValidationError('Format must contain at least 1 column')

filtered_md = md.filter_columns(column_type='numeric')
if filtered_md.column_count != md.column_count:
raise ValidationError('Must only contain numeric values.')


DifferentialDirectoryFormat = model.SingleFileDirectoryFormat(
'DifferentialDirectoryFormat', 'differentials.tsv', DifferentialFormat)


plugin.register_formats(
TSVTaxonomyFormat, TSVTaxonomyDirectoryFormat,
HeaderlessTSVTaxonomyFormat, HeaderlessTSVTaxonomyDirectoryFormat,
TaxonomyFormat, TaxonomyDirectoryFormat, DNAFASTAFormat,
DNASequencesDirectoryFormat, PairedDNASequencesDirectoryFormat,
AlignedDNAFASTAFormat, AlignedDNASequencesDirectoryFormat
AlignedDNAFASTAFormat, AlignedDNASequencesDirectoryFormat,
DifferentialFormat, DifferentialDirectoryFormat
)
20 changes: 19 additions & 1 deletion q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ..feature_table import BIOMV210Format
from . import (TaxonomyFormat, HeaderlessTSVTaxonomyFormat, TSVTaxonomyFormat,
DNAFASTAFormat, PairedDNASequencesDirectoryFormat,
AlignedDNAFASTAFormat)
AlignedDNAFASTAFormat, DifferentialFormat)


# Taxonomy format transformers
Expand Down Expand Up @@ -359,3 +359,21 @@ def _19(data: AlignedDNAIterator) -> AlignedDNAFASTAFormat:
@plugin.register_transformer
def _33(ff: AlignedDNAFASTAFormat) -> qiime2.Metadata:
return _dnafastaformats_to_metadata(ff)


# differential types
@plugin.register_transformer
def _222(ff: DifferentialFormat) -> pd.DataFrame:
return qiime2.Metadata.load(str(ff)).to_dataframe()


@plugin.register_transformer
def _223(ff: DifferentialFormat) -> qiime2.Metadata:
return qiime2.Metadata.load(str(ff))


@plugin.register_transformer
def _224(data: pd.DataFrame) -> DifferentialFormat:
ff = DifferentialFormat()
qiime2.Metadata(data).save(str(ff))
return ff
12 changes: 10 additions & 2 deletions q2_types/feature_data/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from ..plugin_setup import plugin
from . import (TSVTaxonomyDirectoryFormat, DNASequencesDirectoryFormat,
PairedDNASequencesDirectoryFormat,
AlignedDNASequencesDirectoryFormat)
AlignedDNASequencesDirectoryFormat,
DifferentialDirectoryFormat)


FeatureData = SemanticType('FeatureData', field_names='type')
Expand All @@ -26,8 +27,13 @@
AlignedSequence = SemanticType('AlignedSequence',
variant_of=FeatureData.field['type'])

Differential = SemanticType('Differential',
variant_of=FeatureData.field['type'])

plugin.register_semantic_types(FeatureData, Taxonomy, Sequence,
PairedEndSequence, AlignedSequence)
PairedEndSequence, AlignedSequence,
Differential)


plugin.register_semantic_type_to_format(
FeatureData[Taxonomy],
Expand All @@ -41,3 +47,5 @@
plugin.register_semantic_type_to_format(
FeatureData[AlignedSequence],
artifact_format=AlignedDNASequencesDirectoryFormat)
plugin.register_semantic_type_to_format(
FeatureData[Differential], DifferentialDirectoryFormat)
32 changes: 32 additions & 0 deletions q2_types/feature_data/tests/data/bad_differential.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
featureid effect bad_effect
F0 -0.910182258821735 a
F1 1.01418002973925 0
F2 1.02456128258909 0
F3 -0.74363992043225 0
F4 1.29823896534823 0
F5 -1.12965055281585 0
F6 -0.522401797448688 0
F7 0.327560711072239 0
F8 -1.3738693465664802 0
F9 -0.7847891526325621 0
F10 -0.280063201878434 0
F11 -0.251269847578052 0
F12 1.24602780723028 0
F13 0.665734866338239 0
F14 -0.889042985114811 0
F15 -0.811956802515126 0
F16 0.11591764582945001 0
F17 -0.156195990858492 0
F18 -0.965770633683909 0
F19 0.8035240337800391 0
F20 0.680306950765235 0
F21 -0.688388077896823 0
F22 0.7949653982850671 0
F23 -1.11912925367142 0
F24 -1.1059356352774599 0
F25 0.678934047810573 0
F26 -0.937189288219405 0
F27 -0.9997301931164799 0
F28 -0.31799033232181 0
F29 -0.66141741897569 0
F30 0.550511528315366 0
32 changes: 32 additions & 0 deletions q2_types/feature_data/tests/data/differentials.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
featureid effect
F0 -0.910182258821735
F1 1.01418002973925
F2 1.02456128258909
F3 -0.74363992043225
F4 1.29823896534823
F5 -1.12965055281585
F6 -0.522401797448688
F7 0.327560711072239
F8 -1.3738693465664802
F9 -0.7847891526325621
F10 -0.280063201878434
F11 -0.251269847578052
F12 1.24602780723028
F13 0.665734866338239
F14 -0.889042985114811
F15 -0.811956802515126
F16 0.11591764582945001
F17 -0.156195990858492
F18 -0.965770633683909
F19 0.8035240337800391
F20 0.680306950765235
F21 -0.688388077896823
F22 0.7949653982850671
F23 -1.11912925367142
F24 -1.1059356352774599
F25 0.678934047810573
F26 -0.937189288219405
F27 -0.9997301931164799
F28 -0.31799033232181
F29 -0.66141741897569
F30 0.550511528315366
32 changes: 32 additions & 0 deletions q2_types/feature_data/tests/data/empty_differential.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
featureid
F0
F1
F2
F3
F4
F5
F6
F7
F8
F9
F10
F11
F12
F13
F14
F15
F16
F17
F18
F19
F20
F21
F22
F23
F24
F25
F26
F27
F28
F29
F30
32 changes: 32 additions & 0 deletions q2_types/feature_data/tests/data/inf_differential.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
featureid effect
F0 inf
F1 1.01418002973925
F2 1.02456128258909
F3 -0.74363992043225
F4 1.29823896534823
F5 -1.12965055281585
F6 -0.522401797448688
F7 0.327560711072239
F8 -1.3738693465664802
F9 -0.7847891526325621
F10 -0.280063201878434
F11 -0.251269847578052
F12 1.24602780723028
F13 0.665734866338239
F14 -0.889042985114811
F15 -0.811956802515126
F16 0.11591764582945
F17 -0.156195990858492
F18 -0.965770633683909
F19 0.8035240337800391
F20 0.680306950765235
F21 -0.688388077896823
F22 0.7949653982850671
F23 -1.11912925367142
F24 -1.10593563527746
F25 0.678934047810573
F26 -0.937189288219405
F27 -0.99973019311648
F28 -0.31799033232181
F29 -0.66141741897569
F30 0.550511528315366
1 change: 1 addition & 0 deletions q2_types/feature_data/tests/data/not_differential.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
asdfasdfasdfasdf asdfasdfa asdfasdf asdf asdfas dfa
55 changes: 54 additions & 1 deletion q2_types/feature_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
HeaderlessTSVTaxonomyDirectoryFormat, TSVTaxonomyFormat,
TSVTaxonomyDirectoryFormat, DNAFASTAFormat, DNASequencesDirectoryFormat,
PairedDNASequencesDirectoryFormat, AlignedDNAFASTAFormat,
AlignedDNASequencesDirectoryFormat
AlignedDNASequencesDirectoryFormat, DifferentialDirectoryFormat
)
from qiime2.plugin.testing import TestPluginBase
from qiime2.plugin import ValidationError
Expand Down Expand Up @@ -203,5 +203,58 @@ def test_aligned_dna_sequences_directory_format(self):
format.validate()


class TestDifferentialFormat(TestPluginBase):
package = 'q2_types.feature_data.tests'

def test_differential_format(self):
filepath = self.get_data_path('differentials.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'differentials.tsv'))
format = DifferentialDirectoryFormat(temp_dir, mode='r')
format.validate()
self.assertTrue(True)

def test_differential_format_empty(self):
filepath = self.get_data_path('empty_differential.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'differentials.tsv'))

with self.assertRaisesRegex(ValidationError, 'least 1 column'):
format = DifferentialDirectoryFormat(temp_dir, mode='r')
format.validate()

def test_differential_format_not(self):
filepath = self.get_data_path('not_differential.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'differentials.tsv'))

with self.assertRaises(ValidationError):
format = DifferentialDirectoryFormat(temp_dir, mode='r')
format.validate()

def test_differential_format_inf(self):
filepath = self.get_data_path('inf_differential.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'differentials.tsv'))

with self.assertRaisesRegex(ValidationError, 'numeric'):
format = DifferentialDirectoryFormat(temp_dir, mode='r')
format.validate()

def test_differential_format_bad_type(self):
filepath = self.get_data_path('bad_differential.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'differentials.tsv'))

with self.assertRaisesRegex(ValidationError, 'numeric'):
format = DifferentialDirectoryFormat(temp_dir, mode='r')
format.validate()


if __name__ == '__main__':
unittest.main()
39 changes: 38 additions & 1 deletion q2_types/feature_data/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
TaxonomyFormat, HeaderlessTSVTaxonomyFormat, TSVTaxonomyFormat,
DNAFASTAFormat, DNAIterator, PairedDNAIterator,
PairedDNASequencesDirectoryFormat, AlignedDNAFASTAFormat,
AlignedDNAIterator
DifferentialFormat, AlignedDNAIterator
)
from q2_types.feature_data._transformer import (
_taxonomy_formats_to_dataframe, _dataframe_to_tsv_taxonomy_format)
Expand Down Expand Up @@ -656,5 +656,42 @@ def test_aligned_dnafasta_format_to_metadata(self):
self.assertEqual(exp, obs)


class TestDifferentialTransformer(TestPluginBase):
package = 'q2_types.feature_data.tests'

def test_differential_to_df(self):

_, obs = self.transform_format(DifferentialFormat, pd.DataFrame,
filename='differentials.tsv')

# sniff to see if the first 4 feature ids are the same
exp = ['F0', 'F1', 'F2', 'F3']
obs = list(obs.index[:4])
self.assertListEqual(exp, obs)

def test_differential_to_md(self):

_, obs = self.transform_format(DifferentialFormat, qiime2.Metadata,
filename='differentials.tsv')
obs = obs.to_dataframe()
# sniff to see if the first 4 feature ids are the same
exp = ['F0', 'F1', 'F2', 'F3']
obs = list(obs.index[:4])
self.assertListEqual(exp, obs)

def test_df_to_differential(self):
transformer = self.get_transformer(pd.DataFrame, DifferentialFormat)

index = pd.Index(['SEQUENCE1', 'SEQUENCE2', 'SEQUENCE3'])
index.name = 'featureid'
input = pd.DataFrame(
[-1.3, 0.1, 1.2], index=index, columns=['differential'],
dtype=float)

obs = transformer(input)

self.assertIsInstance(obs, DifferentialFormat)


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit d2071e9

Please sign in to comment.