Skip to content

Commit

Permalink
MAINT: Remove BIOMV100 support (#223)
Browse files Browse the repository at this point in the history
  • Loading branch information
thermokarst committed Jul 30, 2019
1 parent 6735805 commit b63d2f5
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 181 deletions.
9 changes: 4 additions & 5 deletions q2_types/feature_table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@

import importlib

from ._format import (BIOMV100Format, BIOMV210Format, BIOMV100DirFmt,
BIOMV210DirFmt)
from ._format import (BIOMV210Format, BIOMV210DirFmt)
from ._type import (FeatureTable, Frequency, RelativeFrequency,
PresenceAbsence, Composition, Balance,
PercentileNormalized)

__all__ = ['BIOMV100Format', 'BIOMV100DirFmt', 'FeatureTable', 'Frequency',
'RelativeFrequency', 'PresenceAbsence', 'BIOMV210Format',
'BIOMV210DirFmt', 'Composition', 'Balance', 'PercentileNormalized']
__all__ = ['FeatureTable', 'Frequency', 'RelativeFrequency', 'PresenceAbsence',
'BIOMV210Format', 'BIOMV210DirFmt', 'Composition', 'Balance',
'PercentileNormalized']

importlib.import_module('q2_types.feature_table._transformer')
31 changes: 2 additions & 29 deletions q2_types/feature_table/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import ijson

import h5py
import biom

Expand All @@ -14,29 +14,6 @@
from ..plugin_setup import plugin, citations


class BIOMV100Format(model.TextFileFormat):
top_level_keys = {
'id', 'format', 'format_url', 'type', 'generated_by',
'date', 'rows', 'columns', 'matrix_type', 'matrix_element_type',
'shape', 'data', 'comment'
}

def sniff(self):
with self.open() as fh:
try:
parser = ijson.parse(fh)
for prefix, event, value in parser:
if (prefix, event) == ('', 'map_key'):
# `format_url` seems pretty unique to BIOM 1.0.
if value == 'format_url':
return True
elif value not in self.top_level_keys:
return False
except (ijson.JSONError, UnicodeDecodeError):
pass
return False


class BIOMV210Format(model.BinaryFileFormat):
# minimum requirements as described by
# http://biom-format.org/documentation/format_versions/biom-2.1.html
Expand Down Expand Up @@ -85,13 +62,9 @@ def sniff(self):
return False


BIOMV100DirFmt = model.SingleFileDirectoryFormat('BIOMV100DirFmt',
'feature-table.biom',
BIOMV100Format)
BIOMV210DirFmt = model.SingleFileDirectoryFormat('BIOMV210DirFmt',
'feature-table.biom',
BIOMV210Format)

plugin.register_views(BIOMV100Format, BIOMV210Format, BIOMV100DirFmt,
BIOMV210DirFmt, biom.Table,
plugin.register_views(BIOMV210Format, BIOMV210DirFmt, biom.Table,
citations=[citations['mcdonald2012biological']])
82 changes: 10 additions & 72 deletions q2_types/feature_table/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,22 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import json

import biom
import pandas as pd
import qiime2

from . import BIOMV100Format, BIOMV210Format
from . import BIOMV210Format
from ..plugin_setup import plugin

# NOTE: In the readers and writers for BIOM v1 and v2 below, metadata must be
# ignored on both axes because BIOM v1 and v2 are incompatible with certain
# types of metadata. We need to support both versions of the format and
# converting between them (e.g. to support existing QIIME 1 data). We can
# ignore metadata because it is represented as different types in QIIME 2, and
# thus is stored in separate artifacts. `biom.Table` does not have an API to
# delete/unset metadata on its axes, so we construct a new `biom.Table` object
# from the existing table's matrix data and axis IDs (see `_drop_axis_metadata`
# below). This workaround should be fairly efficient because the matrix data
# and axis IDs aren't copied; only a new `biom.Table` reference is created and
# some ID indexing operations are performed.
# NOTE: In the readers and writers for BIOM v2 below, metadata must be ignored
# on both axes because BIOM v2 is incompatible with certain types of metadata.
# We can ignore metadata because it is represented as different types in QIIME
# 2, and thus is stored in separate artifacts. `biom.Table` does not have an
# API to delete/unset metadata on its axes, so we construct a new `biom.Table`
# object from the existing table's matrix data and axis IDs (see
# `_drop_axis_metadata` below). This workaround should be fairly efficient
# because the matrix data and axis IDs aren't copied; only a new `biom.Table`
# reference is created and some ID indexing operations are performed.
#
# TODO: Revisit this workaround when `biom.Table` supports deletion of
# metadata: https://github.com/biocore/biom-format/issues/708
Expand All @@ -41,12 +37,6 @@ def _get_generated_by():
return 'qiime2 %s' % qiime2.__version__


def _parse_biom_table_v100(ff):
with ff.open() as fh:
table = biom.Table.from_json(json.load(fh))
return _drop_axis_metadata(table)


def _parse_biom_table_v210(ff):
with ff.open() as fh:
table = biom.Table.from_hdf5(fh)
Expand All @@ -73,47 +63,13 @@ def _table_to_v210(data):
return ff


def _table_to_v100(data):
ff = BIOMV100Format()
with ff.open() as fh:
data.to_json(generated_by=_get_generated_by(), direct_io=fh)
return ff


def _dataframe_to_table(df):
if df.index.inferred_type != 'string':
raise TypeError("Please provide a DataFrame with a string-based Index")
return biom.Table(df.T.values, observation_ids=df.columns,
sample_ids=df.index)


@plugin.register_transformer
def _1(data: biom.Table) -> BIOMV100Format:
data = _drop_axis_metadata(data)

ff = BIOMV100Format()
with ff.open() as fh:
fh.write(data.to_json(generated_by=_get_generated_by()))
return ff


@plugin.register_transformer
def _2(ff: BIOMV100Format) -> biom.Table:
return _parse_biom_table_v100(ff)


# Note: this is an old TODO and should be revisited with the new view system.
# TODO: this always returns a pd.DataFrame of floats due to how biom loads
# tables, and we don't know what the dtype of the DataFrame should be. It would
# be nice to have support for a semantic-type override that specifies further
# transformations (e.g. converting from floats to ints or bools as
# appropriate).
@plugin.register_transformer
def _3(ff: BIOMV100Format) -> pd.DataFrame:
table = _parse_biom_table_v100(ff)
return _table_to_dataframe(table)


@plugin.register_transformer
def _4(ff: BIOMV210Format) -> pd.DataFrame:
table = _parse_biom_table_v210(ff)
Expand All @@ -136,12 +92,6 @@ def _7(data: biom.Table) -> pd.DataFrame:
return _table_to_dataframe(data)


@plugin.register_transformer
def _8(ff: BIOMV100Format) -> BIOMV210Format:
data = _parse_biom_table_v100(ff)
return _table_to_v210(data)


@plugin.register_transformer
def _9(df: pd.DataFrame) -> biom.Table:
return _dataframe_to_table(df)
Expand All @@ -152,23 +102,11 @@ def _10(df: pd.DataFrame) -> BIOMV210Format:
return _table_to_v210(_dataframe_to_table(df))


@plugin.register_transformer
def _11(ff: BIOMV210Format) -> BIOMV100Format:
data = _parse_biom_table_v210(ff)
return _table_to_v100(data)


@plugin.register_transformer
def _12(data: biom.Table) -> qiime2.Metadata:
return _table_to_metadata(data)


@plugin.register_transformer
def _13(ff: BIOMV100Format) -> qiime2.Metadata:
table = _parse_biom_table_v100(ff)
return _table_to_metadata(table)


@plugin.register_transformer
def _14(ff: BIOMV210Format) -> qiime2.Metadata:
table = _parse_biom_table_v210(ff)
Expand Down
24 changes: 1 addition & 23 deletions q2_types/feature_table/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,14 @@
import os
import unittest

from q2_types.feature_table import (BIOMV100Format, BIOMV210Format,
BIOMV100DirFmt, BIOMV210DirFmt)
from q2_types.feature_table import (BIOMV210Format, BIOMV210DirFmt)
from qiime2.plugin.testing import TestPluginBase
from qiime2.plugin import ValidationError


class TestFormats(TestPluginBase):
package = 'q2_types.feature_table.tests'

def test_biomv100_format_validate_positive(self):
filepath = self.get_data_path('feature-table_v100.biom')
format = BIOMV100Format(filepath, mode='r')

format.validate()

def test_biomv100_format_validate_negative(self):
filepath = self.get_data_path('feature-table_v210.biom')
format = BIOMV100Format(filepath, mode='r')

with self.assertRaisesRegex(ValidationError, 'BIOMV100Format'):
format.validate()

def test_biomv210_format_validate_positive(self):
filepath = self.get_data_path('feature-table_v210.biom')
format = BIOMV210Format(filepath, mode='r')
Expand All @@ -45,14 +31,6 @@ def test_biomv210_format_validate_negative(self):
with self.assertRaisesRegex(ValidationError, 'BIOMV210Format'):
format.validate()

def test_biomv100_dir_format_validate_positive(self):
filepath = self.get_data_path('feature-table_v100.biom')
shutil.copy(filepath,
os.path.join(self.temp_dir.name, 'feature-table.biom'))
format = BIOMV100DirFmt(self.temp_dir.name, mode='r')

format.validate()

def test_biomv210_dir_format_validate_positive(self):
filepath = self.get_data_path('feature-table_v210.biom')
shutil.copy(filepath,
Expand Down
64 changes: 12 additions & 52 deletions q2_types/feature_table/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,16 @@
import qiime2

from pandas.util.testing import assert_frame_equal
from q2_types.feature_table import BIOMV100Format, BIOMV210Format
from q2_types.feature_table import BIOMV210Format
from qiime2.plugin.testing import TestPluginBase
from q2_types.feature_table._transformer import (_parse_biom_table_v100,
_parse_biom_table_v210,
from q2_types.feature_table._transformer import (_parse_biom_table_v210,
_table_to_dataframe,
_table_to_metadata)


class TestTransformers(TestPluginBase):
package = 'q2_types.feature_table.tests'

def test_biom_v100_format_to_biom_table(self):
input, obs = self.transform_format(BIOMV100Format, biom.Table,
filename='feature-table_v100.biom')

exp = biom.load_table(str(input))
self.assertEqual(obs.ids(axis='observation').all(),
exp.ids(axis='observation').all())
self.assertEqual(obs.ids(axis='sample').all(),
exp.ids(axis='sample').all())

def test_biom_v100_format_to_pandas_data_frame(self):
input, obs = self.transform_format(BIOMV100Format, pd.DataFrame,
filename='feature-table_v100.biom')

table = _parse_biom_table_v100(input)
df = _table_to_dataframe(table)

assert_frame_equal(df, obs)

def test_biom_v210_format_to_pandas_data_frame(self):
input, obs = self.transform_format(BIOMV210Format, pd.DataFrame,
filename='feature-table_v210.biom')
Expand Down Expand Up @@ -78,29 +58,18 @@ def test_biom_table_to_biom_v210_format(self):
exp.ids(axis='sample').all())

def test_biom_table_to_pandas_data_frame(self):
filepath = self.get_data_path('feature-table_v100.biom')
filepath = self.get_data_path('feature-table_v210.biom')
transformer = self.get_transformer(biom.Table, pd.DataFrame)
input = biom.load_table(filepath)

obs = transformer(input)

self.assertIsInstance(obs, pd.DataFrame)

def test_biom_v100_format_to_biom_v210_format(self):
input, obs = self.transform_format(BIOMV100Format, BIOMV210Format,
filename='feature-table_v100.biom')
exp = biom.load_table(str(input))
obs = biom.load_table(str(obs))

self.assertEqual(obs.ids(axis='observation').all(),
exp.ids(axis='observation').all())
self.assertEqual(obs.ids(axis='sample').all(),
exp.ids(axis='sample').all())

def test_to_pandas_data_frame_to_biom_v210_format(self):
filepath = self.get_data_path('feature-table_v100.biom')
transformer1 = self.get_transformer(BIOMV100Format, pd.DataFrame)
input = BIOMV100Format(filepath, mode='r')
def test_v210_to_pandas_data_frame_to_v210_format(self):
filepath = self.get_data_path('feature-table_v210.biom')
transformer1 = self.get_transformer(BIOMV210Format, pd.DataFrame)
input = BIOMV210Format(filepath, mode='r')
df = transformer1(input)

transformer2 = self.get_transformer(pd.DataFrame, BIOMV210Format)
Expand All @@ -118,34 +87,25 @@ def test_to_pandas_dataframe_bad_index(self):
with self.assertRaisesRegex(TypeError, 'string-based'):
transformer(df)

def test_to_pandas_data_frame_to_biom_table(self):
filepath = self.get_data_path('feature-table_v100.biom')
transformer1 = self.get_transformer(BIOMV100Format, pd.DataFrame)
input = BIOMV100Format(filepath, mode='r')
def test_v210_to_pandas_data_frame_to_biom_table(self):
filepath = self.get_data_path('feature-table_v210.biom')
transformer1 = self.get_transformer(BIOMV210Format, pd.DataFrame)
input = BIOMV210Format(filepath, mode='r')
df = transformer1(input)

transformer2 = self.get_transformer(pd.DataFrame, biom.Table)
obs = transformer2(df)
self.assertIsInstance(obs, biom.Table)

def test_biom_table_to_metadata(self):
filepath = self.get_data_path('feature-table_v100.biom')
filepath = self.get_data_path('feature-table_v210.biom')
transformer = self.get_transformer(biom.Table, qiime2.Metadata)
input = biom.load_table(filepath)

obs = transformer(input)

self.assertIsInstance(obs, qiime2.Metadata)

def test_biom_v100_format_to_metadata(self):
input, obs = self.transform_format(BIOMV100Format, qiime2.Metadata,
filename='feature-table_v100.biom')

table = _parse_biom_table_v100(input)
df = _table_to_metadata(table)

self.assertEqual(df, obs)

def test_biom_v210_format_to_metadata(self):
input, obs = self.transform_format(BIOMV210Format, qiime2.Metadata,
filename='feature-table_v210.biom')
Expand Down

0 comments on commit b63d2f5

Please sign in to comment.