Skip to content

Commit

Permalink
More changes to merge
Browse files Browse the repository at this point in the history
* Additional docstrings
* Enforce oc_model subclasses OneCodexBase
* Update CircleCI
  • Loading branch information
polyatail committed Jan 11, 2019
1 parent f346e38 commit 1e934f6
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 29 deletions.
51 changes: 38 additions & 13 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,53 @@ workflows:
version: 2
all-tests:
jobs:
- test-py3
- test-py2
- test-minimal
- test-simplejson
- test-py37
- test-py35
- test-py27
- test-minimal-py34
- test-simplejson-py35
- coverage
- lint

version: 2
jobs:
test-py3:
test-py37:
docker:
- image: circleci/python:3.6-stretch
- image: circleci/python:3.7-stretch
steps:
- checkout
- run:
name: Install Python deps in virtual environment
command: |
python3 -m venv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off numpy
pip install --progress-bar=off .[all,testing]
- run:
name: Run tests
command: |
. venv/bin/activate
py.test -v tests/
test-py2:
test-py35:
docker:
- image: circleci/python:3.5-stretch
steps:
- checkout
- run:
name: Install Python deps in virtual environment
command: |
python3 -m venv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off numpy
pip install --progress-bar=off .[all,testing]
- run:
name: Run tests
command: |
. venv/bin/activate
py.test -v tests/
test-py27:
docker:
- image: circleci/python:2.7-stretch
steps:
Expand All @@ -38,39 +58,42 @@ jobs:
command: |
virtualenv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off numpy
pip install --progress-bar=off .[all,testing]
- run:
name: Run tests
command: |
. venv/bin/activate
py.test -v tests/
test-minimal:
test-minimal-py34:
docker:
- image: circleci/python:3.6-stretch
- image: circleci/python:3.4-stretch
steps:
- checkout
- run:
name: Install Python deps in virtual environment
command: |
virtualenv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off .[testing]
- run:
name: Run tests
command: |
. venv/bin/activate
py.test -v tests/
test-simplejson:
test-simplejson-py35:
docker:
- image: circleci/python:3.6-stretch
- image: circleci/python:3.5-stretch
steps:
- checkout
- run:
name: Install Python deps in virtual environment
command: |
virtualenv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off numpy
pip install --progress-bar=off simplejson
pip install --progress-bar=off .[all,testing]
Expand All @@ -81,14 +104,15 @@ jobs:
py.test -v tests/
lint:
docker:
- image: circleci/python:3.6-stretch
- image: circleci/python:3.5-stretch
steps:
- checkout
- run:
name: Install Python deps in virtual environment
command: |
virtualenv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off .[testing]
- run:
name: Run flake8
Expand All @@ -98,14 +122,15 @@ jobs:
flake8 --ignore E501 --exclude onecodex/schemas/* tests/
coverage:
docker:
- image: circleci/python:3.6-stretch
- image: circleci/python:3.5-stretch
steps:
- checkout
- run:
name: Install Python deps in virtual environment
command: |
virtualenv venv
. venv/bin/activate
pip install -q -U pip
pip install --progress-bar=off numpy
pip install --progress-bar=off .[all,testing]
- run:
Expand Down
100 changes: 93 additions & 7 deletions onecodex/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,35 @@

# force persistence of our additional taxonomy and metadata dataframe properties
class ClassificationsDataFrame(pd.DataFrame):
"""A subclassed `pandas.DataFrame` containing additional metadata pertinent to analysis of
One Codex Classifications results. These fields, once part of the DataFrame, will no longer be
updated when the contents of the associated `SampleCollection` change. In comparison, the
corresponding attributes `_rank`, `_field`, `taxonomy` and `metadata` in a `SampleCollection`
are re-generated whenever members of the `SampleCollection` are added or removed.
Methods from `AnalysisMixin`, such as `to_df`, are available via the `ocx` namespace. For
example, `ClassificationsDataFrame().ocx.to_df()`.
Parameters
----------
ocx_rank : {'auto', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'}, optional
Analysis was restricted to abundances of taxa at the specified level.
ocx_field : {'readcount_w_children', 'readcount', 'abundance'}
Which field was used for the abundance/count of a particular taxon in a sample.
- 'readcount_w_children': total reads of this taxon and all its descendants
- 'readcount': total reads of this taxon
- 'abundance': genome size-normalized relative abundances, from shotgun sequencing
ocx_metadata : `pandas.DataFrame`
A DataFrame containing collated metadata fields for all samples in this analysis.
ocx_taxonomy : `pandas.DataFrame`
A DataFrame containing taxonomy information (i.e., id, name, rank, parent) for all taxa
referenced in this analysis.
"""

_metadata = ['ocx_rank', 'ocx_field', 'ocx_taxonomy', 'ocx_metadata']

def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False, ocx_rank=None,
Expand Down Expand Up @@ -34,6 +63,10 @@ def _constructor_sliced(self):


class ClassificationsSeries(pd.Series):
"""A subclassed `pandas.Series` containing additional metadata pertinent to analysis of
One Codex Classifications results. See the docstring for `ClassificationsDataFrame`.
"""

# 'name' is a piece of metadata specified by pd.Series--it's not ours
_metadata = ['name', 'ocx_rank', 'ocx_field', 'ocx_taxonomy', 'ocx_metadata']

Expand Down Expand Up @@ -62,8 +95,18 @@ def _constructor_expanddim(self):


class AnalysisMixin(VizPCAMixin, VizHeatmapMixin, VizMetadataMixin, VizDistanceMixin):
"""Contains methods for analyzing Classifications results.
Notes
-----
Three DataFrames are required by most methods: collated counts, collated metadata, and taxonomy.
This data is obtained from either a `ClassificationsDataFrame` or a `SampleCollection`. Both
classes use this mixin. `AnalysisMixin` pulls additional methods in from `onecodex.distance`,
`onecodex.taxonomy`, and `onecodex.viz`.
"""

def _get_auto_rank(self, rank):
"""Tries to figure out what rank we should use for analyses, mainly called by results()"""
"""Tries to figure out what rank we should use for analyses"""

if rank == 'auto':
# if we're an accessor for a ClassificationsDataFrame, use its _rank property
Expand All @@ -78,14 +121,45 @@ def _get_auto_rank(self, rank):
return rank

def _guess_normalized(self):
# it's possible that the _results df has already been normalized, which can cause some
# methods to fail. we must guess whether this is the case
"""Returns true if the collated counts in `self._results` appear to be normalized.
Notes
-----
It's possible that the _results df has already been normalized, which can cause some
methods to fail. This method lets us guess whether that's true and act accordingly.
"""
return bool((self._results.sum(axis=1).round(4) == 1.0).all())

def _metadata_fetch(self, metadata_fields):
"""Takes a list of metadata fields, some of which can contain taxon names or taxon IDs, and
returns a DataFrame with magically transformed data that can be used for plotting.
returns a DataFrame with transformed data that can be used for plotting.
Notes
-----
Taxon names and IDs are transformed into the relative abundances of those taxa within their
own rank. For example, 'Bacteroides' will return the relative abundances of 'Bacteroides'
among all taxa of rank genus. Taxon IDs are stored as strings in `ClassificationsDataFrame`
and are coerced to strings if integers are given.
Metadata fields are returned as is, from the `self.metadata` DataFrame. If multiple metadata
fields are specified in a tuple, their values are joined as strings separated by underscore.
Multiple metadata fields in tuple must both be categorical. That is, a numerical field and
boolean can not be joined, or the result would be something like '87.4_True'.
The 'Label' field name is transformed to '_display_name'. This lets us label points in plots
by the name generated for each sample in `SampleCollection._collate_metadata`.
Returns
-------
`pandas.DataFrame`
Columns are renamed (if applicable) metadata fields and rows are `Classifications.id`.
Elements are transformed values. Not all metadata fields will have been renamed, but will
be present in the below `dict` nonetheless.
`dict`
Keys are metadata fields and values are renamed metadata fields. This can be used to map
metadata fields which were passed to this function, to prettier names. For example, if
'bacteroid' is passed, it will be matched with the Bacteroides genus and renamed to
'Bacteroides (816)', which includes its taxon ID.
"""
help_metadata = ', '.join(self.metadata.keys())
magic_metadata = pd.DataFrame({'classification_id': self._results.index}) \
Expand Down Expand Up @@ -173,8 +247,8 @@ def _metadata_fetch(self, metadata_fields):

def to_df(self, rank='auto', top_n=None, threshold=None, remove_zeros=True, normalize='auto',
table_format='wide'):
"""Takes the ClassificationsDataFrame associated with these samples, or SampleCollection, does some
filtering, and returns a ClassificationsDataFrame.
"""Takes the ClassificationsDataFrame associated with these samples, or SampleCollection,
does some filtering, and returns a ClassificationsDataFrame copy.
Parameters
----------
Expand All @@ -194,7 +268,7 @@ def to_df(self, rank='auto', top_n=None, threshold=None, remove_zeros=True, norm
Returns
-------
ClassificationsDataFrame
`ClassificationsDataFrame`
"""

rank = self._get_auto_rank(rank)
Expand Down Expand Up @@ -266,6 +340,18 @@ def to_df(self, rank='auto', top_n=None, threshold=None, remove_zeros=True, norm

@pd.api.extensions.register_dataframe_accessor('ocx')
class OneCodexAccessor(AnalysisMixin):
"""Accessor object alllowing access of `AnalysisMixin` methods from the 'ocx' namespace of a
`ClassificationsDataFrame`.
Notes
-----
When instantiated, the accessor will prune the taxonomic tree back to contain only taxa
referenced in the classification results (i.e., self._results). Similarly, metadata is sliced
such that it contains only those `Classifications.id` in the results. This is because users may
filter or modify the classification results to remove classification results (i.e., rows) or
taxa (i.e., cols) from the `ClassificationsDataFrame` before accessing this namespace.
"""

def __init__(self, pandas_obj):
# copy data from the ClassificationsDataFrame to a new instance of AnalysisMethods
self.metadata = pandas_obj.ocx_metadata
Expand Down
36 changes: 30 additions & 6 deletions onecodex/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,22 @@ class AnalysisMixin(object):


class ResourceList(object):
"""
In OneCodexBase, when attributes are lists, actions performed on the returned lists are not
passed through to the underlying resource list. This class passes those actions through, and
will generally act like a list.
"""Wrapper around lists of onecodex-wrapped potion objects.
Parameters
----------
_resource : `list`
A list of potion objects, which are generally stored in `OneCodexBase._resource`.
oc_model : `OneCodexBase`
A class which inherits from `OneCodexBase`, for example, `models.Tags`.
Notes
-----
In OneCodexBase, when attributes are lists (e.g., `Samples.tags`), actions performed on the
returned lists are not passed through to the underlying potion object's list. This class passes
those actions through, and will generally act like a list.
See https://github.com/onecodex/onecodex/issues/40
"""

def _update(self):
Expand Down Expand Up @@ -59,6 +71,10 @@ def _check_valid_resource(self, other, check_for_dupes=True):
raise OneCodexException('{} cannot contain duplicate objects'.format(self.__class__.__name__))

def __init__(self, _resource, oc_model):
if not issubclass(oc_model, OneCodexBase):
raise ValueError("Expected object of type '{}', got '{}'"
.format(OneCodexBase.__name__, oc_model.__name__))

# turn potion Resource objects into OneCodex objects
self._resource = _resource
self._oc_model = oc_model
Expand Down Expand Up @@ -162,6 +178,15 @@ def remove(self, x):


class SampleCollection(ResourceList, AnalysisMixin):
"""A collection of `Samples` or `Classifications` objects with many methods are analysis of
classifications results.
Notes
-----
Inherits from `ResourceList` to provide a list-like API, and `AnalysisMixin` to provide relevant
analysis methods.
"""

def __init__(self, _resource, oc_model, skip_missing=True, label=None, field='auto'):
self._kwargs = {'skip_missing': skip_missing,
'label': label,
Expand Down Expand Up @@ -376,8 +401,7 @@ def taxonomy(self):
return self._cached['taxonomy']

def to_otu(self, biom_id=None):
"""
Converts a list of objects associated with a classification result into a `dict` resembling
"""Converts a list of objects associated with a classification result into a `dict` resembling
an OTU table.
Parameters
Expand Down
8 changes: 7 additions & 1 deletion onecodex/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

class TaxonomyMixin(object):
def tree_build(self):
"""Build a tree from the taxonomy data present in this ResultsDataFrame or SampleCollection.
"""Build a tree from the taxonomy data present in this `ClassificationsDataFrame` or
`SampleCollection`.
Returns
-------
Expand Down Expand Up @@ -79,6 +80,11 @@ def tree_prune_rank(self, tree, rank='species'):
-------
`skbio.tree.TreeNode`, the root of the tree where all tips are at the given rank, and all
tips have a path back to the root node.
Examples
--------
tree_prune_rank(tree, 'species') will remove all subspecies/strain nodes and return a tree
containing all genus-level nodes and higher.
"""
if rank is None:
return tree.copy()
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py27,py36,coverage,lint,minimal,simplejson
envlist = py27,py35,py37,coverage,lint,minimal,simplejson

[testenv]
commands =
Expand Down
2 changes: 1 addition & 1 deletion tox.setup.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py27,py36
envlist = py27,py35,py37

[testenv]
deps = numpy
Expand Down

0 comments on commit 1e934f6

Please sign in to comment.