Skip to content
This repository has been archived by the owner on Aug 19, 2020. It is now read-only.

Commit

Permalink
Merge 282a4f0 into 9156b42
Browse files Browse the repository at this point in the history
  • Loading branch information
dangunter committed Aug 12, 2015
2 parents 9156b42 + 282a4f0 commit 3a732b6
Show file tree
Hide file tree
Showing 13 changed files with 249 additions and 188 deletions.
6 changes: 1 addition & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
language: python
python:
- "2.7"
- "3.4"

install:
- cd lib
- pip install coveralls
- pip install -r requirements.txt
- cd ..
- "pip install -r requirements-travis.txt"

script:
- cd lib
Expand Down
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinxcontrib.napoleon',
'sphinx.ext.doctest',
'sphinx.ext.todo',
'sphinx.ext.coverage',
Expand Down
70 changes: 65 additions & 5 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,76 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to lib's documentation!
===============================
KBase Data API documentation
============================
The Data API provides a unified entry point to retrieve and, eventually,
store KBase data objects.

Contents:

On this page
------------

.. contents::

:depth: 1

API Reference
-------------
.. toctree::
:maxdepth: 4
:maxdepth: 2

biokbase.data_api

Using the Data API
==================

There are two primary modes of using the Data API: interactively and programmatically.
Interactively, the API can be imported into in the IPython/Jupyter notebook or
Narrative and used to explore and examine data objects.
Results will be automatically displayed as HTML and inline plots. Programmatically,
the same API can be imported into any Python code and used like a standard
library.

.. note::

The integration of the Data API into the KBase Narrative is not quite
done yet. For now, you need to try it out in a recent (August 2015+)
version of the Jupyter notebook.

In both cases, the :ref:`core-api` functions are used to access the objects.
For interactive use, and some programmatic use-cases, the :ref:`highlevel-api`
will be more convenient.

.. _highlevel-api:

High-level API
--------------
This section covers how to :ref:`initialize the high-level API <highlevel-api-conf>`
to access the KBase data as an authenticated user,
then how to :ref:`use the provided functions <highlevel-api-func>`.

.. _highlevel-api-conf:

Configuration and Authorization
+++++++++++++++++++++++++++++++

.. _highlevel-api-func:

Functions
+++++++++

.. _core-api:

Core API
--------
This section covers how to initialize the high-level API to access the KBase
data as an authenticated user, then how to use the provided functions.

biokbase
Configuration and Authorization
+++++++++++++++++++++++++++++++

Functions
+++++++++

Indices and tables
==================
Expand Down
1 change: 1 addition & 0 deletions docs/matplotlibrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
backend : PDF
74 changes: 32 additions & 42 deletions lib/biokbase/data_api/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,11 @@
TYPES = _CONTIGSET_TYPES + _ASSEMBLY_TYPES

class AssemblyAPI(ObjectAPI):
"""
API for the assembled sequences associated with a Genome Annotation.
"""API for the assembled sequences associated with a Genome Annotation.
"""

def __init__(self, services, ref):
"""
Defines which types and type versions that are legal.
"""Defines which types and type versions that are legal.
"""
super(AssemblyAPI, self).__init__(services, ref)

Expand All @@ -41,8 +39,7 @@ def __init__(self, services, ref):
raise TypeError("Invalid type! Expected one of {0}, received {1}".format(TYPES, self._typestring))

def get_assembly_id(self):
"""
Retrieve the id for an Assembly.
"""Retrieve the id for an Assembly.
Returns:
id: string identifier for the Assembly"""
Expand All @@ -53,11 +50,11 @@ def get_assembly_id(self):
return self.get_data_subset(path_list=["assembly_id"])["assembly_id"]

def get_genome_annotations(self):
"""
Retrieve the GenomeAnnotations that refer to this Assembly.
"""Retrieve the GenomeAnnotations that refer to this Assembly.
Returns:
list<GenomeAnnotationAPI>"""
list<GenomeAnnotationAPI>
"""

import biokbase.data_api.genome_annotation

Expand All @@ -67,16 +64,17 @@ def get_genome_annotations(self):
for object_type in referrers:
if object_type.split('-')[0] in biokbase.data_api.genome_annotation.TYPES:
for x in referrers[object_type]:
annotations.append(GenomeAnnotationAPI(self.services, ref=x))
annotations.append(
biokbase.data_api.genome_annotation.GenomeAnnotationAPI(
self.services, ref=x))

if len(annotations) == 0:
return None
else:
return annotations

def get_external_source_info(self):
"""
Retrieve the external source information associated with this Assembly.
"""Retrieve the external source information associated with this Assembly.
Returns:
id: string identifier for the Assembly"""
Expand All @@ -95,8 +93,7 @@ def get_external_source_info(self):
return output

def get_stats(self):
"""
Retrieve the derived statistical information about this Assembly.
"""Retrieve the derived statistical information about this Assembly.
Returns:
gc_content: total guanine and cytosine content, counting all G and C only
Expand All @@ -112,7 +109,7 @@ def get_stats(self):
for c in contigs:
total_gc += len([s for s in re.finditer(pattern, c["sequence"])])

total_length = sum([x.length in contigs])
total_length = sum([x.length for x in contigs])

data = dict()
data["gc_content"] = total_gc/(total_length*1.0)
Expand All @@ -123,8 +120,7 @@ def get_stats(self):
return self.get_data_subset(path_list=["gc_content","dna_size","num_contigs"])

def get_number_contigs(self):
"""
Retrieve the number of contiguous sequences in this Assembly.
"""Retrieve the number of contiguous sequences in this Assembly.
Returns:
int"""
Expand All @@ -135,8 +131,7 @@ def get_number_contigs(self):
return self.get_data_subset(path_list=["num_contigs"])["num_contigs"]

def get_gc_content(self):
"""
Retrieve the total GC content for this Assembly.
"""Retrieve the total GC content for this Assembly.
Returns:
float"""
Expand All @@ -157,8 +152,7 @@ def get_gc_content(self):
return self.get_data_subset(path_list=["gc_content"])["gc_content"]

def get_dna_size(self):
"""
Retrieve the total DNA size for this Assembly.
"""Retrieve the total DNA size for this Assembly.
Returns:
int"""
Expand All @@ -170,8 +164,7 @@ def get_dna_size(self):
return self.get_data_subset(path_list=["dna_size"])["dna_size"]

def get_contig_lengths(self, contig_id_list=None):
"""
Retrieve the ids for every contiguous sequence in this Assembly.
"""Retrieve the ids for every contiguous sequence in this Assembly.
Returns:
dict<str>: <int>"""
Expand All @@ -191,8 +184,7 @@ def get_contig_lengths(self, contig_id_list=None):
return result

def get_contig_gc_content(self, contig_id_list=None):
"""
Retrieve the total GC content for each contiguous sequence of this Assembly.
"""Retrieve the total GC content for each contiguous sequence of this Assembly.
Returns:
dict<str>: float"""
Expand Down Expand Up @@ -222,8 +214,7 @@ def get_contig_gc_content(self, contig_id_list=None):
return contigs_gc

def get_contig_ids(self):
"""
Retrieve the ids for every contiguous sequence in this Assembly.
"""Retrieve the ids for every contiguous sequence in this Assembly.
Returns:
list<str>"""
Expand All @@ -237,29 +228,28 @@ def get_contig_ids(self):
return result

def get_contigs_by_id(self, contig_id_list=None):
"""
Retrieve contiguous sequences from this Assembly by id.
"""Retrieve contiguous sequences from this Assembly by id.
Args:
contig_id_list: list<str>
Returns:
dict
dictionary of contigs, with contig id as key
contig value structure
{
'contig_id': string,
'length': integer,
'md5': string,
'name': string,
'description': string,
'is_complete': 0 or 1,
'is_circular': 0 or 1,
'sequence': string
}
contig value structure::
{
'contig_id': string,
'length': integer,
'md5': string,
'name': string,
'description': string,
'is_complete': 0 or 1,
'is_circular': 0 or 1,
'sequence': string
}
"""

if contig_id_list == None:
if contig_id_list is None:
contig_id_list = self.get_data()["contigs"]

if self._is_contigset_type:
Expand Down
25 changes: 19 additions & 6 deletions lib/biokbase/data_api/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,33 @@ def get_token():
"Missing authentication token! Set KB_AUTH_TOKEN environment variable.")

class ObjectAPI(object):
"""
Generic Object API for basic properties and actions of a KBase Data Object.
"""Generic Object API for basic properties and actions
of a KBase Data Object.
"""

def __init__(self, services=None, ref=None):
if services == None or type(services) != type({}):
"""Create new object.
Args:
services (dict): Service configuration dictionary. Required keys:
* workspace_service_url: URL for Workspace, such as
`https://ci.kbase.us/services/ws/`
ref (str): Object reference, which can be the name of the object
(although this is not unique), or a numeric identifier in the
format `A/B[/C]` where A is the number of the workspace, B is the
number identifying the object, and C is the "version" number of
the object.
"""
if services is None or type(services) != type({}):
raise TypeError("You must provide a service configuration dictionary! Found {0}".format(type(services)))
elif not services.has_key("workspace_service_url"):
raise KeyError("Expecting workspace_service_url key!")

if ref == None:
if ref is None:
raise TypeError("Missing object reference!")
elif type(ref) != type("") and type(ref) != type(unicode()):
raise TypeError("Invalid reference given, expected string! Found {0}".format(type(ref)))
elif re.match(REF_PATTERN, ref) == None:
elif re.match(REF_PATTERN, ref) is None:
raise TypeError("Invalid workspace reference string! Found {0}".format(ref))

self.services = services
Expand Down Expand Up @@ -97,7 +109,8 @@ def get_info(self):
Retrieve basic properties about this object.
Returns:
dict"""
dict
"""

return self._info

Expand Down
24 changes: 12 additions & 12 deletions lib/biokbase/data_api/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
def basic_suite():
import biokbase.data_api.tests.test_suite_basic
biokbase.data_api.tests.test_suite_basic.test_assembly_api()
biokbase.data_api.tests.test_suite_basic.test_genome_annotation_api()
biokbase.data_api.tests.test_suite_basic.test_taxon_api()

def extended_suite():
import biokbase.data_api.tests.test_suite_extended
biokbase.data_api.tests.test_suite_extended.test_assembly_api()
biokbase.data_api.tests.test_suite_extended.test_genome_annotation_api()
biokbase.data_api.tests.test_suite_extended.test_taxon_api()
# def basic_suite():
# import biokbase.data_api.tests.test_suite_basic
# biokbase.data_api.tests.test_suite_basic.test_assembly_api()
# biokbase.data_api.tests.test_suite_basic.test_genome_annotation_api()
# biokbase.data_api.tests.test_suite_basic.test_taxon_api()
#
# def extended_suite():
# import biokbase.data_api.tests.test_suite_extended
# biokbase.data_api.tests.test_suite_extended.test_assembly_api()
# biokbase.data_api.tests.test_suite_extended.test_genome_annotation_api()
# biokbase.data_api.tests.test_suite_extended.test_taxon_api()
#
28 changes: 28 additions & 0 deletions lib/biokbase/data_api/tests/shared.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
Shared by test code
"""
__author__ = 'Dan Gunter <dkgunter@lbl.gov>'
__date__ = '8/11/15'

import logging

from biokbase.data_api.assembly import AssemblyAPI

can_connect = False
services = {
"workspace_service_url": "https://ci.kbase.us/services/ws/",
"shock_service_url": "https://ci.kbase.us/services/shock-api/",
}
genome = 'PrototypeReferenceGenomes/kb|g.3157'

def setup():
global can_connect

logging.basicConfig()
_log = logging.getLogger()

try:
_ = AssemblyAPI(services=services, ref=genome + '_assembly')
can_connect = True
except:
_log.warn('Cannot connect to workspace! Most tests will be skipped')
Loading

0 comments on commit 3a732b6

Please sign in to comment.