In [21]:
from doekbase.data_api.sequence.assembly.api import AssemblyAPI
from doekbase.data_api.annotation.genome_annotation.api import GenomeAnnotationAPI
from doekbase.data_api.taxonomy.taxon.api import TaxonAPI
from doekbase.workspace.client import Workspace
from doekbase.data_api.core import ObjectAPI
from doekbase.handle.Client import AbstractHandle as handleClient

services = {"workspace_service_url": "https://ci.kbase.us/services/ws/",
            "shock_service_url": "https://ci.kbase.us/services/shock-api/",
            "handle_service_url": "https://ci.kbase.us/services/handle_service/"}

import os

In [56]:
token = os.environ["KB_AUTH_TOKEN"]

genome_ref = '6838/146'#ReferenceGenomeAnnotations/kb|g.166819

GenomAnnotation_type = 'KBaseGenomeAnnotations.GenomeAnnotation-2.1'

ga_api = GenomeAnnotationAPI(services, token=token, ref=genome_ref)

ga_object = ObjectAPI(services, token=token, ref=genome_ref)
asm_object = ObjectAPI(services, token=token, ref=genome_ref)

tax_api = ga_api.get_taxon()

asm_api = ga_api.get_assembly()

In [30]:
#from Gavin's file converter
def upload_workspace_data(cs, ws_url, source_ref, target_ws, obj_name):
    ws = Workspace(ws_url, token=token)
    type_ = GenomAnnotation_type
    ws.save_objects(
        {'workspace': target_ws,
         'objects': [{'name': obj_name,
                      'type': type_,
                      'data': cs,
                      'provenance': [{'script': 'fakey',
                                      'script_ver': '1.2.3',
                                      'input_ws_objects': [source_ref],
                                      }]
                      }
                     ]
         }
    )
    

In [33]:
genome_dict = dict()

genome_dict['id'] = asm_object.get_data_subset(["external_source"])["external_source"]

genome_dict['scientific_name'] = tax_api.get_scientific_name()
genome_dict['domain'] = tax_api.get_domain()
genome_dict['genetic_code'] = tax_api.get_genetic_code()
genome_dict['dna_size'] = asm_api.get_dna_size()
genome_dict['num_contigs'] = asm_api.get_number_contigs()

genome_dict['contig_lengths'] = asm_api.get_contig_lengths()
genome_dict['contig_ids'] = asm_api.get_contig_ids()
genome_dict['source'] = asm_object.get_data_subset(["external_source_id"])["external_source_id"]
genome_dict['source_id'] = asm_object.get_data_subset(["external_source"])["external_source"]
genome_dict['md5'] = asm_object.get_data_subset(["md5"])["md5"]
genome_dict['taxonomy'] = tax_api.get_scientific_lineage()
genome_dict['gc_content'] = asm_api.get_gc_content()
genome_dict['complete'] = 1

#not loaded in GenomeAnnotation?
#genome_dict['publications'] = 

628


In [57]:
#the Contig list is duplicated in both Genome and ContigSet, 
#per convention of the Genome uploader only the ContigSet field is populated

feature_list = []
proteins = ga_api.get_proteins()
print "proteins"

feature_ids = ga_api.get_feature_ids(filters={"type_list":['CDS']})
if 'CDS' in feature_ids['by_type']:
    features_CDS = ga_api.get_features(feature_ids['by_type']['CDS'])
    print "CDS"

for p in proteins:
    print p
    #print proteins[p].keys()
    feature_dict = dict()
    feature_dict['id'] = p
    
    location = {}
    if features_cds[p] != null:
        location = features_cds[p]['feature_locations']        
    feature_dict['location'] = location
    feature_dict['type'] = 'CDS'
                
    feature_dict['function'] = function = proteins[p]['function']
    
    location = {}
    if 'CDS' in feature_ids['by_type']:
        location = features_cds[p]['feature_locations']
    feature_dict['md5'] = features_cds[f]['feature_md5']
    feature_dict['protein_translation'] = proteins[p]['protein_amino_acid_sequence']
    
    location = {}
    if 'CDS' in feature_ids['by_type']:
        location = features_cds[p]['feature_locations']
    feature_dict['dna_sequence'] = features_cds[f]['feature_dna_sequence'] 
    
    location = {}
    if 'CDS' in feature_ids['by_type']:
        location = features_cds[p]['feature_locations']        
    feature_dict['dna_sequence_length'] = features_cds[p]['feature_dna_sequence_length']
    feature_dict['aliases'] = proteins[p]['protein_aliases']
    feature_list.append(feature_dict)
    

feature_ids_gene = ga_api.get_feature_ids(filters={"type_list":['gene']})
print "ids gene"
if 'gene' in feature_ids_gene['by_type']:
    features = ga_api.get_features(feature_ids_gene['by_type']['gene'])
    print"gene"

    for f in features:
        print f
        #print features[f]
        feature_dict = dict()
        feature_dict['id'] = features[f]['feature_id'] 
        feature_dict['location'] = features[f]['feature_locations']
        feature_dict['type'] = 'gene'
        feature_dict['function'] = features[f]['feature_function']
        feature_dict['md5'] = features[f]['feature_md5']
        feature_dict['protein_translation'] = ""
        feature_dict['dna_sequence'] = features[f]['feature_dna_sequence'] 
        feature_dict['dna_sequence_length'] = features[f]['feature_dna_sequence_length']
        feature_dict['aliases'] = features[f]['feature_aliases']
        feature_list.append(feature_dict)

    
    
#RNA
feature_ids_RNA = ga_api.get_feature_ids(filters={"type_list":['RNA']})
print "ids RNA"

if 'RNA' in feature_ids_RNA['by_type']:
    features = ga_api.get_features(feature_ids_RNA['by_type']['RNA'])
    print"gene"

    #obj.get_features(obj.get_feature_ids({'region_list':filters})['by_type']['gene'])

    for f in features:
        print f
        #print features[f]
        feature_dict = dict()
        feature_dict['id'] = features[f]['feature_id'] 
        feature_dict['location'] = features[f]['feature_locations']
        feature_dict['type'] = 'RNA'
        feature_dict['function'] = features[f]['feature_function']
        feature_dict['md5'] = features[f]['feature_md5']
        feature_dict['protein_translation'] = ""
        feature_dict['dna_sequence'] = features[f]['feature_dna_sequence'] 
        feature_dict['dna_sequence_length'] = features[f]['feature_dna_sequence_length']
        feature_dict['aliases'] = features[f]['feature_aliases']
        feature_list.append(feature_dict)



genome_dict['features'] = feature_list


KeyError: u'protein_2734'

In [15]:
#fill with proper ref
genome_dict['contigset_ref'] = str(0)+'/'+str(0)+'/'+str(0)

In [16]:
import json
g_data = json.dumps(genome_dict)

In [60]:
#ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
#fill with proper ref
ref = str(0)+'/'+str(0)+'/'+str(0)

print genome_dict['id']

name = genome_dict['id']
if isinstance( genome_dict['id'], ( int, long ) ):
    name = str(genome_dict['id']) + "_Genome"

upload_workspace_data(
            g_data, services["workspace_service_url"], ref,
            "marcin:1458173367710", name)#6865 #ConvertAssemblytoContigSet

628


ServerError: JSONRPCError: -32500. Object #1, 628_Genome failed type checking:
instance type (string) does not match any allowed primitive type (allowed: ["object"]), at 
us.kbase.typedobj.exceptions.TypedObjectValidationException: Object #1, 628_Genome failed type checking:
instance type (string) does not match any allowed primitive type (allowed: ["object"]), at 
	at us.kbase.workspace.database.Workspace.validate(Workspace.java:848)
	at us.kbase.workspace.database.Workspace.validateObjectsAndExtractReferences(Workspace.java:719)
	at us.kbase.workspace.database.Workspace.saveObjects(Workspace.java:621)
	at us.kbase.workspace.kbase.WorkspaceServerMethods.saveObjects(WorkspaceServerMethods.java:255)
	at us.kbase.workspace.WorkspaceServer.saveObjects(WorkspaceServer.java:613)
	at sun.reflect.GeneratedMethodAccessor176.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at us.kbase.common.service.JsonServerServlet.processRpcCall(JsonServerServlet.java:559)
	at us.kbase.common.service.JsonServerServlet.doPost(JsonServerServlet.java:414)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:688)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:770)
	at org.apache.catalina.core.StandardWrapper.service(StandardWrapper.java:1550)
	at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:281)
	at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:175)
	at org.apache.catalina.core.StandardPipeline.doInvoke(StandardPipeline.java:655)
	at org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:595)
	at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:161)
	at org.apache.catalina.connector.CoyoteAdapter.doService(CoyoteAdapter.java:331)
	at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:231)
	at com.sun.enterprise.v3.services.impl.ContainerMapper$AdapterCallable.call(ContainerMapper.java:317)
	at com.sun.enterprise.v3.services.impl.ContainerMapper.service(ContainerMapper.java:195)
	at com.sun.grizzly.http.ProcessorTask.invokeAdapter(ProcessorTask.java:860)
	at com.sun.grizzly.http.ProcessorTask.doProcess(ProcessorTask.java:757)
	at com.sun.grizzly.http.ProcessorTask.process(ProcessorTask.java:1056)
	at com.sun.grizzly.http.DefaultProtocolFilter.execute(DefaultProtocolFilter.java:229)
	at com.sun.grizzly.DefaultProtocolChain.executeProtocolFilter(DefaultProtocolChain.java:137)
	at com.sun.grizzly.DefaultProtocolChain.execute(DefaultProtocolChain.java:104)
	at com.sun.grizzly.DefaultProtocolChain.execute(DefaultProtocolChain.java:90)
	at com.sun.grizzly.http.HttpProtocolChain.execute(HttpProtocolChain.java:79)
	at com.sun.grizzly.ProtocolChainContextTask.doCall(ProtocolChainContextTask.java:54)
	at com.sun.grizzly.SelectionKeyContextTask.call(SelectionKeyContextTask.java:59)
	at com.sun.grizzly.ContextTask.run(ContextTask.java:71)
	at com.sun.grizzly.util.AbstractThreadPool$Worker.doWork(AbstractThreadPool.java:532)
	at com.sun.grizzly.util.AbstractThreadPool$Worker.run(AbstractThreadPool.java:513)
	at java.lang.Thread.run(Thread.java:744)


In [58]:
ws = Workspace(services["workspace_service_url"], token=token)
print ws.list_workspace_info({"owners": ["b"]})#[]['narrative_nice_name']

[]


In [None]:
ws.get_type_info("KBaseGenomeAnnotations.GenomeAnnotation")


