In [None]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
from sevenbridges.http.error_handlers import rate_limit_sleeper, maintenance_sleeper
import sys
import re
import pdb
import concurrent.futures
from requests import request
config = sbg.Config(profile='turbo')
api = sbg.Api(config=config, error_handlers=[rate_limit_sleeper, maintenance_sleeper])
project = 'd3b-bixu/dev-wgsa'

In [None]:
def get_VEP_refs(db_key_list, db_run_bool):
    extra_db_dict = {'cadd_indels': 'CADDv1.5-38-InDels.tsv.gz', 'cadd_snvs': 'CADDv1.5-38-whole_genome_SNVs.tsv.gz',
                     'dbnsfp': 'dbNSFP4.0a.gz', 'dbscsnv': 'dbscSNV1.1_GRCh38.txt.gz', 'phylop': 'hg38.phyloP100way.bw'}
    ref_dict = {}
    ref_dict['run_cache_dbs'] = db_run_bool
    ref_dict['reference'] = api.files.query(project=project, names=['Homo_sapiens.GRCh38.dna.toplevel.fa.gz'])[0]
    ref_dict['cache'] = api.files.query(project=project, names=['homo_sapiens_merged_vep_99_GRCh38.tar.gz'])[0]
    ref_dict['tool'] = 'VEP99'
    # db_key list has optional databases to run
    for key in db_key_list:
        ref_dict[key] = api.files.query(project=project, names=[extra_db_dict[key]])[0]
    return ref_dict
    

In [None]:
def get_ANNOVAR_refs(db_list, db_run_bool, protocol_name):
    ref_dict = {}
    ref_dict['run_dbs'] = db_run_bool
    ref_dict['reference_name'] = api.files.query(project=project, names=['annovar_2019Oct24.tgz'])[0]
    ref_dict['protocol_name'] = protocol_name
    # db file name list has optional databases to run
    if len(db_list) > 0:
        ref_dict['additional_dbs'] = []
        for db in db_list:
            ref_dict['additional_dbs'].append(api.files.query(project=project, names=[db])[0])
    return ref_dict


In [None]:
def get_snpEff_refs(vcf_list, reference_name, gwas_bool, dbnsfp_txt_bool):
    ref_dict = {}
    ref_dict['reference_name'] = reference_name
    ref_dict['ref_tar_gz'] = api.files.query(project=project, names=['snpeff_hg38_grch38.tgz'])[0]
    ref_dict['protocol_name'] = protocol_name
    # db file name list has optional databases to run
    if len(vcf_list) > 0:
        ref_dict['db_vcfs'] = []
        for vcf in vcf_list:
            ref_dict['db_vcfs'].append(api.files.query(project=project, names=[vcf])[0])
    if gwas_bool:
        ref_dict['gwas_catalog_txt'].append(api.files.query(project=project, names=['gwas_catalog_v1.0-associations_e98_r2020-03-08.tsv'])[0])
    if dbnsfp_txt_bool:
        ref_dict['dbnsfp_txt'].append(api.files.query(project=project, names=['dbNSFP4.0a.gz'])[0])
    return ref_dict
