In [1]:
import sevenbridges as sbg
import sys
from requests import request

config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

In [None]:
def get_refs(api, project):
    ref_dict = {}
    ref_dict['cache'] = api.files.query(project=project, names=['homo_sapiens_vep_93_GRCh38_convert_cache.tar.gz'])[0]
    ref_dict['reference'] = api.files.query(project=project, names=['Homo_sapiens_assembly38.fasta'])[0]
    ref_dict['snp_sites'] = api.files.query(project=project, names=['1000G_phase3_v4_20130502.sites.hg38.vcf'])[0]
    return ref_dict

In [None]:
def get_inputs_by_task(api, project):
    in_dict = {}
    tasks = api.tasks.query(project = project, status = 'COMPLETED').all()
    for task in tasks:
        parts = task.name.split('-')
        fm_id = parts[1]
        ped_fn = fm_id + '.ped'
        if fm_id in in_dict:
            sys.stderr.write('ERROR: Repeat family ID ' + fm_id)
            exit(1)
        in_dict[fm_id] = {}
        in_dict[fm_id]['vqsr_vcf'] = task.outputs['finalgathervcf']
        in_dict[fm_id]['ped'] = api.files.query(project=project, names=[ped_fn])[0]
    return in_dict

In [None]:
def stage_tasks(api, project, fm_id, in_dict, ref_dict):
    task_name = 'kf_genotype_refinement_vep-' + fm_id
    app_name = project + '/kf-genotype-refinement-workflow'
    inputs = ref_dict
    for key in in_dict:
        inputs[key] = in_dict[key]
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
    task.inputs['output_basename'] = task.id
    task.save()
    print ('\t'.join((task_name, task.id)))

In [None]:
engle = 'kfdrc-harmonization/sd-dztb5hrr-01'
chung = 'kfdrc-harmonization/sd-46sk55a3-02'
schiffman = 'kfdrc-harmonization/sd-ygva0e1c-01'
cur_proj = engle
sys.stderr.write('Getting refs for ' + cur_proj + '\n')
ref_dict = get_refs(api, cur_proj)
sys.stderr.write('Getting inputs by task\n')
in_dict = get_inputs_by_task(api, cur_proj)

for fm_id in in_dict:
    sys.stderr.write('Staging task for ' + fm_id + '\n')
    stage_tasks(api, cur_proj, fm_id, in_dict[fm_id], ref_dict)



## Delete files associated with certain tasks

In [23]:
fm_fn = '/Users/brownm28/Documents/2018-Oct-9_genotype_refinement_bonanza/ENGLE/fm_to_rpt.txt'
fm_list = []
for line in open(fm_fn):
    fm_list.append(line.rstrip('\n'))

engle = 'kfdrc-harmonization/sd-dztb5hrr-01'
tasks = api.tasks.query(project=engle, status = 'COMPLETED').all()

f = 0
for task in tasks:
    check = task.name.split('-')
    if f ==1:
        break
    if check[0] == 'kf_genotype_refinement_vep' and check[1] in fm_list:
        sys.stderr.write('Found task ' + task.name + ' with outputs to delete\n')

        for out in task.outputs.keys():
            try:
                print (task.outputs[out].name)
                task.outputs[out].delete()
            except:
                sys.stderr.write('No file output for ' + out + '\n')
        # f = 1

Found task kf_genotype_refinement_vep-FM_8WPPZVN3 with outputs to delete
No file output for vep_warn


8995faad-f0ae-45c1-9454-e579d2a68c16_stats.txt
8995faad-f0ae-45c1-9454-e579d2a68c16.CGP.filtered.deNovo.vep.vcf.gz
de52c8fa-88fb-4c25-b0bc-44ad7d4ed828_stats.txt
de52c8fa-88fb-4c25-b0bc-44ad7d4ed828.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_ZKK01N7M with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_H95TPB6A with outputs to delete
No file output for vep_warn


7707c336-3223-4ed5-a19c-92410187c3a7_stats.txt
7707c336-3223-4ed5-a19c-92410187c3a7.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_3Z42KBQ4 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_346Z2W1F with outputs to delete
No file output for vep_warn


cff07c2a-14a3-4a6b-a26a-f2174ff8ee80_stats.txt
cff07c2a-14a3-4a6b-a26a-f2174ff8ee80.CGP.filtered.deNovo.vep.vcf.gz
425a575e-ee34-489b-ba47-49c34ed6ce22_stats.txt
425a575e-ee34-489b-ba47-49c34ed6ce22.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_G342GZE1 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_1AX45XF6 with outputs to delete
No file output for vep_warn


4014586f-bf02-4527-8eb4-f83a807515d9_stats.txt
4014586f-bf02-4527-8eb4-f83a807515d9.CGP.filtered.deNovo.vep.vcf.gz
557204b9-1407-41cb-9b6c-d4f253194338_stats.txt
557204b9-1407-41cb-9b6c-d4f253194338.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_YXZQGKFS with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_ECATPBSB with outputs to delete
No file output for vep_warn


c349795f-9430-4b8d-a401-dc52c39ab61a_stats.txt
c349795f-9430-4b8d-a401-dc52c39ab61a.CGP.filtered.deNovo.vep.vcf.gz
5cf3e286-69f5-4948-a69e-6e2de491792a_stats.txt
5cf3e286-69f5-4948-a69e-6e2de491792a.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_E6Y9XB4Z with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_7DB1ZCP9 with outputs to delete
No file output for vep_warn


642b45a6-68f0-411f-a0f9-d3d13b0fe227_stats.txt
642b45a6-68f0-411f-a0f9-d3d13b0fe227.CGP.filtered.deNovo.vep.vcf.gz
db378e83-c0df-42a7-80f9-c06b242b1a1d_stats.txt
db378e83-c0df-42a7-80f9-c06b242b1a1d.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_YJTCZHTS with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_FYS2JR5B with outputs to delete
No file output for vep_warn


767f33f7-5f50-46e3-bd97-ebc2bb02f9da_stats.txt
767f33f7-5f50-46e3-bd97-ebc2bb02f9da.CGP.filtered.deNovo.vep.vcf.gz
6de57531-95ca-40f4-b972-37fedd078e57_stats.txt
6de57531-95ca-40f4-b972-37fedd078e57.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_F5RET3RA with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_VR0AE7M5 with outputs to delete
No file output for vep_warn


a85dc415-d7fc-4013-9e53-e9eec3dd03f4_stats.txt
a85dc415-d7fc-4013-9e53-e9eec3dd03f4.CGP.filtered.deNovo.vep.vcf.gz
c3817a21-9ee3-4729-ae6c-2c2dcb17d610_stats.txt
c3817a21-9ee3-4729-ae6c-2c2dcb17d610.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_2ZG2ZV0K with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_1JDG4XDQ with outputs to delete
No file output for vep_warn


7da771e6-1374-4ab8-b73a-026d9f446221_stats.txt
7da771e6-1374-4ab8-b73a-026d9f446221.CGP.filtered.deNovo.vep.vcf.gz
dc8ac158-1121-4cef-814f-bee4810e6db8_stats.txt
dc8ac158-1121-4cef-814f-bee4810e6db8.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_ZWMSZT40 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_9B5H8M71 with outputs to delete
No file output for vep_warn


f83afbba-82cf-4f59-976b-959395e6b232_stats.txt
f83afbba-82cf-4f59-976b-959395e6b232.CGP.filtered.deNovo.vep.vcf.gz
a38675d8-3045-4168-91c5-377b4b901e13_stats.txt
a38675d8-3045-4168-91c5-377b4b901e13.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_QKY17RNS with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_07GC0GW7 with outputs to delete
No file output for vep_warn


757fedea-9433-4374-8c5e-b9b2597071a5_stats.txt
757fedea-9433-4374-8c5e-b9b2597071a5.CGP.filtered.deNovo.vep.vcf.gz
9dfa0d1d-d6f3-4bb5-86e7-34f1cfe5c6fa_stats.txt
9dfa0d1d-d6f3-4bb5-86e7-34f1cfe5c6fa.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_TQ9N8MVA with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_AT2W3HKG with outputs to delete
No file output for vep_warn


8384132c-6bf4-4888-8cc4-bcee0d0d88fe_stats.txt
8384132c-6bf4-4888-8cc4-bcee0d0d88fe.CGP.filtered.deNovo.vep.vcf.gz
1bb7b58d-a451-4aef-9da7-a69d2ec9cfa1_stats.txt
1bb7b58d-a451-4aef-9da7-a69d2ec9cfa1.CGP.filtered.deNovo.vep.vcf.gz
