In [1]:
import sevenbridges as sbg
import sys
import pdb
from requests import request
import concurrent.futures
config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

In [2]:
def get_refs(api, project):
    ref_dict = {}
    ref_dict['cache'] = api.files.query(project=project, names=['homo_sapiens_vep_93_GRCh38_convert_cache.tar.gz'])[0]
    ref_dict['reference'] = api.files.query(project=project, names=['Homo_sapiens_assembly38.fasta'])[0]
    ref_dict['snp_sites'] = api.files.query(project=project, names=['1000G_phase3_v4_20130502.sites.hg38.vcf'])[0]
    return ref_dict

In [3]:
def get_inputs_by_task(api, fm_run, project):
    tasks = api.tasks.query(project = project, status = 'COMPLETED').all()
    inputs = {}
    x = 1
    n = 50
    for task in tasks:
        parts = task.name.split('-')
        fm_id = parts[1]
        ped_fn = fm_id + '.ped'
        if parts[0] == 'jointgenotyping' and fm_id in inputs:
            sys.stderr.write('ERROR: Repeat family ID ' + fm_id + ' in task ' + task.name + '\n' )
            exit(1)
        elif parts[0] == 'jointgenotyping' and fm_id in fm_run:
            try: 
                sys.stderr.write('Found valid task for ' + fm_id + ' ' + task.name + '\n')
                ped = api.files.query(project=project, names=[(fm_id + '.ped')])[0]
                vqsr_vcf = task.outputs['finalgathervcf']
                inputs[fm_id] = {}
                inputs[fm_id]['ped'] =  ped
                inputs[fm_id]['vqsr_vcf'] = vqsr_vcf
                fm_run[fm_id] = 1
            except Exception as e:
                print(e)
                sys.stderr.write('Error handling ' + task.name + '. This is probably expected, continuing\n')
        if x % n == 0:
            sys.stderr.write('Processed ' + str(x) + ' tasks\n')
        x += 1
    return inputs

In [4]:
def stage_tasks(api, project, fm_id, in_dict, ref_dict, suffix):
    task_name = 'kf_genotype_refinement_vep-' + fm_id + suffix
    app_name = project + '/kf-genotype-refinement-workflow'
    inputs = ref_dict
    for key in in_dict:
        inputs[key] = in_dict[key]
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
    task.inputs['output_basename'] = task.id
    task.save()
    return '\t'.join((task_name, task.id)) + '\n'

In [6]:
engle = 'kfdrc-harmonization/sd-dztb5hrr-01'
chung = 'kfdrc-harmonization/sd-46sk55a3-02'
schiffman = 'kfdrc-harmonization/sd-ygva0e1c-01'
rios_wise = 'kfdrc-harmonization/sd-rm8afw0r-01'
seidman = 'kfdrc-harmonization/sd-preasa7s-02'
vilain = 'kfdrc-harmonization/sd-6fpyjqbr-02'
cur_proj = vilain
out_fh = open('/Users/brownm28/Documents/2018-Oct-9_genotype_refinement_bonanza/VILAIN/rpt/r3-task_stage.txt', 'w')
sys.stderr.write('Getting refs for ' + cur_proj + '\n')
ref_dict = get_refs(api, cur_proj)
sys.stderr.write('Getting inputs by task\n')
in_fm = '/Users/brownm28/Documents/2018-Oct-9_genotype_refinement_bonanza/VILAIN/rpt/r3_rpt.txt'
fm_run = {}
for fm_id in open(in_fm):
    fm_run[fm_id.rstrip('\n')] = 0

in_dict = get_inputs_by_task(api, fm_run, cur_proj)

suffix = '-rerun4'
with concurrent.futures.ThreadPoolExecutor(8) as executor:
    results = {executor.submit(stage_tasks, api, cur_proj, fm_id, in_dict[fm_id], ref_dict, suffix): fm_id for fm_id in in_dict}
    for result in concurrent.futures.as_completed(results):
        out_fh.write(result.result())
out_fh.close()

Getting refs for kfdrc-harmonization/sd-6fpyjqbr-02
Getting inputs by task
Found valid task for FM_08T1J7C6 jointgenotyping-FM_08T1J7C6
Found valid task for FM_11MX5F3A jointgenotyping-FM_11MX5F3A
Found valid task for FM_3CGWDWG9 jointgenotyping-FM_3CGWDWG9
Found valid task for FM_446XWPHN jointgenotyping-FM_446XWPHN
Found valid task for FM_6PCY22EF jointgenotyping-FM_6PCY22EF
Found valid task for FM_7ZVDBHTV jointgenotyping-FM_7ZVDBHTV
Found valid task for FM_81PS45A8 jointgenotyping-FM_81PS45A8
Found valid task for FM_9FPDCWY5 jointgenotyping-FM_9FPDCWY5
Found valid task for FM_ANSS9SWT jointgenotyping-FM_ANSS9SWT
Found valid task for FM_C7YQB73E jointgenotyping-FM_C7YQB73E
Found valid task for FM_CMVE09VR jointgenotyping-FM_CMVE09VR
Found valid task for FM_CVK21HHS jointgenotyping-FM_CVK21HHS
Found valid task for FM_E7GF64GB jointgenotyping-FM_E7GF64GB
Found valid task for FM_EWA2Q3D4 jointgenotyping-FM_EWA2Q3D4
Found valid task for FM_EYPG9H9S jointgenotyping-FM_EYPG9H9S
Found vali

In [8]:
task_list = '/Users/brownm28/Documents/2018-Oct-9_genotype_refinement_bonanza/VILAIN/rpt/r3-task_stage.txt'
for line in open(task_list):
    (tname, tid) = line.rstrip('\n').split('\t')
    task = api.tasks.get(id=tid)
    task.run()
    print('Running', tname, tid)

Running kf_genotype_refinement_vep-FM_08T1J7C6-rerun4 ee220ee1-1b2c-48f7-9e19-f2b2ebf670fc
Running kf_genotype_refinement_vep-FM_6PCY22EF-rerun4 d00051d0-cc48-4cbb-8a2f-e92752b3da5f
Running kf_genotype_refinement_vep-FM_9FPDCWY5-rerun4 745a9541-430c-4f19-8d74-f73c065a5d13
Running kf_genotype_refinement_vep-FM_81PS45A8-rerun4 528008ea-a397-4e6d-a57a-c5d66636e7b0
Running kf_genotype_refinement_vep-FM_446XWPHN-rerun4 d2d32b3e-7175-4494-96b4-19e1206326fd
Running kf_genotype_refinement_vep-FM_7ZVDBHTV-rerun4 59d26754-a550-4a16-9b35-e746497a08a4
Running kf_genotype_refinement_vep-FM_3CGWDWG9-rerun4 99331e3e-a6ae-490d-9682-fe89ca77166e
Running kf_genotype_refinement_vep-FM_11MX5F3A-rerun4 3c2c4e62-919c-41a0-b371-448ab4945b7e
Running kf_genotype_refinement_vep-FM_E7GF64GB-rerun4 d8805f64-b902-463b-9bd4-d3feee3fc7c6
Running kf_genotype_refinement_vep-FM_CVK21HHS-rerun4 ecc729d8-b07d-479d-87cc-43b11408695f
Running kf_genotype_refinement_vep-FM_C7YQB73E-rerun4 4933d13d-6560-4897-a6ad-2c86579d2aac

## Delete files associated with certain tasks

In [2]:
fm_fn = '/Users/brownm28/Documents/2018-Oct-9_genotype_refinement_bonanza/ENGLE/refine_rpt/FM_list.txt'
fm_list = []
for line in open(fm_fn):
    fm_list.append(line.rstrip('\n'))

engle = 'kfdrc-harmonization/sd-dztb5hrr-01'
tasks = api.tasks.query(project=engle, status = 'COMPLETED').all()

f = 0
for task in tasks:
    check = task.name.split('-')
    if f ==1:
        break
    if check[0] == 'kf_genotype_refinement_vep' and check[1] in fm_list:
        sys.stderr.write('Found task ' + task.name + ' with outputs to delete\n')

        for out in task.outputs.keys():
            try:
                print (task.outputs[out].name)
                task.outputs[out].delete()
            except:
                sys.stderr.write('No file output for ' + out + '\n')
        # f = 1

Found task kf_genotype_refinement_vep-FM_ZK0JTWJJ with outputs to delete
No file output for vep_warn


af378737-a466-4eef-b868-32ae4611140b.CGP.filtered.deNovo.vep.vcf.gz
af378737-a466-4eef-b868-32ae4611140b_stats.txt
c19182cf-fa79-4512-be33-f02cb24ce09b.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_CTEBPSDB with outputs to delete
No file output for vep_warn


c19182cf-fa79-4512-be33-f02cb24ce09b_stats.txt


Found task kf_genotype_refinement_vep-FM_4972NB6B with outputs to delete
No file output for vep_warn


541ff799-1c30-4686-a045-d0109eccef1b.CGP.filtered.deNovo.vep.vcf.gz
541ff799-1c30-4686-a045-d0109eccef1b_stats.txt
abafbfb7-66cb-43b3-a696-e62c84449021.CGP.filtered.deNovo.vep.vcf.gz
abafbfb7-66cb-43b3-a696-e62c84449021_stats.txt
33dc9a7c-35a4-4124-ab7f-4ac4d1ce2daf.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_KN821VDX with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_4FK6R5GX with outputs to delete
No file output for vep_warn


33dc9a7c-35a4-4124-ab7f-4ac4d1ce2daf_stats.txt


Found task kf_genotype_refinement_vep-FM_JT5SA2GW with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_69F4WMBF with outputs to delete
No file output for vep_warn


9630d69a-3533-4829-88f2-d83b5cf363ab.CGP.filtered.deNovo.vep.vcf.gz
9630d69a-3533-4829-88f2-d83b5cf363ab_stats.txt
fe44ba9b-6ea9-44e8-ad7b-c1a7013008bf.CGP.filtered.deNovo.vep.vcf.gz
fe44ba9b-6ea9-44e8-ad7b-c1a7013008bf_stats.txt


Found task kf_genotype_refinement_vep-FM_S0WWNAW3 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_YXZQGKFS with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_E6Y9XB4Z with outputs to delete
No file output for vep_warn


0e6ce277-eec8-4672-8b49-be1b82d91fe9.CGP.filtered.deNovo.vep.vcf.gz
0e6ce277-eec8-4672-8b49-be1b82d91fe9_stats.txt


No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_XSMC1HHS with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_7DB1ZCP9 with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf


d3192c79-73dd-4be9-a86b-fa5d2b9539f7.CGP.filtered.deNovo.vep.vcf.gz
d3192c79-73dd-4be9-a86b-fa5d2b9539f7_stats.txt


No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_SMVT3DEP with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_RB2BJP7J with outputs to delete
No file output for vep_warn


1882532b-898d-4537-9214-90e78c374c1a.CGP.filtered.deNovo.vep.vcf.gz
1882532b-898d-4537-9214-90e78c374c1a_stats.txt
a94b8426-5d9a-47e3-b518-63c32ad8c3aa.CGP.filtered.deNovo.vep.vcf.gz
a94b8426-5d9a-47e3-b518-63c32ad8c3aa_stats.txt


Found task kf_genotype_refinement_vep-FM_9GNE20X1 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_H11S5KTZ with outputs to delete
No file output for vep_warn


b272721a-f4e9-4f6c-8b83-8a1a211aadfc.CGP.filtered.deNovo.vep.vcf.gz
b272721a-f4e9-4f6c-8b83-8a1a211aadfc_stats.txt
147983d7-1897-4917-8662-65df1ffd72d9.CGP.filtered.deNovo.vep.vcf.gz
147983d7-1897-4917-8662-65df1ffd72d9_stats.txt


Found task kf_genotype_refinement_vep-FM_PNNKNN19 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_Y8B9NACA with outputs to delete
No file output for vep_warn


be3e7480-978e-47c4-8bfc-5863a2fd6661.CGP.filtered.deNovo.vep.vcf.gz
be3e7480-978e-47c4-8bfc-5863a2fd6661_stats.txt
6ce7b95d-fe34-4e4f-9087-f0fb470a59a3.CGP.filtered.deNovo.vep.vcf.gz
6ce7b95d-fe34-4e4f-9087-f0fb470a59a3_stats.txt


Found task kf_genotype_refinement_vep-FM_QSJP99Q6 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_AE8KWY8T with outputs to delete
No file output for vep_warn


a2580fbd-74db-4979-a73c-63b9321f7dc3.CGP.filtered.deNovo.vep.vcf.gz
a2580fbd-74db-4979-a73c-63b9321f7dc3_stats.txt
41ffc0d1-46b6-4167-82c3-f3acb4a8b056.CGP.filtered.deNovo.vep.vcf.gz
41ffc0d1-46b6-4167-82c3-f3acb4a8b056_stats.txt


Found task kf_genotype_refinement_vep-FM_FZNKBREA with outputs to delete
No file output for vep_warn


12c91c8f-abac-4a11-b6e2-b6830bfef383.CGP.filtered.deNovo.vep.vcf.gz
12c91c8f-abac-4a11-b6e2-b6830bfef383_stats.txt


Found task kf_genotype_refinement_vep-FM_M0BCB6FK with outputs to delete
No file output for vep_warn


e909c59b-5fe5-471e-85b1-f3e0b3869171.CGP.filtered.deNovo.vep.vcf.gz
e909c59b-5fe5-471e-85b1-f3e0b3869171_stats.txt


Found task kf_genotype_refinement_vep-FM_YH2T3CD8 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_C0YWP4XR with outputs to delete
No file output for vep_warn


f1936787-3c2a-4dab-ac59-78fbe18b9efe.CGP.filtered.deNovo.vep.vcf.gz
f1936787-3c2a-4dab-ac59-78fbe18b9efe_stats.txt
48e94e64-b774-4d20-ab5f-358be66c09a4.CGP.filtered.deNovo.vep.vcf.gz
48e94e64-b774-4d20-ab5f-358be66c09a4_stats.txt


Found task kf_genotype_refinement_vep-FM_BRK65FGM with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_CZD00MB8 with outputs to delete
No file output for vep_warn


db9200c5-b33e-4144-9285-604277997053.CGP.filtered.deNovo.vep.vcf.gz
db9200c5-b33e-4144-9285-604277997053_stats.txt
4522b3c2-1e08-4437-943e-841cd7623deb.CGP.filtered.deNovo.vep.vcf.gz
4522b3c2-1e08-4437-943e-841cd7623deb_stats.txt


Found task kf_genotype_refinement_vep-FM_F5RET3RA with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_8RD001GM with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_4G33MP1H with outputs to delete
No file output for vep_warn


2af0e0c0-83e2-4855-8cc0-fa38c16bd5e0.CGP.filtered.deNovo.vep.vcf.gz
2af0e0c0-83e2-4855-8cc0-fa38c16bd5e0_stats.txt
080e015a-3427-48a9-b3f3-21a6b3d0a62a.CGP.filtered.deNovo.vep.vcf.gz
080e015a-3427-48a9-b3f3-21a6b3d0a62a_stats.txt


Found task kf_genotype_refinement_vep-FM_A4N22TGJ with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_1JDG4XDQ with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_PPP9C654 with outputs to delete
No file output for vep_warn


43720e99-a6e2-4681-9100-332f588a22ab.CGP.filtered.deNovo.vep.vcf.gz
43720e99-a6e2-4681-9100-332f588a22ab_stats.txt
e25df335-5d23-468b-8b9b-f60024eb5265.CGP.filtered.deNovo.vep.vcf.gz
e25df335-5d23-468b-8b9b-f60024eb5265_stats.txt
0b140e9a-fc5e-431d-a5f2-e43e2a5f8e02.CGP.filtered.deNovo.vep.vcf.gz
0b140e9a-fc5e-431d-a5f2-e43e2a5f8e02_stats.txt


Found task kf_genotype_refinement_vep-FM_1NXFY5EB with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_5S6V92HX with outputs to delete
No file output for vep_warn


043c0fa0-446f-40cc-9324-35cf1c512305.CGP.filtered.deNovo.vep.vcf.gz
043c0fa0-446f-40cc-9324-35cf1c512305_stats.txt


Found task kf_genotype_refinement_vep-FM_YTXFQAAF with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_X9CFDA8N with outputs to delete
No file output for vep_warn


c38f08bf-7653-40a6-a1c0-3eac4472e951.CGP.filtered.deNovo.vep.vcf.gz
c38f08bf-7653-40a6-a1c0-3eac4472e951_stats.txt
0d7bed7d-6b4f-4127-9bd3-243a8b365683.CGP.filtered.deNovo.vep.vcf.gz
0d7bed7d-6b4f-4127-9bd3-243a8b365683_stats.txt
4b8b2220-b429-4c19-ae45-3576b61b6850.CGP.filtered.deNovo.vep.vcf.gz
4b8b2220-b429-4c19-ae45-3576b61b6850_stats.txt


Found task kf_genotype_refinement_vep-FM_QVTZTFFY with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_NAG3ZGXC with outputs to delete
No file output for vep_warn


e3635a66-2a99-4272-851a-b5e79f3900a4.CGP.filtered.deNovo.vep.vcf.gz
e3635a66-2a99-4272-851a-b5e79f3900a4_stats.txt
7ba8f916-e580-4c82-909e-0d3bd54b884a.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_ZWMSZT40 with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_9B5H8M71 with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_HAY9WV81 with outputs to delete
No file output for vep_warn


7ba8f916-e580-4c82-909e-0d3bd54b884a_stats.txt
bd2d2fd3-3944-4ac4-b48c-485f55a6ae7f.CGP.filtered.deNovo.vep.vcf.gz
bd2d2fd3-3944-4ac4-b48c-485f55a6ae7f_stats.txt


Found task kf_genotype_refinement_vep-FM_07GC0GW7 with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_YXT8900R with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_TQ9N8MVA with outputs to delete
No file output for vep_warn
No file output for cgp_vep_annotated_vcf
No file output for vcf_summary_stats
Found task kf_genotype_refinement_vep-FM_VJ3Z7921 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_ACVHVDEG with outputs to delete
No file output for vep_warn


f765525a-adaa-4965-adf2-43105ccbe193.CGP.filtered.deNovo.vep.vcf.gz
f765525a-adaa-4965-adf2-43105ccbe193_stats.txt
a701dd0c-eb10-4852-b733-cb18b4ac5f85.CGP.filtered.deNovo.vep.vcf.gz
a701dd0c-eb10-4852-b733-cb18b4ac5f85_stats.txt


Found task kf_genotype_refinement_vep-FM_YXZQGKFS-rerun1 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_TQ9N8MVA-rerun1 with outputs to delete
No file output for vep_warn


786c1367-3713-48e0-bf57-363d7fbbae9f.CGP.filtered.deNovo.vep.vcf.gz
786c1367-3713-48e0-bf57-363d7fbbae9f_stats.txt
73e0019e-613f-450d-b310-884a1364d221.CGP.filtered.deNovo.vep.vcf.gz
73e0019e-613f-450d-b310-884a1364d221_stats.txt


Found task kf_genotype_refinement_vep-FM_E6Y9XB4Z-rerun1 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_9B5H8M71-rerun1 with outputs to delete
No file output for vep_warn


eb05d3bc-1db5-4e8b-bfe6-eb0bc0a59ab6.CGP.filtered.deNovo.vep.vcf.gz
eb05d3bc-1db5-4e8b-bfe6-eb0bc0a59ab6_stats.txt
86208e6f-dbc7-464c-bea7-45f3e24a1efd.CGP.filtered.deNovo.vep.vcf.gz
86208e6f-dbc7-464c-bea7-45f3e24a1efd_stats.txt


Found task kf_genotype_refinement_vep-FM_ZWMSZT40-rerun1 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_1JDG4XDQ-rerun1 with outputs to delete
No file output for vep_warn


4a3b5aac-4a27-478f-b892-7878541b3243.CGP.filtered.deNovo.vep.vcf.gz
4a3b5aac-4a27-478f-b892-7878541b3243_stats.txt
0d08a29e-10a9-449d-9070-6b04e03c095c.CGP.filtered.deNovo.vep.vcf.gz
0d08a29e-10a9-449d-9070-6b04e03c095c_stats.txt
f7bf0a06-a380-481e-a4fa-b336ad8b7914.CGP.filtered.deNovo.vep.vcf.gz
f7bf0a06-a380-481e-a4fa-b336ad8b7914_stats.txt
a7ec8804-0dd4-41a4-af5d-7fe248c5603f.CGP.filtered.deNovo.vep.vcf.gz


Found task kf_genotype_refinement_vep-FM_F5RET3RA-rerun1 with outputs to delete
No file output for vep_warn
Found task kf_genotype_refinement_vep-FM_7DB1ZCP9-rerun1 with outputs to delete
No file output for vep_warn


a7ec8804-0dd4-41a4-af5d-7fe248c5603f_stats.txt


Found task kf_genotype_refinement_vep-FM_07GC0GW7-rerun1 with outputs to delete
No file output for vep_warn


fff48624-03da-4e17-aa7b-1291d3278fe4.CGP.filtered.deNovo.vep.vcf.gz
fff48624-03da-4e17-aa7b-1291d3278fe4_stats.txt
