In [43]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
import sys
from requests import request
import concurrent.futures
config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

In [44]:
def get_inputs(api, project):
    tasks = api.tasks.query(project=project, status='COMPLETED').all()
    inputs = {}
    for task in tasks:
        # jointgenotyping-FM_01DTQB3Y
        parts = task.name.split('-')
        if parts[0] == 'jointgenotyping':
            sys.stderr.write('Found valid task: ' + task.name + '\n')
            fm_id = parts[1]
            try:
                ped = api.files.query(project=project, names=[(fm_id + '.ped')])[0]
                vqsr_vcf = task.outputs['finalgathervcf']
                inputs[fm_id] = {}
                inputs[fm_id]['ped'] =  ped
                inputs[fm_id]['vqsr_vcf'] = vqsr_vcf
            except:
                sys.stderr.write(task.name + ' has missing files, skipping\n')
    return inputs
            

In [45]:
def create_tasks(api, project, inputs, fm_id, suffix):
    cur_in = inputs[fm_id]
    task_name = 'kfdrc-peddy-' + fm_id + suffix
    app_name = project + '/kfdrc-peddy-tool'
    try:
        task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=cur_in, run=False)
        task.inputs['output_basename'] = task.id
        task.save()
        return 'FM_ID:' + fm_id + ' ' + inputs[fm_id]['vqsr_vcf'].name + ' Task:' + task.id
    except SbgError as e:
        sys.stderr.write(e.message + '; could not create task for ' + task_name + '!\n')

In [46]:
chung = 'kfdrc-harmonization/sd-46sk55a3-02'
schiff = 'kfdrc-harmonization/sd-ygva0e1c-01'
# cur_proj = schiff
out_fh = open('/Users/brownm28/Documents/2018-Oct-23_peddy_run/mara_rpt_b2_stage.txt', 'w')
# proj_dict = {'kfdrc-harmonization/sd-dypmehhf-01': 'Maris', 'kfdrc-harmonization/sd-9pyzahhe-08': 'Marazita_fy15_b1', 'kfdrc-harmonization/sd-9pyzahhe-09': 'Marazita_fy15_b2'}
proj_dict = {'kfdrc-harmonization/sd-9pyzahhe-09': 'Marazita_fy15_b2'}
suffix = '-rerun2'
for cur_proj in proj_dict:
    inputs = get_inputs(api, cur_proj)
    sys.stderr.write('Processing project ' + cur_proj + ': ' + proj_dict[cur_proj] + '\n')
    with concurrent.futures.ThreadPoolExecutor(8) as executor:
        results = {executor.submit(create_tasks, api, cur_proj, inputs, fm_id, suffix): fm_id for fm_id in inputs}
        for result in concurrent.futures.as_completed(results):
            out_fh.write(proj_dict[cur_proj] + ' ' + cur_proj + ' ' + result.result() + '\n')
out_fh.close()

Found valid task: jointgenotyping-FM_4P00M65H
Found valid task: jointgenotyping-FM_162NCRY3
Found valid task: jointgenotyping-FM_9E3WAF2Z
jointgenotyping-FM_9E3WAF2Z has missing files, skipping
Found valid task: jointgenotyping-FM_Q1JQJBB5
jointgenotyping-FM_Q1JQJBB5 has missing files, skipping
Found valid task: jointgenotyping-FM_MA2B7XP5
Found valid task: jointgenotyping-FM_34H49TGW
Found valid task: jointgenotyping-FM_3PMTR62F
Found valid task: jointgenotyping-FM_V7CNVFHM
Found valid task: jointgenotyping-FM_D5C8910K
Found valid task: jointgenotyping-FM_QGQ6QZ2X
Found valid task: jointgenotyping-FM_BWKVDB1G
Found valid task: jointgenotyping-FM_H69YN234
Found valid task: jointgenotyping-FM_ER8NTQ3P
Found valid task: jointgenotyping-FM_9WAHMGRV
Found valid task: jointgenotyping-FM_WR4R70NY
Found valid task: jointgenotyping-FM_JN9AW927
Found valid task: jointgenotyping-FM_3FJ746JF
Found valid task: jointgenotyping-FM_QEH6QM74
Found valid task: jointgenotyping-FM_PF276JVF
jointgenotypin

In [47]:
def run_task(api, entry):
    info = entry.rstrip('\n').split()
    (tag, tid) = info[-1].split(':')
    task = api.tasks.get(id=tid)
    task.run()

In [48]:

task_fn = '/Users/brownm28/Documents/2018-Oct-23_peddy_run/mara_rpt_b2_stage.txt'
with concurrent.futures.ThreadPoolExecutor(8) as executor:
    results = {executor.submit(run_task, api, entry): entry for entry in open(task_fn)}


In [30]:
def rm_outputs(api, project, prefix_list, fm_list):
    tasks = api.tasks.query(project=project, status='COMPLETED').all()
    for task in tasks:
        # jointgenotyping-FM_01DTQB3Y
        parts = task.name.split('-')
        if parts[0] in prefix_list and parts[1] in fm_list:
            sys.stderr.write('Found task to remove outputs from: ' + task.name + '\n')
            for output in task.outputs:
                if task.outputs[output] is None:
                    sys.stderr.write('Skipping None object for ' + output + '\n')
                elif not isinstance(task.outputs[output], list):
                    sys.stderr.write('Deleting output ' + task.outputs[output].name + '\n')
                    task.outputs[output].delete()
                else:
                    for obj in task.outputs[output]:
                        sys.stderr.write('Deleting output ' + obj.name + '\n')
                        obj.delete()
    

In [31]:
prefix_list = ['kf_genotype_refinement_vep', 'jointgenotyping']
project = 'kfdrc-harmonization/sd-9pyzahhe-08'
fm_list = []
fm_file = open('/Users/brownm28/Documents/2018-Oct-23_peddy_run/FY15_Marazita/batch1/check_info/fm_to_rm.txt')
for line in fm_file:
    fm_list.append(line.rstrip('\n'))
fm_file.close()
rm_outputs(api, project, prefix_list, fm_list)

Found task to remove outputs from: jointgenotyping-FM_4G4QGRH1
Deleting output e28ec276-76c0-4eb6-84b2-3da5663d3f64.vcf.gz
Deleting output e28ec276-76c0-4eb6-84b2-3da5663d3f64.variant_calling_detail_metrics
Deleting output e28ec276-76c0-4eb6-84b2-3da5663d3f64.variant_calling_summary_metrics
Found task to remove outputs from: jointgenotyping-FM_8WNPNTBB
Deleting output ac55cfb3-92f6-4b3a-a974-11bf777e450a.vcf.gz
Deleting output ac55cfb3-92f6-4b3a-a974-11bf777e450a.variant_calling_detail_metrics
Deleting output ac55cfb3-92f6-4b3a-a974-11bf777e450a.variant_calling_summary_metrics
Found task to remove outputs from: jointgenotyping-FM_JT5GVN3H
Deleting output 8cac2f59-be46-4122-996c-e48f329c9425.vcf.gz
Deleting output 8cac2f59-be46-4122-996c-e48f329c9425.variant_calling_detail_metrics
Deleting output 8cac2f59-be46-4122-996c-e48f329c9425.variant_calling_summary_metrics
Found task to remove outputs from: jointgenotyping-FM_ZAT6PTE1
Deleting output 2356ea7b-d050-4864-821d-54d2202cbdc2.vcf.gz
D