In [1]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
import sys
from requests import request
import concurrent.futures
config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

In [1]:
def get_inputs(api, project):
    tasks = api.tasks.query(project=project, status='COMPLETED').all()
    inputs = {}
    for task in tasks:
        # jointgenotyping-FM_01DTQB3Y
        parts = task.name.split('-')
        if parts[0] == 'jointgenotyping':
            sys.stderr.write('Found valid task: ' + task.name + '\n')
            fm_id = parts[1]
            try:
                ped = api.files.query(project=project, names=[(fm_id + '.ped')])[0]
                vqsr_vcf = task.outputs['finalgathervcf']
                inputs[fm_id] = {}
                inputs[fm_id]['ped'] =  ped
                inputs[fm_id]['vqsr_vcf'] = vqsr_vcf
            except:
                sys.stderr.write(task.name + ' has missing files, skipping\n')
    return inputs
            

In [3]:
def create_tasks(api, project, inputs, fm_id, suffix):
    cur_in = inputs[fm_id]
    task_name = 'kfdrc-peddy-' + fm_id + suffix
    app_name = project + '/kfdrc-peddy-tool'
    try:
        task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=cur_in, run=False)
        task.inputs['output_basename'] = task.id
        task.save()
        return 'FM_ID:' + fm_id + ' ' + inputs[fm_id]['vqsr_vcf'].name + ' Task:' + task.id
    except SbgError as e:
        sys.stderr.write(e.message + '; could not create task for ' + task_name + '!\n')

In [5]:
chung = 'kfdrc-harmonization/sd-46sk55a3-02'
schiff = 'kfdrc-harmonization/sd-ygva0e1c-01'
rios_wise = 'kfdrc-harmonization/sd-rm8afw0r-01'
seidman = 'kfdrc-harmonization/sd-preasa7s-02'
vilain = 'kfdrc-harmonization/sd-6fpyjqbr-02'
cur_proj = vilain
proj_dict = {vilain: 'FY15_Vilain'}
out_fh = open('/Users/brownm28/Documents/2018-Oct-23_peddy_run/' + proj_dict[cur_proj] + '/stage.txt', 'w')

# proj_dict = {'kfdrc-harmonization/sd-dypmehhf-01': 'Maris', 'kfdrc-harmonization/sd-9pyzahhe-08': 'Marazita_fy15_b1', 'kfdrc-harmonization/sd-9pyzahhe-09': 'Marazita_fy15_b2'}

suffix = ''
for cur_proj in proj_dict:
    inputs = get_inputs(api, cur_proj)
    sys.stderr.write('Processing project ' + cur_proj + ': ' + proj_dict[cur_proj] + '\n')
    with concurrent.futures.ThreadPoolExecutor(8) as executor:
        results = {executor.submit(create_tasks, api, cur_proj, inputs, fm_id, suffix): fm_id for fm_id in inputs}
        for result in concurrent.futures.as_completed(results):
            out_fh.write(proj_dict[cur_proj] + ' ' + cur_proj + ' ' + result.result() + '\n')
out_fh.close()

Found valid task: jointgenotyping-FM_00VDFKN6
Found valid task: jointgenotyping-FM_0220YZA3
Found valid task: jointgenotyping-FM_08YQ1QE6
Found valid task: jointgenotyping-FM_0AC1P5X1
Found valid task: jointgenotyping-FM_0FNQR0DM
Found valid task: jointgenotyping-FM_0QG7E33X
Found valid task: jointgenotyping-FM_0V2GPXZB
Found valid task: jointgenotyping-FM_0V8F301D
Found valid task: jointgenotyping-FM_0X5GXJ1K
Found valid task: jointgenotyping-FM_0Y24T4AN
Found valid task: jointgenotyping-FM_0Y9B2QDM
Found valid task: jointgenotyping-FM_118PE9SV
Found valid task: jointgenotyping-FM_12ZX9AH2
Found valid task: jointgenotyping-FM_17AN45KQ
Found valid task: jointgenotyping-FM_19K8DZ4D
Found valid task: jointgenotyping-FM_1F4ZDKXC
Found valid task: jointgenotyping-FM_1ZMYWWE3
Found valid task: jointgenotyping-FM_23PJ0A6M
Found valid task: jointgenotyping-FM_246QAFHD
Found valid task: jointgenotyping-FM_25EEE3DR
Found valid task: jointgenotyping-FM_2BAY1ZMV
Found valid task: jointgenotyping-

Found valid task: jointgenotyping-FM_MZ4ENE59
Found valid task: jointgenotyping-FM_N4QSK9BP
Found valid task: jointgenotyping-FM_N88352R9
Found valid task: jointgenotyping-FM_NA1VH864
Found valid task: jointgenotyping-FM_NKHWTFZ4
Found valid task: jointgenotyping-FM_NSGRQGER
Found valid task: jointgenotyping-FM_NYPV7R76
Found valid task: jointgenotyping-FM_P0MBX21V
Found valid task: jointgenotyping-FM_P93F2XV0
Found valid task: jointgenotyping-FM_PB8EYGK1
Found valid task: jointgenotyping-FM_PF8XYXSP
Found valid task: jointgenotyping-FM_PFEV872C
Found valid task: jointgenotyping-FM_PH7DR86G
Found valid task: jointgenotyping-FM_PJ3EF8MB
Found valid task: jointgenotyping-FM_PPH0JPPV
Found valid task: jointgenotyping-FM_PVC7CYBM
Found valid task: jointgenotyping-FM_PVP8RFNR
Found valid task: jointgenotyping-FM_PYP6A4ED
Found valid task: jointgenotyping-FM_Q0578QA4
Found valid task: jointgenotyping-FM_Q214ADB3
Found valid task: jointgenotyping-FM_Q4GAJ5GV
Found valid task: jointgenotyping-

In [6]:
def run_task(api, entry):
    info = entry.rstrip('\n').split()
    (tag, tid) = info[-1].split(':')
    task = api.tasks.get(id=tid)
    task.run()

In [7]:

task_fn = '//Users/brownm28/Documents/2018-Oct-23_peddy_run/FY15_Vilain/stage.txt'
with concurrent.futures.ThreadPoolExecutor(8) as executor:
    results = {executor.submit(run_task, api, entry): entry for entry in open(task_fn)}


In [5]:
def rm_outputs(api, project, prefix_list, fm_list):
    tasks = api.tasks.query(project=project, status='COMPLETED').all()
    for task in tasks:
        # jointgenotyping-FM_01DTQB3Y
        parts = task.name.split('-')
        if parts[0] in prefix_list and parts[1] in fm_list:
            sys.stderr.write('Found task to remove outputs from: ' + task.name + '\n')
            for output in task.outputs:
                if task.outputs[output] is None:
                    sys.stderr.write('Skipping None object for ' + output + '\n')
                elif not isinstance(task.outputs[output], list):
                    try:
                        sys.stderr.write('Deleting output ' + task.outputs[output].name + '\n')
                        task.outputs[output].delete()
                    except:
                        sys.stderr.write('File for output ' + output + ' does not exist, cannot delete\n')
                else:
                    for obj in task.outputs[output]:
                        sys.stderr.write('Deleting output ' + obj.name + '\n')
                        obj.delete()
    

In [6]:
prefix_list = ['kf_genotype_refinement_vep']
project = 'kfdrc-harmonization/sd-9pyzahhe-08'
fm_list = []
fm_file = open('/Users/brownm28/Documents/2018-Oct-9_genotype_refinement_bonanza/MARAZITA/batch1_fm_rpt.txt')
for line in fm_file:
    fm_list.append(line.rstrip('\n'))
fm_file.close()
rm_outputs(api, project, prefix_list, fm_list)

Found task to remove outputs from: kf_genotype_refinement_vep-FM_3YSAERT6
Skipping None object for vep_warn
File for output cgp_vep_annotated_vcf does not exist, cannot delete
File for output vcf_summary_stats does not exist, cannot delete
Found task to remove outputs from: kf_genotype_refinement_vep-FM_4G4QGRH1
Skipping None object for vep_warn
File for output cgp_vep_annotated_vcf does not exist, cannot delete
File for output vcf_summary_stats does not exist, cannot delete
Found task to remove outputs from: kf_genotype_refinement_vep-FM_54AAR0KH
Skipping None object for vep_warn
Deleting output 6e1d023e-d9fe-4dcb-9504-edbe247b70b3.CGP.filtered.deNovo.vep.vcf.gz
Deleting output 6e1d023e-d9fe-4dcb-9504-edbe247b70b3_stats.txt
Found task to remove outputs from: kf_genotype_refinement_vep-FM_7Y0MZVCX
Skipping None object for vep_warn
Deleting output 725308b0-630d-4de9-baba-0f83687ed83a.CGP.filtered.deNovo.vep.vcf.gz
Deleting output 725308b0-630d-4de9-baba-0f83687ed83a_stats.txt
Found task

Deleting output 0918148a-5870-43c4-bc79-6883c7675cad_stats.txt
Found task to remove outputs from: kf_genotype_refinement_vep-FM_SA6Y47WQ
Skipping None object for vep_warn
Deleting output af19f476-f44b-4de5-b5d4-94619c14cd25.CGP.filtered.deNovo.vep.vcf.gz
Deleting output af19f476-f44b-4de5-b5d4-94619c14cd25_stats.txt
Found task to remove outputs from: kf_genotype_refinement_vep-FM_SN2BRZTM
Skipping None object for vep_warn
Deleting output 8d1ea1ae-ee52-44d8-a3e4-13511500b0a9.CGP.filtered.deNovo.vep.vcf.gz
Deleting output 8d1ea1ae-ee52-44d8-a3e4-13511500b0a9_stats.txt
Found task to remove outputs from: kf_genotype_refinement_vep-FM_1PEC3KD8
Skipping None object for vep_warn
Deleting output bd7925b9-b27e-4b85-a39e-cc4b11316848.CGP.filtered.deNovo.vep.vcf.gz
Deleting output bd7925b9-b27e-4b85-a39e-cc4b11316848_stats.txt
Found task to remove outputs from: kf_genotype_refinement_vep-FM_4Y21X6PP
Skipping None object for vep_warn
Deleting output 9256b345-e778-43a0-baae-de33efc83160.CGP.filtere