# General run section

In [28]:
#!/usr/bin/env python3
import sevenbridges as sbg
import sys
import os
import pdb
import concurrent.futures
from requests import request
from datetime import datetime
config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)
uname = 'kfdrc-harmonization'
batch1_id = 'sd-bhjxbdqk-07'
batch2_id = 'sd-bhjxbdqk-08'
cnmc_id = 'sd-bhjxbdqk-10'
pnoc_id = 'sd-bhjxbdqk-09'
batch1 = uname + '/' + batch1_id
batch2 = uname + '/' + batch2_id
cnmc = uname + '/' +  cnmc_id
cnmc_friendly = 'cbttc-cnmc'
pnoc = uname + '/' + pnoc_id
pnoc_friendly = 'cbttc-pnoc'

## Draft task setup section

In [11]:
def get_relevant_file_objs(project):
    # will hold all keys for reference inputs
    ref_dict = {'hg38_strelka.bed.gz': 'ref_bed', 'Homo_sapiens_assembly38.fasta': 'reference'}
    ref_obj_dict = {}
    in_files = api.files.query(project=project, names=[list(ref_dict.keys())])
    for in_file in in_files:
        ref_obj_dict[ref_dict[in_file.name]] = in_file
    # will hold all input reference objects
    return ref_obj_dict

In [22]:
def create_tasks(project, pid, friendly):
    out_fn = out_dir + friendly + '_tasks.txt'
    out_fh = open(out_fn, 'w')
    tasks = api.tasks.query(project = project, status = 'COMPLETED').all()
    x = 1
    n = 20
    with concurrent.futures.ThreadPoolExecutor(8) as executor:
        results = {executor.submit(mt_create_tasks, ref_obj_dict, task, project, api, friendly): task for task in tasks}
        for result in concurrent.futures.as_completed(results):
            if x % n == 0:
                sys.stderr.write('Processed ' + str(x) + ' tasks\n')
                sys.stderr.flush()
            try:
                if result.result()[1]:
                    out_fh.write(result.result()[0])
                else:
                    sys.stderr.write(result.result()[0])
            except Exception as e:
                sys.stderr.write(e)
                
            x += 1
    out_fh.close()


In [23]:
def mt_create_tasks(ref_obj_dict, source_task, project, api, friendly_name):
    app_name = project + '/kfdrc-manta-sv'
    try:
        if 'vep_annotated_maf' in source_task.outputs:
            sys.stderr.write('Valid task found: ' + source_task.name + '\n')
            task_name = friendly_name + '-somatic-SV-' + source_task.inputs['tumor_id'] + '_' + source_task.inputs['normal_id']
            input_dict = ref_obj_dict
            input_dict['input_tumor_cram'] = api.files.get(id=source_task.inputs['tumor_cram'].id)
            input_dict['input_normal_cram'] = api.files.get(id=source_task.inputs['normal_cram'].id)
            cur_task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=input_dict, run=False)
            cur_task.inputs['output_basename'] = cur_task.id
            cur_task.save()
            return '\t'.join((task_name, cur_task.id)) + '\n', 1
        else:
            return 'Invalid info in task source task ' + source_task.name + '\n', 0
    except Exception as e:
        print(e)
        sys.stderr.write('Error processing ' + source_task.name)
        exit(1)
            

In [29]:
out_dir = '/Users/brownm28/Documents/2018-Dec-6_cbttc_manta_sv/'
ref_obj_dict = get_relevant_file_objs(pnoc)
create_tasks(pnoc, pnoc_id, pnoc_friendly)

Valid task found: cbttc-dna-somatic-BS_6GS4XT7F_BS_CGXTFM67
Valid task found: cbttc-dna-somatic-BS_MVYA262V_BS_3Z40EZHD
Valid task found: cbttc-dna-somatic-BS_MVYA262V_BS_JRFVST47
Valid task found: cbttc-dna-somatic-BS_HJ7HYZ7N_BS_J8EK6RNF
Valid task found: cbttc-dna-somatic-BS_9H6Z0MEG_BS_M5FM63EB
Valid task found: cbttc-dna-somatic-BS_9H6Z0MEG_BS_M0B42FPR
Valid task found: cbttc-dna-somatic-BS_9H6Z0MEG_BS_9P4NDTKJ
Valid task found: cbttc-dna-somatic-BS_E5RKHG41_BS_YZD4SSMA
Invalid info in task source task alignment-BS_4PM2QG9C-rerun1Invalid info in task source task alignment-BS_X6H86RTAInvalid info in task source task alignment-BS_Z64NEPNEInvalid info in task source task alignment-BS_Y96RP1HJ-rerun1Invalid info in task source task alignment-BS_KQRAHH6YInvalid info in task source task alignment-BS_Y74XAFJX-rerun1Invalid info in task source task alignment-BS_0AK4F99XInvalid info in task source task alignment-BS_7GKF6M85-rerun1Invalid info in task source task alignment-BS_J8EH1N7VInvali

## Run this section only if ready to run tasks

In [30]:
def mt_run_task(task_info, api, project):
    try:
        (name, task_id) = task_info.rstrip('\n').split('\t')
        cur_task = api.tasks.get(id=task_id)
        cur_task.run()
        return task_info
    except Exception as e:
        sys.stderr.write(e)
        sys.stderr.write('Task run failed for ' + name + '\n')

In [31]:
in_fn = out_dir + pnoc_friendly + '_tasks.txt'
out_fh = open(out_dir + pnoc_friendly + '_run_sanity_check.txt', 'w')
x = 1
n = 50
with concurrent.futures.ThreadPoolExecutor(8) as executor:
        results = {executor.submit(mt_run_task, info, api, pnoc): info for info in open(in_fn)}
        for result in concurrent.futures.as_completed(results):
            if x % n == 0:
                sys.stderr.write('Processed ' + str(x) + ' tasks\n')
                sys.stderr.flush()
            out_fh.write(result.result())
            x += 1
out_fh.close()