# Imports

In [1]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
from sevenbridges.http.error_handlers import rate_limit_sleeper, maintenance_sleeper
import sys
from requests import request
import concurrent.futures
config = sbg.Config(profile='cavatica' )
import pdb
import re
api = sbg.Api(config=config, error_handlers=[rate_limit_sleeper, maintenance_sleeper])

In [9]:
def get_relevant_file_objs(api, project, adapt_flag, strand):
    ref_fn = {'STARgenome': 'STAR_GENCODE27.tar.gz',
            'RSEMgenome': 'RSEM_GENCODE27.tar.gz',
            'FusionGenome': 'GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz',
            'reference_fasta': 'GRCh38.primary_assembly.genome.fa',
            'RNAseQC_GTF': 'gencode.v27.primary_assembly.RNAseQC.gtf',
            'gtf_anno': 'gencode.v27.primary_assembly.annotation.gtf',
            'kallisto_idx': 'gencode.v27.kallisto.index',
            'pizzly_transcript_ref': 'gencode.v27.transcripts.pizzly.fa.gz'}
    ref_obj = {}
    for in_name in ref_fn:
        ref_obj[in_name] = api.files.query(project=project, names = [ref_fn[in_name]])[0]
    #pdb.set_trace()
    # set some static vars
    ref_obj['runThread'] = 36
    if adapt_flag == 1:
        ref_obj['r1_adapter'] = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCA'
        ref_obj['r2_adapter'] = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT'
    else:
        ref_obj['r1_adapter'] = None
        ref_obj['r2_adapter'] = None
    if strand is None:
        ref_obj['wf_strand_param'] = None
    else:
        ref_obj['wf_strand_param'] = strand
    return ref_obj


In [10]:
def setup_task(api, ref_dict, info, uproject):
    try:
        in_dict = {}
        for key in ref_dict:
            in_dict[key] = ref_dict[key]
        
        file_data = info.rstrip('\n').split('\t')
        # uproject = file_data[2]
        sname = file_data[8]
        aliquot = file_data[16]
        bs_id = file_data[12]
        task_name = 'RNAfusion-BAM_INPUT: ' + bs_id + ' ' + sname
        app_name = uproject + '/kfdrc-rnaseq-wf-bam-in'
        star_rg = 'ID:' + sname + '\tLB:' + aliquot + '\tPL:ILLUMINA\tSM:' + bs_id
        in_dict['input_bam'] = api.files.get(id=file_data[0])
        in_dict['STAR_outSAMattrRGline'] = star_rg
        task = api.tasks.create(name=task_name, project=uproject, app=app_name, inputs=in_dict, run=False)
        task.inputs['sample_name'] = task.id
        task.save()
        return '\t'.join((task_name, task.id)) + '\n'
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write('Failed to create task for ' + task_name + '\n')
        exit()
    

In [11]:
pname = 'kfdrc-harmonization/sd-bhjxbdqk-06'
strand = 'rf-stranded'
ref_obj = get_relevant_file_objs(api, pname, 0, strand)

out_fh = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/2019-Mar-4_re-run/2019-Mar-4_nant_tasks.txt', 'w')
input_metadata = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/2019-Mar-4_re-run/2019-Mar-4_nant_bam_in.txt')
head = next(input_metadata)
#for metadata in input_metadata:
#    setup_task(api, ref_obj, metadata)
i = 1
n = 50
with concurrent.futures.ThreadPoolExecutor(8) as executor:
    results = {executor.submit(setup_task, api, ref_obj, metadata, pname): metadata for metadata in input_metadata}
    for result in concurrent.futures.as_completed(results):
        if i % n == 0:
            sys.stderr.write(str(i) + 'tasks set up\n')
        i += 1
        out_fh.write(result.result())
out_fh.close()

50tasks set up
100tasks set up
150tasks set up
200tasks set up
250tasks set up
300tasks set up
350tasks set up
400tasks set up
450tasks set up
500tasks set up
550tasks set up
600tasks set up
650tasks set up
700tasks set up
750tasks set up
800tasks set up
850tasks set up
900tasks set up
950tasks set up
1000tasks set up


## Run tasks set up from above

In [6]:
def run_task_by_id(api, info):
    try:
        (task_id, tname) = info.rstrip('\n').split('\t')
        task = api.tasks.get(id=task_id)
        task.run()
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write('Could not start task ' + info)
        exit(1)

In [7]:
check = input()
if check == 'YASS':
    task_file = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/fail-rerun/2019-Mar-11_fail_repeat_tasks.txt')
    with concurrent.futures.ThreadPoolExecutor(8) as executor:
        results = {executor.submit(run_task_by_id, api, task_info): task_info for task_info in task_file}

YASS


## Check running tasks

In [22]:
import pdb
pname = 'kfdrc-harmonization/sd-bhjxbdqk-06'
# pname = 'kfdrc-harmonization/sd-preasa7s'
tasks = api.tasks.query(project=pname, status='FAILED').all()
out_fn = '/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/status/failed_update.txt'
#out_fn = '/Users/brownm28/Documents/2019-Jan-30_reharmonization/seidman_fy15/completed_update.txt'
out_fh = open(out_fn, 'w')
out_fh.write('task id\ttask name\tdate_created\tcompleted steps\tmessage code\tmessage\n')
for task in tasks:
    step = task.execution_status.steps_completed
    if step is None:
        step = 'NA'
    else:
        step = str(step)
    ecode = task.execution_status.message_code
    if task.execution_status.message_code is None:
        ecode = 'NA'
    try:
        msg = task.execution_status.message
        msg = msg.replace('\n', ' ')
        out_fh.write('\t'.join((task.id, task.name, str(task.start_time), step, ecode, msg)) + '\n')
    except:
        pdb.set_trace()
        hold = 1
out_fh.close()

In [14]:
task = api.tasks.get(id='5c2b3aab-f993-4a3e-ad68-28ff85c736f2')
pdb.set_trace()
hold = 1

--Return--
> <ipython-input-14-788d70090019>(2)<module>()->None
-> pdb.set_trace()
(Pdb) p dir(task)
(Pdb) p task.create
<bound method Task.create of <class 'sevenbridges.models.task.Task'>>
(Pdb) p task.created_time
datetime.datetime(2019, 2, 4, 19, 45, 34)
(Pdb) p datetime.date(task.created_time)
*** NameError: name 'datetime' is not defined
(Pdb) import datetime
(Pdb) p datetime.date(task.created_time)
*** TypeError: an integer is required (got type datetime.datetime)
(Pdb) p dir(task.create_task)
*** AttributeError: 'Task' object has no attribute 'create_task'
(Pdb) p dir(task.create_time)
*** AttributeError: 'Task' object has no attribute 'create_time'
(Pdb) p dir(task.created_time)
['__add__', '__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__radd__', '__reduce__', '__reduce_ex__', '__repr__', '__rsub__', '__setattr__', '__sizeof

BdbQuit: 

### delete old outputs!

In [8]:
def del_task_outputs(prefix, task):
    if re.search(prefix, task.name):
        sys.stderr.write('Deleting outputs from ' + task.name + '\n')
        for key in task.outputs:
            try:
                res = ''
                if task.outputs[key] is not None:
                    res += 'Deleting ' + task.outputs[key].name + ' from ' + task.name + '\n'
                    task.outputs[key].delete()
                else:
                    res += key + ' was null for ' + task.name + '\n'
                
            except Exception as e:
                sys.stderr.write(str(e) + '\n')
                sys.stderr.write('Could not delete output ' + key + ' from task ' + task.name + '\n')
        return res
                    

In [9]:
check = input()
if check != 'YASS':
    sys.stderr.write('Confirmation was not YASS, exiting\n')
else:
    sys.stderr.write('Delete process confirmed!\n')
    project = 'kfdrc-harmonization/sd-bhjxbdqk-06'
    tasks = api.tasks.query(project=project, status='FAILED').all()
    prefix = 'RNAfusion-'
    del_log = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/2019-Mar-11_del_failed_outputs.log', 'a')
    # for task in tasks:
    #     del_task_outputs(prefix, task)
    with concurrent.futures.ThreadPoolExecutor(16) as executor:
        results = {executor.submit(del_task_outputs, prefix, task): task for task in tasks}
        for del_info in concurrent.futures.as_completed(results):
            try:
                if del_info.result() is not None:
                    del_log.write(del_info.result())
                    del_log.flush()
            except Exception as e:
                print (e)
    del_log.close()



YASS


Delete process confirmed!
Deleting outputs from RNAfusion-BAM_INPUT: BS_0448A413 7316-364
Deleting outputs from RNAfusion-BAM_INPUT: BS_06XH7EVF 7316-879
Deleting outputs from RNAfusion-BAM_INPUT: BS_07ANYSYQ 7316-2134
Deleting outputs from RNAfusion-BAM_INPUT: BS_08HWDBX5 7316-1112
Deleting outputs from RNAfusion-BAM_INPUT: BS_0HW7W7SD 7316-2658
Deleting outputs from RNAfusion-BAM_INPUT: BS_0QYS36NR 7316-455
Deleting outputs from RNAfusion-BAM_INPUT: BS_0RF0EZTV 7316-286
Deleting outputs from RNAfusion-BAM_INPUT: BS_0RQ4P069 7316-1746
Deleting outputs from RNAfusion-BAM_INPUT: BS_0X9EGHY2 7316-1744
Deleting outputs from RNAfusion-BAM_INPUT: BS_10APZRVT 7316-877
Deleting outputs from RNAfusion-BAM_INPUT: BS_17WYVEEC 7316-332
Deleting outputs from RNAfusion-BAM_INPUT: BS_1EBQ3T20 7316-913
Deleting outputs from RNAfusion-BAM_INPUT: BS_0C7VZC0A 7316-2176
Deleting outputs from RNAfusion-BAM_INPUT: BS_0RF0EZTV 7316-286
Deleting outputs from RNAfusion-BAM_INPUT: BS_3MS8WVNR 7316-519
Deleting

## re-run fail task

In [5]:
check = input()
if check != 'YASS':
    exit(1)
project = 'kfdrc-harmonization/sd-bhjxbdqk-06'
task_rpt = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/fail-rerun/2019-Mar-11_fail_rerun.txt')
app_name = project + '/kfdrc-rnaseq-wf-bam-in'
out = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/fail-rerun/2019-Mar-11_fail_repeat_tasks.txt', 'w')
for fail in task_rpt:
    (tid, tname) = fail.rstrip('\n').split('\t')
    fail_task = api.tasks.get(id=tid)
    inputs = {}
    for key in fail_task.inputs:
        inputs[key] = fail_task.inputs[key]
    task_name = fail_task.name.replace('RERUN', 'RERUN1')
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
    task.inputs['sample_name'] = task.id
    task.save()
    out.write(task.id + '\t' + task.name + '\n')
out.close()


YASS


### inventory completed

In [2]:
def task_summary(task):
    if re.search('RNAfusion-BAM_INPUT', task.name) or re.search('RNAfusion-FQ_INPUT', task.name):
        (title, bs_id, sample) = task.name.split()
        # print(task.name)
        return '\t'.join((task.id, task.name, bs_id, sample)) + '\n'

In [3]:
project = 'kfdrc-harmonization/sd-bhjxbdqk-06'
tasks =api.tasks.query(status='COMPLETED', project=project).all()
out = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/status/completed.txt', 'w')
out.write('Task ID\tTask Name\tBS ID\tSample name\n')
i = 1
n = 100
with concurrent.futures.ThreadPoolExecutor(16) as executor:
    results = {executor.submit(task_summary, task): task for task in tasks}
    for info in concurrent.futures.as_completed(results):
        if i % n == 0:
            sys.stderr.write('Processed ' + str(i) + ' tasks\n')
        try:
            if info.result() is not None:
                out.write(info.result())
        except Exception as e:
            print (e)
        i += 1
out.close()

Processed 100 tasks
Processed 200 tasks
Processed 300 tasks
Processed 400 tasks
Processed 500 tasks
Processed 600 tasks
Processed 700 tasks
Processed 800 tasks
Processed 900 tasks
Processed 1000 tasks
Processed 1100 tasks
Processed 1200 tasks
Processed 1300 tasks
Processed 1400 tasks
Processed 1500 tasks
Processed 1600 tasks
Processed 1700 tasks
Processed 1800 tasks
Processed 1900 tasks
Processed 2000 tasks
Processed 2100 tasks
Processed 2200 tasks
Processed 2300 tasks
Processed 2400 tasks
Processed 2500 tasks
Processed 2600 tasks
Processed 2700 tasks
Processed 2800 tasks
Processed 2900 tasks
Processed 3000 tasks
