In [1]:
#!/usr/bin/env python3
import sevenbridges as sbg
from sevenbridges.errors import SbgError
from sevenbridges.http.error_handlers import rate_limit_sleeper, maintenance_sleeper
import sys
import re
import concurrent.futures
import pdb
config = sbg.Config(profile='turbo')
api = sbg.Api(config=config, error_handlers=[rate_limit_sleeper, maintenance_sleeper])

In [2]:
def get_refs(api, project):
    ref_dict = {}
    ref_dict['RNAseQC_GTF'] = api.files.query(project=project, names=['gencode.v27.primary_assembly.RNAseQC.gtf'])[0]
    return ref_dict

In [3]:
def setup_task(ref_obj, task, prefix):
    if re.search(prefix, task.name):
        # sys.stderr.write('Processing ' + task.name + '\n')
        try:
            inputs = {}
            for key in ref_obj:
                inputs[key] = ref_obj[key]

            parts = task.name.split()
            task_name = 'CBTTC_RNASeQC_COUNTS: ' + parts[1] + ' ' + parts[2]
            for j in range(len(out_keys)):
                inputs[in_keys[j]] = task.outputs[out_keys[j]]
            inputs['wf_strand_param'] = task.inputs['wf_strand_param']
            rnaseqc_task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
            rnaseqc_task.inputs['sample_name'] = rnaseqc_task.id
            rnaseqc_task.save()
            return rnaseqc_task.name + '\t' + rnaseqc_task.id + '\n'
        except Exception as e:
            sys.stderr.write('Got error ' + e + ' for task ' + task.id + ' ' + task.name + '\n')
            sys.exit(1)

In [4]:
project = 'kfdrc-harmonization/sd-bhjxbdqk-06'
app_name = project + '/temp-rnaseqc-only-wf'
prefix = 'RNAfusion-'
in_keys = ['genome_aligned_bam']
out_keys = ['STAR_sorted_genomic_bam']
tasks  = api.tasks.query(project=project, status='COMPLETED').all()
# tasks = []
# tasks.append(api.tasks.get('42ea7847-418a-4c2d-9b08-5a8cf6446cd5'))
# tasks.append(api.tasks.get('879268d2-a158-4583-beb0-8352b0b9c6c0'))
ref_obj = get_refs(api, project)
i = 1
n = 50
out_fh = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/rnaseqc_rpt/rnaseqc_tasks.txt', 'w')
with concurrent.futures.ThreadPoolExecutor(8) as executor:
    results = {executor.submit(setup_task, ref_obj, task, prefix): task for task in tasks}
    for result in concurrent.futures.as_completed(results):
        if result.result() is not None:
            if i % n == 0:
                sys.stderr.write(str(i) + 'tasks set up\n')
            i += 1
            out_fh.write(result.result())
out_fh.close()

50tasks set up
100tasks set up
150tasks set up
200tasks set up
250tasks set up
300tasks set up
350tasks set up
400tasks set up
450tasks set up
500tasks set up
550tasks set up
600tasks set up
650tasks set up
700tasks set up
750tasks set up
800tasks set up
850tasks set up
900tasks set up
950tasks set up
1000tasks set up
1050tasks set up


In [None]:
## Copy metadata from old to new

In [2]:
def cp_metadata(line):
    try:
        (tname, tid) = line.rstrip('\n').split('\t')
        (app, bs_id, samp_id) = tname.split()
        # pdb.set_trace()
        outs = api.files.query(project=project, origin={"task":tid})
        for out in outs:
            for key in in_metadata[bs_id]:
                out.metadata[key] = in_metadata[bs_id][key]
            bulk_up.append(out)
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.exit(1)

In [3]:
project = 'kfdrc-harmonization/sd-bhjxbdqk-06'
old_manifest = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/rnaseqc_rpt/1557340089978-manifest.csv')
head = next(old_manifest)
bulk_in = []
for line in old_manifest:
    info = line.rstrip('\n').split(',')
    bulk_in.append(info[0])
in_metadata = {}
max_j = 100
total = len(bulk_in)
for i in range(0, total, max_j):
    uset = i + max_j
    sys.stderr.write('Processing ' + str(uset) + ' set\n')
    if uset > total:
        uset = total
    subset = api.files.bulk_get(files=bulk_in[i:uset])
    for obj in subset:
        cur_meta = obj.resource.metadata
        in_metadata[cur_meta['Kids First Biospecimen ID']] = cur_meta
    sys.stderr.write(str(api.remaining) + ' calls left\n')
old_manifest.close()
sys.stderr.write(str(api.remaining) + ' calls left\n')
task_manifest = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/rnaseqc_rpt/rnaseqc_tasks.txt')
bulk_up = []
x = 1
n = 50
with concurrent.futures.ThreadPoolExecutor(16) as executor:
    results = {executor.submit(cp_metadata, line): line for line in task_manifest}
    for result in concurrent.futures.as_completed(results):
        if x % n == 0:
            sys.stderr.write(str(x) + ' outputs tagged, ' + str(api.remaining) + ' api calls left\n')
        x += 1

# for line in task_manifest:
#     cp_metadata(line)
x = 1
sys.stderr.write('Bulk updates queued\n')
max_j = 100
total = len(bulk_up)
for i in range(0, total, max_j):
    uset = i + max_j
    if uset > total:
        uset = total
    api.files.bulk_update(files=bulk_up[i:uset])


Processing 100 set
9999 calls left
Processing 200 set
9998 calls left
Processing 300 set
9997 calls left
Processing 400 set
9996 calls left
Processing 500 set
9995 calls left
Processing 600 set
9994 calls left
Processing 700 set
9993 calls left
Processing 800 set
9992 calls left
Processing 900 set
9991 calls left
Processing 1000 set
9990 calls left
Processing 1100 set
9989 calls left
9989 calls left
50 outputs tagged, 9923 api calls left
100 outputs tagged, 9873 api calls left
150 outputs tagged, 9823 api calls left
200 outputs tagged, 9773 api calls left
250 outputs tagged, 9723 api calls left
300 outputs tagged, 9672 api calls left
350 outputs tagged, 9622 api calls left
400 outputs tagged, 9573 api calls left
450 outputs tagged, 9523 api calls left
500 outputs tagged, 9473 api calls left
550 outputs tagged, 9421 api calls left
600 outputs tagged, 9371 api calls left
650 outputs tagged, 9323 api calls left
700 outputs tagged, 9273 api calls left
750 outputs tagged, 9222 api calls lef

## Check strand inputs

In [2]:
project="kfdrc-harmonization/sd-bhjxbdqk-06"
prefix="RNAfusion-"
tasks = api.tasks.query(project=project, status="COMPLETED").all()
out = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/strand_info/wf_strand_param_used.txt', 'w')
out.write("Task name\tBS_ID\tSample name\twf strand param\n")
for task in tasks:
    if re.search(prefix, task.name):
        (tool, bs_id, s_id) = task.name.split(" ")
        out.write(task.name + "\t" + bs_id + "\t" + s_id + "\t" + task.inputs['wf_strand_param'] + "\n")
out.close()