# Imports

In [11]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
import sys
import concurrent.futures
import pdb
from requests import request
import re
config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

In [2]:
def get_relevant_file_objs(api, project, adapt_flag):
    ref_fn = {'STARgenome': 'STAR_GENCODE27.tar.gz',
            'RSEMgenome': 'RSEM_GENCODE27.tar.gz',
            'FusionGenome': 'GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz',
            'reference_fasta': 'GRCh38.primary_assembly.genome.fa',
            'RNAseQC_GTF': 'gencode.v27.primary_assembly.RNAseQC.gtf',
            'gtf_anno': 'gencode.v27.primary_assembly.annotation.gtf',
            'kallisto_idx': 'gencode.v27.kallisto.index',
            'pizzly_transcript_ref': 'gencode.v27.transcripts.pizzly.fa.gz'}
    ref_obj = {}
    for in_name in ref_fn:
        ref_obj[in_name] = api.files.query(project=project, names = [ref_fn[in_name]])[0]
    # set some static vars
    ref_obj['runThread'] = 36
    if adapt_flag == 1:
        ref_obj['r1_adapter'] = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCA'
        ref_obj['r2_adapter'] = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT'
    else:
        ref_obj['r1_adapter'] = None
        ref_obj['r2_adapter'] = None
    return ref_obj


In [3]:
def setup_task(api, ref_dict, info, fh):
    try:
        in_dict = {}
        for key in ref_dict:
            in_dict[key] = ref_dict[key]
        #pdb.set_trace()
        fq1 = info.rstrip('\n').split('\t')
        strand = fq1[-1]
        if strand is None:
            in_dict['wf_strand_param'] = None
        else:
            in_dict['wf_strand_param'] = strand
        fq2_data = next(fh)
        fq2 = fq2_data.rstrip('\n').split('\t')
        # uproject = fq1[2]
        uproject = 'kfdrc-harmonization/sd-bhjxbdqk-06'
        sname = fq1[6]
        aliquot = fq1[4]
        bs_id = fq1[3]
        task_name = 'RNAfusion-FQ_INPUT: ' + bs_id + ' ' + sname
        app_name = uproject + '/kfdrc-rnaseq-wf'
        
        star_rg = 'ID:' + sname + '\tLB:' + aliquot + '\tPL:ILLUMINA\tSM:' + bs_id
        in_dict['reads1'] = api.files.get(id=fq1[0])
        in_dict['reads2'] = api.files.get(id=fq2[0])
        in_dict['STAR_outSAMattrRGline'] = star_rg
        # pdb.set_trace()
        task = api.tasks.create(name=task_name, project=uproject, app=app_name, inputs=in_dict, run=False)
        task.inputs['sample_name'] = task.id
        task.save()
        return '\t'.join((task_name, task.id)) + '\n'
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write('Failed to create task for ' + task_name + '\n')
        exit(1)
    

In [4]:
pname = 'kfdrc-harmonization/sd-bhjxbdqk-06'
ref_obj = {}
try:
    ref_obj = get_relevant_file_objs(api, pname, 1)
except Exception as e:
    sys.stderr.write(str(e) + '\n')
    pdb.set_trace()

input_metadata = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/2019-Mar-4_re-run/2019-Mar-4_bgi_fq_w_strand.txt')
out_fh = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/2019-Mar-4_re-run/2019-Mar-4_bgi_tasks_fq_in.txt', 'w')
head = next(input_metadata)
for metadata in input_metadata:
    task_info = setup_task(api, ref_obj, metadata, input_metadata)
    out_fh.write(task_info)
out_fh.close()

## Run tasks set up from above

In [9]:
def run_task_by_id(api, info):
    (tname, task_id) = info.rstrip('\n').split('\t')
    task = api.tasks.get(id=task_id)
    task.run()

In [10]:
check = input()
if check == 'YASS':
    task_file = open('/Users/brownm28/Documents/2019-Mar-29_DGD_adhoc_rnaseq/dgd_tasks.txt')
    with concurrent.futures.ThreadPoolExecutor(8) as executor:
        results = {executor.submit(run_task_by_id, api, task_info): task_info for task_info in task_file}

YASS


## Check running tasks

In [2]:
import pdb
pname = 'kfdrc-harmonization/sd-bhjxbdqk-06'
# pname = 'kfdrc-harmonization/sd-preasa7s'
tasks = api.tasks.query(project=pname).all()
out_fn = '/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/status/full.txt'
#out_fn = '/Users/brownm28/Documents/2019-Jan-30_reharmonization/seidman_fy15/completed_update.txt'
out_fh = open(out_fn, 'w')
out_fh.write('task id\ttask name\tcompleted steps\tmessage code\tmessage\n')
for task in tasks:
    if task.name[0:4] == 'RNAf':
        step = task.execution_status.steps_completed
        if step is None:
            step = 'NA'
        else:
            step = str(step)
        ecode = task.execution_status.message_code
        if task.execution_status.message_code is None:
            ecode = 'NA'
        try:
            out_fh.write('\t'.join((task.id, task.name, step, ecode, task.execution_status.message)) + '\n')
        except:
            pdb.set_trace()
            hold = 1
out_fh.close()

### Run PNOC

In [31]:
def get_fq_inputs(api, project):
    tasks = api.tasks.query(project=project).all()
    prefix = 'star-rsem-for-disease-expression'
    fq_list = []
    for task in tasks:
        if task.name[0:9] == 'star-rsem':
            if task.batch:
                children = list(task.get_batch_children().all())
                for child in children:
                    if child.status == 'COMPLETED':
                        fq1 = child.inputs['readFilesIn'][0]
                        fq2 = child.inputs['readFilesIn'][1]
                        fq_list.append(fq1)
                        fq_list.append(fq2)
                        # sys.stderr.write(fq1.name + '\t' + fq2.name + '\n')
            elif task.status == 'COMPLETED':
                fq1 = task.inputs['readFilesIn'][0]
                fq2 = task.inputs['readFilesIn'][1]
                fq_list.append(fq1)
                fq_list.append(fq2)
                # sys.stderr.write(fq1.name + '\t' + fq2.name + '\n')
    return fq_list
                    

In [35]:
def setup_pnoc_task(api, ref_dict, fq1, fq2, fh):
    try:
        in_dict = {}
        for key in ref_dict:
            in_dict[key] = ref_dict[key]
        #pdb.set_trace()
        strand = 'default'
        if strand is None:
            in_dict['wf_strand_param'] = None
        else:
            in_dict['wf_strand_param'] = strand
        # uproject = fq1[2]
        uproject = 'kfdrc-harmonization/sd-bhjxbdqk-11'
        sname = fq1.metadata['sample_id']
        aliquot = fq1.metadata['aliquot_id']
        bs_id = fq1.metadata['Kids First Biospecimen ID']
        task_name = 'RNAfusion-FQ_INPUT: ' + bs_id + ' ' + sname
        app_name = uproject + '/kfdrc-rnaseq-wf'
        
        star_rg = 'ID:' + sname + '\tLB:' + aliquot + '\tPL:ILLUMINA\tSM:' + bs_id
        in_dict['reads1'] = fq1
        in_dict['reads2'] = fq2
        in_dict['STAR_outSAMattrRGline'] = star_rg
        # pdb.set_trace()
        task = api.tasks.create(name=task_name, project=uproject, app=app_name, inputs=in_dict, run=False)
        task.inputs['sample_name'] = task.id
        task.save()
        fh.write('\t'.join((task_name, task.id)) + '\n')
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write('Failed to create task for ' + task_name + '\n')
        exit(1)


In [36]:
project = 'kfdrc-harmonization/sd-bhjxbdqk-11'
ref_obj = {}
try:
    ref_obj = get_relevant_file_objs(api, project, 1)
except Exception as e:
    sys.stderr.write(str(e) + '\n')
    pdb.set_trace()
fq_pairs = get_fq_inputs(api, project)
out_fh = open('/Users/brownm28/Documents/2019-Jan-31_rna_fusion/run_info/2019-Mar-4_re-run/pnoc_tasks.txt', 'w')
for i in range(0, len(fq_pairs), 2):
    setup_pnoc_task(api, ref_obj, fq_pairs[i], fq_pairs[(i + 1)], out_fh)
out_fh.close()
    

#### Run DGD

In [3]:
def setup_dgd_task(api, ref_dict, fq1, fq2, fh):
    try:
        in_dict = {}
        for key in ref_dict:
            in_dict[key] = ref_dict[key]
        #pdb.set_trace()
        strand = 'rf-stranded'
        if strand is None:
            in_dict['wf_strand_param'] = None
        else:
            in_dict['wf_strand_param'] = strand
        # uproject = fq1[2]
        parts = fq1.name.split('_')
        sname = parts[0]
        aliquot = sname
        bs_id = sname
        task_name = 'RNAfusion-FQ_INPUT: ' + sname
        app_name = project + '/kfdrc-rnaseq-wf'
        
        star_rg = 'ID:' + sname + '\tLB:' + aliquot + '\tPL:ILLUMINA\tSM:' + bs_id
        in_dict['reads1'] = fq1
        in_dict['reads2'] = fq2
        in_dict['STAR_outSAMattrRGline'] = star_rg
        # pdb.set_trace()
        task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
        task.inputs['sample_name'] = task.id
        task.save()
        fh.write('\t'.join((task_name, task.id)) + '\n')
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.write('Failed to create task for ' + task_name + '\n')
        exit(1)

In [4]:
def get_dgd_fq_inputs(api):
    fq_dir = api.files.query(names=[fq_folder_name], project=project)[0]
    fq_dict = {}
    for file_obj in fq_dir.list_files():
        # pdb.set_trace()
        fname = file_obj.name
        fq_dict[fname] = file_obj
    fq_list = []
    for fastq in sorted(fq_dict.keys()):
        fq_list.append(fq_dict[fastq])
    return fq_list

In [8]:
project = 'zhangb1/cancerdxdgd'
fq_folder_name = 'dgd_rnaseq'
ref_obj = {}

try:
    ref_obj = get_relevant_file_objs(api, project, 1)
except Exception as e:
    sys.stderr.write(str(e) + '\n')
    pdb.set_trace()
fq_pairs = get_dgd_fq_inputs(api)
out_fh = open('/Users/brownm28/Documents/2019-Mar-29_DGD_adhoc_rnaseq/dgd_tasks.txt', 'w')
for i in range(0, len(fq_pairs), 2):
    setup_dgd_task(api, ref_obj, fq_pairs[i], fq_pairs[(i + 1)], out_fh)
out_fh.close()

In [3]:
project = 'zhangb1/cancerdxdgd'
fq_folder_name = 'dgd_rnaseq'
fq_dir = api.files.query(names=[fq_folder_name], project=project)[0]
pdb.set_trace()
hold = 1

--Return--
> <ipython-input-3-1073e89724a3>(4)<module>()->None
-> pdb.set_trace()
(Pdb) p dir(fq_dir)
['FOLDER_TYPE', '_API', '_URL', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_api', '_data', '_dirty', '_fields', '_modified_data', '_old', '_query', 'bulk_delete', 'bulk_edit', 'bulk_get', 'bulk_update', 'content', 'copy', 'copy_to_folder', 'create_folder', 'created_on', 'deepcopy', 'delete', 'download', 'download_info', 'equals', 'field', 'get', 'href', 'id', 'is_folder', 'list_files', 'metadata', 'modified_on', 'move_to_folder', 'name', 'origin', 'parent', 'project', 'query', 'reload', 'save', 'size', 'storage', 'stream', 'tags', 'type', 'upload']
(Pdb) fq_dir.list_files
<bound method Fil

BdbQuit: 

### quick tag dgd outputs

In [12]:
def parse_tag_outputs(task):
    print (task.name)
    if re.search('RNAfusion-FQ_INPUT', task.name):
        sys.stderr.write('Found valid task ' + task.name + '\n')
        parts = task.name.split()
        sname = parts[-1]
        for key in task.outputs:
            file_obj = api.files.get(id=task.outputs[key].id)
            file_obj.metadata['sample_id'] = sname
            file_obj.save()

In [13]:
project = 'zhangb1/cancerdxdgd'
tasks = api.tasks.query(project=project, status='COMPLETED').all()
with concurrent.futures.ThreadPoolExecutor(16) as executor:
    results = {executor.submit(parse_tag_outputs, task): task for task in tasks}


Bam-to-fastqgz run - 11-10-17 21:36:25
wgs-somatic-pipeline run 7492B  - 11-12-17 00:27:01
wgs-somatic-pipeline run 7609B  - 11-12-17 00:27:01
vcf2maf-gz-support-mem-expand run - 11-13-17 14:19:50
zcat-vcf run - 11-13-17 14:43:44
snpeff-v4.2 run - 11-13-17 14:50:02
vcf2maf-gz-support-mem-expand run - 11-13-17 15:11:51RNAfusion-FQ_INPUT: 16-3400

RNAfusion-FQ_INPUT: 16-3821
RNAfusion-FQ_INPUT: 17-1115
RNAfusion-FQ_INPUT: 17-1464
RNAfusion-FQ_INPUT: 17-4417
RNAfusion-FQ_INPUT: 17-4579
RNAfusion-FQ_INPUT: 17-4666
RNAfusion-FQ_INPUT: 17-4996
RNAfusion-FQ_INPUT: 17-5865
RNAfusion-FQ_INPUT: 17-683
RNAfusion-FQ_INPUT: 18-1341RNAfusion-FQ_INPUT: 18-2044

RNAfusion-FQ_INPUT: 18-2128
RNAfusion-FQ_INPUT: 18-2743RNAfusion-FQ_INPUT: 18-2749

RNAfusion-FQ_INPUT: 18-3724


Found valid task RNAfusion-FQ_INPUT: 16-3400
Found valid task RNAfusion-FQ_INPUT: 16-3821
Found valid task RNAfusion-FQ_INPUT: 17-1115
Found valid task RNAfusion-FQ_INPUT: 17-1464
Found valid task RNAfusion-FQ_INPUT: 17-4417
Found valid task RNAfusion-FQ_INPUT: 17-4579
Found valid task RNAfusion-FQ_INPUT: 17-4666
Found valid task RNAfusion-FQ_INPUT: 17-4996
Found valid task RNAfusion-FQ_INPUT: 17-5865
Found valid task RNAfusion-FQ_INPUT: 17-683
Found valid task RNAfusion-FQ_INPUT: 18-1341
Found valid task RNAfusion-FQ_INPUT: 18-2044
Found valid task RNAfusion-FQ_INPUT: 18-2128
Found valid task RNAfusion-FQ_INPUT: 18-2749
Found valid task RNAfusion-FQ_INPUT: 18-2743
Found valid task RNAfusion-FQ_INPUT: 18-3724


RNAfusion-FQ_INPUT: 18-3819


Found valid task RNAfusion-FQ_INPUT: 18-3819
Found valid task RNAfusion-FQ_INPUT: 18-3910
Found valid task RNAfusion-FQ_INPUT: 18-4372
Found valid task RNAfusion-FQ_INPUT: 18-5156


RNAfusion-FQ_INPUT: 18-3910
RNAfusion-FQ_INPUT: 18-4372
RNAfusion-FQ_INPUT: 18-5156
RNAfusion-FQ_INPUT: 18-5574
RNAfusion-FQ_INPUT: 18-5852


Found valid task RNAfusion-FQ_INPUT: 18-5574
Found valid task RNAfusion-FQ_INPUT: 18-5852
Found valid task RNAfusion-FQ_INPUT: 18-5996
Found valid task RNAfusion-FQ_INPUT: 18-800


RNAfusion-FQ_INPUT: 18-5996
RNAfusion-FQ_INPUT: 18-800


