In [1]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
from sevenbridges.http.error_handlers import rate_limit_sleeper, maintenance_sleeper
import sys
import re
import pdb
import concurrent.futures
from requests import request
import json
config = sbg.Config(profile='turbo')
api = sbg.Api(config=config, error_handlers=[rate_limit_sleeper, maintenance_sleeper])

## Copy metadata from bam to fastq
Will copy metadata from input bams to converted fastq, and add an rg tag:
format is: `ID:sample_name LB:aliquot_id PL:platform SM:BSID`
 for example `ID:7316-242 LB:750189 PL:ILLUMINA SM:BS_W72364MN`


In [None]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
phrase = 'bam2fastq run'
tasks = api.tasks.query(project=project, status="COMPLETED")
for task in tasks:
    if re.search(phrase, task.name):
        metadata = task.inputs['input_reads_1'].metadata
        rg = "\t".join(["ID:" + metadata['sample_id'], "LB:" 
                        + metadata['aliquot_id'], "PL:ILLUMINA", "SM:" + metadata['Kids First Biospecimen ID']])
        for out in task.outputs:
            obj = api.files.get(task.outputs[out].id)
            for key in metadata:
                obj.metadata[key] = metadata[key]
            obj.metadata['read_group'] = rg
            obj.save()

## Param dicts

In [2]:
# Scenario specific params to test
param_sets = {
    "gtex": {
        "alignInsertionFlush": "None",
        "alignIntronMax": 1000000,
        "alignMatesGapMax": 1000000,
        "alignSJDBoverhangMin": 1,
        "alignSJoverhangMin": 8,
        "alignSoftClipAtReferenceEnds": "Yes",
        "chimJunctionOverhangMin": 15,
        "chimMainSegmentMultNmax": 1,
        "chimSegmentMin": 15,
        "limitSjdbInsertNsj": 1200000,
        "outFilterIntronMotifs": "None",
        "outFilterMatchNminOverLread": 0.33,
        "outFilterMismatchNmax": 999,
        "outFilterMismatchNoverLmax": 0.1,
        "outFilterMultimapNmax": 20,
        "outFilterScoreMinOverLread": 0.33,
        "outFilterType": "BySJout",
        "twopassMode": "Basic"
    },
    "min_fusion": {
        "alignInsertionFlush": "None",
        "alignIntronMax": 1000000,
        "alignMatesGapMax": 1000000,
        "alignSJDBoverhangMin": 1,
        "alignSJoverhangMin": 8,
        "alignSoftClipAtReferenceEnds": "Yes",
        "chimJunctionOverhangMin": 12,
        "chimMainSegmentMultNmax": 1,
        "chimSegmentMin": 15,
        "limitSjdbInsertNsj": 1200000,
        "outFilterIntronMotifs": "None",
        "outFilterMatchNminOverLread": 0.33,
        "outFilterMismatchNmax": 999,
        "outFilterMismatchNoverLmax": 0.1,
        "outFilterMultimapNmax": 20,
        "outFilterScoreMinOverLread": 0.33,
        "outFilterType": "BySJout",
        "twopassMode": "Basic",
        "alignSJstitchMismatchNmax": "5 -1 5 5",
        "chimOutJunctionFormat": 1
    },
    "star_fusion_heavy": {
        "alignInsertionFlush": "Right",
        "alignIntronMax": 100000,
        "alignMatesGapMax": 100000,
        "alignSJDBoverhangMin": 10,
        "alignSJstitchMismatchNmax": "5 -1 5 5",
        "alignSplicedMateMapLmin": 30,
        "alignSplicedMateMapLminOverLmate": 0,
        "chimJunctionOverhangMin": 8,
        "chimMultimapNmax": 20,
        "chimOutType": "Junctions WithinBAM SoftClip", # if chimMultimapNmax value > 0, must drop SeparateSAMold
        "chimMultimapScoreRange": 3,
        "chimNonchimScoreDropMin": 10,
        "chimOutJunctionFormat": 1,
        "chimScoreJunctionNonGTAG": -4,
        "chimSegmentMin": 12,
        "peOverlapMMp": 0,
        "peOverlapNbasesMin": 12,
        "twopassMode": "Basic"
    },
    "arriba_heavy": {
        "alignInsertionFlush": "None",
        "alignSJstitchMismatchNmax": "5 -1 5 5",
        "alignSplicedMateMapLminOverLmate": 0.5,
        "chimJunctionOverhangMin": 10,
        "chimMultimapNmax": 50,
        "chimOutType": "Junctions WithinBAM SoftClip", # if chimMultimapNmax value > 0, must drop SeparateSAMold
        "chimScoreDropMax": 30,
        "chimScoreJunctionNonGTAG": -1,
        "chimScoreSeparation": 1,
        "chimSegmentMin": 10,
        "chimSegmentReadGapMax": 3,
        "outFilterMultimapNmax": 50,
        "peOverlapNbasesMin": 10,
        "twopassMode": "Basic"
    }
}


## Set up tasks

In [None]:
def get_required_files(api, project):
    ref_dict = {}
    ref_dict['genomeDir'] = api.files.query(project=project, names=['STAR_2.7.10a_GENCODE38.tar.gz'])[0]
    return ref_dict

In [None]:
def draft_tasks(param_key, ref_files, bs_id):
    in_dict = {**param_sets[param_key], **ref_files}
    for fq in fastq_file_dict[bs_id]:
        if fq.name.endswith('_1.fastq.gz'):
            in_dict['readFilesIn1'] = fq
            in_dict['outSAMattrRGline'] = fq.metadata['read_group']
        else:
            in_dict['readFilesIn2'] = fq
    task_name = "STAR 2.7.10a " +  param_key + ": " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['outFileNamePrefix'] = task.id
    task.save()


### Prep inputs

In [None]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/star_2-7-10a_alignReads"
in_fastq_manifest = api.files.get('6219027166ee4b4fd5f46ec6').content().split('\n')
head = in_fastq_manifest[0].split(',')
b_idx = head.index('Kids First Biospecimen ID')
fastq_file_dict = {}
for i in range(1, len(in_fastq_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_fastq_manifest[i].split(',')
    bs_id = info[b_idx]
    if bs_id not in fastq_file_dict:
        fastq_file_dict[bs_id] = []
    fastq_file_dict[bs_id].append(api.files.get(info[0]))
param_keys = ["gtex", "min_fusion", "star_fusion_heavy", "arriba_heavy"]
ref_files = get_required_files(api, project)


### Draft tasks

In [None]:
for param in param_keys:
    for bs_id in fastq_file_dict:
        draft_tasks(param, ref_files, bs_id)

In [None]:
param_keys = ["arriba_heavy", "star_fusion_heavy"]

## Add metadata to outputs
Example task `STAR 2.7.10a star_fusion_heavy: BS_N6NE6B3W`

In [None]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
phrase = 'STAR 2.7.10a'
tasks = api.tasks.query(project=project, status="COMPLETED").all()
for task in tasks:
    if re.search(phrase, task.name):
        name_bits = task.name.split()
        metadata = task.inputs['readFilesIn1'].metadata
        for out in task.outputs:
            try:
                obj = api.files.get(task.outputs[out].id)
                for key in metadata:
                    obj.metadata[key] = metadata[key]
                obj.metadata['param_set'] = name_bits[2][:-1]
                obj.save()
            except Exception as e:
                print(e)

## Set up RSEM Tasks

In [8]:
def get_rsem_ref(api, project):
    ref_dict = {}
    ref_dict['genomeDir'] = api.files.query(project=project, names=['RSEM_GENCODE38.tar.gz'])[0]
    ref_dict['strandedness'] = 'reverse'
    ref_dict['estimate_rspd'] = True # added after refactor and review of GTeX params
    return ref_dict

In [9]:
def draft_rsem_tasks(ref_files, tx_bam):
    in_dict = {**ref_files}
    in_dict['bam'] = tx_bam
    param_key = tx_bam.metadata['param_set']
    bs_id = tx_bam.metadata['Kids First Biospecimen ID']
    # it was a good idea at the time, in a typical wf, giving read2 fq turns on paired-end param for bam, so using a dummy input
    # in_dict['input_reads_2'] = ref_files['genomeDir'] # this has been refactored out
    task_name = "RSEM RSPD " +  param_key + " " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['outFileNamePrefix'] = task.id
    task.save()

In [10]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/rsem-calculate-expression"
in_tx_bam_manifest = api.files.get('621cf74d66ee4b4fd5f7f0a1').content().split('\n')
tx_bam_list = []
for i in range(1, len(in_tx_bam_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_tx_bam_manifest[i].split(',')
    tx_bam_list.append(api.files.get(info[0]))
ref_files = get_rsem_ref(api, project)


In [12]:
for tx_bam in tx_bam_list:
    draft_rsem_tasks(ref_files, tx_bam)

## Copy metadata for RSEM outputs

In [2]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
# phrase = 'RSEM '
phrase = 'RSEM RSPD'
tasks = api.tasks.query(project=project, status="COMPLETED").all()
for task in tasks:
    if re.search(phrase, task.name):
        bam = api.files.get(task.inputs['bam'].id)
        metadata = bam.metadata
        for out in task.outputs:
            try:
                obj = api.files.get(task.outputs[out].id)
                for key in metadata:
#                     if key == "param_set":
#                         pdb.set_trace()
                    obj.metadata[key] = metadata[key]
                obj.save()
            except Exception as e:
                print(e)

# Test Fusion callers

## Arriba v 1.1.0

In [2]:
def get_arriba_ref(api, project):
    ref_dict = {}
    ref_dict['gtf_anno'] = api.files.query(project=project, names=['gencode.v38.primary_assembly.annotation.gtf'])[0]
    ref_dict['reference_fasta'] = api.files.query(project=project, names=['GRCh38.primary_assembly.genome.fa'])[0]
    return ref_dict

In [3]:
def draft_arriba_tasks(ref_files, gbam):
    in_dict = {**ref_files}
    in_dict['genome_aligned_bam'] = gbam
    param_key = gbam.metadata['param_set']
    bs_id = gbam.metadata['Kids First Biospecimen ID']
    task_name = "arriba 1.1.0 " +  param_key + ": " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['outFileNamePrefix'] = task.id
    task.save()

In [4]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/arriba_fusion"
in_gbam_manifest = api.files.get('62238cfb36ccef54e3afc1f1').content().split('\n')
gbam_list = []
for i in range(1, len(in_gbam_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_gbam_manifest[i].split(',')
    gbam_list.append(api.files.get(info[0]))
ref_files = get_arriba_ref(api, project)


In [5]:
for gbam in gbam_list:
    draft_arriba_tasks(ref_files, gbam)

## Arriba v2.2.1

In [3]:
def draft_arriba2_tasks(ref_files, gbam):
    in_dict = {**ref_files}
    in_dict['genome_aligned_bam'] = gbam
    param_key = gbam.metadata['param_set']
    bs_id = gbam.metadata['Kids First Biospecimen ID']
    task_name = "arriba 2.2.1 " +  param_key + ": " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['outFileNamePrefix'] = task.id
    task.save()

In [4]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/arriba_fusion_2-2-1"
in_gbam_manifest = api.files.get('62238cfb36ccef54e3afc1f1').content().split('\n')
gbam_list = []
for i in range(1, len(in_gbam_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_gbam_manifest[i].split(',')
    gbam_list.append(api.files.get(info[0]))
ref_files = get_arriba_ref(api, project)


In [5]:
for gbam in gbam_list:
    draft_arriba2_tasks(ref_files, gbam)

### Copy metadata for arriba

In [4]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
phrase = 'arriba '
tasks = api.tasks.query(project=project, status="COMPLETED").all()
for task in tasks:
    if re.search(phrase, task.name):
        bam = api.files.get(task.inputs['genome_aligned_bam'].id)
        metadata = bam.metadata
        for out in task.outputs:
            try:
                obj = api.files.get(task.outputs[out].id)
                for key in metadata:
                    obj.metadata[key] = metadata[key]
                obj.save()
            except Exception as e:
                print(e)

'NoneType' object has no attribute 'id'
'NoneType' object has no attribute 'id'


## STAR Fusion 1.10.1

In [5]:
def get_star_fusion_ref(api, project):
    ref_dict = {}
    ref_dict['genome_tar'] = api.files.query(project=project, names=['GRCh38_v38_CTAT_lib_Mar072022.CUSTOM.tgz'])[0]
    ref_dict['genome_untar_path'] = "GRCh38_v38_CTAT_lib_Mar072022.CUSTOM"
    return ref_dict

In [9]:
def draft_sf_tasks(ref_files, chim_junc):
    in_dict = {**ref_files}
    in_dict['Chimeric_junction'] = chim_junc
    param_key = chim_junc.metadata['param_set']
    bs_id = chim_junc.metadata['Kids First Biospecimen ID']
    task_name = "STAR Fusion 1.10.1 RERUN " +  param_key + ": " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['output_basename'] = task.id
    task.save()

In [10]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/star_fusion_1-10-1_call"
in_chim_junc_manifest = api.files.get('6224e5dd36ccef54e3b2aa3b').content().split('\n')
chim_junc_list = []
for i in range(1, len(in_chim_junc_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_chim_junc_manifest[i].split(',')
    chim_junc_list.append(api.files.get(info[0]))
ref_files = get_star_fusion_ref(api, project)


In [11]:
for chim_junc in chim_junc_list:
    draft_sf_tasks(ref_files, chim_junc)

### Copy metadata to STAR Fusion outputs

In [12]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
phrase = 'STAR Fusion 1.10.1 RERUN'
tasks = api.tasks.query(project=project, status="COMPLETED").all()
for task in tasks:
    if re.search(phrase, task.name):
        bam = api.files.get(task.inputs['Chimeric_junction'].id)
        metadata = bam.metadata
        for out in task.outputs:
            try:
                obj = api.files.get(task.outputs[out].id)
                for key in metadata:
                    obj.metadata[key] = metadata[key]
                obj.save()
            except Exception as e:
                print(e)

## Run annofuse

In [14]:
def get_annofuse_ref(api, project):
    ref_dict = {}
    ref_dict['FusionGenome'] = api.files.query(project=project, names=['GRCh38_v38_CTAT_lib_Mar072022.CUSTOM.tgz'])[0]
    ref_dict['genome_untar_path'] = "GRCh38_v38_CTAT_lib_Mar072022.CUSTOM"
    return ref_dict

In [19]:
def draft_af_tasks(ref_files, in_file_dict):
    in_dict = {**ref_files}
    param_key = in_file_dict['rsem_expr_file'].metadata['param_set']
    bs_id = in_file_dict['rsem_expr_file'].metadata['Kids First Biospecimen ID']
    in_dict['rsem_expr_file'] = in_file_dict['rsem_expr_file']
    in_dict['star_fusion_output_file'] = in_file_dict['star_fusion_output_file']
    in_dict['sample_name'] = bs_id
    # Weird situation where we draft two task, one for each arriba version
    for arriba_file in in_file_dict['arriba_output_file']:    
        task_name = "annoFuse " +  param_key + ": " + bs_id
        if '2.2.1' in arriba_file.name:
            task_name = "annoFuse arriba 2.2.1 " +  param_key + ": " + bs_id
        in_dict['arriba_output_file'] = arriba_file
        task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
        task.inputs['output_basename'] = task.id
        task.save()

In [17]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/kfdrc-annofuse-wf"
in_files_manifest = api.files.get('6227ace336ccef54e3c658ae').content().split('\n')
head = in_files_manifest[0].split(',')
p_idx = head.index('param_set')
b_idx = head.index('Kids First Biospecimen ID')
in_files_dict = {}
for i in range(1, len(in_files_manifest) - 1, 1):
    info = in_files_manifest[i].split(',')
    bs_id, param_set = info[b_idx], info[p_idx]
    # initialize bs id if not there
    if bs_id not in in_files_dict:
        in_files_dict[bs_id] = {}
    # same for param set
    if param_set not in in_files_dict[bs_id]:
        in_files_dict[bs_id][param_set] = {}
    if "rsem" in info[1]:
        in_files_dict[bs_id][param_set]['rsem_expr_file'] = api.files.get(info[0])
    elif "abridged" in info[1]:
        in_files_dict[bs_id][param_set]['star_fusion_output_file'] = api.files.get(info[0])
    else:
        # special case where two different versions of arriba run
        if 'arriba_output_file' not in in_files_dict[bs_id][param_set]:
            in_files_dict[bs_id][param_set]['arriba_output_file'] = []
        in_files_dict[bs_id][param_set]['arriba_output_file'].append(api.files.get(info[0]))
        
ref_files = get_annofuse_ref(api, project)

In [20]:
for bs_id in in_files_dict:
    for param_set in in_files_dict[bs_id]:
        draft_af_tasks(ref_files, in_files_dict[bs_id][param_set])

### Add metadata to outputs

In [23]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
phrase = 'annoFuse'
tasks = api.tasks.query(project=project, status="COMPLETED").all()
for task in tasks:
    if re.search(phrase, task.name):
        bam = api.files.get(task.inputs['rsem_expr_file'].id)
        metadata = bam.metadata
        for out in task.outputs:
            try:
                obj = api.files.get(task.outputs[out].id)
                for key in metadata:
                    obj.metadata[key] = metadata[key]
                if "2.2.1" in task.name:
                    obj.metadata['param_set'] += " using arriba 2.2.1"
                obj.save()
            except Exception as e:
                print(e)

# TEST NEW WORKFLOW
Leverage Param Sets dict at start

In [3]:
def get_wf_refs(api, project):
    ref_dict = {}
    ref_dict['STARgenome'] = api.files.query(project=project, names=['STAR_2.7.10a_GENCODE39.tar.gz'])[0]
    ref_dict['FusionGenome'] = api.files.query(project=project, names=['GRCh38_v39_CTAT_lib_Mar242022.CUSTOM.tar.gz'])[0]
    ref_dict['RSEMgenome'] = api.files.query(project=project, names=['RSEM_GENCODE39.tar.gz'])[0]
    ref_dict['gtf_anno'] = api.files.query(project=project, names=['gencode.v39.primary_assembly.annotation.gtf'])[0]
    ref_dict['RNAseQC_GTF'] = api.files.query(project=project, names=['gencode.v39.primary_assembly.rnaseqc.stranded.gtf'])[0]
    ref_dict['kallisto_idx'] = api.files.query(project=project, names=['RSEM_GENCODE39.transcripts.kallisto.idx'])[0]
    ref_dict['reference_fasta'] = api.files.query(project=project, names=['GRCh38.primary_assembly.genome.fa'])[0]
    ref_dict['wf_strand_param'] = 'rf-stranded'
    ref_dict['rmats_read_length'] = 151
    ref_dict['rmats_allow_clipping'] = True
    return ref_dict

In [4]:
def draft_fq_tasks(param_key, ref_files, bs_id):
    in_dict = {**param_sets[param_key], **ref_files}
    for fq in fastq_file_dict[bs_id]:
        if fq.name.endswith('_1.fastq.gz'):
            in_dict['reads1'] = fq
            in_dict['outSAMattrRGline'] = fq.metadata['read_group']
        else:
            in_dict['reads2'] = fq
    in_dict['sample_name'] = bs_id
    in_dict['input_type'] = 'FASTQ'
    task_name = "KF Workflow V4 TEST FQ " +  param_key + ": " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['output_basename'] = task.id
    task.save()

### Set up fastq input run

In [7]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/kf-rnaseqv4-wf"
in_fastq_manifest = api.files.get('62432da85638100a011b3ed0').content().split('\n')
head = in_fastq_manifest[0].split(',')
b_idx = head.index('Kids First Biospecimen ID')
fastq_file_dict = {}
for i in range(1, len(in_fastq_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_fastq_manifest[i].split(',')
    bs_id = info[b_idx]
    if bs_id not in fastq_file_dict:
        fastq_file_dict[bs_id] = []
    fastq_file_dict[bs_id].append(api.files.get(info[0]))
param_keys = ["gtex"]
ref_files = get_wf_refs(api, project)


In [17]:
for param in param_keys:
    for bs_id in fastq_file_dict:
        draft_fq_tasks(param, ref_files, bs_id)

### Set up a bam input run

In [14]:
def draft_bam_tasks(param_key, ref_files, bs_id):
    in_dict = {**param_sets[param_key], **ref_files}
    in_dict['reads1'] = bam_file_dict[bs_id][0]
    in_dict['outSAMattrRGline'] = bam_file_dict[bs_id][1]
    in_dict['sample_name'] = bs_id
    in_dict['input_type'] = 'PEBAM'
    task_name = "KF Workflow V4 TEST BAM " +  param_key + ": " + bs_id
    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=in_dict, run=False)
    task.inputs['output_basename'] = task.id
    task.save()

In [12]:
# manifest should be small enough to load directly
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
app_name = project + "/kf-rnaseqv4-wf"
in_bam_manifest = api.files.get('62432a9603fdd27daa1a724e').content().split('\n')
head = in_bam_manifest[0].split('\t')
# use additional info to create rg IDs
b_idx = head.index('Kids First Biospecimen ID')
a_idx = head.index('aliquot_id')
s_idx = head.index('sample_id')
# bam dict, indexed by bs id, links to list of file object and read group
bam_file_dict = {}
# ID:7316-470 LB:549594 PL:ILLUMINA SM:BS_ZRM0W01M
for i in range(1, len(in_bam_manifest) - 1, 1):
    # pdb.set_trace()
    info = in_bam_manifest[i].split('\t')
    bs_id = info[b_idx]
    a_id = info[a_idx]
    s_id = info[s_idx]
    rg = "\t".join(["ID:" + s_id, "LB:" + a_id, "PL:ILLUMINA", "SM:" + bs_id])
    bam_file_dict[bs_id] = []
    bam_file_dict[bs_id].append(api.files.get(info[0]))
    bam_file_dict[bs_id].append(rg)
param_keys = ["arriba_heavy"]
ref_files = get_wf_refs(api, project)


In [16]:
for param in param_keys:
    for bs_id in bam_file_dict:
        draft_bam_tasks(param, ref_files, bs_id)

### Add metadata pipe outputs

In [2]:
project = 'kids-first-drc/kids-first-drc-rnaseq-workflow'
phrase = 'KF Workflow V4 TEST'
tasks = api.tasks.query(project=project, status="COMPLETED").all()
for task in tasks:
    if re.search(phrase, task.name):
        name_bits = task.name.split()
        metadata = task.inputs['reads1'].metadata
        for out in task.outputs:
            try:
                obj = api.files.get(task.outputs[out].id)
                for key in metadata:
                    obj.metadata[key] = metadata[key]
                obj.metadata['param_set'] = name_bits[5][:-1]
                obj.save()
            except Exception as e:
                print(e)

'NoneType' object has no attribute 'id'
'NoneType' object has no attribute 'id'
'NoneType' object has no attribute 'id'
'NoneType' object has no attribute 'id'
'NoneType' object has no attribute 'id'
