## Set up api imports

In [1]:
import sevenbridges as sbg
from sevenbridges.errors import SbgError
from sevenbridges.http.error_handlers import rate_limit_sleeper, maintenance_sleeper
import sys
import re
import concurrent.futures
import pdb
config = sbg.Config(profile='turbo') # assumes sbg credentials file, use your profile name here
api = sbg.Api(config=config, error_handlers=[rate_limit_sleeper, maintenance_sleeper])

### Read in "master" ped file and note proband flag and fam relationship flag

In [None]:
# no header, tab-separated
ped_file = open("/Users/brownm28/Documents/2021-Jan-4_sobreira_strelka2_run/families.ped")
trios = {}
for line in ped_file:
    info = line.rstrip('\n').split('\t')
    # pdb.set_trace()
    if info[2] != '-9':
        fam_id = info[0]
        trios[fam_id] = []
        trios[fam_id] = info[1:4]
ped_file.close()

### Create cram dict from cavatica file manifest

In [None]:
crams = {}
manifest = open("/Users/brownm28/Documents/2021-Jan-4_sobreira_strelka2_run/cram-manifest.csv")
head = next(manifest)
for line in manifest:
    info = info = line.rstrip('\n').split(',')
    crams[info[1]] = info[0]
manifest.close()

## Create tasks

In [None]:
def get_refs_defaults():
    ref_dict = {}
    ref_dict['cores'] = 16
    ref_dict['ram'] = 32
    # using files IDs instead of query, which is normally more portable between projects
    ref_dict['call_regions'] = api.files.get('5fdcf7e9e4b0a6d396628ee9')
    ref_dict['reference'] = api.files.get('5fdcf782e4b0370307f394ce')
    return ref_dict
    

In [None]:
def create_task(fam_id):
    try:
        task_name = "STRELKA2 JOINT CALL: " + fam_id
        inputs = {}
        for key in ref_dict:
            inputs[key] = ref_dict[key]
        inputs['input_crams'] = []
        for samp_id in trios[fam_id]:
            fname = samp_id + ".cram"
            inputs['input_crams'].append(api.files.get(crams[fname]))
        task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
        task.inputs['output_basename'] = task.id
        task.save()
    except Exception as e:
        sys.stderr.write(str(e) + "\nFailed to create task, exiting\n")
        exit(1)

    

In [None]:
project = 'kids-first-drc/kfdrc-sobreira-strelka2-collab'
app_name = project + '/strelka2-germline'
ref_dict = get_refs_defaults()
for fam_id in trios:
    create_task(fam_id)


## Add metadata to task outputs

In [7]:
project = 'kids-first-drc/kfdrc-sobreira-strelka2-collab'
tasks = api.tasks.query(project=project, status="COMPLETED").all()
prefix = "STRELKA2 JOINT CALL"
for task in tasks:
    if re.search(prefix, task.name):
        # get fam id from task name
        (ignore, fam_id) = task.name.split(": ")
        for i in range(len(task.outputs['genome_vcf_gzs'])):
            # pdb.set_trace()
            file_obj = api.files.get(task.outputs['genome_vcf_gzs'][i].id)
            parts = file_obj.name.split('.')
            metadata = {"sample_id": parts[1], "Family ID": fam_id}
            file_obj.metadata = metadata
            try:
                file_obj.save()
            except Exception as e:
                sys.stderr.write(str(e) + "\nGot while saving metadata for " 
                                 + file_obj.name + "\n")
            sf = api.files.get(task.outputs['genome_vcf_gzs'][i].secondary_files[0].id)
            sf.metadata = metadata
            try:
                sf.save()
            except Exception as e:
                sys.stderr.write(str(e) + "\nGot while saving metadata for " + sf.name + "\n")
        file_obj = api.files.get(task.outputs['variants_vcf_gz'].id)
        file_obj.metadata["Family ID"] = fam_id
        try:
            file_obj.save()
        except Exception as e:
            sys.stderr.write(str(e) + "\nGot while saving metadata for " + file_obj.name + "\n")
        sf = api.files.get(task.outputs['variants_vcf_gz'].secondary_files[0].id)
        sf.metadata["Family ID"] = fam_id
        try:
            sf.save()
        except Exception as e:
            sys.stderr.write(str(e) + "\nGot while saving metadata for " + sf.name + "\n")

