# cram2gvcf Run for SD_R0EPRSGS, Marazita FY16

## Imports

In [6]:
#!/usr/bin/env python3
import sevenbridges as sbg
import sys
import re
import os
from requests import request

config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

### Get refs

In [7]:
def get_refs(api):
    dbsnp_vcf = api.files.query(project=project, names=['Homo_sapiens_assembly38.dbsnp138.vcf'])[0]
    indexed_reference_fasta = api.files.query(project=project, names=['Homo_sapiens_assembly38.fasta'])[0]
    reference_dict = api.files.query(project=project, names=['Homo_sapiens_assembly38.dict'])[0]
    wgs_calling_interval_list = api.files.query(project=project, names=['wgs_calling_regions.hg38.interval_list'])[0]
    wgs_evaluation_interval_list = api.files.query(project=project, names=['wgs_evaluation_regions.hg38.interval_list'])[0]
    return dbsnp_vcf, indexed_reference_fasta, reference_dict, wgs_calling_interval_list, wgs_evaluation_interval_list

### setup tasks

In [8]:
def setup_task(project, api, bs_id, cram, contam, dbsnp_vcf, indexed_reference_fasta, reference_dict, wgs_calling_interval_list, wgs_evaluation_interval_list):
    task_name = 'cram2gvcf-' + bs_id
    app_name = project + '/kf-cram2gvcf-bam-input'
    
    inputs = {}
    inputs['dbsnp_vcf'] = dbsnp_vcf
    inputs['indexed_reference_fasta'] = indexed_reference_fasta
    inputs['reference_dict'] = reference_dict
    inputs['wgs_calling_interval_list'] = wgs_calling_interval_list
    inputs['wgs_evaluation_interval_list'] = wgs_evaluation_interval_list
    inputs['biospecimen_name'] = bs_id
    inputs['contamination'] = contam
    inputs['input_cram'] = api.files.query(project=project, names=[cram])[0]

    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
    task.inputs['output_basename'] = task.id
    task.save()
    print ('Task name:' + task.name, 'Task ID:' + task.id)

## Main

In [9]:
project = 'kfdrc-harmonization/sd-r0eprsgs'
(dbsnp_vcf, indexed_reference_fasta, reference_dict, wgs_calling_interval_list, wgs_evaluation_interval_list) = get_refs(api)
cram_info_dict = {}
broad_contam = open('/Users/brownm28/Documents/2018-Sep-19_Marazita_FY16/fname_contam_only.txt')
next(broad_contam)
ds_load_file = open('/Users/brownm28/Documents/2018-Sep-19_Marazita_FY16/ds_bsid_cram_pairs.txt')
for line in broad_contam:
    info = line.rstrip('\n').split('\t')
    cram_info_dict[os.path.basename(info[0])] = info[1]
broad_contam.close()
next(ds_load_file)
x = 1
n = 5
for pair in ds_load_file:
    if x==n:
        sys.stderr.write('Test mode. Stopping\n')
        break
    x+=1
    (bs_id, cram) = pair.rstrip('\n').split('\t')
    setup_task(project, api, bs_id, cram, float(cram_info_dict[cram]), dbsnp_vcf, indexed_reference_fasta, reference_dict, wgs_calling_interval_list, wgs_evaluation_interval_list)


Task name:cram2gvcf-BS_W7PW914H Task ID:a8be25a9-4c60-483f-9f8d-e49c5dab63c0
Task name:cram2gvcf-BS_CMTQKKJN Task ID:2edae207-679b-4ee0-8c23-59bad69918b6
Task name:cram2gvcf-BS_H2FCSCK3 Task ID:847b5634-66b3-4d9b-9dcd-049aa49f0660
Task name:cram2gvcf-BS_5PAP7GRQ Task ID:3e3e9897-926f-4450-9b47-5fa3b366251f


Test mode. Stopping
