# cram2gvcf Run for SD_RM8AFW0R, Rios-Wise

## Imports

In [15]:
#!/usr/bin/env python3
import sevenbridges as sbg
import sys
import re
import os
from requests import request

config = sbg.Config(profile='cavatica')
api = sbg.Api(config=config)

### Get Crams

In [24]:
def get_crams(api, project):
    proj_tasks = api.tasks.query(project=project, status='COMPLETED').all()
    cram_dict = {}
    for task in proj_tasks:
        bs_id = task.name.replace('alignment-', '')
        if bs_id in cram_dict:
            sys.stderr.write('WARN:' + bs_id + ' listed twice!\n')
        cram_dict[bs_id] = {}
        cram_dict[bs_id]['tid'] = task.id
        cram_dict[bs_id]['obj'] = task.outputs['cram']
    return cram_dict
        

### get refs

In [25]:
def get_refs(api):
    dbsnp_vcf = api.files.query(project=project, names=['Homo_sapiens_assembly38.dbsnp138.vcf'])[0]
    indexed_reference_fasta = api.files.query(project=project, names=['Homo_sapiens_assembly38.fasta'])[0]
    reference_dict = api.files.query(project=project, names=['Homo_sapiens_assembly38.dict'])[0]
    contamination_sites_bed = api.files.query(project=project, names=['Homo_sapiens_assembly38.contam.bed'])[0]
    contamination_sites_mu = api.files.query(project=project, names=['Homo_sapiens_assembly38.contam.mu'])[0]
    contamination_sites_ud = api.files.query(project=project, names=['Homo_sapiens_assembly38.contam.UD'])[0]
    wgs_calling_interval_list = api.files.query(project=project, names=['wgs_calling_regions.hg38.interval_list'])[0]
    wgs_evaluation_interval_list = api.files.query(project=project, names=['wgs_evaluation_regions.hg38.interval_list'])[0]
    return dbsnp_vcf, indexed_reference_fasta, reference_dict, contamination_sites_bed, contamination_sites_mu, contamination_sites_ud, wgs_calling_interval_list, wgs_evaluation_interval_list

### set up tasks

In [26]:
def setup_task(project, api, bs_id, cram, cram_tid, dbsnp_vcf, indexed_reference_fasta, reference_dict, contamination_sites_bed, contamination_sites_mu, contamination_sites_ud, wgs_calling_interval_list, wgs_evaluation_interval_list):
    task_name = 'cram2gvcf-' + bs_id
    app_name = project + '/kf-cram2gvcf-calc-contam'
    
    inputs = {}
    inputs['dbsnp_vcf'] = dbsnp_vcf
    inputs['indexed_reference_fasta'] = indexed_reference_fasta
    inputs['reference_dict'] = reference_dict
    inputs['contamination_sites_bed'] = contamination_sites_bed
    inputs['contamination_sites_mu'] = contamination_sites_mu
    inputs['contamination_sites_ud'] = contamination_sites_ud
    inputs['wgs_calling_interval_list'] = wgs_calling_interval_list
    inputs['wgs_evaluation_interval_list'] = wgs_evaluation_interval_list
    inputs['input_cram'] = cram

    task = api.tasks.create(name=task_name, project=project, app=app_name, inputs=inputs, run=False)
    task.inputs['output_basename'] = task.id
    task.save()
    print ('\t'.join((bs_id, cram_tid, task.id)))

## Main

In [27]:
project = 'kfdrc-harmonization/sd-rm8afw0r'
cram_dict = get_crams(api, project)
(dbsnp_vcf, indexed_reference_fasta, reference_dict, contamination_sites_bed, contamination_sites_mu, contamination_sites_ud, wgs_calling_interval_list, wgs_evaluation_interval_list) = get_refs(api)
print ('BS ID\tcram task ID\tgvcf_task ID')
x = 1
n = 5
for bs_id in cram_dict:
    if x == 5:
        sys.stderr.write('Quitting, TEST MODE!\n')
        break
    x += 1
    setup_task(project, api, bs_id, cram_dict[bs_id]['obj'], cram_dict[bs_id]['tid'], dbsnp_vcf, indexed_reference_fasta, reference_dict, contamination_sites_bed, contamination_sites_mu, contamination_sites_ud, wgs_calling_interval_list, wgs_evaluation_interval_list)
    

BS ID	cram task ID	gvcf_task ID
BS_082CXWXG	d887e679-e06f-4fc0-bf59-f6dbf8f5e6f8	e4710b97-8d9a-4417-a2f6-30e7a8728d40
BS_CXT1NEB5	de9fd84a-38bf-451c-aaf9-0391dca204c4	334d0f24-f774-4f98-8591-a0ada4e1782f
BS_T87T4WYC	ebe5d9ea-f10f-4425-bc84-84c78e9e5679	898114d1-2b54-4b27-a0d7-20d97c0837bc
BS_T3ZTQY6D	f50336f3-1043-4c47-a0be-ad75cfd4531b	462effe5-dbb9-48f1-9935-206574bed65e


Quitting, TEST MODE!
