/
seqr_loader.toml
87 lines (69 loc) · 2.63 KB
/
seqr_loader.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
[workflow]
name = 'seqr_loader'
dataset_gcp_project = 'seqr-308602'
dataset = 'seqr'
status_reporter = 'metamist'
# Use GnarlyGenotyper instead of GenotypeGVCFs
use_gnarly = false
# Use allele-specific annotations for VQSR
use_as_vqsr = true
# Version of VEP to use, currently operational: 105, 110
vep_version = '110'
# Realign CRAM when available, instead of using FASTQ.
# The parameter value should correspond to CRAM version
# (e.g. v0 in gs://cpg-fewgenomes-main/cram/v0/CPGaaa.cram
#realign_from_cram_version = 'v0'
# Calling intervals (defauls to whole genome intervals)
#intervals_path =
# Create Seqr ElasticSearch indices for these datasets. If not specified, will
# create indices for all input datasets.
#create_es_index_for_datasets = []
write_vcf = ["udn-aus"]
[resource_overrides]
# Override default resource requirements for unusually large seq data without
# demanding higher resources for all operations as standard. Examples below
# picard MarkDuplicates overrides for unreasnobly large sequnce groups
#picard_mem_gb = 100
#picard_storage_gb = 350
# haplotype caller overrides, see production-pipelines PR#381
# defaults in code are 40 for genomes, none for exomes
#haplotypecaller_storage = 80
[vqsr]
# VQSR, when applying model, targets indel_filter_level and snp_filter_level
# sensitivities. The tool matches them internally to a VQSLOD score cutoff
# based on the model's estimated sensitivity to a set of true variants.
snp_filter_level = 99.7
indel_filter_level = 99.0
[cramqc]
assume_sorted = true
num_pcs = 4
[qc_thresholds.genome.min]
"MEDIAN_COVERAGE" = 10
"PCT_PF_READS_ALIGNED" = 0.80
[qc_thresholds.genome.max]
"FREEMIX" = 0.04
"PERCENT_DUPLICATION" = 25
[hail]
pool_label = 'seqr'
billing_project = 'seqr'
[slack]
channel = 'workflows-qc'
token_secret_id = 'slack-seqr-loader-token'
token_project_id = 'seqr-308602'
[elasticsearch]
# Configure access to ElasticSearch server
port = '9243'
host = 'elasticsearch.es.australia-southeast1.gcp.elastic-cloud.com'
username = 'seqr'
# Load ElasticSearch password from a secret, unless SEQR_ES_PASSWORD is set
password_secret_id = 'seqr-es-password'
password_project_id = 'seqr-308602'
# temporary overrides until we rename images/config
[images]
vep_105 = "australia-southeast1-docker.pkg.dev/cpg-common/images/vep:105.0"
[references]
vep_105_mount = "gs://cpg-common-main/references/vep/105.0/mount"
vep_110_mount = "gs://cpg-common-main/references/vep/110/mount"
[references.hg38_telomeres_and_centromeres_intervals]
# Derived from hg38.telomeresAndMergedCentromeres.bed used in gnomAD v3
interval_list = "gs://cpg-common-main/references/hg38/v0/hg38.telomeresAndMergedCentromeres.interval_list"