-
Notifications
You must be signed in to change notification settings - Fork 7
/
gisaid.yaml
165 lines (143 loc) · 3.94 KB
/
gisaid.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#### Parameters which define which builds to produce via this config ###
builds:
h5nx:
- all-time
- 2y
h5n1:
- all-time
- 2y
h7n9:
- all-time
h9n2:
- all-time
segments:
- pb2
- pb1
- pa
- ha
- np
- na
- mp
- ns
#### Parameters which define the input source ####
s3_src:
name: gisaid
metadata: s3://nextstrain-data-private/files/workflows/avian-flu/metadata.tsv.zst
sequences: s3://nextstrain-data-private/files/workflows/avian-flu/{segment}/sequences.fasta.zst
local_ingest: false
# P.S. To use local ingest files, comment out s3_src and change to local_ingest: fauna
#### Parameters which control large overarching aspects of the build
target_sequences_per_tree: 3000
same_strains_per_segment: false
#### Config files ####
reference: config/{subtype}/reference_{subtype}_{segment}.gb
auspice_config: config/{subtype}/auspice_config_{subtype}.json
colors: config/{subtype}/colors_{subtype}.tsv
lat_longs: config/{subtype}/lat_longs_{subtype}.tsv
include_strains: config/{subtype}/include_strains_{subtype}_{time}.txt
dropped_strains: config/{subtype}/dropped_strains_{subtype}.txt
clades_file: clade-labeling/{subtype}-clades.tsv
description: config/description_gisaid.md
#### Rule-specific parameters ####
filter:
# rule parameters are typically defined for each build - e.g.
# min_length.h5nx.2y = ... The "FALLBACK" key (e.g. min_length.FALLBACK)
# is used as a fallback value if nothing is specifically defined
# for the subtype/time combination.
# Some parameters have an extra hierarchy of segment, some don't
min_length:
FALLBACK:
pb2: 2100
pb1: 2100
pa: 2000
ha: 1600
np: 1400
na: 1270
mp: 900
ns: 800
min_date:
h5nx:
all-time: 1996
2y: 2Y
h5n1:
all-time: 1996
2y: 2Y
h7n9:
all-time: 2013
h9n2:
all-time: 1966
group_by:
h5nx:
all-time: subtype country year
2y: subtype region month host
h5n1:
all-time: region country year
2y: subtype region month host
h7n9:
all-time: division year
h9n2:
all-time: country year
exclude_where:
FALLBACK: host=laboratoryderived host=ferret host=unknown host=other host=host country=? region=? gisaid_clade=3C.2
refine:
coalescent: const
date_inference: marginal
clock_filter_iqd:
FALLBACK: 4
root:
FALLBACK: false
__clock_std_dev: &clock_std_dev 0.00211 # YAML anchor so we can reference this value
clock_rates:
FALLBACK: # anything not specified by a subtype/time combination
pb2: '' # falls back to FALLBACK, and the empty string means no
pb1: '' # supplied clock rate, i.e. infer the clock
pa: ''
ha: ''
np: ''
na: ''
mp: ''
ns: ''
h5nx:
2y:
pb2: [0.00287, *clock_std_dev]
pb1: [0.00267, *clock_std_dev]
pa: [0.00238, *clock_std_dev]
ha: [0.0048, *clock_std_dev]
np: [0.0022, *clock_std_dev]
na: [0.0028, *clock_std_dev]
mp: [0.0017, *clock_std_dev]
ns: [0.0017, *clock_std_dev]
h5n1:
2y:
pb2: [0.00287, *clock_std_dev]
pb1: [0.00264, *clock_std_dev]
pa: [0.00248, *clock_std_dev]
ha: [0.00455, *clock_std_dev]
np: [0.00252, *clock_std_dev]
na: [0.00349, *clock_std_dev]
mp: [0.00191, *clock_std_dev]
ns: [0.00249, *clock_std_dev]
ancestral:
inference: joint
root_seq:
FALLBACK: false
traits:
columns:
h5nx:
all-time: region
2y: region
h5n1:
all-time: region country
2y: region country
h7n9:
all-time: country division
h9n2:
all-time: region country
sampling_bias_correction:
FALLBACK: false
confidence:
FALLBACK: true
export:
title:
FALLBACK: false # use the title in the auspice JSON
# TODO - if we parameterise this here we can use the subtype wildcard to customise the title