/
builds.yaml
183 lines (168 loc) · 6.55 KB
/
builds.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# Define locations for which builds should be created.
# For each build we specify a subsampling scheme via an explicit key.
builds:
# first we'll define a country-level build, focused on Switzerland.
# we'll define the sampling schemes below
switzerland:
subsampling_scheme: switzerland # <- use the 'switzerland' sampling scheme
region: Europe
country: Switzerland
# If we want to have specific files for colors or the auspice_config file *just*
# for this build, we can specify this here.
colors: "my_profiles/example_advanced_customization/switzerland_build_colors.tsv"
auspice_config: "my_profiles/example_advanced_customization/auspice_config_switzerland_build.json"
# We can also set a title for *just* this build.
title: "SARS-CoV-2 Sequences in Switzerland"
# now we'll have 2 division-level builds, for 'cantons' (states) in Switzerland
basel-stadt:
subsampling_scheme: canton # <- use the 'canton' sampling scheme
region: Europe
country: Switzerland
division: Basel-Stadt
geneva:
subsampling_scheme: canton
region: Europe
country: Switzerland
division: Geneva
# Finally we'll have build that involves multiple cantons (divisions) in a region around
# a lake in Switzerland
lac-leman:
subsampling_scheme: lac-leman # <- this is a specialised build (see below)
region: Europe
country: Switzerland
division: Lac Leman
subsampling:
# Here we use the default subsampling logic for countries
switzerland:
# Focal samples for country
country:
group_by: "division year month"
max_sequences: 1500
exclude: "--exclude-where 'country!={country}'"
# Contextual samples from country's region
region:
group_by: "country year month"
seq_per_group: 20
exclude: "--exclude-where 'country={country}' 'region!={region}'"
priorities:
type: "proximity"
focus: "country"
# Contextual samples from the rest of the world,
# excluding the current region to avoid resampling.
global:
group_by: "country year month"
seq_per_group: 10
exclude: "--exclude-where 'region={region}'"
priorities:
type: "proximity"
focus: "country"
# Here we use the default subsampling logic for divisions
# This is used for both canton builds: Basel-Stadt & Geneva
canton:
# Focal samples for division
division:
group_by: "year month"
seq_per_group: 300
exclude: "--exclude-where 'region!={region}' 'country!={country}' 'division!={division}'"
# Contextual samples from division's country
country:
group_by: "division year month"
seq_per_group: 20
exclude: "--exclude-where 'region!={region}' 'country!={country}' 'division={division}'"
priorities:
type: "proximity"
focus: "division"
# Contextual samples from division's region
region:
group_by: "country year month"
seq_per_group: 10
exclude: "--exclude-where 'region!={region}' 'country={country}'"
priorities:
type: "proximity"
focus: "division"
# Contextual samples from the rest of the world, excluding the current
# division to avoid resampling.
global:
group_by: "country year month"
seq_per_group: 5
exclude: "--exclude-where 'region={region}'"
priorities:
type: "proximity"
focus: "division"
# This build will take from 3 cantons - we have a sample rule for each,
# rather than just one division that's focal build
lac-leman:
# focal samples
geneva:
group_by: "year month"
seq_per_group: 300
exclude: "--exclude-where 'division!=geneva'"
vaud:
group_by: "year month"
seq_per_group: 300
exclude: "--exclude-where 'division!=vaud'"
valais:
group_by: "year month"
seq_per_group: 300
exclude: "--exclude-where 'division!=valais'"
# Contextual samples from the country
country:
group_by: "division year month"
seq_per_group: 20
exclude: "--exclude-where 'country!=switzerland'"
# Contextual samples from division's region
region:
group_by: "country year month"
seq_per_group: 10
exclude: "--exclude-where 'region!=europe'"
priorities:
type: "proximity"
focus: "country"
# Contextual samples from the rest of the world, excluding the current
# division to avoid resampling.
global:
group_by: "country year month"
seq_per_group: 5
exclude: "--exclude-where 'region=europe'"
priorities:
type: "proximity"
focus: "country"
# Here we can specify the colors file & auspice_config to be used for all of the
# above builds - unless otherwise specified in the builds above!
# If you don't specify these, the files in `default_config` folder will be used
files:
colors: "my_profiles/example_advanced_customization/colors.tsv"
auspice_config: "my_profiles/example_advanced_customization/auspice_config_swiss.json"
## You could also specify a title which will be used for all builds
## where you haven't already specified a title above:
## (Uncomment this to use!):
# title: "SARS-CoV-2 Builds Focused on Regions of Switzerland"
#########
## Finally, you can specify what traits you want to reconstruct, per build.
## If you have exposure information for a trait, specify it as well, so that
## this is incorporated into the final run. (Otherwise, it will simply be excluded)
## First, specify the traits you want to reconstruct, per build:
## (uncomment to use!)
#traits:
# switzerland: # for the switzerland build
# columns: ["location", "division_exposure"]
## ^ Note that the swiss samples do not have 'location' (typically, city)
## information, so this command would fail as-is - but for many other places
## there are samples with 'location' information.
#exposure:
# switzerland: #for the same build, incorporate division exposure information
# trait: "division"
# exposure: "division_exposure"
##########
## If you want to modify parameters for the steps in the build - augur commands -
## you can copy these from the 'defaults/parameters.yaml' file:
## Here are some examples below - uncomment if you want to use them:
## Modify filter settings. By default we filter out sequences less than 27000 bases
## If you wanted to include slightly shorter sequeces, change this:
#filter:
# min_length: 25000
## TreeTime settings for the 'refine' rule. By default we exclude sequences more than
## 4 IQD from the root-to-tip vs clock regression. You can change this if you want to include
## more divergent sequences:
#refine:
# clock_filter_iqd: 6