generated from opensafely/research-template
/
project.yaml
268 lines (235 loc) · 12.7 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
version: '3.0'
expectations:
population_size: 200000
actions:
generate_study_population:
run: cohortextractor:latest generate_cohort --study-definition study_definition --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/input.csv.gz
generate_study_population_consults_2019:
run: cohortextractor:latest generate_cohort --study-definition study_definition_consults_year --index-date-range "2019-03-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_consults_year_2019-03-01.csv.gz
generate_study_population_consults_2020:
run: cohortextractor:latest generate_cohort --study-definition study_definition_consults_year --index-date-range "2020-03-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_consults_year_2020-03-01.csv.gz
generate_study_population_consults_2021:
run: cohortextractor:latest generate_cohort --study-definition study_definition_consults_year --index-date-range "2021-03-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_consults_year_2021-03-01.csv.gz
generate_study_population_consults_2022:
run: cohortextractor:latest generate_cohort --study-definition study_definition_consults_year --index-date-range "2022-03-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_consults_year_2022-03-01.csv.gz
generate_study_population_consults_2023:
run: cohortextractor:latest generate_cohort --study-definition study_definition_consults_year --index-date-range "2023-03-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_consults_year_2023-03-01.csv.gz
generate_study_population_count:
run: cohortextractor:latest generate_cohort --study-definition study_definition_count --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/input_count.csv.gz
summary_counts:
run: stata-mp:latest analysis/002_summary_counts.do
needs: [generate_study_population_count]
outputs:
highly_sensitive:
log1: logs/summary_counts.log
generate_study_population_allpts:
run: cohortextractor:latest generate_cohort --study-definition study_definition_allpts --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/input_allpts.csv.gz
generate_study_population_2015:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2015-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2015-09-01.csv.gz
generate_study_population_2016:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2016-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2016-09-01.csv.gz
generate_study_population_2017:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2017-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2017-09-01.csv.gz
generate_study_population_2018:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2018-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2018-09-01.csv.gz
generate_study_population_2019:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2019-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2019-09-01.csv.gz
generate_study_population_2020:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2020-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2020-09-01.csv.gz
generate_study_population_2021:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2021-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2021-09-01.csv.gz
generate_study_population_2022:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2022-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2022-09-01.csv.gz
generate_study_population_2023:
run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2023-09-01" --output-dir=output/measures --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/measures/input_year_2023-09-01.csv.gz
generate_measures:
run: cohortextractor:latest generate_measures --study-definition study_definition_year --output-dir=output/measures
needs: [generate_study_population_2015, generate_study_population_2016, generate_study_population_2017, generate_study_population_2018, generate_study_population_2019, generate_study_population_2020, generate_study_population_2021, generate_study_population_2022, generate_study_population_2023]
outputs:
moderately_sensitive:
measure_csv: output/measures/measure_*.csv
create_cohorts_allpts:
run: stata-mp:latest analysis/001_define_covariates_allpts.do
needs: [generate_study_population_allpts]
outputs:
highly_sensitive:
log1: logs/cleaning_dataset_allpts.log
data1: output/data/file_gout_allpts.dta
create_cohorts:
run: stata-mp:latest analysis/000_define_covariates.do
needs: [generate_study_population, generate_measures]
outputs:
highly_sensitive:
log1: logs/cleaning_dataset.log
data1: output/data/file_gout_all.dta
data2: output/data/gout_prevalence_sex_long.dta
data3: output/data/gout_incidence_sex_long.dta
data4: output/data/gout_admissions_sex_long.dta
create_cohorts_consults_year:
run: stata-mp:latest analysis/004_define_covariates_consults_year.do
needs: [generate_study_population_consults_2019, generate_study_population_consults_2020, generate_study_population_consults_2021, generate_study_population_consults_2022, generate_study_population_consults_2023]
outputs:
highly_sensitive:
log1: logs/cleaning_dataset_consults_year.log
data1: output/data/input_consults_year_2019-03-01.dta
data2: output/data/input_consults_year_2020-03-01.dta
data3: output/data/input_consults_year_2021-03-01.dta
data4: output/data/input_consults_year_2022-03-01.dta
data5: output/data/input_consults_year_2023-03-01.dta
create_admission_counts:
run: stata-mp:latest analysis/003_define_admissions.do
needs: [generate_study_population_2015, generate_study_population_2016, generate_study_population_2017, generate_study_population_2018, generate_study_population_2019, generate_study_population_2020, generate_study_population_2021, generate_study_population_2022, generate_study_population_2023]
outputs:
highly_sensitive:
data1: output/measures/gout_admissions.dta
run_baseline_tables_allpts:
run: stata-mp:latest analysis/101_baseline_characteristics_allpts.do
needs: [create_cohorts_allpts]
outputs:
moderately_sensitive:
log1: logs/descriptive_tables_allpts.log
doc1: output/tables/baseline_allpts.csv
run_baseline_tables:
run: stata-mp:latest analysis/100_baseline_characteristics.do
needs: [create_cohorts, create_admission_counts]
outputs:
moderately_sensitive:
log1: logs/descriptive_tables.log
doc1: output/tables/incidence_year_rounded.csv
doc2: output/tables/incidence_month_rounded.csv
doc3: output/tables/prevalance_year_rounded.csv
doc4: output/tables/incidence_admission_year_rounded.csv
doc5: output/tables/admission_month_rounded.csv
doc6: output/tables/baseline_bydiagnosis.csv
doc7: output/tables/baseline_byyear.csv
doc8: output/tables/ult6m_byyear.csv
doc9: output/tables/ult6m_byregion.csv
doc10: output/tables/ult12m_byyear.csv
doc11: output/tables/ult12m_byregion.csv
doc12: output/tables/urate6m*.csv
doc13: output/tables/urate12m*.csv
doc14: output/tables/ult_byyear*.csv
doc15: output/tables/urate_6m*.csv
doc16: output/tables/urate_12m*.csv
figure1: output/figures/incidence_year_rounded.svg
figure2: output/figures/incidence_month_rounded.svg
figure3: output/figures/prevalance_year_rounded.svg
figure4: output/figures/incidence_admission_year_rounded.svg
figure5: output/figures/admission_month_rounded.svg
run_itsa_models:
run: stata-mp:latest analysis/200_itsa_models.do
needs: [create_cohorts]
outputs:
moderately_sensitive:
log1: logs/itsa_models.log
figure1: output/figures/ITSA*.svg
doc1: output/tables/ITSA*.csv
run_box_plots:
run: stata-mp:latest analysis/300_box_plots.do
needs: [create_cohorts]
outputs:
moderately_sensitive:
log1: logs/box_plots.log
figure 1: output/figures/regional_ult_overall_6m.svg
figure 2: output/figures/regional_ult_merged_6m.svg
figure 3: output/figures/regional_ult_overall_12m.svg
figure 4: output/figures/regional_ult_merged_12m.svg
figure 5: output/figures/regional_urate_overall_6m.svg
figure 6: output/figures/regional_urate_merged_6m.svg
figure 7: output/figures/regional_urate_overall_6m_test.svg
figure 8: output/figures/regional_urate_merged_6m_test.svg
figure 9: output/figures/regional_urate_overall_6m_ult.svg
figure 10: output/figures/regional_urate_merged_6m_ult.svg
figure 11: output/figures/reg_urate_overall_6m_ult_test.svg
figure 12: output/figures/reg_urate_merged_6m_ult_test.svg
figure 13: output/figures/regional_urate_overall_12m.svg
figure 14: output/figures/regional_urate_merged_12m.svg
figure 15: output/figures/regional_urate_overall_12m_test.svg
figure 16: output/figures/regional_urate_merged_12m_test.svg
figure 17: output/figures/regional_urate_overall_12m_ult.svg
figure 18: output/figures/regional_urate_merged_12m_ult.svg
figure 19: output/figures/reg_urate_overall_12m_ult_test.svg
figure 20: output/figures/reg_urate_merged_12m_ult_test.svg
run_redacted_tables:
run: stata-mp:latest analysis/400_redacted_tables.do
needs: [create_cohorts]
outputs:
moderately_sensitive:
log1: logs/redacted_tables.log
doc1: output/tables/table_1_rounded_bydiag.csv
doc2: output/tables/ult_byyearandregion_rounded.csv
doc3: output/tables/urate_6m_ult_byyearandregion_rounded.csv
doc4: output/tables/urate_12m_ult_byyearandregion_rounded.csv
doc5: output/tables/table_mean_rounded.csv
doc6: output/tables/urate_monitoring_rounded_all.csv
run_redacted_tables_allpts:
run: stata-mp:latest analysis/401_redacted_tables_allpts.do
needs: [create_cohorts_allpts]
outputs:
moderately_sensitive:
log1: logs/redacted_tables_allpts.log
doc1: output/tables/table_1_rounded_allpts.csv
doc2: output/tables/table_mean_rounded_allpts.csv
# convert_image_formats:
# run: python:latest python analysis/convert_images.py --input_dir output/figures --output_dir output/figures
# needs: [run_baseline_tables, run_itsa_models, run_box_plots, run_redacted_tables]
# outputs:
# moderately_sensitive:
# figures: output/figures/*.png
# generate_notebook:
# run: jupyter:latest jupyter nbconvert /workspace/analysis/report.ipynb --execute --to html --template basic --output-dir=/workspace/output --ExecutePreprocessor.timeout=86400 --no-input
# needs: [convert_image_formats,run_baseline_tables, run_itsa_models, run_box_plots, run_redacted_tables]
# outputs:
# moderately_sensitive:
# notebook: output/report.html