generated from opensafely/research-template
/
project.yaml
171 lines (147 loc) · 7.21 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
version: '3.0'
expectations:
population_size: 10000
actions:
generate_study_population_covid_2020:
run: cohortextractor:latest generate_cohort --study-definition study_definition_covid_2020 --output-dir=output/cohorts
outputs:
highly_sensitive:
cohort: output/cohorts/input_covid_2020.csv
generate_study_population_general_2019:
run: cohortextractor:latest generate_cohort --study-definition study_definition_general_2019 --output-dir=output/cohorts
outputs:
highly_sensitive:
cohort: output/cohorts/input_general_2019.csv
generate_study_population_general_2020:
run: cohortextractor:latest generate_cohort --study-definition study_definition_general_2020 --output-dir=output/cohorts
outputs:
highly_sensitive:
cohort: output/cohorts/input_general_2020.csv
generate_study_population_pneumonia_2019:
run: cohortextractor:latest generate_cohort --study-definition study_definition_pneumonia_2019 --output-dir=output/cohorts
outputs:
highly_sensitive:
cohort: output/cohorts/input_pneumonia_2019.csv
matching_2019:
run: python:latest python analysis/match_running.py "2019" --output-dir=output/cohorts
needs: [generate_study_population_covid_2020, generate_study_population_general_2019]
outputs:
moderately_sensitive:
matching_report: output/cohorts/matching_report_general_2019.txt
highly_sensitive:
matched_cohort: output/cohorts/matched_matches_general_2019.csv
matching_2020:
run: python:latest python analysis/match_running.py "2020" --output-dir=output/cohorts
needs: [generate_study_population_covid_2020, generate_study_population_general_2020]
outputs:
moderately_sensitive:
matching_report: output/cohorts/matching_report_general_2020.txt
highly_sensitive:
matched_cohort: output/cohorts/matched_matches_general_2020.csv
reconcile_sick_note_spells_covid_2020:
run: python:latest python analysis/reconcile_sick_note_spells.py "_covid_2020" "input"
needs: [generate_study_population_covid_2020]
outputs:
highly_sensitive:
cohort_with_duration: output/cohorts/input_covid_2020_with_duration.csv
reconcile_sick_note_spells_general_2019:
run: python:latest python analysis/reconcile_sick_note_spells.py "_general_2019" "matched_matches"
needs: [matching_2019]
outputs:
highly_sensitive:
cohort_with_duration: output/cohorts/input_general_2019_with_duration.csv
reconcile_sick_note_spells_general_2020:
run: python:latest python analysis/reconcile_sick_note_spells.py "_general_2020" "matched_matches"
needs: [matching_2020]
outputs:
highly_sensitive:
cohort_with_duration: output/cohorts/input_general_2020_with_duration.csv
reconcile_sick_note_spells_pneumonia_2019:
run: python:latest python analysis/reconcile_sick_note_spells.py "_pneumonia_2019" "input"
needs: [generate_study_population_pneumonia_2019]
outputs:
highly_sensitive:
cohort_with_duration: output/cohorts/input_pneumonia_2019_with_duration.csv
covid_2020_rates_cohort:
run: stata-mp:latest analysis/000_cr_define_covariates_simple_rates.do "covid_2020" --output-dir=output/cohorts
needs: [reconcile_sick_note_spells_covid_2020]
outputs:
highly_sensitive:
analysis_dataset: output/cohorts/cohort_rates_covid_2020.dta
general_2019_rates_cohort:
run: stata-mp:latest analysis/000_cr_define_covariates_simple_rates.do "general_2019" --output-dir=output/cohorts
needs: [reconcile_sick_note_spells_general_2019]
outputs:
highly_sensitive:
analysis_dataset: output/cohorts/cohort_rates_general_2019.dta
general_2020_rates_cohort:
run: stata-mp:latest analysis/000_cr_define_covariates_simple_rates.do "general_2020" --output-dir=output/cohorts
needs: [reconcile_sick_note_spells_general_2020]
outputs:
highly_sensitive:
analysis_dataset: output/cohorts/cohort_rates_general_2020.dta
pneumonia_2019_rates_cohort:
run: stata-mp:latest analysis/000_cr_define_covariates_simple_rates.do "pneumonia_2019" --output-dir=output/cohorts
needs: [reconcile_sick_note_spells_pneumonia_2019]
outputs:
highly_sensitive:
analysis_dataset: output/cohorts/cohort_rates_pneumonia_2019.dta
covid_2020_rates:
run: stata-mp:latest analysis/100_cr_simple_rates.do "covid_2020" --output-dir=output/tabfig
needs: [covid_2020_rates_cohort]
outputs:
moderately_sensitive:
rates: output/tabfig/rates_summary_covid_2020.csv
general_2019_rates:
run: stata-mp:latest analysis/100_cr_simple_rates.do "general_2019" --output-dir=output/tabfig
needs: [general_2019_rates_cohort]
outputs:
moderately_sensitive:
rates: output/tabfig/rates_summary_general_2019.csv
general_2020_rates:
run: stata-mp:latest analysis/100_cr_simple_rates.do "general_2020" --output-dir=output/tabfig
needs: [general_2020_rates_cohort]
outputs:
moderately_sensitive:
rates: output/tabfig/rates_summary_general_2020.csv
pneumonia_2019_rates:
run: stata-mp:latest analysis/100_cr_simple_rates.do "pneumonia_2019" --output-dir=output/tabfig
needs: [pneumonia_2019_rates_cohort]
outputs:
moderately_sensitive:
rates: output/tabfig/rates_summary_pneumonia_2019.csv
append_cohorts:
run: stata-mp:latest analysis/200_cr_data_management_matching.do --output-dir=output/cohorts
needs: [covid_2020_rates_cohort, pneumonia_2019_rates_cohort, general_2020_rates_cohort, general_2019_rates_cohort]
outputs:
moderately_sensitive:
log: output/cohorts/append_cohorts.txt
highly_sensitive:
dataset: output/cohorts/combined_covid_pneumonia.dta
dataset2: output/cohorts/combined_covid_general_2019.dta
dataset3: output/cohorts/combined_covid_general_2020.dta
cox_models:
run: stata-mp:latest analysis/201_cox_models.do
needs: [append_cohorts]
outputs:
moderately_sensitive:
log: output/cohorts/cox_models.txt
dataset: output/tabfig/cox_model_summary.csv
describe_duration:
run: jupyter:latest jupyter nbconvert /workspace/notebooks/describe_duration.ipynb --execute --to html --template basic --output-dir=/workspace/output --ExecutePreprocessor.timeout=86400 --no-input
needs: [covid_2020_rates_cohort, general_2019_rates_cohort, general_2020_rates_cohort, pneumonia_2019_rates_cohort]
outputs:
moderately_sensitive:
notebook: output/describe_duration.html
table: output/tabfig/med_iqr_overall.csv
table2: output/tabfig/med_iqr_age_group.csv
table3: output/tabfig/med_iqr_sex.csv
table4: output/tabfig/med_iqr_ethnicity.csv
table5: output/tabfig/med_iqr_imd.csv
table6: output/tabfig/med_iqr_region.csv
rates_over_time:
run: jupyter:latest jupyter nbconvert /workspace/notebooks/rates_over_time.ipynb --execute --to html --template basic --output-dir=/workspace/output --ExecutePreprocessor.timeout=86400 --no-input
needs: [generate_study_population_covid_2020, generate_study_population_general_2019, generate_study_population_general_2020, generate_study_population_pneumonia_2019]
outputs:
moderately_sensitive:
notebook: output/rates_over_time.html