generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
project.yaml
204 lines (178 loc) · 8.23 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# describes how each step in your analysis should be run
version: '3.0'
expectations:
population_size: 1000
actions:
generate_wide_non_health:
run: cohortextractor:latest generate_cohort --study-definition study_definition_non_health --with-end-date-fix
outputs:
highly_sensitive:
cohort: output/input_non_health.csv
generate_wide_health_mh:
run: cohortextractor:latest generate_cohort --study-definition study_definition_health_mh --with-end-date-fix
outputs:
highly_sensitive:
cohort: output/input_health_mh.csv
generate_wide_health_non_mh:
run: cohortextractor:latest generate_cohort --study-definition study_definition_health_non_mh --with-end-date-fix
outputs:
highly_sensitive:
cohort: output/input_health_non_mh.csv
generate_wide_ons_cis_new:
run: >
databuilder:v0
generate-dataset
--output output/dataset_ons_cis_new.csv
analysis/dataset_definition_ons_cis_new.py
outputs:
highly_sensitive:
dataset: output/dataset_ons_cis_new.csv
combine_wide_data:
run: r:latest analysis/1_combine_wide_data.R
needs: [generate_wide_non_health, generate_wide_health_mh, generate_wide_health_non_mh, generate_wide_ons_cis_new]
outputs:
highly_sensitive:
cohort: output/input_cis_wide.csv
transform_cis_wide_to_long:
run: r:latest analysis/2_cis_wide_to_long.R
needs: [combine_wide_data]
outputs:
highly_sensitive:
cohort: output/input_cis_long.csv
reconcile_snomed_ctv3:
run: r:latest analysis/3_reconcile_snomed_ctv3.R
needs: [transform_cis_wide_to_long]
outputs:
highly_sensitive:
cohort: output/input_reconciled.csv
#bmi: output/bmi_summary_table_info_before_cap.csv
derive_exposed:
run: r:latest analysis/4_exposed_population.R
needs: [reconcile_snomed_ctv3]
outputs:
highly_sensitive:
cohort: output/cis_exposed.csv
derive_controls:
run: r:latest analysis/5_control_population.R
needs: [reconcile_snomed_ctv3]
outputs:
highly_sensitive:
cohort: output/cis_control.csv
perform_matching:
run: r:latest analysis/6_create_controls.R
needs: [derive_exposed, derive_controls]
outputs:
highly_sensitive:
incidence: output/incidence_group.csv
prevalence: output/prevalence_group.csv
#exacerbated: output/exacerbated_group.csv
testing_exposed_post_pre_matching:
run: r:latest analysis/6_6_pre_post_match_testing.R
needs: [derive_exposed, derive_controls,perform_matching]
outputs:
moderately_sensitive:
t1: output/dates_order_control.csv
t2: output/dates_order_exposed.csv
adjust_matched_groups:
run: r:latest analysis/7_adjust_groups.R
needs: [perform_matching]
outputs:
highly_sensitive:
incidence: output/adjusted_incidence_group.csv
prevalence: output/adjusted_prevalence_group.csv
descriptive_stats:
run: r:latest analysis/8_descriptive_statistics.R
needs: [adjust_matched_groups]
outputs:
moderately_sensitive:
incidence_cat_stats: output/1_descriptives_incidence_cat.csv
incidence_con_stats: output/2_descriptives_incidence_con.csv
prevalence_cat_stats: output/3_descriptives_prevalence_cat.csv
prevalence_con_stats: output/4_descriptives_prevalence_con.csv
cumulative_incidence:
run: r:latest analysis/9_cumulative_incidence_curves.R
needs: [adjust_matched_groups]
outputs:
highly_sensitive:
inc_t: output/incidence_t.csv
prev_t: output/prevalence_t.csv
moderately_sensitive:
inc_surv: output/incidence_surv.jpg
prev_surv: output/prevalence_surv.jpg
hazard_ratios:
run: r:latest analysis/10_new_hazard_ratio_code.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
temp: output/5_cox_hazard_ratio_incidence_table.csv
temp2: output/6_cox_hazard_ratio_prevalence_table.csv
surv1: output/1_survfit_plot_incidence_noadj.jpg
surv2: output/2_survfit_plot_prevalence_noadj.jpg
surv3: output/3_survfit_plot_incidence_min.jpg
surv4: output/4_survfit_plot_prevalence_min.jpg
surv5: output/5_survfit_plot_incidence_full.jpg
surv6: output/6_survfit_plot_prevalence_full.jpg
shoen_1: output/inc_no_adj_schoenfeld_res.csv
shoen_2: output/inc_min_adj_schoenfeld_res.csv
shoen_3: output/inc_full_adj_schoenfeld_res.csv
shoen_4: output/prev_no_adj_schoenfeld_res.csv
shoen_5: output/prev_min_adj_schoenfeld_res.csv
shoen_6: output/prev_full_adj_schoenfeld_res.csv
shoen_jp1: output/inc_no_adj_schoenfeld_res.jpg
shoen_jp2: output/inc_min_adj_schoenfeld_res.jpg
shoen_jp3: output/inc_full_adj_schoenfeld_res.jpg
shoen_jp4: output/prev_no_adj_schoenfeld_res.jpg
shoen_jp5: output/prev_min_adj_schoenfeld_res.jpg
shoen_jp6: output/prev_full_adj_schoenfeld_res.jpg
distribution_of_follow_up_time:
run: r:latest analysis/11_distribution_of_follow_up_time.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
distribution_inc: output/distribution_of_follow_up_time_incidence.jpg
distribution_prev: output/distribution_of_follow_up_time_prevalence.jpg
density1: output/distribution_of_follow_up_time_incidence_density.jpg
density2: output/distribution_of_follow_up_time_prevalence_density.jpg
event_rates_incidence:
run: r:latest analysis/12_event_rates_incidence.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
incidence_event_rates: output/event_counts_and_rates_incidence.csv
event_rates_prevalence:
run: r:latest analysis/13_event_rates_prevalence.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
incidence_event_rates: output/event_counts_and_rates_prevalence.csv
temporary_test_effects_of_time:
run: r:latest analysis/14_testing_waves.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
d1: output/99_coeff_exposed_spline_time_fulladj_incidence.csv
d2: output/99_anova_exposed_spline_time_interactions.csv
d3: output/99_coeff_exposed_spline_time_fulladj_prev.csv
d4: output/99_anova_exposed_spline_time_fulladj_prev.csv
d5: output/99_coefficients_for_waves_incidence.csv
d6: output/99_anova_waves_time_interaction.csv
emmeans:
run: r:latest analysis/15_emmeans.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
d1: output/99_emmeans_incidence.csv
d2: output/99_emmeans_incidence.jpg
d3: output/99_emmeans_3df_incidence.csv
d4: output/99_emmeans_3df_incidence.jpg
d5: output/99_emmeans_4df_incidence.csv
d6: output/99_emmeans_4df_incidence.jpg
d7: output/BIC_all_3models.cvs
d8: output/different_degrees_of_freedom_avova_all_adjustments.cvs
hr_for_interaction_with_spline:
run: r:latest analysis/16_hr_for_interaction_with_spline.R
needs: [cumulative_incidence]
outputs:
moderately_sensitive:
d1: output/100_hazard_ratios_by_modifier_incidence.csv
d2: output/100_hazard_ratios_by_modifier_variable.jpg