generated from opensafely/research-template
/
project.yaml
144 lines (127 loc) · 4.69 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
version: '3.0'
expectations:
population_size: 500
actions:
generate_long_covid_exposure_dataset:
run:
databuilder:v0 generate-dataset
analysis/dataset_definition_unmatched_exp_lc.py
--output output/dataset_exp_lc_unmatched.csv
outputs:
highly_sensitive:
cohort: output/dataset_exp_lc_unmatched.csv
generate_list_gp_use_long_covid_dx:
run:
databuilder:v0 generate-dataset
analysis/dataset_definition_lc_gp_list.py
--output output/dataset_lc_gp_list.csv
outputs:
highly_sensitive:
cohort: output/dataset_lc_gp_list.csv
generate_dataset_comparator_exclude_gp_no_long_covid:
needs: [generate_list_gp_use_long_covid_dx]
run:
databuilder:v0 generate-dataset
analysis/dataset_definition_unmatched_comparator.py
--output output/dataset_comparator_unmatched.csv
outputs:
highly_sensitive:
cohort: output/dataset_comparator_unmatched.csv
test_matching:
run:
python:latest python analysis/match_test.py
needs: [generate_dataset_comparator_exclude_gp_no_long_covid, generate_long_covid_exposure_dataset]
outputs:
highly_sensitive:
matched_cases: output/matched_cases_stp.csv
matched_matches: output/matched_matches_stp.csv
matched_all: output/matched_combined_stp.csv
moderately_sensitive:
matching_report: output/matching_report_stp.txt
import_matched_exposure:
run: >
databuilder:v0
generate-dataset analysis/dataset_definition_matched_cases.py
--output output/matched_cases_with_ehr.csv
needs: [test_matching]
outputs:
highly_sensitive:
cohort: output/matched_cases_with_ehr.csv
import_matched_controls:
run: >
databuilder:v0
generate-dataset analysis/dataset_definition_matched_control.py
--output output/matched_control_with_ehr.csv
needs: [test_matching]
outputs:
highly_sensitive:
cohort: output/matched_control_with_ehr.csv
generate_historical_exp_data:
run:
databuilder:v0 generate-dataset analysis/dataset_definition_hx_unmatched_exp_lc.py
--output output/hx_unmatched_exp.csv
outputs:
highly_sensitive:
hx_cohort: output/hx_unmatched_exp.csv
generate_historical_comp_data_exclude_gp_no_long_covid:
needs: [generate_list_gp_use_long_covid_dx]
run:
databuilder:v0 generate-dataset analysis/dataset_definition_hx_unmatched_com_no_lc.py
--output output/hx_dataset_comp_unmatched.csv
outputs:
highly_sensitive:
hx_cohort: output/hx_dataset_comp_unmatched.csv
historical_matching:
run:
python:latest python analysis/match_historical.py
needs: [generate_historical_exp_data, generate_historical_comp_data_exclude_gp_no_long_covid]
outputs:
highly_sensitive:
matched_cases: output/matched_cases_historical.csv
matched_matches: output/matched_matches_historical.csv
matched_all: output/matched_combined_historical.csv
moderately_sensitive:
matching_report: output/matching_report_historical.txt
import_matched_historical_exposure:
run: >
databuilder:v0
generate-dataset analysis/dataset_definition_hx_matched_exp_lc.py
--output output/hx_matched_cases_with_ehr.csv
needs: [historical_matching]
outputs:
highly_sensitive:
cohort: output/hx_matched_cases_with_ehr.csv
import_matched_historical_controls:
run: >
databuilder:v0
generate-dataset analysis/dataset_definition_hx_matched_comp.py
--output output/hx_matched_control_with_ehr.csv
needs: [historical_matching]
outputs:
highly_sensitive:
cohort: output/hx_matched_control_with_ehr.csv
# Reporting:
report01_matched_datasets:
needs: [import_matched_exposure, import_matched_controls]
run:
r:latest analysis/st01_report_matched.R
outputs:
moderately_sensitive:
matched_table: output/st01_matched_numbers_table.csv
explore_vax_fig: output/st1_exporing_vax_index_date.png
missing_table: output/missing_distribution_table.csv
missing_pattern: output/missing_pattern_current.png
report02_hx_matched_datasets:
needs: [import_matched_historical_exposure, import_matched_historical_controls]
run:
r:latest analysis/st02_report_matched_historical.R
outputs:
moderately_sensitive:
matched_table: output/hx_matched_numbers_table.csv
report_03_hurdle_model:
needs: [import_matched_exposure, import_matched_controls]
run:
r:latest analysis/st03_hurdle_model.R
outputs:
moderately_sensitive:
model_table: output/st03_monthly_visits_crude_hurdle.csv