generated from opensafely/covid-vaccine-research-template
/
project.yaml
132 lines (107 loc) · 3.9 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
version: '3.0'
expectations:
population_size: 1000
actions:
generate_study_treated:
run: cohortextractor:latest generate_cohort --study-definition study_definition_treated --output-format feather
outputs:
highly_sensitive:
cohort: output/input_treated.feather
data_process_treated:
run: r:latest analysis/data_process_treated.R over12
needs:
- generate_study_treated
outputs:
highly_sensitive:
rds: output/data/data_treated_eligible.rds
moderately_sensitive:
flowchart: output/data/flowchart_treated_eligible.csv
## matching round 1
generate_study_control_potential1:
run: cohortextractor:latest generate_cohort --study-definition study_definition_control_potential --output-format feather --index-date-range "2021-09-20 to 2021-09-20 by week"
outputs:
highly_sensitive:
cohort: output/input_control_potential_2021-09-20.feather
data_process_control_potential1:
run: r:latest analysis/data_process_control.R over12 1
needs:
- generate_study_control_potential1
outputs:
highly_sensitive:
rds: output/data/data_control_potential1.rds
matching1:
run: r:latest analysis/matching.R over12 1
needs:
- data_process_treated
- data_process_control_potential1
outputs:
highly_sensitive:
rds1: output/match/data_potential_matchstatus1.rds
rds2: output/match/data_potential_matched1.rds
csv: output/match/potential_matched_controls1.csv.gz
generate_study_match_control1:
run: cohortextractor:latest generate_cohort --study-definition study_definition_control_match1 --output-format feather
needs:
- matching1
outputs:
highly_sensitive:
cohort: output/input_control_match1.feather
matching_filter1:
run: r:latest analysis/matching_filter.R over12 1
needs:
- matching1
- generate_study_match_control1
outputs:
highly_sensitive:
rds1: output/match/data_matchstatus_allrounds1.rds
rds2: output/match/data_match_actual1.rds
# ## matching round 2
# generate_study_control_potential1:
# run: cohortextractor:latest generate_cohort --study-definition study_definition_control_potential --output-format feather --index-date-range "2021-10-04 to 2021-10-04 by week"
# outputs:
# highly_sensitive:
# cohort: output/input_control_potential_2021-10-04.feather
# data_process_control_potential2:
# run: r:latest analysis/data_process_control.R over12 2
# needs:
# - generate_study_control_potential2
# outputs:
# highly_sensitive:
# rds: output/data/data_control_potential2.rds
# matching2:
# run: r:latest analysis/matching.R over12 2
# needs:
# - data_matching_filter1
# - data_process_treated
# - data_process_control_potential2
# outputs:
# highly_sensitive:
# rds1: output/match/data_potential_matchstatus2.rds
# rds2: output/match/data_potential_matched2.rds
# csv: output/match/potential_matched_controls2.csv.gz
# generate_study_match_control2:
# run: cohortextractor:latest generate_cohort --study-definition study_definition_control_match2 --output-format feather
# needs:
# - matching2
# outputs:
# highly_sensitive:
# cohort: output/input_control_match2.feather
# matching_filter2:
# run: r:latest analysis/matching_filter.R over12 2
# needs:
# - matching_filter1
# - matching2
# - generate_study_match_control2
# outputs:
# highly_sensitive:
# rds1: output/match/data_matchstatus_allrounds2.rds
# rds2: output/match/data_match_actual2.rds
# ## combine together
# matching_combine:
# run: r:latest analysis/matching_combine.R over12
# needs:
# - matching_filter1
# - matching_filter2
# outputs:
# highly_sensitive:
# rds: output/match/data_match_all.rds