generated from opensafely/research-template
/
project.yaml
129 lines (110 loc) · 4.94 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
version: '3.0'
expectations:
population_size: 1000
actions:
generate_cohorts_main:
run: cohortextractor:latest generate_cohort --study-definition study_definition_ori
outputs:
highly_sensitive:
cohort: output/input_ori.csv
run_model:
run: r:latest analysis/01-createSDtables.R
needs: [generate_cohorts_main]
outputs:
moderately_sensitive:
log: logs/log-01-createSDtables.txt
#gtpng1: output/tables/gt_ocpop.png
#gtpng2: output/tables/gt_gpcpop.png
rdata1: output/tables/gt_ocpop.RData
rdata2: output/tables/gt_gpcpop.RData
rdata1_discl: output/tables/gt_ocpop_unformatted.csv
rdata2_discl: output/tables/gt_gpcpop_unformatted.csv
#tb01: output/tables/tb01_gpcr_region.csv
#tb02: output/tables/tb02_gpcr_stp.csv # guidance says to not output identifiable regions
tb04: output/tables/tb04_gpcr_agesex.csv
tb05: output/tables/tb05_gpcr_ethnicity.csv
tb06: output/tables/tb06_gpcr_ruc.csv
tb07: output/tables/tb07_gpcr_care.csv
tb08: output/tables/tb08_gpcr_dis.csv
tb09: output/tables/tb09_gpcr_imd.csv
# https://docs.opensafely.org/en/latest/measures/
generate_cohorts_long:
run: cohortextractor:latest generate_cohort --study-definition study_definition_measures_bycode --index-date-range "2019-01-01 to 2020-12-01 by month" --output-dir=output/measures
outputs:
highly_sensitive:
cohort: output/measures/input_measures_bycode_*.csv
generate_measures:
run: cohortextractor:latest generate_measures --study-definition study_definition_measures_bycode --output-dir=output/measures
needs: [generate_cohorts_long]
outputs:
moderately_sensitive:
measure_csv: output/measures/measure_*.csv
run_model_long:
run: r:latest analysis/03-createnattrends_codes.R
needs: [generate_cohorts_long]
outputs:
moderately_sensitive:
log: logs/log-03-createnattrends.txt
tb01: output/tables/sc03_tb01_nattrends.csv
fig01: output/plots/sc03_fig01_nattrends.svg
fig02: output/plots/sc03_fig02_nattrends.svg
fig03: output/plots/sc03_fig03_pracnatcoverage.svg
fig04: output/plots/sc03_fig04_pracbyregcoverage.svg
run_model_measures:
run: r:latest analysis/02-createtemporal.R
needs: [generate_cohorts_long,generate_measures]
outputs:
moderately_sensitive:
log: logs/log-02-createtemporal.txt
red_measures: output/tables/redacted_*.csv
#tb01: output/measures_gpc_pop.csv # redundant into. omit output
fig01: output/plots/plot_overall_gpc_pop.svg
#figall: output/plots/plot_each_*_practice.svg
figquant: output/plots/plot_quantiles_*_practice.svg
# run_model_measuresDEBUG:
# run: r:latest analysis/02-createtemporal_debug.R
# needs: [generate_cohorts_long,generate_measures]
# outputs:
# moderately_sensitive:
# log: logs/log-02-createtemporal-debug.txt
# #tb01: output/measures_gpc_pop_debug.csv
# #fig01: output/plots/plot_overall_gpc_pop_debug.svg
# #figall: output/plots/plot_each_debug_*_practice.svg
# #figquant: output/plots/plot_quantiles_debug_*_practice.svg
# #figquant2: output/plots/plot_quantiles2_debug_*_practice.svg
#run_model_redactedmeasures:
# run: r:latest analysis/02a-createredactedmeasure.R
# needs: [generate_cohorts_long,generate_measures]
# outputs:
# moderately_sensitive:
# log: logs/log-02a-createredactedmeasure.txt
# red_measures: output/tables/redacted2a_*.csv
### SRO template pipeline
SROtem_generate_study_population:
run: cohortextractor:latest generate_cohort --study-definition study_definition --index-date-range "2019-01-01 to 2020-12-01 by month" --output-dir=output
outputs:
highly_sensitive:
cohort: output/input_*.csv
SROtem_generate_study_population_practice_count:
run: cohortextractor:latest generate_cohort --study-definition study_definition_practice_count --output-dir=output
outputs:
highly_sensitive:
cohort: output/input_practice_count.csv
SROtem_generate_measures:
run: cohortextractor:latest generate_measures --study-definition study_definition --output-dir=output
needs: [SROtem_generate_study_population]
outputs:
moderately_sensitive:
measure_csv: output/measure_*.csv
SROtem_get_patient_count:
run: python:latest python analysis/SROtem_get_patients_counts.py
needs: [SROtem_generate_study_population]
outputs:
moderately_sensitive:
text: output/patient_count.json
generate_notebook:
run: jupyter:latest jupyter nbconvert /workspace/notebooks/SRO-Notebook.ipynb --execute --to html --output-dir=/workspace/output --ExecutePreprocessor.timeout=86400 --no-input
needs: [SROtem_generate_measures, SROtem_generate_study_population_practice_count]
outputs:
moderately_sensitive:
notebook: output/SRO-Notebook.html