generated from opensafely/research-template
/
project.yaml
115 lines (97 loc) · 3.8 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
######################################
# This script defines the project pipeline - it specifys the execution orders for all the code in this
# repo using a series of actions.
######################################
version: '3.0'
expectations:
population_size: 1000000
actions:
# Extract data for study population flow chart
generate_study_population_flow_chart_data:
run: cohortextractor:latest generate_cohort --study-definition study_definition_flow_chart
outputs:
highly_sensitive:
cohort: output/input_flow_chart.csv
# Calculate numbers for study population flow chart
flow_chart:
run: r:latest -e 'rmarkdown::render("analysis/R/Markdown/Study_definition_flow_chart.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
needs: [generate_study_population_flow_chart_data]
outputs:
moderately_sensitive:
html: output/Study_definition_flow_chart.html
# Extract study data
generate_study_population:
run: cohortextractor:latest generate_cohort --study-definition study_definition
outputs:
highly_sensitive:
cohort: output/input.csv
# Process data
data_process:
run: r:latest analysis/R/Scripts/00_process_data.R
needs: [generate_study_population]
outputs:
highly_sensitive:
data1: output/data/data_all.rds
data2: output/data/data_modelling.rds
# Summarise data
data_properties:
run: r:latest analysis/R/Scripts/01_data_properties.R output/data/data_all.rds output/data_properties
needs: [data_process]
outputs:
moderately_sensitive:
datasummary: output/data_properties/data_all*.txt
# More data summaries
data_summaries:
run: r:latest -e 'rmarkdown::render("analysis/R/Markdown/Data_summaries.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
needs: [generate_study_population, data_process]
outputs:
moderately_sensitive:
html: output/Data_summaries.html
# # Coxph models
# cox_models:
# run: r:latest analysis/R/Scripts/02_Models.R
# needs: [generate_study_population, data_process]
# outputs:
# highly_sensitive :
# models: output/models/testing/mod*.rds
#
# # Coxme model
# cox_models_sub:
# run: r:latest analysis/R/Scripts/02_Models_sub_test.R
# needs: [generate_study_population, data_process]
# outputs:
# highly_sensitive :
# models: output/models/testing/mod_test*.rds
#
# # Model summaries
# cox_models_summaries:
# run: r:latest -e 'rmarkdown::render("analysis/R/Markdown/Model_comparisons.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
# needs: [generate_study_population, data_process, cox_models, cox_models_sub]
# outputs:
# moderately_sensitive:
# html: output/Model_comparisons.html
# Stratified cox model
cox_model_final:
run: r:latest analysis/R/Scripts/03_Final_model.R
needs: [generate_study_population, data_process]
outputs:
highly_sensitive :
models: output/models/final/mod*.rds
# moderately_sensitive:
# tables: output/models/final/tab*.html
# data: output/models/final/tab*.csv
# plots: output/models/final/plot*.svg
# Strata summaries
strata_summary:
run: r:latest analysis/R/Scripts/04_Strata_Summary.R
needs: [data_process, cox_model_final]
outputs:
moderately_sensitive:
plots: output/models/final/plot_strata*.svg
# Results summary
results_summary:
run: r:latest -e 'rmarkdown::render("analysis/R/Markdown/Results_summary.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
needs: [generate_study_population, data_process, cox_model_final]
outputs:
moderately_sensitive:
html: output/Results_summary.html