generated from opensafely/research-template
/
01_cox_pipeline.R
97 lines (69 loc) · 4.33 KB
/
01_cox_pipeline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
## =============================================================================
## Pipeline (1): Control center, calls relevant analysis scripts, sets working
## and saving directories, parallelises processes
##
## Based on scripts written by Samantha Ip, see the following repo's for
## original scripts: https://github.com/BHFDSC/CCU002_01 & https://github.com/BHFDSC/CCU002_03
## =============================================================================
# libraries
library(data.table); library(dplyr); library(survival); library(broom); library(DBI); library(ggplot2);
library(nlme); library(tidyverse); library(lubridate); library(purrr); library(parallel); library(stats);
library(utils); library(stringr); library(rms); library(readr)
args = commandArgs(trailingOnly=TRUE)
if(length(args)==0){
event_name="t2dm"
}else{
event_name = args[[1]]
}
# Specify directories ----------------------------------------------------------
fs::dir_create(here::here("output", "not-for-review"))
fs::dir_create(here::here("output", "review", "model"))
output_dir <- "output/review/model"
scripts_dir <- "analysis/model"
# Source relevant files --------------------------------------------------------
source(file.path(scripts_dir,"02_01_cox_analyses_to_run.R"))
source(file.path(scripts_dir,"02_02_cox_load_data.R")) # Prepare dataset for model
source(file.path(scripts_dir,"06_cox_extra_functions.R"))
# Add time point parameter to analyses to run ----------------------------
source(file.path(scripts_dir,"02_03_cox_timepoint_param.R")) # Prepare dataset for model
analyses_to_run_timepoints <- analyses_to_run %>% filter(mdl=="mdl_max_adj")
# add reduced time point column
analyses_to_run_timepoints$reduced_timepoint <- NA
analyses_to_run_timepoints$reduced_timepoint <- lapply(split(analyses_to_run_timepoints,seq(nrow(analyses_to_run_timepoints))),
function(analyses_to_run_timepoints)
get_timepoint(
event=analyses_to_run_timepoints$event,
subgroup=analyses_to_run_timepoints$subgroup,
stratify_by_subgroup=analyses_to_run_timepoints$stratify_by_subgroup,
stratify_by=analyses_to_run_timepoints$strata,
mdl=analyses_to_run_timepoints$mdl,
input, cuts_days_since_expo,cuts_days_since_expo_reduced,covar_names)
)
analyses_to_run_timepoints <- analyses_to_run_timepoints %>% select(subgroup, reduced_timepoint)
analyses_to_run <- analyses_to_run %>% left_join(analyses_to_run_timepoints, by="subgroup")
analyses_to_run <- analyses_to_run %>% filter(reduced_timepoint != "remove")
# If one subgroup category is "reduced" then make sure all of the subgroup categories are "reduced" for comparison purposes
analyses_to_run <- analyses_to_run %>%
group_by(subgroup_cat) %>%
dplyr::mutate(reduced_timepoint = case_when(
any(reduced_timepoint == "reduced") ~ "reduced",
TRUE ~ as.character(reduced_timepoint)))
# Source remainder of relevant files --------------------------------------------------------
source(file.path(scripts_dir,paste0("03_01_cox_subgrouping.R"))) # Model specification
# ------------------------------------ LAUNCH JOBS -----------------------------
if(nrow(analyses_to_run>0)){
lapply(split(analyses_to_run,seq(nrow(analyses_to_run))),
function(analyses_to_run)
get_vacc_res(
event=analyses_to_run$event,
subgroup=analyses_to_run$subgroup,
stratify_by_subgroup=analyses_to_run$stratify_by_subgroup,
stratify_by=analyses_to_run$strata,
mdl=analyses_to_run$mdl,
time_point=analyses_to_run$reduced_timepoint,
input,cuts_days_since_expo,cuts_days_since_expo_reduced, covar_names))
}
#Save csv of anlayses not run
write.csv(analyses_not_run, paste0(output_dir,"/analyses_not_run_" , event_name ,"_", ".csv"), row.names = T)
#Combine all results into one .csv
source(file.path(scripts_dir, "05_cox_format_tbls_HRs.R"))