generated from opensafely/research-template
/
cox_models.R
251 lines (235 loc) · 8.23 KB
/
cox_models.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
######################################
# Cox models (propensity score analysis)
######################################
## Import libraries
library(tidyverse)
library(lubridate)
library(survival)
library(survminer)
library(gridExtra)
library(splines)
library(survey)
library(here)
## Load functions
source(here("lib", "functions", "safely_n_quietly.R"))
## Create figures directory
fs::dir_create(here::here("output", "figs"))
## Create tables directory
fs::dir_create(here::here("output", "tables"))
## Import command-line arguments
args <- commandArgs(trailingOnly=TRUE)
## Set input and output pathways for matched/unmatched data - default is unmatched
if (length(args) == 0){
data_label = "day5"
} else if (args[[1]] == "day0") {
data_label = "day0"
} else if (args[[1]] == "day5") {
data_label = "day5"
} else {
# Print error if no argument specified
stop("No outcome specified")
}
## Import data
if (data_label == "day5") {
data_cohort <-
read_rds(here::here("output", "data", "data_processed_day5.rds"))
} else if (data_label == "day0") {
data_cohort <-
read_rds(here::here("output", "data", "data_processed_day0.rds"))
}
# create data.frame 'estimates' where output is saved
# 'estimates' has 5 columns with the HR and upper and lower limit of CI and
# n for the number of pt in utilised dataset
# 'estimates' has 6 columns:
# primary outcome x 3 (mol + sot vs none; sot vs none; mol vs none) plus
# secondary outcome x 3
estimates <- matrix(nrow = 6, ncol = 6) %>% as.data.frame()
# give column names
colnames(estimates) <-
c("comparison", "outcome", "HR", "LowerCI", "UpperCI", "n")
# create data.frame 'log' where errors and warnings are saved
# 'log' has 3 columns: comparison, warning and error
# 'log' has 6 rows like 'estimates'
log <- matrix(nrow = 6, ncol = 4) %>% as.data.frame()
# give column names
colnames(log) <-
c("comparison", "outcome", "warning", "error")
# Specify treated group for comparison (Treated vs Untreated)
# used to loop trough different analyses
trt_grp <- c("All", "Sotrovimab", "Molnupiravir")
# Specify outcomes
# uesd to loop through different analyses
outcomes <- c("primary", "secondary")
## Loop over analysis for each treatment comparison
for(i in seq_along(trt_grp)) {
# used later in 'esimates' data.frame to save which analysis is done
t <- trt_grp[i]
# Select data used in analysis ---
# (mol/sot are excluded in single comparison analyses)
if (i == 1) {
print("All Treated versus Untreated Comparison")
data_cohort_sub <-
data_cohort
estimates[c(1, 4), "n"] <- (nrow(data_cohort_sub))
} else {
print(paste0(trt_grp[i], " versus Untreated Comparison"))
# Drop patients treated with molnupiravir
data_cohort_sub <-
data_cohort %>%
filter(treatment_strategy_cat != trt_grp[i])
estimates[c(1 + (i - 1), 4 + (i - 1)), "n"] <- (nrow(data_cohort_sub))
}
# Fit Propensity Score Model ---
# Vector of variables for PS model
# Note: age modelled with cubic spline with 3 knots
vars <-
c(
"ns(age, df=4)",
"sex",
"ethnicity",
"imdQ5" ,
"region_nhs",
"rural_urban",
"huntingtons_disease_nhsd" ,
"myasthenia_gravis_nhsd" ,
"motor_neurone_disease_nhsd" ,
"multiple_sclerosis_nhsd" ,
"solid_organ_transplant_nhsd",
"hiv_aids_nhsd" ,
"immunosupression_nhsd" ,
"imid_nhsd" ,
"liver_disease_nhsd",
"ckd_stage_5_nhsd",
"haematological_disease_nhsd" ,
"cancer_opensafely_snomed" ,
"downs_syndrome_nhsd",
"diabetes",
"bmi_group",
"smoking_status",
"copd",
"dialysis",
"cancer",
"lung_cancer",
"haem_cancer",
"vaccination_status",
"pfizer_most_recent_cov_vac",
"az_most_recent_cov_vac",
"moderna_most_recent_cov_vac"
)
# Specify model
psModelFunction <- as.formula(
paste("treatment",
paste(vars, collapse = " + "),
sep = " ~ "))
# Check your model
print(psModelFunction)
# Fit PS model
psModel <- glm(psModelFunction,
family = binomial(link = "logit"),
data = data_cohort_sub)
# Append patient-level predicted probability of being assigned to cohort
data_cohort_sub$pscore <- predict(psModel, type = "response")
# Overlap plot
overlapPlot <- data_cohort_sub %>%
mutate(trtlabel = ifelse(treatment == "Treated",
yes = 'Treated',
no = 'Untreated')) %>%
ggplot(aes(x = pscore, linetype = trtlabel)) +
scale_linetype_manual(values=c("solid", "dotted")) +
geom_density(alpha = 0.5) +
xlab('Probability of receiving treatment') +
ylab('Density') +
scale_fill_discrete('') +
scale_color_discrete('') +
scale_x_continuous(breaks=seq(0, 1, 0.1)) +
theme(strip.text = element_text(colour ='black')) +
theme_bw() +
theme(legend.title = element_blank()) +
theme(legend.position = c(0.82,.8),
legend.direction = 'vertical',
panel.background = element_rect(fill = "white", colour = "white"),
axis.line = element_line(colour = "black"),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# Save plot
ggsave(overlapPlot,
filename =
here("output", "figs",
paste0(trt_grp[i], "_overlap_plot_day5.png")),
width=20, height=14, units="cm")
# Derive inverse probability of treatment weights (IPTW)
data_cohort_sub$weights <-
ifelse(data_cohort_sub$treatment == "Treated",
1 / data_cohort_sub$pscore,
1 / (1 - data_cohort_sub$pscore))
# Check extremes
quantile(data_cohort_sub$weights[data_cohort_sub$treatment=="Treated"],
c(0,0.01,0.05,0.95,0.99,1))
quantile(data_cohort_sub$weights[data_cohort_sub$treatment=="Untreated"],
c(0,0.01,0.05,0.95,0.99,1))
# Fit outcome model ---
## Define svy design for IPTW
iptw <- svydesign(ids = ~ 1, data = data_cohort_sub, weights = ~ weights)
for (j in seq_along(outcomes)){
outcome_event <- outcomes[j]
print(outcome_event)
# because we need to know where in 'estimates' and 'log' output should be
# saved
k <- i + ((j - 1) * 3)
estimates[k, "comparison"] <- t
estimates[k, "outcome"] <- outcome_event
log[k, "comparison"] <- t
log[k, "outcome"] <- outcome_event
# create formula for primary and secondary analysis
if (outcome_event == "primary"){
formula <-
as.formula(
Surv(fu_primary, status_primary == "covid_hosp_death") ~
treatment)
} else if (outcome_event == "secondary"){
formula <-
as.formula(
Surv(fu_secondary, status_secondary == "allcause_hosp_death") ~
treatment)
}
# Cox regression
# returns function model_PSw() with components result, output, messages,
# warnings and error
model_PSw <-
safely_n_quietly(
.f = ~ svycoxph(formula,
design = iptw,
data = data_cohort_sub)
)
# save results from model_PSw and save warnings or errors to log file
if (is.null(model_PSw()$error)){
# no error
log[k,"error"] <- NA_character_
if (length(model_PSw()$warnings) != 0){
# save warning in log
log[k, "warning"] <- paste(model_PSw()$warnings, collapse = '; ')
} else log[k, "warning"] <- NA_character_
# select main effects from 'model_PSw'
selection <-
model_PSw()$result$coefficients %>% names %>% startsWith("treatment")
# save coefficients of model and CIs in estimates
estimates[k, "HR"] <- model_PSw()$result$coefficients[selection] %>% exp()
estimates[k, c("LowerCI", "UpperCI")] <-
confint(model_PSw()$result)[selection,] %>% exp()
} else log[k, "error"] <- model_PSw()$messages
}
}
## Save output
write_csv(estimates,
here("output",
"tables",
paste0("cox_models_", data_label, ".csv")))
write_rds(estimates,
here("output",
"tables",
paste0("cox_models_", data_label, ".rds")))
write_csv(log,
here("output",
"tables",
paste0("log_cox_models_", data_label, ".csv")))