generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
16_hr_for_interaction_with_spline.R
149 lines (119 loc) · 4.67 KB
/
16_hr_for_interaction_with_spline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
library(splines)
library(ggplot2)
library(survival)
library(survminer)
library(data.table)
library(tidyverse)
#rm(list=ls())
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#setwd('../')
options(datatable.fread.datatable=FALSE)
dat <- fread('output/incidence_t.csv')
### creating exposure, modifier and outcome variables
### for simplicity, I'm creating a binary outcome rather than a time-to-event outcome
### in this example, I'm exploring whether the relationship between gender (exposure) and
### diabetes (outcome) varies by continuous age (modifier)
#dat$exposure <- ifelse(dat$exposed=="M", 1, 0) #don't need that as "exposed" is already binary
#dat$modifier <- dat$index_time_to_start_date
#dat$outcome <- ifelse(!is.na(dat$mh_outcome) & dat$mh_outcome== 1, 1, 0)
### fit the model
### I don't have covariates besides the exposure and modifier variables, but you'll have
### all the covariates in the fully adjusted model
### I'm using a logistic regression model rather than Cox given the binary outcome, but
### exactly the same idea applies
#mod <- glm(
# outcome ~ exposure * ns(modifier,
# df=2,
# Boundary.knots=quantile(modifier, c(0.1,0.9))) ,
# family = binomial,
# data = dat)
# Chnage index_dates to numeric
start_date <- as.Date("2020/01/24")
start_date_numeric <- as.numeric(start_date) #this is converted into days
dat$index_numeric <- as.numeric(dat$date_positive)
dat$index_time_to_start_date <- dat$index_numeric - start_date_numeric
mod_cox <- coxph(
Surv(t, mh_outcome) ~ exposed*ns(index_time_to_start_date,
df = 2,
Boundary.knots = c(quantile(index_time_to_start_date, 0.1),
quantile(index_time_to_start_date, 0.9))) +
ns(age, df = 2, Boundary.knots = c(quantile(age,0.1), quantile(age, 0.9))) +
cluster(patient_id) +
alcohol +
obese_binary_flag +
cancer +
digestive_disorder +
hiv_aids +
kidney_disorder +
respiratory_disorder +
metabolic_disorder +
sex +
ethnicity +
region +
hhsize +
work_status_new +
CVD +
musculoskeletal +
neurological +
mental_behavioural_disorder +
imd +
rural_urban,
data = dat)
### extract coefficients from the fitted model
coeffs <- coef(mod_cox)
NROW(coeffs)
### drop the intercept (you won't need to do this as no intercept in a Cox model)
#coeffs <- coeffs[-1]
### pick out the coefficients for the exposure main effect and the two modifier terms
b1 <- coeffs[1]
b4 <- coeffs[46]
b5 <- coeffs[47]
### derive the spline-transformed values of the modifier(index_time_to_start_date) variable (same as used in the model)
spline_matrix <- as.data.frame(
ns(dat$index_time_to_start_date,
df=2,
Boundary.knots=quantile(dat$index_time_to_start_date, c(0.1,0.9)))
)
### pick out each spline-transformed term
s1 <- spline_matrix[,1]
s2 <- spline_matrix[,2]
### calculate change in the linear predictor
lp_change <- b1 + b4*s1 + b5*s2
### convert the change in the linear predictor to an odds ratio (you'll have a hazard ratio
### rather than an odds ratio)
hr <- exp(lp_change)
### extract variance-covariance matrix of the model
mod_vcov <- as.data.frame(vcov(mod_cox))
### drop the intercept from the vcov (you won't need to do this as no intercept)
#mod_vcov <- mod_vcov[-1,-1]
### extract variance for each coefficient (i.e. diagonal elements of the vcov)
var_b1 <- mod_vcov[1,1]
var_b4 <- mod_vcov[4,4]
var_b5 <- mod_vcov[5,5]
### extract pairs of covariances between the coefficients (i.e. off-diagonal elements)
cov_b1_b4 <- mod_vcov[1,4]
cov_b1_b5 <- mod_vcov[1,5]
cov_b4_b5 <- mod_vcov[4,5]
### calculate the variance of the change in the linear predictor
lp_change_var <- var_b1 + (s1^2)*var_b4 + (s2^2)*var_b5 +
2*s1*cov_b1_b4 + 2*s2*cov_b1_b5 + 2*s1*s2*cov_b4_b5
### calculate the standard error of the change in the linear predictor
lp_change_se <- sqrt(lp_change_var)
### calculate 95% CI around the OR
hr_lcl <- exp(lp_change - 1.96*lp_change_se)
hr_ucl <- exp(lp_change + 1.96*lp_change_se)
### bring results together
hr_df <- data.frame(
index_time_to_start_date = dat$index_time_to_start_date,
lp_change,
lp_change_se,
hr,
hr_lcl,
hr_ucl)
write_csv(hr_df, 'output/100_hazard_ratios_by_modifier_incidence.csv')
### plots the odds ratio by the modifier variable
p<- ggplot(data = hr_df, aes(x = index_time_to_start_date, y=hr)) +
geom_line(linewidth = 0.8 , colour="red") +
geom_ribbon(aes(ymin=hr_lcl, ymax=hr_ucl), fill="red", colour=NA, alpha=0.2) +
geom_hline(yintercept=1, linetype=2)
ggsave("output/100_hazard_ratios_by_modifier_variable.jpg",p)