-
Notifications
You must be signed in to change notification settings - Fork 0
/
08_eol_WP3_MAIHDA_opapp.R
363 lines (273 loc) · 11.6 KB
/
08_eol_WP3_MAIHDA_opapp.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#######################################
# MAIHDA analysis
# Authors: Sophie and Miranda
# Date: 18/03/24
# Aim: run MAIHDA analysis for outcome outpatient appointments
######################################
# Set up------------------
# Load packages
library(tidyverse)
library(dplyr)
library(insight)
library(parameters)
library(performance)
library(glmmTMB)
library(broom)
library(data.table)
library(lubridate)
# Create folder structure
fs::dir_create("output", "os_reports", "WP3")
# Code settings
startdate <- dmy("01-01-2022")
enddate <- dmy("31-12-2023")
# Prepare data------------
df <- read_csv(file = here::here("output", "os_reports", "input_os_reports.csv.gz")) %>%
mutate(dod_ons = as_date(dod_ons)
, Ethnicity_2 = case_when(ethnicity_Combined == "White" ~ "White"
, ethnicity_Combined == "Asian or Asian British" | ethnicity_Combined == "Black or Black British" | ethnicity_Combined == "Mixed" | ethnicity_Combined == "Chinese or Other Ethnic Groups" | ethnicity_Combined == "Not stated" ~ "All other ethnic groups")
, study_month = floor_date(dod_ons, unit = "month")
, pod_ons_new = case_when(pod_ons == "Elsewhere"
| pod_ons == "Other communal establishment" ~ "Elsewhere/other"
, TRUE ~ as.character(pod_ons))
, cod_ons_3 = str_sub(cod_ons, 1, 3)
, cod_ons_4 = str_sub(cod_ons, 1, 5)
, codgrp = case_when(cod_ons_4 %in% c("U071", "U072") ~ "Covid-19"
, cod_ons_3 >= "J09" & cod_ons_3 <= "J18" ~ "Flu and pneumonia"
, (cod_ons_3 >= "J00" & cod_ons_3 <= "J08") | (cod_ons_3 >= "J19" & cod_ons_3 <= "J99") ~ "Other respiratory diseases"
, cod_ons_3 %in% c("F01", "F03", "G30") ~ "Dementia and Alzheimer's disease"
, cod_ons_3 >= "I00" & cod_ons_3 <= "I99" ~ "Circulatory diseases"
, cod_ons_3 >= "C00" & cod_ons_3 <= "C99" ~ "Cancer"
, TRUE ~ "All other causes")) %>%
filter(codgrp == "Cancer", pod_ons_new == "Home", age_band != "0-24", imd_quintile >=1)
#produce means and SD for each group ----------------
cols_of_interest <- c("count");
GLM_sex <- df %>%
group_by(sex) %>%
summarise(count = n(),
mean = mean(opapp_1m, na.rm = TRUE)
, sd = sd(opapp_1m, na.rm = TRUE)) %>%
dplyr::mutate(across(.cols = all_of(cols_of_interest), .fns = ~ .x %>% `/`(5) %>% round()*5));
fwrite(GLM_sex, here::here("output", "os_reports", "WP3", "OP_GLM_sex.csv"))
GLM_Ethnicity_2 <- df %>%
group_by(Ethnicity_2) %>%
summarise(count = n(),
mean = mean(opapp_1m, na.rm = TRUE)
, sd = sd(opapp_1m, na.rm = TRUE)) %>%
dplyr::mutate(across(.cols = all_of(cols_of_interest), .fns = ~ .x %>% `/`(5) %>% round()*5));
fwrite(GLM_Ethnicity_2, here::here("output", "os_reports", "WP3", "OP_GLM_ethnicity.csv"))
GLM_imd_quintile <- df %>%
group_by(imd_quintile) %>%
summarise(count = n(),
mean = mean(opapp_1m, na.rm = TRUE)
, sd = sd(opapp_1m, na.rm = TRUE)) %>%
dplyr::mutate(across(.cols = all_of(cols_of_interest), .fns = ~ .x %>% `/`(5) %>% round()*5));
fwrite(GLM_imd_quintile, here::here("output", "os_reports", "WP3", "OP_GLM_IMD.csv"))
GLM_age_band <- df %>%
group_by(age_band) %>%
summarise(count = n(),
mean = mean(opapp_1m, na.rm = TRUE)
, sd = sd(opapp_1m, na.rm = TRUE)) %>%
dplyr::mutate(across(.cols = all_of(cols_of_interest), .fns = ~ .x %>% `/`(5) %>% round()*5));
fwrite(GLM_age_band, here::here("output", "os_reports", "WP3", "OP_GLM_age.csv"))
#previous analysis ----------
#define strata-
df$strata <- ifelse(
is.na(df$sex) | is.na(df$age_band) | is.na(df$Ethnicity_2) | is.na(df$imd_quintile),
NA_character_,
paste0(df$sex,",",df$age_band,",",df$Ethnicity_2,",",df$imd_quintile)
)
df$strata <- factor(df$strata)
#Null model
Model_null <- glmmTMB(opapp_1m ~ 1 +(1 | strata), data = df)
model_parameters(Model_null)
icc(Model_null)
fwrite(print(icc(Model_null)), here::here("output", "os_reports", "WP3", "OPnull.csv"))
# Partially adjusted intersectional model
model_sex <- glmmTMB(opapp_1m ~ sex + (1|strata), data = df)
model_age <- glmmTMB(opapp_1m ~ age_band + (1|strata), data = df)
model_ethnicity <- glmmTMB(opapp_1m ~ Ethnicity_2 + (1|strata), data = df)
model_IMD <- glmmTMB(opapp_1m ~ imd_quintile + (1|strata), data = df)
compare_parameters(model_sex, model_age, model_ethnicity, model_IMD)
icc(model_sex)$ICC_adjusted
icc(model_age)$ICC_adjusted #singularity
icc(model_ethnicity)$ICC_adjusted
icc(model_IMD)$ICC_adjusted
#PCV -
# random effects
variance_null <- get_variance(Model_null)
variance_sex <- get_variance(model_sex)
variance_age <- get_variance(model_age) #singularity again
variance_ethnicity <- get_variance(model_ethnicity)
variance_imd <- get_variance(model_IMD)
#new analysis------------
#define strata----
# Form intersectional strata (80 strata in total)
OP_MAIHDA <-df %>%
group_by(sex, age_band, Ethnicity_2, imd_quintile) %>%
mutate(strata = cur_group_id())
# Model 1 - includes a strata random intercept to account for clustering by strata #
# Calculate simple intersectional model (Null model - Captures variation without considering the impact of any specific predictor)
# Poisson model = model for count data
fm1 <- glmmTMB(opapp_1m ~ 1 + (1|strata), data = OP_MAIHDA, family = poisson)
summary(fm1)
# Intercept (mean no. of outpatient attendances when all predictors = 0)
str(summary(fm1))
beta0 <- summary(fm1)$coefficients$cond[1,1]
beta0
write.csv (beta0, file = 'beta0.csv')
beta0 <- read_csv(file = "beta0.csv")
fwrite(beta0, here::here("output", "os_reports", "WP3", "OPbeta0.csv"))
# Cluster variance
str(summary(fm1))
sigma2u <- summary(fm1)$varcor$cond$strata[1,1]
sigma2u
write.csv(sigma2u, file = 'sigma2u.csv')
sigma2u.csv <-read_csv(file = "sigma2.csv")
fwrite(sigma2u, here::here("output", "os_reports", "WP3", "OPsigma2u.csv"))
# Marginal expectation (approximately = mean no. of outpatient attendances)
expectation <- exp(beta0 + sigma2u/2)
expectation
write.csv(expectation, file = 'expectation.csv')
expectation.csv <-read_csv(file = "expectation.csv")
fwrite(expectation, here::here("output", "os_reports", "WP3", "OPexpectation.csv"))
# Marginal variance
variance <- expectation + expectation^2*(exp(sigma2u) - 1)
variance
write.csv(variance, file = 'variance.csv')
variance.csv <-read_csv(file = "variance.csv")
fwrite(variance, here::here("output", "os_reports", "WP3", "OPvariance.csv"))
# Marginal variance: Level-2 component (Variance between clusters - based on age groups, sex, etc)
variance2 <- expectation^2*(exp(sigma2u) - 1)
variance2
write.csv(variance2, file = 'variance2.csv')
variance2.csv <-read_csv(file = "variance2.csv")
fwrite(variance2, here::here("output", "os_reports", "WP3", "OPvariance2.csv"))
# Marginal variance: Level-1 component (variance within clusters)
variance1 <- expectation
variance1
write.csv(variance1, file = 'variance1.csv')
variance1.csv <-read_csv(file = "variance1.csv")
fwrite(variance1, here::here("output", "os_reports", "WP3", "OPvariance1.csv"))
# Level-2 VPC (variance partition coefficient)
# VPC = proportion of the total variance located at the strata level - the global measure of intersectionality.
# VPC = same as ICC
# A high VPC means the strata are useful for understanding differences in outpatient attendances.
# Where VPC = 0, strata = random sample from the population not relevant to understanding outpatient attendances at end of life.
vpc2 <- variance2/(variance2 + variance1)
vpc2
write.csv(vpc2, file = 'vpc2.csv')
vpc2.csv <-read_csv(file = "vpc2.csv")
fwrite(vpc2, here::here("output", "os_reports", "WP3", "OPvpc2.csv"))
# Level-1 VPC (variance partition coefficient - variance at the individual level)
vpc1 <- variance1/(variance2 + variance1)
vpc1
write.csv(vpc1, file = 'vpc1.csv')
vpc1.csv <-read_csv(file = "vpc1.csv")
fwrite(vpc1, here::here("output", "os_reports", "WP3", "OPvpc1.csv"))
##################################################################################################################################
# Model 2: Two-level variance-components negative binomial model
# Regression coefficients, not just the intercept, are now allowed to vary across clusters
# Negative binomial models account for the variability caused by overdispersion
# Fit model
fm2 <- glmmTMB(opapp_1m ~ 1 + (1|strata), data = OP_MAIHDA, family = nbinom2)
summary(fm2)
# Intercept
str(summary(fm2))
beta0 <- summary(fm2)$coefficients$cond[1,1]
beta0
# Cluster variance
str(summary(fm2))
sigma2u <- summary(fm2)$varcor$cond$strata[1,1]
sigma2u
# Overdispersion parameter
str(summary(fm2))
alpha <- 1/(summary(fm2)$sigma)
alpha
# Marginal expectation
expectation <- exp(beta0 + sigma2u/2)
expectation
# Marginal variance
variance <- expectation + expectation^2*(exp(sigma2u)*(1 + alpha) - 1)
variance
# Marginal variance: Level-2 component
variance2 <- expectation^2*(exp(sigma2u) - 1)
variance2
# Marginal variance: Level-1 component
variance1 <- expectation + expectation^2*exp(sigma2u)*alpha
variance1
# Level-2 VPC
vpc2 <- variance2/(variance2 + variance1)
vpc2
# Level-1 VPC
vpc1 <- variance1/(variance2 + variance1)
vpc1
# Predict cluster random intercept effects
fm2u <- ranef(fm2)
fm2u
############################################################################
# Five-level variance-components negative binomial model
# Variance-component models quantify the proportion of variation in the response due to systematic differences between clusters.
# Fit model
fm3 <- glmmTMB(opapp_1m ~ 1 + (1|sex) + (1|age_band) + (1|Ethnicity_2) + (1|imd_quintile),
data = OP_MAIHDA, family = nbinom2)
summary(fm3)
# Intercept
str(summary(fm3))
beta0 <- summary(fm3)$coefficients$cond[1,1]
beta0
# Cluster variance - sex
str(summary(fm3))
sigma2u <- summary(fm3)$varcor$cond$sex[1,1]
sigma2u
# Cluster variance - imd_quintile
str(summary(fm3))
sigma2v <- summary(fm3)$varcor$cond$imd_quintile[1,1]
sigma2v
str(summary(fm3))
sigma2w <- summary(fm3)$varcor$cond$Ethnicity_2[1,1]
sigma2w
str(summary(fm3))
sigma2x <- summary(fm3)$varcor$cond$age_band[1,1]
sigma2x
# Overdispersion parameter
str(summary(fm3))
alpha <- 1/(summary(fm3)$sigma)
alpha
# Marginal expectation
expectation <- exp(beta0 + sigma2u/2 + sigma2v/2 + sigma2w/2 + sigma2x/2)
expectation
# Marginal variance
variance <- expectation +
expectation^2*(exp(sigma2u + sigma2v + sigma2w + sigma2x)*(1 + alpha) - 1)
variance
# Marginal variance: Level-5 component
variance5 <- (expectation^2*(exp(sigma2u) - 1))
variance5
# Marginal variance: Level-4 component
variance4 <- expectation^2*exp(sigma2u)*(exp(sigma2v) - 1)
variance4
# Marginal variance: Level-3 component
variance3 <- expectation^2*exp(sigma2u)*(exp(sigma2v)*(exp(sigma2w) - 1))
variance3
# Marginal variance: Level-2 component
variance2 <- expectation^2*exp(sigma2u)*(exp(sigma2v)*(exp(sigma2w)*(exp(sigma2x) - 1)))
variance2
# Marginal variance: Level-1 component
variance1 <- expectation + expectation^2*exp(sigma2u + sigma2v + sigma2w + sigma2x)*alpha
variance1
# Should the different level VPC be interpreted as the proportion of variance in outcome explained by each characteristic?
# Level-5 VPC
vpc5 <- variance5/(variance5 + variance4 + variance3 + variance2 + variance1)
vpc5
# Level-4 VPC
vpc4 <- variance4/(variance5 + variance4 + variance3 + variance2 + variance1)
vpc4
# Level-3 VPC
vpc3 <- variance3/(variance5 + variance4 + variance3 + variance2 + variance1)
vpc3
# Level-2 VPC
vpc2 <- variance2/(variance5 + variance4 + variance3 + variance2 + variance1)
vpc2
# Level-1 VPC
vpc1 <- variance1/(variance5 + variance4 + variance3 + variance2 + variance1)
vpc1