generated from opensafely/research-template
/
cumulative_incidence.R
115 lines (98 loc) · 4.82 KB
/
cumulative_incidence.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
######################################
# This script
# - produces a cumulative incidence plot of follow-up-time
# - saves plot as svg
######################################
# Preliminaries ----
## Import libraries
library('tidyverse')
library('lubridate')
library('reshape2')
library('here')
library('survival')
library('survminer')
## Custom function
fct_case_when <- function(...) {
# uses dplyr::case_when but converts the output to a factor,
# with factors ordered as they appear in the case_when's ... argument
args <- as.list(match.call())
levels <- sapply(args[-1], function(f) f[[3]]) # extract RHS of formula
levels <- levels[!is.na(levels)]
factor(dplyr::case_when(...), levels=levels)
}
## Import data
data_processed <- read_rds(here::here("output", "data", "data_processed.rds"))
## Format groups
data_processed <- data_processed %>%
mutate(group = ifelse(care_home_65plus == 1, 1, NA),
group = ifelse(is.na(group) & ageband == 3, 2, group),
group = ifelse(is.na(group) & hscworker == 1, 3, group),
group = ifelse(is.na(group) & ageband == 2, 4, group),
group = ifelse(is.na(group) & shielded == 1, 5, group),
group = ifelse(is.na(group) & age >=50 & age <70, 6, group),
group = ifelse(is.na(group), 7, group),
group = fct_case_when(
group == "1" ~ "Care home (priority group 1)",
group == "2" ~ "80+ (priority group 2)",
group == "3" ~ "Health/care workers (priority groups 1-2)",
group == "4" ~ "70-79 (priority groups 3-4)",
group == "5" ~ "Shielding (age 16-69) (priority group 4)",
group == "6" ~ "50-69 (priority groups 5-9)",
group == "7" ~ "Others not in the above groups (under 50)",
#TRUE ~ "Unknown"
TRUE ~ NA_character_)) %>%
filter(time_to_positive_test >= 0)
# Plot ----
threshold <- 7
## Data
surv_data_all <- survfit(Surv(time = time_to_positive_test, event = covid_positive_test) ~ 1,
data = data_processed) %>%
broom::tidy() %>%
filter(estimate > 0) %>%
mutate(
estimate = pmin(1,plyr::round_any(estimate, threshold/max(n.risk)), na.rm=TRUE),
conf.low = pmin(1, plyr::round_any(conf.low, threshold/max(n.risk)), na.rm=TRUE),
conf.high = pmin(1, plyr::round_any(conf.high, threshold/max(n.risk)), na.rm=TRUE),
cum.in = 1 - estimate,
lci = 1- conf.high,
uci = 1 - conf.low,
group = "All"
) %>%
select(group, time, cum.in, lci, uci)
surv_data_groups <- survfit(Surv(time = time_to_positive_test, event = covid_positive_test) ~ group,
data = data_processed) %>%
broom::tidy() %>%
filter(estimate > 0) %>%
mutate(
estimate = pmin(1,plyr::round_any(estimate, threshold/max(n.risk)), na.rm=TRUE),
conf.low = pmin(1, plyr::round_any(conf.low, threshold/max(n.risk)), na.rm=TRUE),
conf.high = pmin(1, plyr::round_any(conf.high, threshold/max(n.risk)), na.rm=TRUE),
cum.in = 1 - estimate,
lci = 1- conf.high,
uci = 1 - conf.low
) %>%
mutate(group = gsub(".*=","", strata),
group = factor(group, levels = c("All",
"Care home (priority group 1)",
"80+ (priority group 2)",
"Health/care workers (priority groups 1-2)",
"70-79 (priority groups 3-4)",
"Shielding (age 16-69) (priority group 4)",
"50-69 (priority groups 5-9)",
"Others not in the above groups (under 50)"))) %>%
select(group, time, cum.in, lci, uci)
surv_data_risk_table <- ggsurvplot(survfit(Surv(time = time_to_positive_test, event = covid_positive_test) ~ group,
data = data_processed), risk.table = TRUE, break.time.by = 25)$data.survtable %>%
select(group, time, n.risk) %>%
mutate(`n.risk` = ifelse(`n.risk` < 8, "<8", `n.risk`),
group = factor(group, levels = c("Care home (priority group 1)",
"80+ (priority group 2)",
"Health/care workers (priority groups 1-2)",
"70-79 (priority groups 3-4)",
"Shielding (age 16-69) (priority group 4)",
"50-69 (priority groups 5-9)",
"Others not in the above groups (under 50)")))
## Save data behind plots ----
write_csv(surv_data_all, here::here("output", "data", "surv_data_all.csv"))
write_csv(surv_data_groups, here::here("output", "data", "surv_data_groups.csv"))
write_csv(surv_data_risk_table, here::here("output", "data", "surv_data_risk_table.csv"))