generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_2nd_vax_dates.R
189 lines (159 loc) · 6.08 KB
/
data_2nd_vax_dates.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
######################################
# This script:
# - reads data_eligible_b.rds and data_vax_wide.rds
# - saves data for plotting the distribution of 2nd vax dates across dates
# - identifies the second vaccination period
# - saves second_vax_period_dates.csv (the elig_date:region:brand specific dates)
# - saves start_dates.csv and end_dates.csv (the elig_date:region specific dates to pass to study_definition_covs.py)
######################################
## setup
library(tidyverse)
library(lubridate)
library(glue)
# create folder for data
fs::dir_create(here::here("output", "second_vax_period", "data"))
fs::dir_create(here::here("output", "second_vax_period", "tables"))
study_parameters <- readr::read_rds(here::here("output", "lib", "study_parameters.rds"))
elig_dates <- readr::read_csv(here::here("output", "lib", "elig_dates.csv"))
# individuals who are eligible based on criteria in box b of Figure 3 on protocol
data_eligible_b <- readr::read_rds(
here::here("output", "data", "data_eligible_b.rds")
)
data_vax_wide <- readr::read_rds(
here::here("output", "data", "data_wide_vax_dates.rds")
)
# second dose and brand for eligible individuals
data_2nd_dose <- data_eligible_b %>%
left_join(data_vax_wide, by = "patient_id") %>%
select(patient_id, jcvi_group, elig_date, region,
dose_2 = covid_vax_2_date, brand = covid_vax_2_brand) %>%
group_split(jcvi_group, elig_date)
# function for creating plot data
generate_plot_data <- function(.data) {
group <- unique(.data$jcvi_group)
plot_date <- unique(.data$elig_date)
if (nrow(.data)==0) stop(".data is an empty tibble.")
# sequence of dates for plot
dates_seq <- seq(as.Date(plot_date) + weeks(6),
as.Date(plot_date) + weeks(20) - days(1),
1)
# ensure full sequence of dates for each region:brand combo
# so that the moving averages calculated in 'plot_2nd_vax_dates.R' include the zero counts
expanded_data <- tibble(
region = character(),
brand = character(),
dose_2 = Date()
)
for (r in unique(.data$region)) {
for (v in unique(.data$brand)) {
expanded_data <- expanded_data %>%
bind_rows(tibble(
region = rep(r, each = length(dates_seq)),
brand = rep(v, each = length(dates_seq)),
dose_2 = dates_seq
))
}
}
# number of patients with 2nd dose on each date
count_data <- .data %>%
group_by(region, brand, dose_2) %>%
count() %>%
ungroup()
# join expanded and count data
out <- expanded_data %>%
left_join(count_data, by = c("region", "brand", "dose_2")) %>%
mutate(across(n, ~if_else(is.na(.x), 0L, .x))) %>%
mutate(
jcvi_group = group,
elig_date = as.Date(plot_date, format = "%Y-%m-%d"))
return(out)
}
# create list of data for each elig_date
data_vax_plot_list <- lapply(
data_2nd_dose,
function(x)
try(generate_plot_data(x))
)
# bind list into one tibble
data_vax_plot <- bind_rows(
data_vax_plot_list[sapply(data_vax_plot_list, is_tibble)]
) %>%
# change labels for plots
mutate(across(brand,
~factor(brand,
levels = c("az", "pfizer"),
labels = c("ChAdOx", "BNT162b2")))) %>%
rename(n_brand = n) %>%
group_by(jcvi_group, elig_date, region, dose_2) %>%
mutate(n = sum(n_brand)) %>%
ungroup()
readr::write_rds(data_vax_plot,
here::here("output", "second_vax_period", "data", "data_vax_plot.rds"),
compress = "gz")
# second vaccination periods
# number of days in cumulative sum
l <- 28
second_vax_period_dates <- data_vax_plot %>%
distinct(jcvi_group, elig_date, region, dose_2, n) %>%
# calculate moving 28-day total number of individuals vaccinated for each elig_date:region:brand
group_by(jcvi_group, elig_date, region) %>%
arrange(dose_2, .by_group = TRUE) %>%
mutate(
cumulative_sum = stats::filter(
x = n,
filter = rep(1, l),
method = "convolution",
sides = 1),
end_of_period = if_else(
cumulative_sum == max(cumulative_sum, na.rm = TRUE),
dose_2,
as.Date(NA_character_)),
start_of_period = end_of_period - days(27)
) %>%
# only keep rows where cumulative_sum == max(cumulative_sum, na.rm = TRUE)
filter(!is.na(end_of_period)) %>%
# in case there are multiple dates with max(cumulative_sum),
# take the first date with max(cumulative_sum)
summarise(across(c(cumulative_sum, end_of_period, start_of_period),
min, na.rm = TRUE),
.groups = "keep") %>%
ungroup() %>%
mutate(
# time between start of first comparison and last date of available data
days_of_data = as.integer(as.Date(study_parameters$end_date) - start_of_period) + 14,
# set n_comparisons based on days of available data
n_comparisons = pmin(ceiling(days_of_data/28), study_parameters$max_comparisons)
) %>%
select(-days_of_data)
brand_counts <- second_vax_period_dates %>%
left_join(data_vax_plot,
by = c("jcvi_group", "elig_date", "region")) %>%
filter(
start_of_period <= dose_2,
dose_2 <= end_of_period
) %>%
group_by(jcvi_group, elig_date, region, brand) %>%
summarise(n = sum(n_brand), .groups = "keep") %>%
ungroup() %>%
pivot_wider(
names_from = brand, values_from = n, names_prefix = "n_"
)
second_vax_period_dates <- second_vax_period_dates %>%
left_join(brand_counts,
by = c("jcvi_group", "elig_date", "region")) %>%
select(jcvi_group, elig_date, region, n_ChAdOx, n_BNT162b2, cumulative_sum,
start_of_period, end_of_period, n_comparisons)
# save for plotting
readr::write_rds(
second_vax_period_dates,
here::here("output", "second_vax_period", "data", "second_vax_period_dates.rds"),
compress = "gz")
# save to review and release, with cumulative sum rounded to nearest 10
capture.output(
second_vax_period_dates %>%
arrange(jcvi_group, elig_date, region) %>%
mutate(across(cumulative_sum, ~ round(.x, -1))) %>%
kableExtra::kable("pipe"),
file = here::here("output", "second_vax_period", "tables", "second_vax_period_dates.txt"),
append=FALSE
)