generated from opensafely/research-template
/
data_vax_plot.R
98 lines (79 loc) · 2.94 KB
/
data_vax_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
######################################
# This script:
# - reads data_eligible_b.rds and data_vax_wide.rds
# - generates and saves data_vax_plot.rds for plotting the distribution of 2nd vax dates across dates
######################################
## setup
library(tidyverse)
library(lubridate)
library(glue)
# create folder for data
images_dir <- here::here("output", "second_vax_period", "data")
dir.create(images_dir, showWarnings = FALSE, recursive=TRUE)
# individuals who are eligible based on criteria in box b of Figure 3 on protocol
data_eligible_b <- readr::read_rds(
here::here("output", "vax", "data", "data_eligible_b.rds")
)
data_vax_wide <- readr::read_rds(
here::here("output", "vax", "data", "data_wide_vax_dates.rds")
)
# second dose and brand for eligible individuals
data_2nd_dose <- data_eligible_b %>%
left_join(data_vax_wide, by = "patient_id") %>%
select(patient_id, elig_date, region_0,
dose_2 = covid_vax_2_date, brand = covid_vax_2_brand)
# function for creating plot data
generate_plot_data <- function(data = data_2nd_dose, plot_date) {
# check date in correct format
d <- try(as.Date(plot_date, format="%Y-%m-%d"))
if("try-error" %in% class(d) || is.na(d)) stop("plot_date is not in %Y-%m-%d format.")
data <- data %>%
filter(elig_date %in% as.Date(plot_date))
if (nrow(data)==0) stop("No samples for the given plot_date.")
# sequence of dates for plot
dates_seq <- seq(as.Date(plot_date) + weeks(6),
as.Date(plot_date) + weeks(16) - days(1),
1)
# ensure full sequence of dates for each region:brand combo
# so that the moving averages calculated in 'plot_2nd_vax_dates.R' include the zero counts
expanded_data <- tibble(
region_0 = character(),
brand = character(),
dose_2 = Date()
)
for (r in unique(data$region_0)) {
for (v in unique(data$brand)) {
expanded_data <- expanded_data %>%
bind_rows(tibble(
region_0 = rep(r, each = length(dates_seq)),
brand = rep(v, each = length(dates_seq)),
dose_2 = dates_seq
))
}
}
# number of patients with 2nd dose on each date
count_data <- data %>%
group_by(region_0, brand, dose_2) %>%
count() %>%
ungroup()
# join expanded and count data
out <- expanded_data %>%
left_join(count_data, by = c("region_0", "brand", "dose_2")) %>%
mutate(across(n, ~if_else(is.na(.x), 0L, .x))) %>%
mutate(elig_date = as.Date(plot_date, format = "%Y-%m-%d"))
return(out)
}
# create list of data for each elig_date
data_vax_plot_list <- lapply(
as.character(sort(unique(data_2nd_dose$elig_date))),
function(x)
try(generate_plot_data(plot_date = x))
)
# bind list into one tibble
data_vax_plot <- bind_rows(data_vax_plot_list[sapply(data_vax_plot_list, is_tibble)])
# save data for plotting
readr::write_rds(
data_vax_plot,
here::here("output", "second_vax_period", "data", "data_vax_plot.rds"),
compress = "gz"
)