generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_eligible_cde.R
144 lines (120 loc) · 4.99 KB
/
data_eligible_cde.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
################################################################################
# This script:
# - reads the processed data
# - applies eligibility criteria from boxes c and d of Figure 3 in protocol
# - saves processed data from eligible individuals
################################################################################
# import libraries ----
library(tidyverse)
library(lubridate)
library(glue)
# read study parameters
study_parameters <- readr::read_rds(
here::here("output", "lib", "study_parameters.rds"))
# individuals who are eligible based on criteria in box a of Figure 3 on protocol
data_eligible_a <- readr::read_rds(
here::here("output", "data", "data_eligible_a.rds"))
# individuals who are eligible based on criteria in box b of Figure 3 on protocol
data_eligible_b <- readr::read_rds(
here::here("output","data", "data_eligible_b.rds"))
# read wide vaccine dates data
data_vax_wide <- readr::read_rds(
here::here("output", "data", "data_wide_vax_dates.rds"))
# read second vax period dates
second_vax_period_dates <- readr::read_rds(
here::here("output", "second_vax_period", "data", "second_vax_period_dates.rds"))
# covariate data
data_processed <- readr::read_rds(
here::here("output", "data", "data_processed.rds")) %>%
select(patient_id, endoflife_date, midazolam_date, covid_any_date, longres_date)
################################################################################
# apply eligibility criteria in box c ----
data_eligible_c <- data_eligible_b %>%
left_join(data_vax_wide,
by = "patient_id") %>%
# keep brand of interest
# (already applied condition that 1st and 2nd doses are the same)
mutate(brand = case_when(covid_vax_2_brand %in% "pfizer" ~ "BNT162b2",
covid_vax_2_brand %in% "az" ~ "ChAdOx",
TRUE ~ NA_character_)) %>%
select(-ends_with("_brand")) %>%
# right join to keep only the jcvi_group:elig_date:region:brands
# with > n_threshold individuals vaccinated during the period
right_join(second_vax_period_dates,
by = c("jcvi_group", "elig_date", "region")) %>%
filter(
# second dose during second vax period
start_of_period <= covid_vax_2_date,
covid_vax_2_date <= end_of_period) %>%
select(patient_id, jcvi_group, elig_date, region, ethnicity,
covid_vax_2_date, covid_vax_3_date, brand,
start_of_period, end_of_period) %>%
droplevels()
################################################################################
# apply eligibility criteria in box d ----
# set seed so that 50:50 split reproducible
set.seed(study_parameters$seed)
data_eligible_d <- data_eligible_a %>%
# randomly split the unvax 50:50
# one group for odd comparisons, one for even
# so that no overlap in follow-up time across comparisons
mutate(split = factor(
rbernoulli(nrow(.), p=0.5),
labels = c("odd", "even"))) %>%
left_join(data_vax_wide %>%
select(-ends_with("_brand")),
by = "patient_id") %>%
left_join(second_vax_period_dates,
by = c("jcvi_group", "elig_date", "region")) %>%
# remove individuals who had received any vaccination before the start of the second vax period
filter(
is.na(covid_vax_1_date) | covid_vax_1_date >= start_of_period
) %>%
select(patient_id, jcvi_group, elig_date, region, ethnicity,
covid_vax_1_date, start_of_period, end_of_period, split) %>%
droplevels()
################################################################################
# apply eligibility criteria in box e ----
exclusion_e <- function(.data) {
# function to be applied in dplyr::filter
no_evidence_of <- function(cov_date, index_date) {
is.na(cov_date) | index_date < cov_date
}
.data %>%
left_join(data_processed, by = "patient_id") %>%
filter(
no_evidence_of(covid_any_date, start_of_period + weeks(2)),
no_evidence_of(longres_date, start_of_period),
no_evidence_of(endoflife_date, start_of_period),
no_evidence_of(midazolam_date, start_of_period)
) %>%
select(-all_of(names(data_processed)[!names(data_processed) %in% "patient_id"]))
}
data_eligible_e_vax <- data_eligible_c %>% exclusion_e()
data_eligible_e_unvax <- data_eligible_d %>% exclusion_e()
readr::write_rds(
data_eligible_e_vax,
here::here("output", "data", "data_eligible_e_vax.rds"),
compress = "gz")
readr::write_rds(
data_eligible_e_unvax,
here::here("output", "data", "data_eligible_e_unvax.rds"),
compress = "gz")
# for reading into study_definition_tests
data_eligible_e <- bind_rows(
data_eligible_e_vax %>%
transmute(patient_id,
elig_date,
start_1_date = covid_vax_2_date + days(14),
arm = "vax"),
data_eligible_e_unvax %>%
transmute(patient_id,
elig_date,
start_1_date = start_of_period + days(14),
arm = "unvax")
) %>%
mutate(across(ends_with("_date"), as.POSIXct))
readr::write_csv(
data_eligible_e,
here::here("output", "data", "data_eligible_e.csv")
)