generated from opensafely/covid-vaccine-research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_define_cohorts.R
48 lines (35 loc) · 1.74 KB
/
data_define_cohorts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# # # # # # # # # # # # # # # # # # # # #
# This script:
# imports processed data
# creates indicator variables for each potential cohort/outcome combination of interest
# creates a metadata df that describes each cohort
# # # # # # # # # # # # # # # # # # # # #
# Preliminaries ----
## Import libraries ----
library('tidyverse')
## create output directories ----
dir.create(here::here("output", "modeldata"), showWarnings = FALSE, recursive=TRUE)
## Import processed data ----
data_all <- read_rds(here::here("output", "data", "data_all.rds"))
data_cohorts <- data_all %>%
transmute(
patient_id,
over80s = (age>=80) & (is.na(care_home_type)) & (is.na(prior_positive_test_date)) & (!is.na(region)),
under65s = (age<=64) & (is.na(care_home_type)) & (is.na(prior_positive_test_date)) & (!is.na(region)),
)
## define different cohorts ----
metadata_cohorts <- tribble(
~cohort, ~cohort_descr, ~outcome, ~outcome_descr, #~postvax_cuts, ~knots,
"over80s", "Aged 80+, non-carehome, no prior positive test", "positive_test_1_date", "Positive test",
"under65s", "Aged <=64, no prior positive test", "positive_test_1_date", "Positive test"
) %>%
mutate(
cohort_size = map_int(cohort, ~sum(data_cohorts[[.]]))
)
metadata_cohorts %>% select(cohort, cohort_size) %>% print(n=100)
stopifnot("cohort names should match" = names(data_cohorts)[-1] == metadata_cohorts$cohort)
stopifnot("all cohorts should contain at least 1 patient" = all(metadata_cohorts$cohort_size>0))
## Save processed tte data ----
write_rds(data_cohorts, here::here("output", "modeldata", "data_cohorts.rds"))
write_rds(metadata_cohorts, here::here("output", "modeldata", "metadata_cohorts.rds"))
write_csv(metadata_cohorts, here::here("output", "modeldata", "metadata_cohorts.csv"))