generated from opensafely/research-template
/
flow_chart.R
73 lines (62 loc) · 2.38 KB
/
flow_chart.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# # # # # # # # # # # # # # # # # # # # #
# This script:
# imports processed data
# creates indicator variables for each potential outcome combination of interest
# creates a metadata df that describes the cohort
# # # # # # # # # # # # # # # # # # # # #
# Preliminaries ----
## Import libraries
library('tidyverse')
library('here')
library('glue')
## Import command-line arguments
args <- commandArgs(trailingOnly = TRUE)
## Create output directories ----
dir.create(here("output", "data"), showWarnings = FALSE, recursive = TRUE)
## Import processed data
data_processed <- read_rds(here("output", "data", "data_all.rds"))
data_processed_final <- read_rds(here::here("output", "data", "data_processed.rds"))
# Exclusion criteria ----
data_criteria <- data_processed %>%
filter(follow_up_time_vax2 >=14) %>%
mutate(
patient_id,
has_age = (age >=16 & age < 110),
has_sex = !is.na(sex) & !(sex %in% c("I", "U")),
no_outcomes_within_2_weeks_post_vax2_1 = (covid_positive_test_within_2_weeks_post_vax2 == 0),
no_outcomes_within_2_weeks_post_vax2_2 = (covid_hospitalisation_within_2_weeks_post_vax2 == 0),
no_outcomes_within_2_weeks_post_vax2_3 = (covid_death_within_2_weeks_post_vax2 == 0),
include = (
has_age &
has_sex &
no_outcomes_within_2_weeks_post_vax2_1 &
no_outcomes_within_2_weeks_post_vax2_2 &
no_outcomes_within_2_weeks_post_vax2_3),
)
# Flowchart data
data_flowchart <- data_criteria %>%
transmute(
c0_all = TRUE,
c1_notmissing = c0_all & has_age & has_sex,
c2_no_outcomes_within_2_weeks_post_vax2_1 = c0_all & has_age & has_sex & no_outcomes_within_2_weeks_post_vax2_1,
c3_no_outcomes_within_2_weeks_post_vax2_2 = c0_all & has_age & has_sex & no_outcomes_within_2_weeks_post_vax2_1 &
no_outcomes_within_2_weeks_post_vax2_2,
c4_no_outcomes_within_2_weeks_post_vax2_3 = c0_all & has_age & has_sex & no_outcomes_within_2_weeks_post_vax2_1 &
no_outcomes_within_2_weeks_post_vax2_2 & no_outcomes_within_2_weeks_post_vax2_3
) %>%
summarise(
across(.fns=sum, na.rm = T)
) %>%
pivot_longer(
cols=everything(),
names_to="criteria",
values_to="n"
) %>%
mutate(
n_exclude = lag(n) - n,
pct_exclude = n_exclude/lag(n),
pct_all = n / first(n),
pct_step = n / lag(n),
)
# Save dataset as .csv files ----
write_csv(data_flowchart, here("output", "data", "flowchart.csv"))