generated from opensafely/research-template
/
table_one.R
155 lines (150 loc) · 5.31 KB
/
table_one.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
## ###########################################################
## This script:
## - Imports data of the three waves
## - Makes 'table 1' (description of demographics / comorbidities)
## linda.nab@thedatalab.com - 20220304
## ###########################################################
# Load libraries & functions ---
library(here)
library(readr)
library(purrr)
library(dplyr)
library(gtsummary)
library(gt)
library(jsonlite)
## Load json file listing demographics and comorbidities
config <- fromJSON(here("analysis", "config.json"))
# Import data extracts of waves ---
input_files_processed <-
Sys.glob(here("output", "processed", "input_wave*.rds"))
data_processed <-
map(.x = input_files_processed,
.f = ~ readRDS(.x))
# labels in table
labels <- list(
agegroup ~ "Age Group",
sex ~ "Sex",
bmi ~ "Body Mass Index",
ethnicity ~ "Ethnicity",
smoking_status_comb ~ "Smoking status",
imd ~ "IMD quintile",
region ~ "Region",
hypertension ~ "Hypertension",
chronic_respiratory_disease ~ "Chronic respiratory disease",
asthma ~ "Asthma",
bp ~ "Blood pressure",
chronic_cardiac_disease ~ "Chronic cardiac disease",
diabetes_controlled ~ "Diabetes",
cancer ~ "Cancer (non haematological)",
haem_cancer ~ "Haematological malignancy",
dialysis_kidney_transplant ~ "Dialysis",
ckd ~ "Chronic kidney disease",
chronic_liver_disease ~ "Chronic liver disease",
stroke ~ "Stroke",
dementia ~ "Dementia",
other_neuro ~ "Other neurological disease",
organ_kidney_transplant ~ "Organ transplant",
asplenia ~ "Asplenia",
ra_sle_psoriasis ~ "Rheumatoid arthritis/ lupus/ psoriasis",
immunosuppression ~ "Immunosuppressive condition",
learning_disability ~ "Learning disability",
sev_mental_ill ~ "Severe mental illness"
)
# make in long format
data_waves <-
data_processed %>%
bind_rows(.id = "wave") %>%
mutate(wave = wave %>% as.factor())
# select data needed in table
data_waves_list <-
map(
.x = c("1", "2", "3"),
.f = ~ data_waves %>%
select(
wave,
agegroup,
sex,
config$demographics,
config$comorbidities,
died_ons_covid_flag_any
) %>%
filter(wave == .x) %>%
select(-wave)
)
# table has a column 'number of covid-19 related deaths (stratum %)
# make a list of tables stratified by death --> results in a
# FALSE and TRUE column with row percentages (percent = "row")
table_deaths_list <-
map(
.x = data_waves_list,
.f = ~ tbl_summary(
.x,
by = died_ons_covid_flag_any,
label = labels,
percent = "row",
digits = list(everything() ~ c(0, 3))
)
)
# table has a column 'number of individuals (column %) with the overall
# number of individuals in each wave --> needs column percent so therefore
# this is not added to the former tabel with add_overall() as add_overall
# would then take row percentages and we'd like column percentages here
table_overall_list <-
map(
.x = data_waves_list,
.f = ~ tbl_summary(
.x,
label = labels,
percent = "column",
include = c(-"died_ons_covid_flag_any"),
digits = list(everything() ~ c(0, 1))
)
)
# merge two list of tables --> output is a list with 3 tables with a overall
# column and TRUE FALSE for death column
table1_list <-
map2(.x = table_deaths_list,
.y = table_overall_list,
.f = ~ tbl_merge(list(.y, .x)) %>%
modify_spanning_header(everything() ~ NA))
# merge all 3 tables (now 1 table with for each wave 3 colmns)
table1 <-
tbl_merge(table1_list,
tab_spanner = c(
paste0("**Wave 1**, N = ", table_overall_list[[1]]$N),
paste0("**Wave 2**, N = ", table_overall_list[[2]]$N),
paste0("**Wave 3**, N = ", table_overall_list[[3]]$N)
))
# number of deaths in waves --> used in table header for overall column
n_deaths <- map(.x = data_processed,
.f = ~ .x %>%
filter(died_ons_covid_flag_any == TRUE) %>%
nrow())
## Reference in multicategorical variables are omitted
## + 'FALSE' column of deaths is hidden
table1 <-
table1 %>%
modify_table_body(
filter,
!(variable == "asthma" &
label == "No asthma") &
!(variable == "diabetes_controlled" &
label == "No diabetes") &
!(variable == "dialysis_kidney_transplant" &
label == "No dialysis") &
!(variable == "ckd" &
label == "No CKD") &
!(variable == "organ_kidney_transplant" &
label == "No transplant")) %>%
modify_column_hide(column = c(stat_1_2_1, stat_1_2_2, stat_1_2_3)) %>%
modify_header(stat_2_2_1 = paste0("**Number of COVID-19 related deaths (stratum %)**, N = ", n_deaths[[1]]),
stat_2_2_2 = paste0("**Number of COVID-19 related deaths (stratum %)**, N = ", n_deaths[[2]]),
stat_2_2_3 = paste0("**Number of COVID-19 related deaths (stratum %)**, N = ", n_deaths[[3]]),
stat_0_1_1 = paste0("**Number of individuals (column %)**"),
stat_0_1_2 = paste0("**Number of individuals (column %)**"),
stat_0_1_3 = paste0("**Number of individuals (column %)**")) %>%
modify_footnote(everything() ~ NA)
# Save output --
output_dir <- here("output", "tables")
ifelse(!dir.exists(output_dir), dir.create(output_dir), FALSE)
gtsave(table1 %>% as_gt(), paste0(output_dir, "/table1.html"))