generated from opensafely/research-template
/
table_1.R
130 lines (106 loc) · 4.22 KB
/
table_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
######################################
# This script:
# - Produces counts of patients prescribed antipsychotic by demographic characteristics between prior to April 2021.
# - saves data summaries (as table)
######################################
# Preliminaries ----
## Import libraries
library('tidyverse')
library('lubridate')
library('reshape2')
library('here')
library('gt')
library('gtsummary')
## Create output directory
dir.create(here::here("output", "tables"), showWarnings = FALSE, recursive=TRUE)
## Custom functions
source(here("analysis", "lib", "custom_functions.R"))
## Read in data
data_cohort <- arrow::read_feather(here::here("output", "data", "input_2021-04-01.feather"))
## Table 1 ----
counts_table1 <- data_cohort %>%
mutate(
# Sex
sex = as.character(sex),
sex = fct_case_when(
sex == "F" ~ "Female",
sex == "M" ~ "Male",
TRUE ~ NA_character_
),
# Ethnicity
ethnicity = as.character(eth2001),
ethnicity = ifelse(is.na(eth2001), "Missing", ethnicity),
ethnicity = fct_case_when(
ethnicity == "1" ~ "White",
ethnicity == "2" ~ "Mixed",
ethnicity == "3" ~ "Asian or Asian British",
ethnicity == "4" ~ "Black or Black British",
ethnicity == "5" ~ "Other ethnic groups",
ethnicity == "Missing" ~ "Unknown",
#TRUE ~ "Unknown"
TRUE ~ NA_character_),
# IMD
imd = na_if(imd, "0"),
imd = fct_case_when(
imd == 1 ~ "1 most deprived",
imd == 2 ~ "2",
imd == 3 ~ "3",
imd == 4 ~ "4",
imd == 5 ~ "5 least deprived",
#TRUE ~ "Unknown",
TRUE ~ NA_character_
),
# Region
region = fct_case_when(
region == "London" ~ "London",
region == "East" ~ "East of England",
region == "East Midlands" ~ "East Midlands",
region == "North East" ~ "North East",
region == "North West" ~ "North West",
region == "South East" ~ "South East",
region == "South West" ~ "South West",
region == "West Midlands" ~ "West Midlands",
region == "Yorkshire and The Humber" ~ "Yorkshire and the Humber",
#TRUE ~ "Unknown",
TRUE ~ NA_character_),
# Age
ageband = cut(age,
breaks = c(0, 17, 24, 34, 44, 54, 69, 79, Inf),
labels = c("0-17", "18-24", "25-34", "35-44", "45-54", "55-69", "70-79", "80+"),
right = FALSE)) %>%
select(antipsychotic = antipsychotic_any,
ageband,
sex,
region,
imd,
ethnicity) %>%
tbl_summary(by = antipsychotic) %>%
add_overall()
# Redaction ----
## Suppress cells that are less than a threshold (tbd as well as the next smallest value in a group if there is only one)
threshold = 8
table1_redacted <- counts_table1$table_body %>%
select(group = variable, variable = label, total = stat_0, nonantipsychotic = stat_1, antipsychotic =stat_2) %>%
separate(total, c("total","perc"), sep = "([(])") %>%
separate(nonantipsychotic, c("nonantipsychotic","perc2"), sep = "([(])") %>%
separate(antipsychotic, c("antipsychotic","perc3"), sep = "([(])") %>%
mutate(total = as.numeric(gsub(",", "", total)),
nonantipsychotic = as.numeric(gsub(",", "", nonantipsychotic)),
antipsychotic = as.numeric(gsub(",", "", antipsychotic))) %>%
filter(!(is.na(total))) %>%
select(-perc, -perc2, -perc3) %>%
mutate(total = ifelse(total < threshold, NA, total),
nonantipsychotic = ifelse(nonantipsychotic < threshold | is.na(total), NA, nonantipsychotic),
antipsychotic = ifelse(antipsychotic < threshold | is.na(nonantipsychotic) | is.na(total), NA, antipsychotic))
## Round to nearest 5
table1_redacted <- table1_redacted %>%
mutate(total = plyr::round_any(total, 5),
nonantipsychotic = plyr::round_any(nonantipsychotic, 5),
antipsychotic = plyr::round_any(antipsychotic, 5))
## Replace na with [REDACTED]
table1_redacted <- table1_redacted %>%
mutate(total = ifelse(is.na(total), "[REDACTED]", total),
nonantipsychotic = ifelse(is.na(nonantipsychotic), "[REDACTED]", nonantipsychotic),
antipsychotic = ifelse(is.na(antipsychotic), "[REDACTED]", antipsychotic))
# Save table 1 ----
write_csv(table1_redacted, here::here("output", "tables", "table1_redacted.csv"))