generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demographics_byage.R
104 lines (81 loc) · 2.81 KB
/
demographics_byage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
###################################################################
# This script:
# - Calculates the frequency distribution by age in months
# using baseline data
###################################################################
# For running locally only #
# setwd("C:/Users/aschaffer/OneDrive - Nexus365/Documents/GitHub/vax-fourth-dose-RD")
# getwd()
# Import libraries #
library('tidyverse')
library('lubridate')
library('arrow')
library('here')
library('reshape2')
library('dplyr')
library('fs')
library('ggplot2')
library('RColorBrewer')
## Create directories
dir_create(here::here("output", "cohort"), showWarnings = FALSE, recurse = TRUE)
dir_create(here::here("output", "descriptive"), showWarnings = FALSE, recurse = TRUE)
# Load functions
source(here::here("analysis", "custom_functions.R"))
##########################################
# Read in and prep data
##########################################
demographics <- read_csv(here::here("output", "cohort", "cohort_final_sep.csv")) %>%
dplyr::select(c(age, dob, imd, region, ethnicity, sex)) %>%
subset(age >= 45 & age < 55) %>%
# Create age in months variable
mutate(age_mos = (dob %--% "2022-09-03") %/% months(1),
age_3mos = floor(age_mos / 3)) %>%
# Calculate denominator by age in months
group_by(age_3mos) %>%
mutate(total_age_3mos = n())
##########################################
# Function for summarising frequency
# distribution by age in months
##########################################
freq <- function(var){
demographics %>%
# Count number in each IMD category by age in months
group_by(age_3mos, {{var}}, total_age_3mos) %>%
tally() %>%
mutate(across(c(n, total_age_3mos), rounding),
across(c(n, total_age_3mos), redact),
pcent = n / total_age_3mos * 100)
}
## Sex
sex <- freq(sex) %>%
rename(category = sex) %>%
mutate(variable = "Sex",
category = case_when(
category == "M" ~ "Male",
category == "F" ~ "Female",
TRUE ~ as.character(NA)
))
## IMD
imd <- freq(imd) %>%
rename(category = imd) %>%
mutate(variable = "IMD",
category = case_when(
category == "1" ~ "1 (most deprived)",
category == "2" ~ "2",
category == "3" ~ "3",
category == "4" ~ "4",
category == "5" ~ "5 (least deprived)",
TRUE ~ as.character(NA)
))
## Region
region <- freq(region) %>%
rename(category = region) %>%
mutate(variable = "Region")
## Ethnicity
ethnicity <- freq(ethnicity) %>%
rename(category = ethnicity) %>%
mutate(variable = "Ethnicity")
## Combine into one file ##
demographics_by_age <- rbind(imd, sex, region, ethnicity)
############ Save ########################
write_csv(demographics_by_age, here::here("output", "descriptive", "demographics_by_age.csv"))