generated from opensafely/research-template
/
process_sus.R
89 lines (74 loc) · 2.63 KB
/
process_sus.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
######################################
# This script:
# imports data extracted by the cohort extractor (or dummy data)
# standardises some variables (eg convert to factor) and derives some new ones
# Stacks admission dates (one row per admission per patient)
######################################
# Import libraries ----
library('tidyverse')
library('lubridate')
library('arrow')
library('here')
library('glue')
# Import custom user functions from lib
source(here("analysis", "lib", "utility.R"))
# output processed data to rds ----
fs::dir_create(here("output", "admissions"))
# process ----
get_admissions <- function(method, n){
import_extract(
here("dummy-output", glue("sus_method{method}_admission{n}.feather")),
here("output", "admissions", glue("sus_method{method}_admission{n}.csv.gz"))
) %>%
select(-previous_admiss_date) %>%
rename(admission_date = admiss_date) %>%
add_column(
admission_method=method,
admission_number=n,
.before=1
)
}
process_admissions <- function(method){
bind_rows(
get_admissions(method, 1),
get_admissions(method, 2),
get_admissions(method, 3),
get_admissions(method, 4),
get_admissions(method, 5)
) %>%
mutate(
ageband = cut(
age,
breaks=c(-Inf, 18, 40, 55, 65, 75, Inf),
labels=c("under 18", "18-39", "40-54", "55-64", "65-74", "75+"),
right=FALSE
),
sex = fct_case_when(
sex == "F" ~ "Female",
sex == "M" ~ "Male",
#sex == "I" ~ "Inter-sex",
#sex == "U" ~ "Unknown",
TRUE ~ NA_character_
),
region = fct_collapse(
region,
`East of England` = "East",
`London` = "London",
`Midlands` = c("West Midlands", "East Midlands"),
`North East and Yorkshire` = c("Yorkshire and The Humber", "North East"),
`North West` = "North West",
`South East` = "South East",
`South West` = "South West"
)
)
}
admissions_A_processed <- process_admissions("A")
admissions_B_processed <- process_admissions("B")
admissions_C_processed <- process_admissions("C")
# admissions_D_processed <- process_admissions("D")
# admissions_E_processed <- process_admissions("E")
write_rds(admissions_A_processed, here("output", "admissions", "processed_sus_A.rds"), compress="gz")
write_rds(admissions_B_processed, here("output", "admissions", "processed_sus_B.rds"), compress="gz")
write_rds(admissions_C_processed, here("output", "admissions", "processed_sus_C.rds"), compress="gz")
# write_rds(admissions_D_processed, here("output", "admissions", "processed_sus_D.rds"), compress="gz")
# write_rds(admissions_E_processed, here("output", "admissions", "processed_sus_E.rds"), compress="gz")