generated from opensafely/research-template
/
join_measures.R
61 lines (50 loc) · 2.17 KB
/
join_measures.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# This scrip loads all measure files and
# (1) joins them together
# (2) rounds counts to the nearest 10
# Note that the ungrouped measure (population) and grouped measures
# differ in the number of their variables
# Load packages ----
library(magrittr)
library(dplyr)
library(tidyr)
library(here)
library(readr)
library(fs)
library(purrr)
library(stringr)
# Get file names and path ----
dir_hyp_001_measures <- fs::dir_ls(path = "output/indicators/joined",
glob = "*hyp001*.csv$")
# Split dir paths because file structure differes
## Grouped measures (excluding practice)
dir_hyp_001_measures_groups <- dir_hyp_001_measures[!stringr::str_detect(dir_hyp_001_measures, "population")]
dir_hyp_001_measures_groups <- dir_hyp_001_measures_groups[!stringr::str_detect(dir_hyp_001_measures_groups, "practice")]
## Population measure
dir_hyp_001_measures_pop <- dir_hyp_001_measures[stringr::str_detect(dir_hyp_001_measures, "population")]
# Load files ----
## Load grouped measures
## Pivot longer so variable names are identical across measure files
df_hyp_001_measures_groups <- dir_hyp_001_measures_groups %>%
purrr::map(readr::read_csv) %>%
purrr::map_dfr(tidyr::pivot_longer,
cols = 1,
names_to = "group",
values_to = "category",
values_transform = list(category = as.character))
# Load population measure ---
# Add variables that are missing compared to grouped measures
df_hyp_001_measures_pop <- readr::read_csv(here::here(dir_hyp_001_measures_pop)) %>%
dplyr::mutate(group = "population",
category = "population")
# Join all measures into one object ---
df_hyp_001_measures <- df_hyp_001_measures_groups %>%
dplyr::bind_rows(df_hyp_001_measures_pop)
# Write hyp001 csv file
## First create subdirectory (if it doesn't exist)
fs::dir_create(here::here("output", "measures"))
# Round counts to the nearest 10
df_hyp_001_measures <- df_hyp_001_measures %>%
dplyr::mutate(dplyr::across(c("hypertension_register", "population"), round, -1))
## Next, write csv file
readr::write_csv(df_hyp_001_measures,
here::here("output", "measures", "measures_hyp001.csv"))