generated from opensafely/research-template
/
process_ts_demo.R
88 lines (71 loc) · 3.32 KB
/
process_ts_demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
######################################################
# This script:
# - imports measures data for prescribing stratified
# by demographics
# - cleans up data
# - applies rounding and redaction
# - saves processed dataset(s)
#######################################################
# For running locally only #
#setwd("C:/Users/aschaffer/OneDrive - Nexus365/Documents/GitHub/opioids-covid-research")
#getwd()
# Import libraries #
library('tidyverse')
library('lubridate')
library('arrow')
library('here')
library('reshape2')
library('dplyr')
library('fs')
## Custom functions
source(here("analysis", "lib", "custom_functions.R"))
# Create directory
dir_create(here::here("output", "timeseries"), showWarnings = FALSE, recurse = TRUE)
dir_create(here::here("output", "measures"), showWarnings = FALSE, recurse = TRUE)
# Custom functions
source(here("analysis", "lib", "custom_functions.R"))
###############################
# Clean up measures datasets #
###############################
# By demographics
## Prevalent
demo_prev <- read_csv(here::here("output", "measures", "measures_demo_prev.csv")) %>%
mutate(month = as.Date(interval_start, format="%Y-%m-%d"),
period = ifelse(month < as.Date("2020-03-01"), "Pre-COVID",
ifelse(month >= as.Date("2021-04-01"), "Recovery", "Lockdown")),
cat = coalesce(age_group, sex, region, imd, ethnicity6),
var = gsub("opioid_any_", "", measure),
measure = substr(measure,1,10)) %>%
dplyr::select(c(measure, month, cat, var, numerator, denominator, period)) %>%
pivot_wider(names_from = measure, values_from = c(numerator, denominator)) %>%
rename(opioid_any = numerator_opioid_any,
pop_total = denominator_opioid_any) %>%
mutate(rate_opioid_any = (opioid_any / pop_total * 1000))
## New
demo_new <- read_csv(here::here("output", "measures", "measures_demo_new.csv")) %>%
mutate(month = as.Date(interval_start, format="%Y-%m-%d"),
cat = coalesce(age_group, sex, region, imd, ethnicity6),
var = gsub("opioid_new_", "", measure),
measure = substr(measure,1,10)) %>%
dplyr::select(c(measure, month, cat, var, numerator, denominator)) %>%
pivot_wider(names_from = measure, values_from = c(numerator, denominator)) %>%
rename(opioid_new = numerator_opioid_new,
pop_naive = denominator_opioid_new) %>%
mutate(rate_opioid_new = (opioid_new / pop_naive * 1000))
demo <- merge(demo_new, demo_prev, by.x = c("month", "cat", "var"),
by.y = c("month", "cat", "var"))
write.csv(demo, file = here::here("output", "timeseries", "ts_demo.csv"),
row.names = FALSE)
###########################
# Rounding and redaction #
###########################
# By demographics
demo_round <- read_csv(here::here("output", "timeseries", "ts_demo.csv")) %>%
mutate(opioid_any_round = rounding(opioid_any),
opioid_new_round = rounding(opioid_new),
pop_total_round = rounding(pop_total),
pop_naive_round = rounding(pop_naive),
rate_opioid_any_round = (opioid_any_round / pop_total_round * 1000),
rate_opioid_new_round = (opioid_new_round / pop_naive_round * 1000)) %>%
dplyr::select(!c(opioid_any, opioid_new, pop_total, pop_naive, rate_opioid_any, rate_opioid_new))
write.csv(demo_round, here::here("output", "timeseries", "ts_demo_rounded.csv"), row.names = FALSE)