generated from opensafely/research-template
/
process_data.R
106 lines (85 loc) · 6.41 KB
/
process_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
######################################
# This script:
# - imports data extracted by the cohort extractor
# - combines ethnicity columns
# - calculates absolute number of antipsychotics issued each group
# - standardises some variables (eg convert to factor) and derives some new ones
# - saves processed dataset(s)
######################################
# Preliminaries ----
## Import libraries
library('tidyverse')
library('lubridate')
library('arrow')
library('here')
## Custom functions
source(here("analysis", "lib", "custom_functions.R"))
# Process data ----
## Prevalence datasets
data_prevalence <- full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_any.csv"))[,c("antipsychotic_any", "population", "value", "date")],
read.csv(here::here("output", "data", "measure_antipsychotic_all_first_gen.csv"))[,c("antipsychotics_first_gen", "date")],
by = "date") %>%
full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_second_gen.csv"))[,c("antipsychotics_second_gen", "date")],
by = "date") %>%
full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_injectable_and_depot.csv"))[,c("antipsychotics_injectable_and_depot", "date")],
by = "date") %>%
full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_prochlorperazine.csv"))[,c("prochlorperazine", "date")],
by = "date") %>%
select(date, antipsychotic_any, antipsychotics_first_gen, antipsychotics_second_gen, antipsychotics_injectable_and_depot,
prochlorperazine, population) %>%
mutate(date = as.Date(as.character(date), format = "%Y-%M-%d"),
group = "All") %>%
rbind(combine_measures(group = "dementia", incident = FALSE),
combine_measures(group = "care_home", incident = FALSE),
combine_measures(group = "learning_disability", incident = FALSE),
combine_measures(group = "autism", incident = FALSE),
combine_measures(group = "serious_mental_illness", incident = FALSE))
## Incident datasets
data_incident <- full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_any_incident.csv"))[,c("antipsychotic_any_incident", "population", "value", "date")],
read.csv(here::here("output", "data", "measure_antipsychotic_all_first_gen_incident.csv"))[,c("antipsychotics_first_gen_incident", "date")],
by = "date") %>%
full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_second_gen_incident.csv"))[,c("antipsychotics_second_gen_incident", "date")],
by = "date") %>%
full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_injectable_and_depot_incident.csv"))[,c("antipsychotics_injectable_and_depot_incident", "date")],
by = "date") %>%
full_join(read.csv(here::here("output", "data", "measure_antipsychotic_all_prochlorperazine_incident.csv"))[,c("prochlorperazine_incident", "date")],
by = "date") %>%
select(date, antipsychotic_any_incident, antipsychotics_first_gen_incident, antipsychotics_second_gen_incident, antipsychotics_injectable_and_depot_incident,
prochlorperazine_incident, population) %>%
mutate(date = as.Date(as.character(date), format = "%Y-%M-%d"),
group = "All") %>%
rbind(combine_measures(group = "dementia", incident = TRUE),
combine_measures(group = "care_home", incident = TRUE),
combine_measures(group = "learning_disability", incident = TRUE),
combine_measures(group = "autism", incident = TRUE),
combine_measures(group = "serious_mental_illness", incident = TRUE))
# Redaction ----
## Redact values < 8
threshold = 8
data_prevalence_redacted <- data_prevalence %>%
mutate(antipsychotic_any = ifelse(antipsychotic_any < threshold, NA, as.numeric(antipsychotic_any)),
antipsychotics_first_gen = ifelse(antipsychotics_first_gen < threshold, NA, as.numeric(antipsychotics_first_gen)),
antipsychotics_second_gen = ifelse(antipsychotics_second_gen < threshold, NA, as.numeric(antipsychotics_second_gen)),
antipsychotics_injectable_and_depot = ifelse(antipsychotics_injectable_and_depot < threshold, NA, as.numeric(antipsychotics_injectable_and_depot)),
prochlorperazine = ifelse(prochlorperazine < threshold, NA, as.numeric(prochlorperazine)))
data_incident_redacted <- data_incident %>%
mutate(antipsychotic_any_incident = ifelse(antipsychotic_any_incident < threshold, NA, as.numeric(antipsychotic_any_incident)),
antipsychotics_first_gen_incident = ifelse(antipsychotics_first_gen_incident < threshold, NA, as.numeric(antipsychotics_first_gen_incident)),
antipsychotics_second_gen_incident = ifelse(antipsychotics_second_gen_incident < threshold, NA, as.numeric(antipsychotics_second_gen_incident)),
antipsychotics_injectable_and_depot_incident = ifelse(antipsychotics_injectable_and_depot_incident < threshold, NA, as.numeric(antipsychotics_injectable_and_depot_incident)),
prochlorperazine_incident = ifelse(prochlorperazine_incident < threshold, NA, as.numeric(prochlorperazine_incident)))
## Recalculate totals
data_prevalence_redacted$antipsychotic_any = rowSums(data_prevalence_redacted[,c("antipsychotics_first_gen",
"antipsychotics_second_gen",
"antipsychotics_injectable_and_depot",
"prochlorperazine")],
na.rm = T)
data_incident_redacted$antipsychotic_any = rowSums(data_incident_redacted[,c("antipsychotics_first_gen_incident",
"antipsychotics_second_gen_incident",
"antipsychotics_injectable_and_depot_incident",
"prochlorperazine_incident")],
na.rm = T)
# Save datasets ----
## Save as .csv
write.csv(data_prevalence_redacted, file = here::here("output", "data", "data_prevalence_redacted.csv"))
write.csv(data_incident_redacted, file = here::here("output", "data", "data_incident_redacted.csv"))