generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
checks_cohort_clockstops.R
100 lines (81 loc) · 3.79 KB
/
checks_cohort_clockstops.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
###############################################################
# This script checks for incorrect values in dates
# for all people with a closed RTT pathway (May21-May22)
###############################################################
# For running locally only #
#setwd("C:/Users/aschaffer/OneDrive - Nexus365/Documents/GitHub/waiting-list")
#getwd()
## Import libraries
library('tidyverse')
library('lubridate')
library('here')
library('dplyr')
library('ggplot2')
library('zoo')
library('reshape2')
library('fs')
## Rounding function
source(here("analysis", "custom_functions.R"))
## Create directories if needed
dir_create(here::here("output", "clockstops"), showWarnings = FALSE, recurse = TRUE)
dir_create(here::here("output", "measures"), showWarnings = FALSE, recurse = TRUE)
## Load data ##
clockstops <- read_csv(here::here("output", "data", "dataset_clockstops.csv.gz"),
col_types = cols(rtt_start_date = col_date(format="%Y-%m-%d"),
rtt_end_date = col_date(format="%Y-%m-%d"),
reg_end_date = col_date(format="%Y-%m-%d"),
dod = col_date(format="%Y-%m-%d"),
end_date = col_date(format="%Y-%m-%d")))
#### Check dates for problems ##
check_dates <- clockstops %>%
mutate(# These should all be 0
rtt_end_before_start = ifelse(rtt_end_date < rtt_start_date, 1, 0),
rtt_end_after_dod = ifelse(!is.na(dod) & (rtt_end_date > dod), 1, 0),
dereg_before_rtt_start = ifelse(!is.na(reg_end_date) & (reg_end_date < rtt_start_date), 1, 0),
rtt_end_missing = ifelse(is.na(rtt_end_date), 1, 0),
rtt_start_missing = ifelse(is.na(rtt_start_date), 1, 0),
end_before_start = ifelse(end_date < rtt_start_date, 1, 0)
) %>%
summarise(rtt_end_before_start = sum(rtt_end_before_start),
rtt_end_after_dod = sum(rtt_end_after_dod),
dereg_before_rtt_start = sum(dereg_before_rtt_start),
rtt_end_missing = sum(rtt_end_missing),
rtt_start_missing = sum(rtt_start_missing),
end_before_start = sum(end_before_start),
# Check max wait time for developing code
max_wait_time = max(wait_time),
)
write.csv(check_dates, here::here("output", "clockstops", "check_dates.csv"),
row.names = FALSE)
# Check time between death/ RTT end date
dod_end_time <- clockstops %>%
subset(!is.na(dod) & (rtt_end_date > dod)) %>%
mutate(time_dod_end = as.numeric(rtt_end_date - dod),
time_dod_end_gp = ifelse(time_dod_end >= 30, 30, time_dod_end)) %>%
group_by(time_dod_end_gp) %>%
summarise(count = n())
write.csv(dod_end_time, here::here("output", "clockstops", "check_end_dod_time.csv"),
row.names = FALSE)
# Number of rows/pathways/etc.
num_per_person <- clockstops %>%
mutate(count_rtt_rows = ifelse(count_rtt_rows >= 10, 10, count_rtt_rows),
count_rtt_start_date = ifelse(count_rtt_start_date >= 10, 10, count_rtt_start_date),
count_patient_id = ifelse(count_patient_id >= 10, 10, count_patient_id),
count_organisation_id = ifelse(count_organisation_id >= 10, 10, count_organisation_id),
count_referral_id = ifelse(count_rtt_rows >= 10, 10, count_referral_id))
group_summ <- function(variable, name){
num_per_person %>%
mutate(total_count = sum({{variable}})) %>%
group_by({{variable}}, total_count) %>%
summarise(count = n()) %>%
mutate(var = name) %>%
rename(num_per_person = {{variable}})
}
all <- rbind(
group_summ(count_rtt_rows, "RTT rows"),
group_summ(count_rtt_start_date, "RTT start dates"),
group_summ(count_patient_id, "Patient IDs"),
group_summ(count_organisation_id, "Organisation IDs"),
group_summ(count_referral_id, "Referral IDs")
)
write.csv(all, here::here("output", "clockstops", "check_num_per_person.csv"), row.names = FALSE)