-
Notifications
You must be signed in to change notification settings - Fork 7
/
check_oe_bcva_4m_late_early_tot.R
220 lines (207 loc) · 9.17 KB
/
check_oe_bcva_4m_late_early_tot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#' @title Check if 4m BCVA test stops too late, too early and has correct total
#'
#' @description This ophthalmology check is for BCVA 4m test. It checks three conditions: <1> BCVA test stops too late,
#' meaning that lines were read after number of correct letters is <= 3. <2> BCVA test stops too early, meaning that
#' further lines were not read when all numbers of correct letters is > 3. <3> BCVA total score is not correct, meaning
#' that the sum of the number of correct at 4 meters doesn't match with what has been recorded in eCRF
#' (BCVA Scores eCRF Page - A. Total number correct at 4m). Please note that this check only works with USUBJID, VISIT,
#' VISITNUM, OELOC, OELAT combination has unique dates (OEDTC). If your datasets are having situations like 1) unscheduled
#' visits happening on different dates or 2) BCVA TOTAL happens on a different date from BCVA row tests, such combinations
#' will be removed from check.
#' Please note that this check excludes forms BCVA Low Vision Test (BCV5), BCVA Scores (BCV7),
#' BCVA Low Luminance Scores (BCVLL5), BCVA Combined Assessments (BCVAC), BCVA Low Luminance Combined Assessments (BCVACLL)
#' before running check as these forms do not include Row numbers.
#'
#' @param OE Ophtho Dataset with variables USUBJID, OESPID, OECAT, OESCAT, OETSTDTL, OESTRESN, OESTAT, OELOC, OELAT,
#' OERESCAT, VISIT, VISITNUM, OEDTC, OEDY
#'
#' @return boolean value if check failed or passed with 'msg' attribute if the
#' test failed
#'
#' @importFrom dplyr %>% filter mutate select lag lead rename arrange summarise group_by ungroup
#'
#' @family OPHTH
#'
#' @keywords OPHTH
#'
#' @export
#'
#' @author Rosemary Li (HackR 2021 Team Eye)
#'
#' @examples
#' OE_too_late <- data.frame(
#' USUBJID = "1",
#' OESPID = "FORMNAME-R:2/L:2XXXX",
#' OECAT = "BEST CORRECTED VISUAL ACUITY",
#' OETSTDTL = "TESTING DISTANCE: 4M",
#' OESCAT = c(rep("", 6), "TOTAL"),
#' OESTAT = "",
#' OERESCAT = c("ROW 1 - SNELLEN 20/200",
#' "ROW 2 - SNELLEN 20/160",
#' "ROW 4 - SNELLEN 20/100",
#' "ROW 3 - SNELLEN 20/125",
#' "ROW 5 - SNELLEN 20/80",
#' "ROW 6 - SNELLEN 20/63",
#' ""),
#' VISIT = "WEEK 1",
#' VISITNUM = 5,
#' OEDTC = "2020-06-01",
#' OEDY = 8,
#' OELOC = "EYE",
#' OELAT = "LEFT",
#' OESTRESN = c(5, 5, 5, 4, 3, 2, 24)
#' )
#'check_oe_bcva_4m_late_early_tot(OE_too_late)
#'
#' OE_too_early <- data.frame(
#' USUBJID = "1",
#' OESPID = "FORMNAME-R:2/L:2XXXX",
#' OECAT = "BEST CORRECTED VISUAL ACUITY",
#' OETSTDTL = "TESTING DISTANCE: 4M",
#' OESCAT = c(rep("", 6), "TOTAL"),
#' OESTAT = "",
#' OERESCAT = c("ROW 1 - SNELLEN 20/200",
#' "ROW 2 - SNELLEN 20/160",
#' "ROW 4 - SNELLEN 20/100",
#' "ROW 3 - SNELLEN 20/125",
#' "ROW 5 - SNELLEN 20/80",
#' "ROW 6 - SNELLEN 20/63",
#' ""),
#' VISIT = "WEEK 1",
#' VISITNUM = 5,
#' OEDTC = "2020-06-01",
#' OEDY = 8,
#' OELOC = "EYE",
#' OELAT = "LEFT",
#' OESTRESN = c(5, 5, 5, 4, 4, 5, 28)
#' )
#' check_oe_bcva_4m_late_early_tot(OE_too_early)
#'
#' OE_total_incorrect <- data.frame(
#' USUBJID = "1",
#' OESPID = "FORMNAME-R:2/L:2XXXX",
#' OECAT = "BEST CORRECTED VISUAL ACUITY",
#' OETSTDTL = "TESTING DISTANCE: 4M",
#' OESCAT = c(rep("", 6), "TOTAL"),
#' OESTAT = "",
#' OERESCAT = c("ROW 1 - SNELLEN 20/200",
#' "ROW 2 - SNELLEN 20/160",
#' "ROW 4 - SNELLEN 20/100",
#' "ROW 3 - SNELLEN 20/125",
#' "ROW 5 - SNELLEN 20/80",
#' "ROW 6 - SNELLEN 20/63",
#' ""),
#' VISIT = "WEEK 1",
#' VISITNUM = 5,
#' OEDTC = "2020-06-01",
#' OEDY = 8,
#' OELOC = "EYE",
#' OELAT = "LEFT",
#' OESTRESN = c(5, 5, 5, 4, 4, 2, 28)
#' )
#' check_oe_bcva_4m_late_early_tot(OE_total_incorrect)
#'
#'
#'
#'
check_oe_bcva_4m_late_early_tot <- function(OE) {
required_variables <- c(
"USUBJID", "OECAT", "OESCAT", "OETSTDTL", "OESTAT", "OERESCAT", "VISIT", "VISITNUM", "OEDTC", "OEDY",
"OELOC", "OELAT", "OESTRESN"
)
output_variables <- c( "USUBJID", "OETSTDTL", "VISIT", "OEDTC", "OELAT", "OESTRESN")
str_match <- function(x, pattern) {
m <- regexpr(pattern, x)
regmatches(x, m)
}
to_upper_variables <- function(variables) {
if(is.character(variables)) return(toupper(variables))
else return(variables)
}
if(OE %lacks_any% c("OESPID")){
fail(lacks_msg(OE, c("OESPID")))
} else if(OE %>% filter(!grepl("BCV5|BCV7|BCVLL5|BCVAC|BCVACLL", OESPID) &
OECAT == "BEST CORRECTED VISUAL ACUITY" & OETSTDTL == "TESTING DISTANCE: 4M") %>% nrow() == 0){
pass()
} else if (OE %lacks_any% required_variables) {
fail(lacks_msg(OE, required_variables))
} else {
# preprocessing OE dataset to get the relavant BCVA 4m test data
### select required variables
BCVA_4m <- OE %>%
select(all_of(required_variables))
### change all character variables to uppercase for the OE subset dataset
# BCVA_4m <- data.frame(lapply(BCVA_4m, to_upper_variables))
for(var in required_variables){
BCVA_4m[[var]]=to_upper_variables(BCVA_4m[[var]])
}
### filter on BCVA 4m criteria
BCVA_4m <- BCVA_4m %>%
filter(
OECAT == "BEST CORRECTED VISUAL ACUITY" & OETSTDTL == "TESTING DISTANCE: 4M" &
(!OESTAT %in% c("NOT DONE", "ND")) & OESCAT != "LOW LUMINANCE"
)
# check if dates are unique for each USUBJID, VISITNUM, OELAT combination, if not, such a combination will be
# removed from the BCVA check including situations like 1) unscheduled visits on different dates, or 2) TOTAL
# date happens on a dfferent date from the BCVA test date
nonunique_dates_for_each_sub_visit <- BCVA_4m %>%
group_by(USUBJID, VISIT, VISITNUM, OELAT) %>%
summarise(num_of_dates = length(unique(OEDTC)), .groups = "drop") %>%
filter(num_of_dates > 1) %>%
ungroup()
BCVA_4m <- BCVA_4m %>% left_join(
nonunique_dates_for_each_sub_visit,
by = c("USUBJID", "VISIT", "VISITNUM", "OELAT")
) %>%
filter(is.na(num_of_dates)) %>%
select( - num_of_dates)
# subcheck 1: if BCVA 4m test stops too late
anl1 <- BCVA_4m %>% filter(grepl("ROW \\d+ - SNELLEN \\d+/\\d+", OERESCAT)) %>%
mutate(ROW = str_match(OERESCAT, "ROW \\d+"), ROWNUM = as.integer(str_match(ROW, "\\d+"))) %>%
arrange(USUBJID, VISITNUM, OEDTC, OELAT, ROWNUM)
late_rows <- anl1 %>% group_by(USUBJID, VISITNUM, OEDTC, OELOC, OELAT) %>%
mutate(LAG_OESTRESN = lag(OESTRESN)) %>%
filter(LAG_OESTRESN <= 3) %>%
mutate(MAX_ROW = ifelse(is.na(ROWNUM), NA, min(ROWNUM))) %>%
filter(ROWNUM == MAX_ROW) %>%
ungroup() %>%
select(USUBJID, VISITNUM, OEDTC, OELOC, OELAT, MAX_ROW)
late4m <- anl1 %>% left_join(late_rows, by = c("USUBJID", "VISITNUM", "OEDTC", "OELOC", "OELAT")) %>%
mutate(issue = ifelse(ROWNUM >= MAX_ROW, "BCVA 4m check stops too late", NA)) %>%
filter(!is.na(issue) & OESTRESN != 0) %>%
mutate(TOTAL = NA) %>%
select(all_of(output_variables), TOTAL, issue) %>%
unique() #### remove possible duplicates in the result
# subcheck 2: if BCVA 4m test stops too early
## anl2 is the same as anl1
early4m <- anl1 %>% group_by(USUBJID, VISITNUM, OEDTC, OELOC, OELAT) %>%
mutate(LEAD_OESTRESN = lead(OESTRESN)) %>%
filter(is.na(LEAD_OESTRESN) & OESTRESN > 3 & ROWNUM != 14) %>%
ungroup() %>%
mutate(
TOTAL = NA,
issue = "BCVA 4m check stops too early"
) %>%
select(all_of(output_variables), TOTAL, issue)
# subcheck 3: if BCVA 4m test has the correct total in eCRF
## compare total and eCRF, output will include 1) totals in eCRF and calculation don't match
## 2) total not recorded in eCRF 3) only total in eCRF, not test row/line read
anl_total <- BCVA_4m %>% filter(OESCAT %in% c("TOTAL", "NORMAL LIGHTING SCORE"))
anl_sum <- anl1 %>%
group_by(USUBJID, VISIT, VISITNUM, OEDTC, OELOC, OELAT) %>%
summarise(TOTAL = sum(OESTRESN), .groups = "drop") %>%
ungroup()
incorrect4m <- anl_total %>% full_join(anl_sum, by = c("USUBJID", "VISIT", "VISITNUM", "OEDTC", "OELOC", "OELAT")) %>%
filter(is.na(TOTAL) | is.na(OESTRESN) | OESTRESN != TOTAL) %>%
filter(!(OESTRESN == 0 & is.na(TOTAL))) %>% #### remove cases when 4m total is 0 and 4m is not done
mutate(issue = "BCVA 4m score incorrect") %>%
select(all_of(output_variables), TOTAL, issue)
## rbind all three subchecks
df <- rbind(late4m, early4m, incorrect4m)
if (nrow(df) != 0) {
fail(paste0(nrow(df), " BCVA 4m test record(s) failed with 1) test stops too late 2) test stops too early 3) total incorrect. "), df)
} else {
pass()
}
}
}