-
Notifications
You must be signed in to change notification settings - Fork 7
/
check_ae_dup.R
executable file
·80 lines (64 loc) · 2.47 KB
/
check_ae_dup.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#' @title Check for duplicate AE entries
#'
#' @description Identifies duplicated AE entries based on USUBJID, AETERM,
#' AEDECOD, AESTDTC, AEENDTC, AEMODIFY (if present), AELAT (if present) and AETOXGR or AESEV
#'
#' @param AE AE SDTM dataset with variables USUBJID, AETERM, AEDECOD,
#' AESTDTC, AEENDTC, and AETOXGR or AESEV
#'
#' @return boolean value if check failed or passed with 'msg' attribute if the test failed
#'
#' @export
#'
#' @importFrom dplyr %>% group_by_all filter select n
#' @importFrom tidyselect any_of
#'
#' @author Edgar Manukyan
#'
#' @examples
#'
#' AE <- data.frame(USUBJID = c(1), AESTDTC = c("2020-01-01","2020-01-01","2020-02-01","2020-03-01"),
#' AEENDTC = rep("2020-02-01",4), AEDECOD = letters[c(1,1:3)],
#' AETERM = letters[c(1,1:3)], AETOXGR = c(1,1:3),
#' AESPID="FORMNAME-R:5/L:5XXXX",
#' stringsAsFactors=FALSE)
#'
#' check_ae_dup(AE)
#'
#'
#'
check_ae_dup <- function(AE){
# Checks whether required variables are in dataset
if (AE %lacks_any% c("USUBJID", "AEDECOD", "AESTDTC", "AEENDTC", "AETERM")) {
fail(lacks_msg(AE, c("USUBJID", "AEDECOD", "AESTDTC", "AEENDTC", "AETERM")))
} else if (AE %has_all% c("AETOXGR", "AESEV")) {
fail("AE has both variables: AETOXGR and AESEV.")
} else if (AE %lacks_all% c("AETOXGR", "AESEV")) {
fail("AE is missing both the AETOXGR and AESEV variable.")
} else {
# Use either AETOXGR or AESEV, depending on which is in the AE dataset
toxgr_var <- if(AE %has_all% "AETOXGR") "AETOXGR" else "AESEV"
lat_var <- if (AE %has_all% "AELAT") "AELAT" else NULL
if (AE %lacks_any% c("AEMODIFY")){
# When AEMODIFY not in AE
# Subsets to duplicated entries only
df <- AE %>%
select(USUBJID, AETERM, AEDECOD, AESTDTC, AEENDTC, any_of(c(toxgr_var,lat_var))) %>%
group_by_all() %>%
filter(n()>1)
}else {
# When AEMODIFY in AE
# Subsets to duplicated entries only
df <- AE %>%
select(USUBJID, AETERM, AEDECOD, AESTDTC, AEENDTC, AEMODIFY, any_of(c(toxgr_var,lat_var))) %>%
group_by_all() %>%
filter(n()>1)
}
# Outputs a resulting message depending on whether there are duplicates
if (nrow(df) != 0) {
fail("AE has duplicated entries. ", df)
} else {
pass()
}
}
}