Skip to content

Commit

Permalink
Added baseline tests for adult growthcleanr, refs carriedaymont#49
Browse files Browse the repository at this point in the history
  • Loading branch information
dchud committed Jun 23, 2021
1 parent 73d2360 commit 2a8bd51
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 17 deletions.
29 changes: 16 additions & 13 deletions R/growth.R
Original file line number Diff line number Diff line change
Expand Up @@ -656,20 +656,23 @@ cleangrowth <- function(subjid,
}
}

if (any(nrow(data.all) > 0, nrow(data.adult) > 0)) {
# join with pediatric data
full_out <- data.table(
line = c(ret.df$line, res$line),
exclude = c(as.character(ret.df$exclude), res$result),
mean_sde = c(rep(NA, nrow(ret.df)), res$mean_sde)
)
full_out[, exclude := factor(exclude, levels = unique(c(exclude.levels,
unique(exclude))))]
full_out <- full_out[order(line),]
# remove column added for keeping track
full_out[, line := NULL]

# join with pediatric data
full_out <- data.table(
line = c(ret.df$line, res$line),
exclude = c(as.character(ret.df$exclude), res$result),
mean_sde = c(rep(NA, nrow(ret.df)), res$mean_sde)
)
full_out[, exclude := factor(exclude, levels = unique(c(exclude.levels,
unique(exclude))))]
full_out <- full_out[order(line),]
# remove column added for keeping track
full_out[, line := NULL]

return(full_out$exclude)
return(full_out$exclude)
} else {
return(c())
}

}

Expand Down
107 changes: 103 additions & 4 deletions tests/testthat/test-cleangrowth.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
test_that("growthcleanr works as expected on synthetic data", {
test_that("growthcleanr works as expected on pediatric synthetic data", {

# Run cleangrowth() on syngrowth data
data <- as.data.table(syngrowth)
Expand Down Expand Up @@ -89,11 +89,98 @@ test_that("growthcleanr works as expected on synthetic data", {

})

test_that("growthcleanr works as expected on adult synthetic data", {

# Run cleangrowth() on syngrowth data
data <- as.data.table(syngrowth)

# syngrowth hasn't changed in length
expect_equal(77721, data[, .N])
setkey(data, subjid, param, agedays)

# subset to adult data
data_adult <- copy(data[agedays >= 18 * 365.25, ])

# Create small sample
d500 <- as.data.table(data_adult)[subjid %in% unique(data_adult[, subjid])[1:500], ]
expect_equal(12447, d500[, .N])

# Clean sample
cd500 <-
d500[, gcr_result := cleangrowth(
subjid,
param,
agedays,
sex,
measurement
)]

# Clean again with lower cutpoint
cd500cp <-
copy(d500)[, gcr_result := cleangrowth(
subjid,
param,
agedays,
sex,
measurement,
adult_cutpoint = 18
)]


# Spot check individual results
gcr_result <- function (dt, rowid) {
return(as.character(dt[id == rowid]$gcr_result))
}

# These results should not change with cutpoint
expect_equal("Include", gcr_result(cd500, 27166))
expect_equal("Include", gcr_result(cd500cp, 27166))

expect_equal("Exclude-Adult-Identical-Same-Day", gcr_result(cd500, 47596))
expect_equal("Exclude-Adult-Identical-Same-Day", gcr_result(cd500cp, 47596))

expect_equal("Exclude-Adult-BIV", gcr_result(cd500, 41872))
expect_equal("Exclude-Adult-BIV", gcr_result(cd500cp, 41872))

# Results for these records should change due to younger cutpoint
expect_equal("Exclude-Extraneous-Same-Day", gcr_result(cd500, 38722))
expect_equal("Exclude-Adult-Extraneous-Same-Day", gcr_result(cd500cp, 38722))

expect_equal("Exclude-Carried-Forward", gcr_result(cd500, 12923))
expect_equal("Include", gcr_result(cd500cp, 12923))

expect_equal("Exclude-Extraneous-Same-Day", gcr_result(cd500, 25259))
expect_equal("Exclude-Adult-Distinct-3-Or-More", gcr_result(cd500cp, 25259))

# Check counts of exclusions by category
catcount <- function (df, category) {
return(as.numeric(df %>% filter(gcr_result == category) %>% select(n)))
}

d500_exclusions <-
cd500 %>% group_by(gcr_result) %>% tally(sort = TRUE)
expect_equal(9745, catcount(d500_exclusions, "Include"))
expect_equal(2090, catcount(d500_exclusions, "Exclude-Adult-Extraneous-Same-Day"))
expect_equal(59, catcount(d500_exclusions, "Exclude-Adult-Distinct-3-Or-More"))
expect_equal(43, catcount(d500_exclusions, "Exclude-Carried-Forward"))
expect_equal(2, catcount(d500_exclusions, "Exclude-Adult-Transpositions"))

d500cp_exclusions <-
cd500cp %>% group_by(gcr_result) %>% tally(sort = TRUE)
expect_equal(9774, catcount(d500cp_exclusions, "Include"))
expect_equal(2200, catcount(d500cp_exclusions, "Exclude-Adult-Extraneous-Same-Day"))
expect_equal(62, catcount(d500cp_exclusions, "Exclude-Adult-Distinct-3-Or-More"))
expect_true(is.na(catcount(d500cp_exclusions, "Exclude-Carried-Forward")))
expect_equal(2, catcount(d500cp_exclusions, "Exclude-Adult-Transpositions"))

})

test_that("growthcleanr works without either adult or pediatric data", {
# creating small only adult and only pediatric data
# using default cutpoint -- 20
only_peds <- syngrowth[syngrowth$agedays < 20*365.25,][1:50,]
only_adult <- syngrowth[syngrowth$agedays >= 20*365.25,][1:50,]
nobody <- syngrowth[syngrowth$agedays > 120*365.25,]

# testing cleangrowth works without adult data
peds_res <- cleangrowth(
Expand All @@ -102,7 +189,7 @@ test_that("growthcleanr works without either adult or pediatric data", {
only_peds$agedays,
only_peds$sex,
only_peds$measurement,
quietly = T
quietly = TRUE
)

expect_equal(length(peds_res), nrow(only_peds))
Expand All @@ -114,9 +201,21 @@ test_that("growthcleanr works without either adult or pediatric data", {
only_adult$agedays,
only_adult$sex,
only_adult$measurement,
quietly = T
quietly = TRUE
)

expect_equal(length(adult_res), nrow(only_adult))

})
# testing cleangrowth works with no data
no_res <- cleangrowth(
nobody$subjid,
nobody$param,
nobody$agedays,
nobody$sex,
nobody$measurement,
quietly = TRUE
)

expect_equal(length(no_res), nrow(nobody))

})

0 comments on commit 2a8bd51

Please sign in to comment.