-
Notifications
You must be signed in to change notification settings - Fork 7
/
test-data_integrity.R
128 lines (116 loc) · 6.33 KB
/
test-data_integrity.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
context('data integrity')
describe("safe columns", {
db_test_that('it can expand a table if a new column pops up in later entries', {
expect_cached({
df_ref <- cbind(batch_data(1:5, model_version, type),
data.frame(new_col = rep(NA_real_, 5)))
df_ref <- rbind(df_ref, batch_data(6:10, model_version, type, add_column = TRUE))
cached_fcn(key = 5:1, model_version, type)
df_cached <- without_rownames(cached_fcn(
key = 1:10, model_version, type, add_column = TRUE))
expect_almost_equal(without_rownames(df_ref),
without_rownames(cached_fcn(key = 1:10, model_version, type)))
}, no_check = TRUE)
})
describe("when safe_column is TRUE", {
db_test_that('it crashes when trying to expand a table on new column', {
cached_fcn <- cache(batch_data, key = c(key = "id"), c("model_version", "type"), con = test_con, prefix = prefix, safe_columns = TRUE)
cached_fcn(key = 5:1, model_version, type)
expect_error(cached_fcn(key = 1:10, model_version, type, add_column = TRUE))
})
})
describe("when safe_column is a custom function", {
db_test_that('it calls a custom function and returns without error', {
called <- FALSE
caller <- function(...) { called <<- TRUE; message(as.list(...)); TRUE }
cached_fcn <- cache(batch_data, key = c(key = "id"), c("model_version", "type"), con = test_con, prefix = prefix, safe_columns = caller)
cached_fcn(key = 5:1, model_version, type)
expect_false(called)
cached_fcn(key = 1:10, model_version, type, add_column = TRUE)
expect_true(called)
})
db_test_that('it calls a custom function when safe_columns is is a function', {
called <- FALSE
error_msg <- "Safe Columns Error: Customer function detected error."
caller <- function(...) { called <<- TRUE; message(as.list(...)); stop(error_msg) }
cached_fcn <- cache(batch_data, key = c(key = "id"), c("model_version", "type"), con = test_con, prefix = prefix, safe_columns = caller)
cached_fcn(key = 5:1, model_version, type)
expect_false(called)
expect_error(cached_fcn(key = 1:10, model_version, type, add_column = TRUE), error_msg)
expect_true(called)
})
})
})
describe("conditional caching", {
db_test_that("it will cache NA if NA is not on the blacklist", {
expect_cached({
cached_fcn <- cache(return_nas, key = c(key = "id"), salt = c("model_version", "type"), con = test_con, prefix = prefix, blacklist = list("pizza"))
df_ref <- return_nas(1:5)
expect_almost_equal(without_rownames(df_ref),
without_rownames(cached_fcn(key = 1:5, model_version, type)))
}, fn = return_nas)
})
db_test_that("it won't cache NA if NA is on the blacklist", {
expect_cached({
lapply(dbListTables(test_con), function(t) dbRemoveTable(test_con, t))
cached_fcn <- cache(return_nas, key = c(key = "id"), salt = c("model_version", "type"), con = test_con, prefix = prefix, blacklist = list(NA))
# Will cache to a strange 0x2 df...
df_ref <- data.frame(id = 1, data = "a", stringsAsFactors = FALSE)[FALSE, ]
expect_almost_equal(without_rownames(return_nas(1:5)), # ...But will return the normal data
without_rownames(cached_fcn(key = 1:5, model_version, type)))
}, fn = return_nas, no_check = TRUE)
})
db_test_that("NA blacklist still works on a 1x1 df", {
expect_cached({
lapply(dbListTables(test_con), function(t) dbRemoveTable(test_con, t))
cached_fcn <- cache(return_ids, key = c(key = "id"), salt = c("model_version", "type"), con = test_con, prefix = prefix, blacklist = list(NA))
expect_almost_equal(without_rownames(return_ids(1)),
without_rownames(cached_fcn(key = 1, model_version, type)))
}, fn = return_ids, no_check = TRUE)
})
db_test_that("it won't cache FALSE if FALSE is on the blacklist", {
expect_cached({
lapply(dbListTables(test_con), function(t) dbRemoveTable(test_con, t))
cached_fcn <- cache(return_falses, key = c(key = "id"), salt = c("model_version", "type"), con = test_con, prefix = prefix, blacklist = list("FALSE"))
# Will cache to a strange 0x2 df...
df_ref <- data.frame(id = 1, data = "a", stringsAsFactors = FALSE)[FALSE, ]
expect_almost_equal(without_rownames(return_falses(1:5)), # ...But will return the normal data
without_rownames(cached_fcn(key = 1:5, model_version, type)))
}, fn = return_falses, no_check = TRUE)
})
db_test_that("it won't cache 'pizza' or 'potato' if they're on the blacklist", {
expect_cached({
lapply(dbListTables(test_con), function(t) dbRemoveTable(test_con, t))
cached_fcn <- cache(return_foods, key = c(key = "id"), salt = c("model_version", "type"), con = test_con, prefix = prefix, blacklist = list("pizza", "potato"))
df_ref <- data.frame(id = c(3, 4), data = c("apple", "banana"), stringsAsFactors = FALSE)
expect_almost_equal(without_rownames(return_foods(seq(5))),
without_rownames(cached_fcn(key = 1:5, model_version, type)))
}, fn = return_foods, no_check = TRUE)
})
db_test_that("it will cache what is not on the blacklist", {
expect_cached({
cached_fcn <- cache(return_mix_na, key = c(key = "id"),
salt = c("model_version", "type"),
con = test_con, prefix = prefix, blacklist = list(NA))
df_ref <- data.frame(id = c(2L, 4L), data = rep("value", 2),
stringsAsFactors = FALSE)
df_actual <- data.frame(id = seq(5), data = ifelse(seq(5) %% 2 == 0, "value", NA),
stringsAsFactors = FALSE)
expect_equal(without_rownames(df_actual),
without_rownames(cached_fcn(key = seq(5), model_version, type)))
}, fn = return_mix_na)
})
db_test_that("it will cache what is not on the blacklist", {
expect_cached({
cached_fcn <- cache(return_mix_false, key = c(key = "id"),
salt = c("model_version", "type"),
con = test_con, prefix = prefix, blacklist = list(FALSE))
df_ref <- data.frame(id = c(2L, 4L), data = rep("value", 2),
stringsAsFactors = FALSE)
df_actual <- data.frame(id = seq(5), data = ifelse(seq(5) %% 2 == 0, "value", FALSE),
stringsAsFactors = FALSE)
expect_equal(without_rownames(df_actual),
without_rownames(cached_fcn(key = seq(5), model_version, type)))
}, fn = return_mix_false)
})
})