/
util.R
490 lines (448 loc) · 20.7 KB
/
util.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
## Accumulate messages for later display
## Internal function, not exported
## severity: 1 = critical
## 2 = informative, may lead to misinterpretation of data
## 3 = minor, esp. those that might have resulted from selective post-processing of combo codes
collect_messages <- function(msgs, msg_text, line_nums, raw_lines, severity, fatal = FALSE, xraw) {
if (missing(line_nums)) line_nums <- NULL
if (missing(severity)) severity <- NULL
if (is.data.frame(msg_text)) {
if (is.null(line_nums) || all(is.na(line_nums))) line_nums <- msg_text$line_number
if (is.null(severity) || all(is.na(severity))) severity <- msg_text$severity
msg_text <- msg_text$message
}
n <- length(msg_text)
if (is.null(line_nums)) line_nums <- rep(NA_integer_, n)
if (missing(raw_lines)) {
raw_lines <- rep(NA_character_, n)
if (!missing(xraw)) {
raw_lines[!is.na(line_nums)] <- xraw[line_nums[!is.na(line_nums)]]
}
}
if (is.null(severity)) severity <- NA_integer_
if (length(severity) == 1 & n > 1) severity <- rep(severity, n)
vt <- rep(NA_real_, n)
if (!all(is.na(raw_lines))) vt <- video_time_from_raw(raw_lines)
raw_lines[is.na(raw_lines)] <- "[unknown]"
if (fatal) {
lnt <- as.character(line_nums)
lnt[is.na(lnt)] <- "[unknown]"
txt <- paste0("line ", lnt, ": ", msg_text, " (line in file is: \"", raw_lines, "\")")
if (fatal) stop(paste(txt, collapse = " / "))
} else {
msgs[[length(msgs)+1]] <- list(file_line_number = line_nums, video_time = vt, message = msg_text, file_line = raw_lines, severity = severity)
}
msgs
}
## messages stored as attributes of an object
get_dvmsg <- function(x) attr(x, "dvmessages", exact = TRUE)
set_dvmsg <- function(x, msg) {
attr(x, "dvmessages") <- msg
x
}
has_dvmsg <- function(x) !is.null(get_dvmsg(x)) && nrow(get_dvmsg(x)) > 0
clear_dvmsg <- function(x) set_dvmsg(x, NULL)
## video time from raw line, for datavolley format
raw_vt_dv <- function(z) {
tryCatch({
if (!is.null(z) && is.character(z) && nzchar(z) && !is.na(z)) {
as.numeric(strsplit(z, ";")[[1]][13])
} else {
NA_integer_
}},
error = function(e) NA_integer_)
}
## video time from raw line, for peranavolley format
pv_pparse <- function(z, df = TRUE) {
temp <- sub("^[A-Z]+~", "", z)
if (grepl("^\\(?null\\)?", temp, ignore.case = TRUE)) {
if (df) tibble() else NULL
} else {
jsonlite::fromJSON(temp)
}
}
raw_vt_pv <- function(z) {
tryCatch({
if (!is.null(z) && is.character(z) && nzchar(z) && !is.na(z)) {
as.numeric(pv_pparse(z)$videoDuration)
} else {
NA_integer_
}
}, error = function(e) NA_integer_)
}
## video time from raw line, vsm format
## can't guarantee that "lines" in vsm are valid json, because we somewhat-arbitrarily split the single-line input file
raw_vt_vsm <- function(z) {
tryCatch({
temp <- str_match_all(z, "\"time\"[[:space:]]*:[[:space:]]*([^,\\}\\]]+)")[[1]][, 2]
if (length(temp) == 1) round(as.numeric(temp) / 10) else NA_integer_
}, error = function(e) NA_integer_)
}
raw_vt_xml <- function(z) {
tryCatch({
temp <- str_match_all(z, "<start>([[:digit:]\\.]+)</start>")[[1]][, 2]
if (length(temp) == 1) round(as.numeric(temp)) else NA_integer_
}, error = function(e) NA_integer_)
}
video_time_from_raw <- function(raw_lines) {
out <- tryCatch(vapply(raw_lines, function(z) {
if (grepl("~{", z, fixed = TRUE)) raw_vt_pv(z) else if (grepl("\"_id\":", z, fixed = TRUE)) raw_vt_vsm(z) else if (grepl("<start>[[:digit:]\\.]+</start>", z)) raw_vt_xml(z) else raw_vt_dv(z)
}, FUN.VALUE = 1.0, USE.NAMES = FALSE), error = function(e) rep(NA_real_, length(raw_lines)))
if (length(out) < 1) out <- NA_real_
out
}
join_messages <- function(msgs1,msgs2) {
if (length(msgs2)>0) {
msgs1 <- c(msgs1,msgs2)
}
msgs1
}
# Extract text chunks from datavolley file. Internal function, not exported for users.
#
# @param txt: dv text
# @param token1: starting token, e.g. "[3SET]"
# @param token2: ending token
#
# @return string
text_chunk <- function(txt,token1,token2) {
idx1 <- grep(token1,txt,fixed=TRUE)
if (length(idx1)<1) return("")
if (missing(token2)) {
## find next section starting with "["
idx2=grep("^\\[",txt)
idx2=idx2[idx2>idx1][1]
} else {
idx2=grep(token2,txt,fixed=TRUE)
}
if (idx2==(idx1+1)) {
""
} else {
out <- txt[(idx1+1):(idx2-1)]
out <- out[sapply(out,is.notempty.string)]##!is.na(out)]
paste(out,collapse="\n")
}
}
is.notempty.string <- function(x) {
(is.character(x) && length(x)==1) && !is.na(x) && nchar(x)>0
}
#' Find each entry in y that follows each entry in x
#'
#' @param x numeric: vector
#' @param y numeric: vector
#'
#' @return vector, each entry is the value in y that is next-largest to each corresponding entry in x
#'
#' @examples
#' findnext(c(1,5,10),c(1,2,3,7,8,9))
#'
#' @export
findnext <- function(x,y) {
## return the number in y that comes after each of x
sapply(x,function(z){
temp <- y-z
temp <- temp[temp>0]
if (length(temp)<1)
NA
else
min(temp)+z
})
}
##findnext <- function(these,after) {
## ## return the number in after that comes after each of these
## sapply(these,function(z){
## temp <- after-z
## temp <- temp[temp>0]
## if (length(temp)<1)
## NA
## else
## min(temp)+z
## })
##}
#' Find each entry in y that precedes each entry in x
#'
#' @param x numeric: vector
#' @param y numeric: vector
#'
#' @return vector, each entry is the value in y that is next-smallest to each corresponding entry in x
#'
#' @examples
#' findprev(c(1,5,10),c(1,2,3,7,8,9))
#'
#' @export
findprev <- function(x,y) {
## return the number in y that comes before each of x
sapply(x,function(z){
temp <- z-y
temp <- temp[temp>0]
if (length(temp)<1)
NA
else
z-min(temp)
})
}
##findprev <- function(these,prev) {
## ## return the number in prev that comes before each of these
## sapply(these,function(z){
## temp <- z-prev
## temp <- temp[temp>0]
## if (length(temp)<1)
## NA
## else
## z-min(temp)
## })
##}
## equality with NAs considered false
`%eq%` <- function(x,y) x==y & !is.na(x) & !is.na(y)
## convenience function to replace NAs
na.replace <- function(x,replace_with) {x[is.na(x)] <- replace_with; x}
#' Find a particular match in a list of datavolley objects
#'
#' @param match_id string: match_id to find
#' @param x list: list of datavolley objects as returned by \code{dv_read}
#'
#' @return numeric index of the match in the list
#'
#' @seealso \code{\link{dv_read}}
#'
#' @export
find_match <- function(match_id,x) {
which(sapply(x,function(z)z$meta$match_id==match_id))
}
most_common_value <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
## preferred can have more than one entry, treated in order of preference
## parse datetimes and dates, removing any that resolve to future
parse_dt <- function(dstr, format, allow_future_dates = FALSE) {
out <- unique(na.omit(c(lubridate::parse_date_time(dstr, format))))
if (!allow_future_dates) out <- out[difftime(Sys.time(), out, units = "days") > -5] ## not more than 5 days into the future
out
}
parse_d <- function(dstr, pfun, allow_future_dates = FALSE) {
out <- pfun(dstr)
if (!allow_future_dates) out <- out[difftime(Sys.time(), out, units = "days") > -5] ## not more than 5 days into the future
out
}
manydates <- function(z, preferred = NULL) {
z <- z[!is.na(z) & nzchar(z)]
if (length(z) < 1) return(as.Date(integer(), origin = "1970-01-01"))
suppressWarnings(
tries <- list(ymd = unique(as.Date(na.omit(c(parse_d(z, lubridate::ymd), parse_dt(z, "Ymd HMS"))))),
dmy = unique(as.Date(na.omit(c(parse_d(z, lubridate::dmy), parse_dt(z, "dmY HMS"))))),
mdy = unique(as.Date(na.omit(c(parse_d(z, lubridate::mdy), parse_dt(z, "mdY HMS"))))))
)
if (!is.null(preferred)) {
preferred <- tolower(preferred)
for (pref in preferred) {
if (length(tries[[pref]]) > 0) return(tries[[pref]])
}
}
if (length(tries$ymd) < 1 && length(tries$dmy) < 1 && length(tries$mdy) < 1) return(as.Date(integer(), origin = "1970-01-01"))
unique(c(tries$ymd, tries$dmy, tries$mdy))
}
manydatetimes <- function(z, preferred = NULL) {
z <- z[!is.na(z) & nzchar(z)]
if (length(z) < 1) return(as.POSIXct(integer(), origin = "1970-01-01"))
## don't use lubridate::ymd_hms etc here, because they will fall foul of e.g.
## https://github.com/tidyverse/lubridate/issues/552
suppressWarnings(tries <- list(ymd = parse_dt(z, "Ymd HMS"), dmy = parse_dt(z, "dmY HMS"), mdy = parse_dt(z, "mdY HMS")))
if (!is.null(preferred)) {
preferred <- tolower(preferred)
for (pref in preferred) {
if (length(tries[[pref]]) > 0) return(tries[[pref]])
}
}
if (length(tries$ymd) < 1 && length(tries$dmy) < 1 && length(tries$mdy) < 1) return(as.POSIXct(integer(), origin = "1970-01-01"))
unique(c(tries$ymd, tries$dmy, tries$mdy))
}
unambiguous_datetime <- function(z) {
suppressWarnings(tries <- list(ymd = parse_dt(z, format = "Ymd HMS"), dmy = parse_dt(z, format = "dmY HMS"), mdy = parse_dt(z, format = "mdY HMS")))
## do we have an unambiguous date? i.e. only one format gives a valid date
unambiguous <- Filter(length, tries)
unambiguous <- unique(data.frame(format = names(unambiguous), date = as.Date(as.numeric(unambiguous), origin = "1970-01-01"), stringsAsFactors = FALSE))
if (nrow(unambiguous) == 1) unambiguous$format else NULL
}
#' Generate a short, human-readable text summary of one or more actions
#'
#' @param x data.frame or tibble: one or more rows from a datavolleyplays object as returned by \code{\link{dv_read}}
#' @param verbosity integer: 1 = least verbose, 2 = more verbose. Currently ignored
#'
#' @return character vector
#'
#' @examples
#' x <- dv_read(dv_example_file())
#' dv_action2text(plays(x)[27:30, ])
#'
#' @export
dv_action2text <- function(x, verbosity = 1) {
vapply(seq_len(nrow(x)), function(i) do_action2text(x[i, ], verbosity = verbosity), FUN.VALUE = "", USE.NAMES = FALSE)
}
do_action2text <- function(x, verbosity) {
if (x$skill %in% c("Serve", "Reception", "Set", "Attack", "Block", "Dig", "Freeball")) {
out <- paste0(skill_extra(x), " by ", x$player_name, " (",
action_extra(x),
x$evaluation, ")")
} else if (x$skill %in% c("Timeout")) {
paste0("Timeout (", x$team, ")")
} else if (x$skill %in% c("Technical timeout")) {
"Technical timeout"
} else {
NA_character_
}
}
skill_extra <- function(x) {
if (x$skill %in% c("Attack", "Serve") && !is.na(x$skill_type)) x$skill_type else x$skill
}
action_extra <- function(x) {
switch(x$skill,
"Attack" = if (!is.na(x$attack_code)) paste0(x$attack_code, " - ") else "",
"")
}
## replacement for enc::read_lines_enc now that enc has been archived Feb 2024
read_lines_enc <- function(path, file_encoding = "UTF-8") {
con <- file(path, encoding = file_encoding)
on.exit(close(con), add = TRUE)
lines <- readLines(con, warn = FALSE, n = -1L, ok = TRUE, skipNul = FALSE, encoding = "UTF-8")
Encoding(lines) <- "UTF-8"
stringi::stri_enc_toutf8(lines)
}
get_best_encodings <- function(encodings_to_test, filename, read_from = 10, read_to = 90, expect_tildes = TRUE) {
if (read_to < read_from) stop("read_to cannot be less than read_from")
## badchars/badwords indicate characters/words that we don't expect to see, so the presence of any of these indicates that we've got the wrong file encoding
badchars <- c(1328:7499, utf8ToInt("\ub3\ua3\u008a\u008e\u009a\u00b3"), 960) ## armenian through to music, then some isolated ones
## allow 1025:1327 - cyrillic
## may need to consider removing penalty on armenian/arabic chars too
## 0x2000 to 0x206f (general punctuation) likely wrong, 0x01-0x07 are control characters we don't expect to see
badchars <- c(badchars,0x2000:0x206f, 0x00:0x07)
badchars <- c(badchars, utf8ToInt("\u253c\ud7\u3ad")) ##?? \u44d\u42d
badchars <- c(badchars, 0x2500:0x25ff) ## box-drawing characters (seen with Japanese misidentified as Korean)
badwords <- tolower(c("S\u159RENSEN","S\u159gaard","S\u159ren","M\u159LLER","Ish\u159j","Vestsj\u107lland","KJ\u107R","M\u159rk","Hj\u159rn","\u139rhus")) ## these from windows-1252 (or ISO-8859-1) wrongly guessed as windows-1250
badwords <- c(badwords,tolower(c("\ud9ukas","Pawe\uf9","\ud9omacz",paste0("Mo\ufd","d\ufdonek"),"W\uf9odarczyk"))) ## these from windows-1257/ISO-8859-13 wrongly guessed as windows-1252
badwords <- c(badwords,tolower(c("\u3a9ukas","Pawe\u3c9","\u3a9omacz",paste0("Mo\u3cd","d\u3cdonek"),"W\u3c9odarczyk"))) ## these from windows-1257/ISO-8859-13 wrongly guessed as windows-1253
badwords <- c(badwords,tolower(c("\uc4\u15a","\u139\u2dd"))) ## utf-8 wrongly guessed as windows-1250
badwords <- c(badwords, tolower(c("\uc4\u8d", "\uc5\ubd", "\uc4\u152"))) ## utf-8 wrongly guessed as windows-1252
badwords <- c(badwords,tolower(c("Nicol\u148"))) ## windows-1252/iso-8859-1 wrongly guessed as windows-1250
badwords <- c(badwords, c("\u6c\u2116", "\u6a\u2116", "\u77\u2116", "\u64\u2116\u62", "\u45a\u77", "\u45a\u6c", "\u63\u7a\u43a")) ## windows-1250 wrongly guessed as windows-1251
badwords <- c(badwords, tolower(c("\u6a\u446\u72", "\u68\u446\u68", "\u76\u446\u6c", "\u6d\u44c\u6c", "\u72\u44c\u67", "\u70\u446\u68", "\u6b\u44c\u68"))) ## 1250 as 1251
badwords <- c(badwords, tolower(c("\uc2\ue4\u77", "\uf1\u7b", "\ue5\ue4", "\ue5\ue3"))) ## japanese SHIFT-JIS wrongly guessed as macintosh
badwords <- c(badwords, tolower("\u102\u104\u7a"), tolower("\u102\u104\u73"), tolower("\u102\u2c7\u7a"), tolower("\u102\u2c7\u73"))
badwords <- c(badwords, tolower(c(intToUtf8(c(8222, 162)))))
badwords <- c(badwords, tolower(c("\u192\u56", "\u192\u2021", "\u192\u67", "\u192\u6f", "\u192\u62", "\u192\u4e", "\u2018\u4f", "\u192\u70", "\u192\u43", "\u192\u76")))
badwords <- c(badwords, tolower(c("\uf7\u119\uee", "\u2d9\u119\uee", "\uf7\u155\u111", "\uf7\u10d\ued", "\uc2\ueb\u155\ue4\u10d", "\u10f\u10d\uee", "\u10f\u13a\u111", "\u155\u148\uf7"))) ## russian 1251 wrongly detected as 1250
badwords <- c(badwords, tolower(c("\ue8\ueb\ueb", "\ueb\uea", "\ue4\ue0", "\ue0\ue2", "\ue0\ue5", "\ue5\ue2", "\ued\uee", "\uee\uef", "\udf\uea", "\u1fb\uef\ue8", "\uee\ued\ue0", "\uee\uf1\uf1", "\uf1\uf1\ue8"))) ## russian 1251 wrongly detected as 1258
badwords <- c(badwords, tolower(c("\u101\ue5", "\ue5\u101", "\u107\u10d", "\ud7\ue5", "\u122\u10d"))) ## russian 1251 wrongly detected as 1257
##badwords <- c(badwords, tolower(c("\u10f\u17c\u2dd", "\u43f\u457\u405", "\u3bf\u38f\ubd", "\u5df\ubf\ubd"))) ## this is the unicode replacement char EF BF BD in windows-1250, 1251, 1253 (the encoding should be UTF-8). Note that 1252, 1254 seem to represent this as EF BF BD, so we can't detect that?? Doesn't seem to be reliable anyway
badwords <- c(badwords, tolower(c("\u56\u49\u444", "\u56\u49\uc6"))) ## windows-1252 wrongly detected as KOI8-R
badwords <- c(badwords, tolower(c("\u171\ufc", "\u8e\u52"))) ## cp932 wrongly detected as ISO-8859-2 (there are heaps here)
badwords <- c(badwords, tolower(c("\uc9\u57\uc9", "\ue2\u2122"))) ## cp932 wrongly detected as macintosh
badwords <- c(badwords, c("\ufd\u79")) ## windows-1254 wrongly detected as 1250
badwords <- c(badwords, c("\u6e\u434\u45a\u69\u434\u45a", "\u76\u69\u434\u45a")) ## "ncic" and "vic" but c with caron (Serbian/Czech/etc) wrongly detected as cyrillic
badwords_trans <- c("oooo", "ouuoo", "oouoo", "uuou", "uuoo") ## badwords after transliteration, e.g. wrongly-detected cyrillic
## get the \uxx numbers from sprintf("%x",utf8ToInt(tolower(dodgy_string_or_char))) or paste0("\\u", sprintf("%x", utf8ToInt(tolower("dodgy"))), collapse = "")
read_with_enc <- function(filename, enc_to_test) {
## read with specified encoding and convert to UTF8
tryCatch(read_lines_enc(filename, file_encoding = enc_to_test), warning = function(w) NA_character_, error = function(e) NA_character_)
}
testtxt <- lapply(encodings_to_test, read_with_enc, filename = filename)
suppressWarnings({
encerrors <- sapply(testtxt, function(z) {
z <- paste(z[read_from:read_to], collapse = "")
if (is.na(z)) Inf else sum(utf8ToInt(z) %in% badchars) + 10*sum(sapply(badwords, grepl, tolower(z), fixed = TRUE)) + 10*(sum(sapply(badwords_trans, grepl, stri_trans_general(tolower(z), "latin-ascii"), fixed = TRUE)) - sum(sapply(badwords_trans, grepl, tolower(z), fixed = TRUE)))
## the latter term penalizes text that matches badwords_trans when transliterated to ASCII but doesn't match when not transliterated. Which will generally be streams of gibberish accented characters (e.g. Cyrillic processed as something else)
})
})
if (expect_tildes) {
## also check whether we get ~'s in the 3SCOUT section (will not get any if CP932 incorrectly detected as SHIFT-JIS, for example)
tilde_count <- sapply(testtxt, function(z) if (all(is.na(z))) 0L else sum(stringi::stri_count(z, fixed = "~")))
encerrors[which(tilde_count < 1)] <- encerrors[which(tilde_count < 1)] + 20L
}
## what badwords are matching a given encoding?
##cat("bw:\n")
##print(lapply(testtxt[encodings_to_test == "CP1251"], function(z) if (all(is.na(z))) Inf else { z <- paste(z[read_from:read_to], collapse = ""); print(z); which(sapply(badwords, grepl, tolower(z), fixed = TRUE))}))
## errors per encodings_to_test
##cat("encerrors:\n"); print(sort(setNames(encerrors, encodings_to_test)))
idx <- encerrors==min(encerrors)
if (!any(idx)) {
list(encodings = character(), error_score = NA_integer_)
} else {
list(encodings = encodings_to_test[idx], error_score = min(encerrors))
}
}
#' Convert integer colour to RGB
#'
#' DataVolley files use an integer representation of colours. These functions convert to and from hex colour strings as used in R.
#'
#' @param z integer: vector of integers
#' @param x integer: vector of hex colour strings
#'
#' @return Character vector of hex RGB colour strings
#'
#' @examples
#' dv_int2rgb(c(255, 16711680))
#'
#' @export
dv_int2rgb <- function(z) {
r <- floor(z / 256^2)
g <- floor((z - r * (256^2)) / 256)
b <- z - floor(r * (256^2) + g * 256)
out <- apply(cbind(r, g, b), 1, function(z) sprintf("#%02X%02X%02X", z[1], z[2], z[3]))
out[z < 0 | z > 16777215 | is.na(z)] <- NA_character_
out
}
#' @export
#' @rdname dv_int2rgb
dv_rgb2int <- function(x) {
out <- grDevices::col2rgb(x)
out <- as.integer(apply(out, 2, function(z) z[1] * 256 * 256 + z[2] * 256 + z[3]))
out[is.na(x)] <- NA_integer_
out
}
camel_to_underscore <- function(x) {
s <- gsub("([a-z0-9])([A-Z])", "\\1_\\L\\2", x, perl = TRUE)
sub("^(.[a-z])", "\\L\\1", s, perl = TRUE) # make 1st char lower case if second is lower
}
lead0 <- function(x, width = 2, pad = "0", na = NULL) {
out <- character(length(x))
naidx <- rep(FALSE, length(x))
if (!is.null(na)) {
naidx <- is.na(x) | !nzchar(x)
out[naidx] <- as.character(na)
}
out[!naidx] <- stringr::str_pad(as.numeric(x[!naidx]), width = width, pad = pad)
out
}
rotpos <- function(p, by = 1L, n = 6L) (p - 1L - by) %% n + 1L
rot_lup <- function(z, by = 1L, n = 6L) {
## z is a lineup vector
z[rotpos(seq_along(z), by = by, n = n)]
}
## mapvalues from plyr, MIT license
mapvalues <- function (x, from, to, warn_missing = TRUE) {
if (length(from) != length(to)) {
stop("`from` and `to` vectors are not the same length.")
}
if (!is.atomic(x) && !is.null(x)) {
stop("`x` must be an atomic vector or NULL.")
}
if (is.factor(x)) {
levels(x) <- mapvalues(levels(x), from, to, warn_missing)
return(x)
}
mapidx <- match(x, from)
mapidxNA <- is.na(mapidx)
from_found <- sort(unique(mapidx))
if (warn_missing && length(from_found) != length(from)) {
message("The following `from` values were not present in `x`: ",
paste(from[!(1:length(from) %in% from_found)], collapse = ", "))
}
x[!mapidxNA] <- to[mapidx[!mapidxNA]]
x
}
single_unique_value_or_na_int <- function(x) {
u <- unique(na.omit(x))
if (length(u) == 1) u else NA_integer_
}
mean_nna <- function(...) mean(..., na.rm = TRUE)
qmax <- function(...) suppressWarnings(max(..., na.rm = TRUE))
qmin <- function(...) suppressWarnings(min(..., na.rm = TRUE))