-
Notifications
You must be signed in to change notification settings - Fork 60
/
vroom_lines.R
60 lines (52 loc) · 1.91 KB
/
vroom_lines.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#' Read lines from a file
#'
#' `vroom_lines()` is similar to `readLines()`, however it reads the lines
#' lazily like [vroom()], so operations like `length()`, `head()`, `tail()` and `sample()`
#' can be done much more efficiently without reading all the data into R.
#' @inheritParams vroom
#' @examples
#' lines <- vroom_lines(vroom_example("mtcars.csv"))
#'
#' length(lines)
#' head(lines, n = 2)
#' tail(lines, n = 2)
#' sample(lines, size = 2)
#' @export
vroom_lines <- function(file, n_max = Inf, skip = 0,
na = character(), skip_empty_rows = FALSE,
locale = default_locale(), altrep = TRUE,
altrep_opts = deprecated(), num_threads = vroom_threads(),
progress = vroom_progress()) {
if (!is_missing(altrep_opts)) {
deprecate_warn("1.1.0", "vroom_lines(altrep_opts = )", "vroom_lines(altrep = )")
altrep <- altrep_opts
}
file <- standardise_path(file)
if (!is_ascii_compatible(locale$encoding)) {
file <- reencode_file(file, locale$encoding)
locale$encoding <- "UTF-8"
}
if (n_max < 0 || is.infinite(n_max)) {
n_max <- -1
}
if (length(file) == 0 || n_max == 0) {
return(character())
}
col_select <- quo(NULL)
# delim = "\1" sets the delimiter to be start of header, which should never
# appear in modern text. This essentially means the only record breaks will
# be newlines. Ideally this would be "\0", but R doesn't let you have nulls
# in character vectors.
out <- vroom_(file, delim = "\1", col_names = "V1", col_types = cols(col_character()),
id = NULL, skip = skip, col_select = col_select, name_repair = "minimal",
na = na, quote = "", trim_ws = FALSE, escape_double = FALSE,
escape_backslash = FALSE, comment = "", skip_empty_rows = skip_empty_rows,
locale = locale, guess_max = 0, n_max = n_max,
altrep = vroom_altrep(altrep), num_threads = num_threads,
progress = progress
)
if (length(out) == 0) {
return(character())
}
out[[1]]
}