Skip to content

Commit

Permalink
Fix n_max for fwf and connections
Browse files Browse the repository at this point in the history
  • Loading branch information
jimhester committed Jun 30, 2021
1 parent 66f56e9 commit f7ca609
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 6 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
@@ -1,5 +1,7 @@
# vroom (development version)

* `vroom_fwf(n_max=)` now works as intended when the input is a connection.

* `vroom()` and `vroom_write()` now automatically detect the compression format regardless of the file extension for bzip2, xzip, gzip and zip files (#348)

* `vroom()` and `vroom_write()` now automatically support many more archive formats thanks to the archive package.
Expand Down
23 changes: 17 additions & 6 deletions src/fixed_width_index_connection.cc
Expand Up @@ -10,7 +10,6 @@
#include <future> // std::async, std::future
#include <utility>


#ifdef VROOM_LOG
#include "spdlog/sinks/basic_file_sink.h" // support for basic file logging
#include "spdlog/spdlog.h"
Expand Down Expand Up @@ -77,14 +76,22 @@ fixed_width_index_connection::fixed_width_index_connection(
std::future<void> parse_fut;
std::future<void> write_fut;
size_t lines_read = 0;
size_t lines_remaining = n_max;
std::unique_ptr<RProgress::RProgress> empty_pb = nullptr;

newlines_.push_back(start - 1);
if (n_max > 0) {
newlines_.push_back(start - 1);
}

while (sz > 0) {
if (parse_fut.valid()) {
parse_fut.wait();
}
if (lines_read >= lines_remaining) {
break;
}
lines_remaining -= lines_read;

parse_fut = std::async([&, i, start, total_read, sz] {
lines_read = index_region(
buf[i],
Expand All @@ -94,7 +101,7 @@ fixed_width_index_connection::fixed_width_index_connection(
total_read,
comment,
skip_empty_rows,
n_max,
lines_remaining,
empty_pb);
});

Expand All @@ -120,12 +127,14 @@ fixed_width_index_connection::fixed_width_index_connection(

SPDLOG_DEBUG("first_nl_loc: {0} size: {1}", start, sz);
}

if (parse_fut.valid()) {
parse_fut.wait();
}
if (write_fut.valid()) {
write_fut.wait();
}

std::fclose(out);

if (progress) {
Expand All @@ -139,9 +148,11 @@ fixed_width_index_connection::fixed_width_index_connection(
}

std::error_code error;
mmap_ = make_mmap_source(filename_.c_str(), error);
if (error) {
cpp11::stop("%s", error.message().c_str());
if (n_max != 0) {
mmap_ = make_mmap_source(filename_.c_str(), error);
if (error) {
cpp11::stop("%s", error.message().c_str());
}
}

#ifdef VROOM_LOG
Expand Down
26 changes: 26 additions & 0 deletions tests/testthat/test-vroom_fwf.R
Expand Up @@ -303,6 +303,32 @@ test_that("vroom_fwf respects n_max (#334)", {
expect_equal(out[[2]], c(1, 2))
})

test_that("vroom_fwf respects n_max when reading from a connection", {
f <- tempfile()
on.exit(unlink(f))
writeLines(rep("00010002", 1000), f)

out1 <- vroom_fwf(file(f), col_positions = fwf_widths(c(4, 4)), col_types = "ii")

expect_equal(dim(out1), c(1000, 2))

out2 <- vroom_fwf(file(f), n_max = 900, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

expect_equal(dim(out2), c(900, 2))

out3 <- vroom_fwf(file(f), n_max = 100, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

expect_equal(dim(out3), c(100, 2))

out4 <- vroom_fwf(file(f), n_max = 10, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

expect_equal(dim(out4), c(10, 2))

out5 <- vroom_fwf(file(f), n_max = 1, col_positions = fwf_widths(c(4, 4)), col_types = "ii")

expect_equal(dim(out5), c(1, 2))
})

test_that("vroom_fwf works when skip_empty_rows is false (https://github.com/tidyverse/readr/issues/1211)", {
f <- tempfile()
on.exit(unlink(f))
Expand Down

0 comments on commit f7ca609

Please sign in to comment.