From f7ca609e0586f2fd5fa4a11996f89a411c9018e5 Mon Sep 17 00:00:00 2001 From: Jim Hester Date: Wed, 30 Jun 2021 13:09:23 -0400 Subject: [PATCH] Fix n_max for fwf and connections --- NEWS.md | 2 ++ src/fixed_width_index_connection.cc | 23 +++++++++++++++++------ tests/testthat/test-vroom_fwf.R | 26 ++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index a6fc2960..c8de1cbc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # vroom (development version) +* `vroom_fwf(n_max=)` now works as intended when the input is a connection. + * `vroom()` and `vroom_write()` now automatically detect the compression format regardless of the file extension for bzip2, xzip, gzip and zip files (#348) * `vroom()` and `vroom_write()` now automatically support many more archive formats thanks to the archive package. diff --git a/src/fixed_width_index_connection.cc b/src/fixed_width_index_connection.cc index 36844375..0c6310c1 100644 --- a/src/fixed_width_index_connection.cc +++ b/src/fixed_width_index_connection.cc @@ -10,7 +10,6 @@ #include // std::async, std::future #include - #ifdef VROOM_LOG #include "spdlog/sinks/basic_file_sink.h" // support for basic file logging #include "spdlog/spdlog.h" @@ -77,14 +76,22 @@ fixed_width_index_connection::fixed_width_index_connection( std::future parse_fut; std::future write_fut; size_t lines_read = 0; + size_t lines_remaining = n_max; std::unique_ptr empty_pb = nullptr; - newlines_.push_back(start - 1); + if (n_max > 0) { + newlines_.push_back(start - 1); + } while (sz > 0) { if (parse_fut.valid()) { parse_fut.wait(); } + if (lines_read >= lines_remaining) { + break; + } + lines_remaining -= lines_read; + parse_fut = std::async([&, i, start, total_read, sz] { lines_read = index_region( buf[i], @@ -94,7 +101,7 @@ fixed_width_index_connection::fixed_width_index_connection( total_read, comment, skip_empty_rows, - n_max, + lines_remaining, empty_pb); }); @@ -120,12 +127,14 @@ fixed_width_index_connection::fixed_width_index_connection( SPDLOG_DEBUG("first_nl_loc: {0} size: {1}", start, sz); } + if (parse_fut.valid()) { parse_fut.wait(); } if (write_fut.valid()) { write_fut.wait(); } + std::fclose(out); if (progress) { @@ -139,9 +148,11 @@ fixed_width_index_connection::fixed_width_index_connection( } std::error_code error; - mmap_ = make_mmap_source(filename_.c_str(), error); - if (error) { - cpp11::stop("%s", error.message().c_str()); + if (n_max != 0) { + mmap_ = make_mmap_source(filename_.c_str(), error); + if (error) { + cpp11::stop("%s", error.message().c_str()); + } } #ifdef VROOM_LOG diff --git a/tests/testthat/test-vroom_fwf.R b/tests/testthat/test-vroom_fwf.R index 444cf267..c305d064 100644 --- a/tests/testthat/test-vroom_fwf.R +++ b/tests/testthat/test-vroom_fwf.R @@ -303,6 +303,32 @@ test_that("vroom_fwf respects n_max (#334)", { expect_equal(out[[2]], c(1, 2)) }) +test_that("vroom_fwf respects n_max when reading from a connection", { + f <- tempfile() + on.exit(unlink(f)) + writeLines(rep("00010002", 1000), f) + + out1 <- vroom_fwf(file(f), col_positions = fwf_widths(c(4, 4)), col_types = "ii") + + expect_equal(dim(out1), c(1000, 2)) + + out2 <- vroom_fwf(file(f), n_max = 900, col_positions = fwf_widths(c(4, 4)), col_types = "ii") + + expect_equal(dim(out2), c(900, 2)) + + out3 <- vroom_fwf(file(f), n_max = 100, col_positions = fwf_widths(c(4, 4)), col_types = "ii") + + expect_equal(dim(out3), c(100, 2)) + + out4 <- vroom_fwf(file(f), n_max = 10, col_positions = fwf_widths(c(4, 4)), col_types = "ii") + + expect_equal(dim(out4), c(10, 2)) + + out5 <- vroom_fwf(file(f), n_max = 1, col_positions = fwf_widths(c(4, 4)), col_types = "ii") + + expect_equal(dim(out5), c(1, 2)) +}) + test_that("vroom_fwf works when skip_empty_rows is false (https://github.com/tidyverse/readr/issues/1211)", { f <- tempfile() on.exit(unlink(f))