Skip to content

Commit

Permalink
Merge pull request #433 from jimhester/bugfix/issue309
Browse files Browse the repository at this point in the history
Support reading into long vectors
  • Loading branch information
jimhester committed Jun 15, 2016
2 parents 75b0b5f + 909d0a5 commit afb2a0d
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 10 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
@@ -1,5 +1,7 @@
# readr 0.2.2.9000

* Supports reading into long vectors (#309, @jimhester).

* Printing of double values now uses an
[implementation](https://github.com/juj/MathGeoLib/blob/master/src/Math/grisu3.c)
of the [grisu3
Expand Down
2 changes: 1 addition & 1 deletion src/SourceRaw.h
Expand Up @@ -14,7 +14,7 @@ class SourceRaw : public Source {
x_(x)
{
begin_ = (const char*) RAW(x);
end_ = (const char*) RAW(x) + Rf_length(x);
end_ = (const char*) RAW(x) + Rf_xlength(x);

// Skip lines, if needed
begin_ = skipLines(begin_, end_, skip, comment);
Expand Down
2 changes: 1 addition & 1 deletion src/SourceString.h
Expand Up @@ -15,7 +15,7 @@ class SourceString : public Source {
string_ = x[0];

begin_ = CHAR(string_);
end_ = begin_ + Rf_length(string_);
end_ = begin_ + Rf_xlength(string_);

// Skip lines, if needed
begin_ = skipLines(begin_, end_, skip, comment);
Expand Down
16 changes: 8 additions & 8 deletions src/read.cpp
Expand Up @@ -29,10 +29,10 @@ CharacterVector read_lines_(List sourceSpec, List locale_, int n_max = -1,
LocaleInfo locale(locale_);
Progress progressBar;

int n = (n_max < 0) ? 10000 : n_max;
R_len_t n = (n_max < 0) ? 10000 : n_max;
CharacterVector out(n);

int i = 0;
R_len_t i = 0;
for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) {
if (progress && (i + 1) % 25000 == 0)
progressBar.show(tokenizer.progress());
Expand All @@ -41,7 +41,7 @@ CharacterVector read_lines_(List sourceSpec, List locale_, int n_max = -1,
if (n_max < 0) {
// Estimate rows in full dataset
n = (i / tokenizer.progress().first) * 1.2;
out = Rf_lengthgets(out, n);
out = Rf_xlengthgets(out, n);
} else {
break;
}
Expand All @@ -54,7 +54,7 @@ CharacterVector read_lines_(List sourceSpec, List locale_, int n_max = -1,
}

if (i < n) {
out = Rf_lengthgets(out, i);
out = Rf_xlengthgets(out, i);
}

if (progress)
Expand All @@ -72,10 +72,10 @@ List read_lines_raw_(List sourceSpec, int n_max = -1, bool progress = false) {
tokenizer.tokenize(source->begin(), source->end());
Progress progressBar;

int n = (n_max < 0) ? 10000 : n_max;
R_len_t n = (n_max < 0) ? 10000 : n_max;
List out(n);

int i = 0;
R_len_t i = 0;
for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) {
if (progress && (i + 1) % 25000 == 0)
progressBar.show(tokenizer.progress());
Expand All @@ -84,7 +84,7 @@ List read_lines_raw_(List sourceSpec, int n_max = -1, bool progress = false) {
if (n_max < 0) {
// Estimate rows in full dataset
n = (i / tokenizer.progress().first) * 1.2;
out = Rf_lengthgets(out, n);
out = Rf_xlengthgets(out, n);
} else {
break;
}
Expand All @@ -97,7 +97,7 @@ List read_lines_raw_(List sourceSpec, int n_max = -1, bool progress = false) {
}

if (i < n) {
out = Rf_lengthgets(out, i);
out = Rf_xlengthgets(out, i);
}

if (progress)
Expand Down
15 changes: 15 additions & 0 deletions tests/testthat/test-read-lines.R
Expand Up @@ -8,3 +8,18 @@ test_that("read_lines respects encoding", {
test_that("read_lines returns an empty character vector on an empty file", {
expect_equal(read_lines("empty-file"), character())
})

# These tests are slow so are commented out
#test_that("long vectors are supported", {
#tmp <- tempfile(fileext = ".gz")
#on.exit(unlink(tmp))

#x <- rep(paste(rep("a", 2 ^ 16), collapse = ''), 2 ^ 15)
#con <- gzfile(tmp, open = "w", compression = 0)
#writeLines(x, con)
#close(con)

#expect_equal(length(read_lines(tmp)), 2^15)

#expect_equal(length(read_lines_raw(tmp)), 2^15)
#})

0 comments on commit afb2a0d

Please sign in to comment.