@@ -1,7 +1,7 @@
---
Language: Cpp
BasedOnStyle: LLVM
Standard: Cpp03
Standard: Cpp11
AlignAfterOpenBracket: AlwaysBreak
AllowShortBlocksOnASingleLine: false
BinPackArguments: false
@@ -1,13 +1,12 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/list.hpp"

#include "Collector.h"
#include "LocaleInfo.h"
#include "QiParsers.h"
#include "utils.h"

CollectorPtr Collector::create(List spec, LocaleInfo* pLocale) {
std::string subclass(as<CharacterVector>(spec.attr("class"))[0]);
CollectorPtr Collector::create(cpp11::list spec, LocaleInfo* pLocale) {
std::string subclass(cpp11::as_cpp<cpp11::strings>(spec.attr("class"))[0]);

if (subclass == "collector_skip")
return CollectorPtr(new CollectorSkip());
@@ -25,36 +24,36 @@ CollectorPtr Collector::create(List spec, LocaleInfo* pLocale) {
return CollectorPtr(new CollectorCharacter(&pLocale->encoder_));
if (subclass == "collector_date") {
SEXP format_ = spec["format"];
std::string format =
(Rf_isNull(format_)) ? pLocale->dateFormat_ : as<std::string>(format_);
std::string format = (Rf_isNull(format_))
? pLocale->dateFormat_
: cpp11::as_cpp<std::string>(format_);
return CollectorPtr(new CollectorDate(pLocale, format));
}
if (subclass == "collector_datetime") {
std::string format = as<std::string>(spec["format"]);
std::string format = cpp11::as_cpp<std::string>(spec["format"]);
return CollectorPtr(new CollectorDateTime(pLocale, format));
}
if (subclass == "collector_time") {
std::string format = as<std::string>(spec["format"]);
std::string format = cpp11::as_cpp<std::string>(spec["format"]);
return CollectorPtr(new CollectorTime(pLocale, format));
}
if (subclass == "collector_factor") {
Nullable<CharacterVector> levels =
as<Nullable<CharacterVector> >(spec["levels"]);
bool ordered = as<bool>(spec["ordered"]);
bool includeNa = as<bool>(spec["include_na"]);
cpp11::sexp levels(spec["levels"]);
bool ordered = cpp11::as_cpp<bool>(spec["ordered"]);
bool includeNa = cpp11::as_cpp<bool>(spec["include_na"]);
return CollectorPtr(
new CollectorFactor(&pLocale->encoder_, levels, ordered, includeNa));
}

Rcpp::stop("Unsupported column type");
cpp11::stop("Unsupported column type");
return CollectorPtr(new CollectorSkip());
}

std::vector<CollectorPtr>
collectorsCreate(ListOf<List> specs, LocaleInfo* pLocale) {
collectorsCreate(cpp11::list specs, LocaleInfo* pLocale) {
std::vector<CollectorPtr> collectors;
for (int j = 0; j < specs.size(); ++j) {
CollectorPtr col = Collector::create(specs[j], pLocale);
CollectorPtr col(Collector::create(SEXP(specs[j]), pLocale));
collectors.push_back(col);
}

@@ -85,7 +84,7 @@ void CollectorCharacter::setValue(int i, const Token& t) {
SET_STRING_ELT(column_, i, Rf_mkCharCE("", CE_UTF8));
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

@@ -124,7 +123,7 @@ void CollectorDate::setValue(int i, const Token& t) {
REAL(column_)[i] = NA_REAL;
return;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

@@ -160,7 +159,7 @@ void CollectorDateTime::setValue(int i, const Token& t) {
REAL(column_)[i] = NA_REAL;
return;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}

return;
@@ -195,14 +194,14 @@ void CollectorDouble::setValue(int i, const Token& t) {
REAL(column_)[i] = NA_REAL;
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

void CollectorDouble::setValue(int i, size_t st) { REAL(column_)[i] = st; }

void CollectorFactor::insert(int i, Rcpp::String str, const Token& t) {
std::map<Rcpp::String, int>::iterator it = levelset_.find(str);
void CollectorFactor::insert(int i, cpp11::string str, const Token& t) {
std::map<cpp11::string, int>::iterator it = levelset_.find(str);
if (it == levelset_.end()) {
if (implicitLevels_ || (includeNa_ && str == NA_STRING)) {
int n = levelset_.size();
@@ -226,8 +225,8 @@ void CollectorFactor::setValue(int i, const Token& t) {
boost::container::string buffer;
SourceIterators string = t.getString(&buffer);

Rcpp::String std_string =
pEncoder_->makeSEXP(string.first, string.second, t.hasNull());
cpp11::string std_string(
pEncoder_->makeSEXP(string.first, string.second, t.hasNull()));
insert(i, std_string, t);
return;
};
@@ -239,7 +238,7 @@ void CollectorFactor::setValue(int i, const Token& t) {
}
return;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

@@ -272,7 +271,7 @@ void CollectorInteger::setValue(int i, const Token& t) {
INTEGER(column_)[i] = NA_INTEGER;
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

@@ -306,7 +305,7 @@ void CollectorLogical::setValue(int i, const Token& t) {
return;
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

@@ -333,7 +332,7 @@ void CollectorNumeric::setValue(int i, const Token& t) {
REAL(column_)[i] = NA_REAL;
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

@@ -368,13 +367,13 @@ void CollectorTime::setValue(int i, const Token& t) {
REAL(column_)[i] = NA_REAL;
return;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
}

void CollectorRaw::setValue(int i, const Token& t) {
if (t.type() == TOKEN_EOF) {
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}
SET_VECTOR_ELT(column_, i, t.asRaw());
return;
@@ -1,21 +1,25 @@
#ifndef FASTREAD_COLLECTOR_H_
#define FASTREAD_COLLECTOR_H_
#include "cpp11/doubles.hpp"
#include "cpp11/integers.hpp"
#include "cpp11/list.hpp"
#include "cpp11/logicals.hpp"
#include "cpp11/strings.hpp"

#include "DateTime.h"
#include "DateTimeParser.h"
#include "Iconv.h"
#include "LocaleInfo.h"
#include "Token.h"
#include "Warnings.h"
#include <Rcpp.h>
#include <boost/shared_ptr.hpp>

class Collector;
typedef boost::shared_ptr<Collector> CollectorPtr;

class Collector {
protected:
Rcpp::RObject column_;
cpp11::sexp column_;
Warnings* pWarnings_;

int n_;
@@ -30,14 +34,13 @@ class Collector {
virtual void setValue(int i, const std::string& s){}; // nocov
virtual void setValue(int i, size_t st){}; // nocov

virtual Rcpp::RObject vector() { return column_; };
virtual cpp11::sexp vector() { return column_; };

virtual bool skip() { return false; }

int size() { return n_; }

void resize(int n) {
// Rcpp::Rcerr << "Resizing to: " << n << std::endl;
if (n == n_)
return;

@@ -59,12 +62,12 @@ class Collector {

inline void warn(int row, int col, std::string expected, std::string actual) {
if (pWarnings_ == NULL) {
Rcpp::warning(
cpp11::warning(
"[%i, %i]: expected %s, but got '%s'",
row + 1,
col + 1,
expected,
actual);
expected.c_str(),
actual.c_str());
return;
}

@@ -75,7 +78,7 @@ class Collector {
warn(row, col, expected, std::string(actual.first, actual.second));
}

static CollectorPtr create(Rcpp::List spec, LocaleInfo* pLocale);
static CollectorPtr create(cpp11::list spec, LocaleInfo* pLocale);
};

// Character -------------------------------------------------------------------
@@ -85,7 +88,7 @@ class CollectorCharacter : public Collector {

public:
CollectorCharacter(Iconv* pEncoder)
: Collector(Rcpp::CharacterVector()), pEncoder_(pEncoder) {}
: Collector(cpp11::writable::strings(R_xlen_t(0))), pEncoder_(pEncoder) {}
void setValue(int i, const Token& t);
void setValue(int i, const std::string& s);
};
@@ -98,11 +101,13 @@ class CollectorDate : public Collector {

public:
CollectorDate(LocaleInfo* pLocale, const std::string& format)
: Collector(Rcpp::NumericVector()), format_(format), parser_(pLocale) {}
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
format_(format),
parser_(pLocale) {}

void setValue(int i, const Token& t);

Rcpp::RObject vector() {
cpp11::sexp vector() {
column_.attr("class") = "Date";
return column_;
};
@@ -117,15 +122,15 @@ class CollectorDateTime : public Collector {

public:
CollectorDateTime(LocaleInfo* pLocale, const std::string& format)
: Collector(Rcpp::NumericVector()),
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
format_(format),
parser_(pLocale),
tz_(pLocale->tz_) {}

void setValue(int i, const Token& t);

Rcpp::RObject vector() {
column_.attr("class") = Rcpp::CharacterVector::create("POSIXct", "POSIXt");
cpp11::sexp vector() {
column_.attr("class") = {"POSIXct", "POSIXt"};
column_.attr("tzone") = tz_;
return column_;
};
@@ -136,37 +141,35 @@ class CollectorDouble : public Collector {

public:
CollectorDouble(char decimalMark)
: Collector(Rcpp::NumericVector()), decimalMark_(decimalMark) {}
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
decimalMark_(decimalMark) {}
void setValue(int i, const Token& t);
void setValue(int i, size_t st);
};

class CollectorFactor : public Collector {
Iconv* pEncoder_;
std::vector<Rcpp::String> levels_;
std::map<Rcpp::String, int> levelset_;
std::vector<cpp11::string> levels_;
std::map<cpp11::string, int> levelset_;
bool ordered_, implicitLevels_, includeNa_;
boost::container::string buffer_;

void insert(int i, Rcpp::String str, const Token& t);
void insert(int i, cpp11::string str, const Token& t);

public:
CollectorFactor(
Iconv* pEncoder,
Rcpp::Nullable<Rcpp::CharacterVector> levels,
bool ordered,
bool includeNa)
: Collector(Rcpp::IntegerVector()),
Iconv* pEncoder, cpp11::sexp levels, bool ordered, bool includeNa)
: Collector(cpp11::writable::integers(R_xlen_t(0))),
pEncoder_(pEncoder),
ordered_(ordered),
includeNa_(includeNa) {
implicitLevels_ = levels.isNull();
implicitLevels_ = levels == R_NilValue;
if (!implicitLevels_) {
Rcpp::CharacterVector lvls = Rcpp::CharacterVector(levels);
cpp11::strings lvls(levels);
int n = lvls.size();

for (int i = 0; i < n; ++i) {
Rcpp::String std_level;
cpp11::string std_level;
if (STRING_ELT(lvls, i) != NA_STRING) {
const char* level = Rf_translateCharUTF8(STRING_ELT(lvls, i));
std_level = level;
@@ -180,16 +183,15 @@ class CollectorFactor : public Collector {
}
void setValue(int i, const Token& t);

Rcpp::RObject vector() {
cpp11::sexp vector() {
if (ordered_) {
column_.attr("class") =
Rcpp::CharacterVector::create("ordered", "factor");
column_.attr("class") = {"ordered", "factor"};
} else {
column_.attr("class") = "factor";
}

int n = levels_.size();
Rcpp::CharacterVector levels = Rcpp::CharacterVector(n);
cpp11::writable::strings levels(n);
for (int i = 0; i < n; ++i) {
levels[i] = levels_[i];
}
@@ -201,13 +203,13 @@ class CollectorFactor : public Collector {

class CollectorInteger : public Collector {
public:
CollectorInteger() : Collector(Rcpp::IntegerVector()) {}
CollectorInteger() : Collector(cpp11::writable::integers(R_xlen_t(0))) {}
void setValue(int i, const Token& t);
};

class CollectorLogical : public Collector {
public:
CollectorLogical() : Collector(Rcpp::LogicalVector()) {}
CollectorLogical() : Collector(cpp11::writable::logicals(R_xlen_t(0))) {}
void setValue(int i, const Token& t);
};

@@ -216,7 +218,7 @@ class CollectorNumeric : public Collector {

public:
CollectorNumeric(char decimalMark, char groupingMark)
: Collector(Rcpp::NumericVector()),
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
decimalMark_(decimalMark),
groupingMark_(groupingMark) {}
void setValue(int i, const Token& t);
@@ -231,12 +233,14 @@ class CollectorTime : public Collector {

public:
CollectorTime(LocaleInfo* pLocale, const std::string& format)
: Collector(Rcpp::NumericVector()), format_(format), parser_(pLocale) {}
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
format_(format),
parser_(pLocale) {}

void setValue(int i, const Token& t);

Rcpp::RObject vector() {
column_.attr("class") = Rcpp::CharacterVector::create("hms", "difftime");
cpp11::sexp vector() {
column_.attr("class") = {"hms", "difftime"};
column_.attr("units") = "secs";
return column_;
};
@@ -254,17 +258,17 @@ class CollectorSkip : public Collector {
// Raw -------------------------------------------------------------------------
class CollectorRaw : public Collector {
public:
CollectorRaw() : Collector(Rcpp::List()) {}
CollectorRaw() : Collector(cpp11::writable::list(static_cast<R_xlen_t>(0))) {}
void setValue(int i, const Token& t);
};

// Helpers ---------------------------------------------------------------------

std::vector<CollectorPtr>
collectorsCreate(Rcpp::ListOf<Rcpp::List> specs, LocaleInfo* pLocale);
collectorsCreate(cpp11::list specs, LocaleInfo* pLocale);
void collectorsResize(std::vector<CollectorPtr>& collectors, int n);
void collectorsClear(std::vector<CollectorPtr>& collectors);
std::string collectorGuess(
Rcpp::CharacterVector input, Rcpp::List locale_, bool guessInteger = false);
cpp11::strings input, cpp11::list locale_, bool guessInteger = false);

#endif
@@ -1,5 +1,6 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/R.hpp"
#include "cpp11/list.hpp"
#include "cpp11/strings.hpp"

#include "DateTime.h"
#include "DateTimeParser.h"
@@ -10,7 +11,7 @@ using namespace Rcpp;
typedef bool (*canParseFun)(const std::string&, LocaleInfo* pLocale);

bool canParse(
CharacterVector x, const canParseFun& canParse, LocaleInfo* pLocale) {
cpp11::strings x, const canParseFun& canParse, LocaleInfo* pLocale) {
for (int i = 0; i < x.size(); ++i) {
if (x[i] == NA_STRING)
continue;
@@ -24,7 +25,7 @@ bool canParse(
return true;
}

bool allMissing(CharacterVector x) {
bool allMissing(cpp11::strings x) {
for (int i = 0; i < x.size(); ++i) {
if (x[i] != NA_STRING && x[i].size() > 0)
return false;
@@ -103,10 +104,9 @@ static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
return parser.year() > 999;
}

// [[Rcpp::export]]
std::string
collectorGuess(CharacterVector input, List locale_, bool guessInteger = false) {
LocaleInfo locale(locale_);
[[cpp11::export]] std::string collectorGuess(
cpp11::strings input, cpp11::list locale_, bool guessInteger = false) {
LocaleInfo locale(static_cast<SEXP>(locale_));

if (input.size() == 0) {
return "character";
@@ -1,9 +1,12 @@
#ifndef READR_DATE_TIME_H_
#define READR_DATE_TIME_H_

#include "cpp11/R.hpp"

#include "localtime.h"
#include <ctime>
#include <stdlib.h>
#include <string>

// Much of this code is adapted from R's src/main/datetime.c.
// Author: The R Core Team.
@@ -4,6 +4,8 @@
#include "DateTime.h"
#include "LocaleInfo.h"
#include "QiParsers.h"
#include "cpp11/protect.hpp"

#include "boost.h"
#include <ctime>

@@ -144,7 +146,7 @@ class DateTimeParser {
}

if (formatItr + 1 == formatEnd)
Rcpp::stop("Invalid format: trailing %");
cpp11::stop("Invalid format: trailing %%");
formatItr++;

switch (*formatItr) {
@@ -203,7 +205,7 @@ class DateTimeParser {
break;
case 'O': // seconds (double)
if (formatItr + 1 == formatEnd || *(formatItr + 1) != 'S')
Rcpp::stop("Invalid format: %%O must be followed by %%S");
cpp11::stop("Invalid format: %%O must be followed by %%S");
formatItr++;
if (!consumeSeconds(&sec_, &psec_))
return false;
@@ -241,7 +243,7 @@ class DateTimeParser {

case 'A': // auto date / time
if (formatItr + 1 == formatEnd)
Rcpp::stop("Invalid format: %%A must be followed by another letter");
cpp11::stop("Invalid format: %%A must be followed by another letter");
formatItr++;
switch (*formatItr) {
case 'D':
@@ -253,7 +255,7 @@ class DateTimeParser {
return false;
break;
default:
Rcpp::stop("Invalid %%A auto parser");
cpp11::stop("Invalid %%A auto parser");
}
break;

@@ -276,7 +278,7 @@ class DateTimeParser {
break;

default:
Rcpp::stop("Unsupported format %%%s", *formatItr);
cpp11::stop("Unsupported format %%%s", *formatItr);
}
}

@@ -1,7 +1,6 @@
#include <Rcpp.h>
using namespace Rcpp;

#include "Iconv.h"
#include "cpp11/protect.hpp"
#include <string.h>

Iconv::Iconv(const std::string& from, const std::string& to) {
if (from == "UTF-8") {
@@ -10,9 +9,9 @@ Iconv::Iconv(const std::string& from, const std::string& to) {
cd_ = Riconv_open(to.c_str(), from.c_str());
if (cd_ == (void*)-1) {
if (errno == EINVAL) {
stop("Can't convert from %s to %s", from, to);
cpp11::stop("Can't convert from %s to %s", from.c_str(), to.c_str());
} else {
stop("Iconv initialisation failed");
cpp11::stop("Iconv initialisation failed");
}
}

@@ -44,13 +43,13 @@ size_t Iconv::convert(const char* start, const char* end) {
if (res == (size_t)-1) {
switch (errno) {
case EILSEQ:
stop("Invalid multibyte sequence");
cpp11::stop("Invalid multibyte sequence");
case EINVAL:
stop("Incomplete multibyte sequence");
cpp11::stop("Incomplete multibyte sequence");
case E2BIG:
stop("Iconv buffer too small");
cpp11::stop("Iconv buffer too small");
default:
stop("Iconv failed to convert for unknown reason");
cpp11::stop("Iconv failed to convert for unknown reason");
}
}

@@ -76,7 +75,7 @@ int my_strnlen(const char* s, int maxlen) {
SEXP safeMakeChar(const char* start, size_t n, bool hasNull) {
size_t m = hasNull ? readr_strnlen(start, n) : n;
if (m > INT_MAX) {
Rf_error("R character strings are limited to 2^31-1 bytes");
cpp11::stop("R character strings are limited to 2^31-1 bytes");
}
return Rf_mkCharLenCE(start, m, CE_UTF8);
}
@@ -1,6 +1,9 @@
#ifndef READ_ICONV_H_
#define READ_ICONV_H_

#include "cpp11/R.hpp"
#include <string>

#include "R_ext/Riconv.h"
#include <errno.h>

@@ -1,27 +1,30 @@
#include <Rcpp.h>
#include "cpp11/as.hpp"
#include "cpp11/list.hpp"
#include "cpp11/strings.hpp"
#include <string>
#include <vector>

#include "LocaleInfo.h"

using namespace Rcpp;

LocaleInfo::LocaleInfo(List x)
: encoding_(as<std::string>(x["encoding"])), encoder_(Iconv(encoding_)) {
std::string klass = x.attr("class");
LocaleInfo::LocaleInfo(cpp11::list x)
: encoding_(cpp11::as_cpp<std::string>(x["encoding"])),
encoder_(Iconv(encoding_)) {
std::string klass = cpp11::as_cpp<std::string>(x.attr("class"));
if (klass != "locale")
stop("Invalid input: must be of class locale");
cpp11::stop("Invalid input: must be of class locale");

List date_names = as<List>(x["date_names"]);
mon_ = as<std::vector<std::string> >(date_names["mon"]);
monAb_ = as<std::vector<std::string> >(date_names["mon_ab"]);
day_ = as<std::vector<std::string> >(date_names["day"]);
dayAb_ = as<std::vector<std::string> >(date_names["day_ab"]);
amPm_ = as<std::vector<std::string> >(date_names["am_pm"]);
cpp11::list date_names(x["date_names"]);
mon_ = cpp11::as_cpp<std::vector<std::string>>(date_names["mon"]);
monAb_ = cpp11::as_cpp<std::vector<std::string>>(date_names["mon_ab"]);
day_ = cpp11::as_cpp<std::vector<std::string>>(date_names["day"]);
dayAb_ = cpp11::as_cpp<std::vector<std::string>>(date_names["day_ab"]);
amPm_ = cpp11::as_cpp<std::vector<std::string>>(date_names["am_pm"]);

decimalMark_ = as<char>(x["decimal_mark"]);
groupingMark_ = as<char>(x["grouping_mark"]);
decimalMark_ = cpp11::as_cpp<char>(x["decimal_mark"]);
groupingMark_ = cpp11::as_cpp<char>(x["grouping_mark"]);

dateFormat_ = as<std::string>(x["date_format"]);
timeFormat_ = as<std::string>(x["time_format"]);
dateFormat_ = cpp11::as_cpp<std::string>(x["date_format"]);
timeFormat_ = cpp11::as_cpp<std::string>(x["time_format"]);

tz_ = as<std::string>(x["tz"]);
tz_ = cpp11::as_cpp<std::string>(x["tz"]);
}
@@ -3,6 +3,10 @@

#include "Iconv.h"

#include "cpp11/list.hpp"
#include <string>
#include <vector>

class LocaleInfo {

public:
@@ -18,7 +22,7 @@ class LocaleInfo {
std::string encoding_;
Iconv encoder_;

LocaleInfo(Rcpp::List);
LocaleInfo(cpp11::list x);
};

#endif
@@ -1,7 +1,8 @@
#ifndef FASTREAD_PROGRESS_H_
#define FASTREAD_PROGRESS_H_

#include <Rcpp.h>
#include "cpp11/R.hpp"
#include <iomanip>
#include <sstream>
#include <time.h>

@@ -12,12 +13,16 @@ inline std::string clearLine(int width = 50) {
}

inline std::string showTime(int x) {
std::stringstream ss;
if (x < 60) {
return tfm::format("%i s", x);
ss << x << " s";
return ss.str();
} else if (x < 60 * 60) {
return tfm::format("%i m", x / 60);
ss << x / 60 << " m";
return ss.str();
} else {
return tfm::format("%i h", x / (60 * 60));
ss << x / (60 * 60) << " h";
return ss.str();
}
}

@@ -52,9 +57,10 @@ class Progress {
}

std::stringstream labelStream;
tfm::format(labelStream, " %3d%%", (int)(prop * 100));
labelStream << std::setprecision(2) << std::fixed << " "
<< (int)(prop * 100) << "%";
if (size > 0) {
tfm::format(labelStream, " %4.0f MB", size);
labelStream << " " << std::setprecision(0) << size << " MB";
}

std::string label = labelStream.str();
@@ -66,7 +72,7 @@ class Progress {
int nbars = prop * barSize;
int nspaces = (1 - prop) * barSize;
std::string bars(nbars, '='), spaces(nspaces, ' ');
Rcpp::Rcout << '\r' << '|' << bars << spaces << '|' << label;
Rprintf("\r|%s%s|%s", bars.c_str(), spaces.c_str(), label.c_str());
}

~Progress() {
@@ -76,7 +82,7 @@ class Progress {

if (!stopped_)
timeStop_ = now();
Rcpp::Rcout << "\n";
Rprintf("\n");

} catch (...) {
}

This file was deleted.

@@ -1,11 +1,16 @@
#include "Reader.h"

#include "cpp11/function.hpp"
#include "cpp11/list.hpp"

#include <sstream>

Reader::Reader(
SourcePtr source,
TokenizerPtr tokenizer,
std::vector<CollectorPtr> collectors,
bool progress,
CharacterVector colNames)
cpp11::strings colNames)
: source_(source),
tokenizer_(tokenizer),
collectors_(collectors),
@@ -19,7 +24,7 @@ Reader::Reader(
TokenizerPtr tokenizer,
CollectorPtr collector,
bool progress,
CharacterVector colNames)
cpp11::strings colNames)
: source_(source),
tokenizer_(tokenizer),
progress_(progress),
@@ -29,7 +34,7 @@ Reader::Reader(
init(colNames);
}

void Reader::init(CharacterVector colNames) {
void Reader::init(cpp11::strings colNames) {
tokenizer_->tokenize(source_->begin(), source_->end());
tokenizer_->setWarnings(&warnings_);

@@ -44,7 +49,7 @@ void Reader::init(CharacterVector colNames) {
}

if (colNames.size() > 0) {
outNames_ = CharacterVector(keptColumns_.size());
outNames_ = cpp11::writable::strings(keptColumns_.size());
int i = 0;
for (std::vector<int>::const_iterator it = keptColumns_.begin();
it != keptColumns_.end();
@@ -54,31 +59,30 @@ void Reader::init(CharacterVector colNames) {
}
}

RObject Reader::readToDataFrame(int lines) {
cpp11::sexp Reader::readToDataFrame(int lines) {
int rows = read(lines);

// Save individual columns into a data frame
List out(outNames_.size());
cpp11::writable::list out(outNames_.size());
int j = 0;
for (std::vector<int>::const_iterator it = keptColumns_.begin();
it != keptColumns_.end();
++it) {
out[j++] = collectors_[*it]->vector();
}

out.attr("names") = outNames_;
out.attr("class") =
CharacterVector::create("spec_tbl_df", "tbl_df", "tbl", "data.frame");
out.attr("row.names") = IntegerVector::create(NA_INTEGER, -(rows + 1));
cpp11::sexp out2(warnings_.addAsAttribute(static_cast<SEXP>(out)));

out = warnings_.addAsAttribute(out);
out2.attr("names") = outNames_;
out2.attr("class") = {"spec_tbl_df", "tbl_df", "tbl", "data.frame"};
out2.attr("row.names") = {NA_INTEGER, -(rows + 1)};

collectorsClear();
warnings_.clear();

// TODO: call tibble name repair function when tibble 1.5.0 is released.

return out;
return out2;
}

int Reader::read(int lines) {
@@ -156,8 +160,12 @@ void Reader::checkColumns(int i, int j, int n) {
if (j + 1 == n)
return;

warnings_.addWarning(
i, -1, tfm::format("%i columns", n), tfm::format("%i columns", j + 1));
std::stringstream ss1;
ss1 << n << " columns";

std::stringstream ss2;
ss2 << j + 1 << " columns";
warnings_.addWarning(i, -1, ss1.str(), ss2.str());
}

void Reader::collectorsResize(int n) {
@@ -172,30 +180,29 @@ void Reader::collectorsClear() {
}
}

RObject Reader::meltToDataFrame(List locale_, int lines) {
cpp11::sexp Reader::meltToDataFrame(cpp11::list locale_, int lines) {
melt(locale_, lines);

// Save individual columns into a data frame
List out(4);
cpp11::writable::list out(4);
out[0] = collectors_[0]->vector();
out[1] = collectors_[1]->vector();
out[2] = collectors_[2]->vector();
out[3] = collectors_[3]->vector();

out.attr("names") =
CharacterVector::create("row", "col", "data_type", "value");
out = warnings_.addAsAttribute(out);
out.attr("names") = {"row", "col", "data_type", "value"};
cpp11::sexp out2(warnings_.addAsAttribute(static_cast<SEXP>(out)));

collectorsClear();
warnings_.clear();

out.attr("names") =
CharacterVector::create("row", "col", "data_type", "value");
static Function as_tibble("as_tibble", Environment::namespace_env("tibble"));
out.attr("names") = {"row", "col", "data_type", "value"};

static cpp11::function as_tibble = cpp11::package("tibble")["as_tibble"];
return as_tibble(out);
}

int Reader::melt(List locale_, int lines) {
int Reader::melt(cpp11::list locale_, int lines) {

if (t_.type() == TOKEN_EOF) {
return (-1);
@@ -239,8 +246,9 @@ int Reader::melt(List locale_, int lines) {

switch (t_.type()) {
case TOKEN_STRING: {
cpp11::sexp str(cpp11::as_sexp(t_.asString()));
collectors_[2]->setValue(
cells - 1, collectorGuess(t_.asString(), locale_, true));
cells - 1, collectorGuess(SEXP(str), locale_, true));
break;
};
case TOKEN_MISSING:
@@ -250,7 +258,7 @@ int Reader::melt(List locale_, int lines) {
collectors_[2]->setValue(cells - 1, "empty");
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
cpp11::stop("Invalid token");
}

last_row = t_.row();
@@ -1,44 +1,38 @@
#include <Rcpp.h>

#include "Collector.h"
#include "Progress.h"
#include "Source.h"

using namespace Rcpp;
#include "cpp11/list.hpp"
#include "cpp11/strings.hpp"

class Reader {
public:
Reader(
SourcePtr source,
TokenizerPtr tokenizer,
std::vector<CollectorPtr> collectors,
bool progress = true,
CharacterVector colNames = CharacterVector());
bool progress,
cpp11::strings colNames = cpp11::strings());

Reader(
SourcePtr source,
TokenizerPtr tokenizer,
CollectorPtr collector,
bool progress = true,
CharacterVector colNames = CharacterVector());
bool progress,
cpp11::strings colNames = cpp11::strings());

RObject readToDataFrame(int lines = -1);
RObject meltToDataFrame(List locale_, int lines = -1);
cpp11::sexp readToDataFrame(int lines = -1);
cpp11::sexp meltToDataFrame(cpp11::list locale_, int lines = -1);

template <typename T> T readToVector(int lines) {
read(lines);

T out = as<T>(collectors_[0]->vector());
SEXP x = collectors_[0]->vector();
T out(x);
collectorsClear();
return out;
}

template <typename T> RObject readToVectorWithWarnings(int lines) {
read(lines);

return warnings_.addAsAttribute(as<T>(collectors_[0]->vector()));
}

private:
Warnings warnings_;
SourcePtr source_;
@@ -47,15 +41,15 @@ class Reader {
bool progress_;
Progress progressBar_;
std::vector<int> keptColumns_;
CharacterVector outNames_;
cpp11::writable::strings outNames_;
bool begun_;
Token t_;

const static int progressStep_ = 10000;

void init(CharacterVector colNames);
void init(cpp11::strings colNames);
int read(int lines = -1);
int melt(List locale_, int lines = -1);
int melt(cpp11::list locale_, int lines = -1);
void checkColumns(int i, int j, int n);

void collectorsResize(int n);
@@ -1,31 +1,29 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/list.hpp"
#include "cpp11/strings.hpp"

#include "Source.h"
#include "SourceFile.h"
#include "SourceRaw.h"
#include "SourceString.h"

SourcePtr Source::create(List spec) {
std::string subclass(as<CharacterVector>(spec.attr("class"))[0]);
SourcePtr Source::create(cpp11::list spec) {
std::string subclass(cpp11::as_cpp<cpp11::strings>(spec.attr("class"))[0]);

int skip = as<int>(spec["skip"]);
bool skipEmptyRows = as<int>(spec["skip_empty_rows"]);
std::string comment = as<std::string>(spec["comment"]);
int skip = cpp11::as_cpp<int>(spec["skip"]);
bool skipEmptyRows = cpp11::as_cpp<bool>(spec["skip_empty_rows"]);
std::string comment = cpp11::as_cpp<std::string>(spec["comment"]);

if (subclass == "source_raw") {
return SourcePtr(
new SourceRaw(as<RawVector>(spec[0]), skip, skipEmptyRows, comment));
return SourcePtr(new SourceRaw(spec[0], skip, skipEmptyRows, comment));
} else if (subclass == "source_string") {
return SourcePtr(new SourceString(
as<CharacterVector>(spec[0]), skip, skipEmptyRows, comment));
return SourcePtr(new SourceString(spec[0], skip, skipEmptyRows, comment));
} else if (subclass == "source_file") {
CharacterVector path(spec[0]);
cpp11::strings path(spec[0]);
return SourcePtr(new SourceFile(
Rf_translateChar(path[0]), skip, skipEmptyRows, comment));
}

Rcpp::stop("Unknown source type");
cpp11::stop("Unknown source type");
return SourcePtr();
}

@@ -1,9 +1,10 @@
#ifndef FASTREAD_SOURCE_H_
#define FASTREAD_SOURCE_H_

#include "boost.h"
#include "cpp11/list.hpp"
#include "utils.h"
#include <Rcpp.h>

#include "boost.h"

class Source;
typedef boost::shared_ptr<Source> SourcePtr;
@@ -31,7 +32,7 @@ class Source {

static const char* skipBom(const char* begin, const char* end);

static SourcePtr create(Rcpp::List spec);
static SourcePtr create(cpp11::list spec);

private:
static bool
@@ -2,8 +2,9 @@
#define FASTREAD_SOURCEFILE_H_

#include "Source.h"
#include "cpp11/protect.hpp"

#include "boost.h"
#include <Rcpp.h>

class SourceFile : public Source {
boost::interprocess::file_mapping fm_;
@@ -24,7 +25,7 @@ class SourceFile : public Source {
mr_ = boost::interprocess::mapped_region(
fm_, boost::interprocess::read_private);
} catch (boost::interprocess::interprocess_exception& e) {
Rcpp::stop("Cannot read file %s: %s", path, e.what());
cpp11::stop("Cannot read file %s: %s", path.c_str(), e.what());
}

begin_ = static_cast<char*>(mr_.get_address());
@@ -2,16 +2,16 @@
#define FASTREAD_SOURCERAW_H_

#include "Source.h"
#include <Rcpp.h>
#include "cpp11/raws.hpp"

class SourceRaw : public Source {
Rcpp::RawVector x_; // Make sure it doesn't get GC'd
cpp11::raws x_;
const char* begin_;
const char* end_;

public:
SourceRaw(
Rcpp::RawVector x,
cpp11::raws x,
int skip = 0,
bool skipEmptyRows = true,
const std::string& comment = "")
@@ -1,22 +1,23 @@
#ifndef FASTREAD_SOURCESTRING_H_
#define FASTREAD_SOURCESTRING_H_

#include "cpp11/strings.hpp"

#include "Source.h"
#include <Rcpp.h>

class SourceString : public Source {
Rcpp::RObject string_;
cpp11::sexp string_;

const char* begin_;
const char* end_;

public:
SourceString(
Rcpp::CharacterVector x,
cpp11::strings x,
int skip = 0,
bool skipEmptyRows = true,
const std::string& comment = "") {
string_ = x[0];
const std::string& comment = "")
: string_(x[0]) {

begin_ = CHAR(string_);
end_ = begin_ + Rf_xlength(string_);
@@ -1,10 +1,11 @@
#ifndef FASTREAD_TOKEN_H_
#define FASTREAD_TOKEN_H_

#include "cpp11/raws.hpp"

#include "Iconv.h"
#include "Source.h"
#include "Tokenizer.h"
#include <Rcpp.h>
#include <string>

enum TokenType {
@@ -64,7 +65,7 @@ class Token {

SEXP asRaw() const {
size_t n = (type_ == TOKEN_STRING) ? end_ - begin_ : 0;
Rcpp::RawVector out(n);
cpp11::writable::raws out(n);

if (n > 0)
memcpy(RAW(out), begin_, n);
@@ -1,5 +1,6 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/as.hpp"
#include "cpp11/integers.hpp"
#include "cpp11/list.hpp"

#include "Tokenizer.h"
#include "TokenizerDelim.h"
@@ -8,19 +9,20 @@ using namespace Rcpp;
#include "TokenizerLog.h"
#include "TokenizerWs.h"

TokenizerPtr Tokenizer::create(List spec) {
std::string subclass(as<CharacterVector>(spec.attr("class"))[0]);
TokenizerPtr Tokenizer::create(cpp11::list spec) {
std::string subclass(cpp11::strings(spec.attr("class"))[0]);

if (subclass == "tokenizer_delim") {
char delim = as<char>(spec["delim"]);
char quote = as<char>(spec["quote"]);
std::vector<std::string> na = as<std::vector<std::string> >(spec["na"]);
std::string comment = as<std::string>(spec["comment"]);
bool trimWs = as<bool>(spec["trim_ws"]);
bool escapeDouble = as<bool>(spec["escape_double"]);
bool escapeBackslash = as<bool>(spec["escape_backslash"]);
bool quotedNA = as<bool>(spec["quoted_na"]);
bool skipEmptyRows = as<bool>(spec["skip_empty_rows"]);
char delim = cpp11::as_cpp<char>(spec["delim"]);
char quote = cpp11::as_cpp<char>(spec["quote"]);
std::vector<std::string> na =
cpp11::as_cpp<std::vector<std::string>>(spec["na"]);
std::string comment = cpp11::as_cpp<std::string>(spec["comment"]);
bool trimWs = cpp11::as_cpp<bool>(spec["trim_ws"]);
bool escapeDouble = cpp11::as_cpp<bool>(spec["escape_double"]);
bool escapeBackslash = cpp11::as_cpp<bool>(spec["escape_backslash"]);
bool quotedNA = cpp11::as_cpp<bool>(spec["quoted_na"]);
bool skipEmptyRows = cpp11::as_cpp<bool>(spec["skip_empty_rows"]);

return TokenizerPtr(new TokenizerDelim(
delim,
@@ -33,28 +35,31 @@ TokenizerPtr Tokenizer::create(List spec) {
quotedNA,
skipEmptyRows));
} else if (subclass == "tokenizer_fwf") {
std::vector<int> begin = as<std::vector<int> >(spec["begin"]),
end = as<std::vector<int> >(spec["end"]);
std::vector<std::string> na = as<std::vector<std::string> >(spec["na"]);
std::string comment = as<std::string>(spec["comment"]);
bool trimWs = as<bool>(spec["trim_ws"]);
bool skipEmptyRows = as<bool>(spec["skip_empty_rows"]);
std::vector<int> begin = cpp11::as_cpp<std::vector<int>>(spec["begin"]),
end = cpp11::as_cpp<std::vector<int>>(spec["end"]);
std::vector<std::string> na =
cpp11::as_cpp<std::vector<std::string>>(spec["na"]);
std::string comment = cpp11::as_cpp<std::string>(spec["comment"]);
bool trimWs = cpp11::as_cpp<bool>(spec["trim_ws"]);
bool skipEmptyRows = cpp11::as_cpp<bool>(spec["skip_empty_rows"]);

return TokenizerPtr(
new TokenizerFwf(begin, end, na, comment, trimWs, skipEmptyRows));
} else if (subclass == "tokenizer_line") {
std::vector<std::string> na = as<std::vector<std::string> >(spec["na"]);
bool skipEmptyRows = as<bool>(spec["skip_empty_rows"]);
std::vector<std::string> na =
cpp11::as_cpp<std::vector<std::string>>(spec["na"]);
bool skipEmptyRows = cpp11::as_cpp<bool>(spec["skip_empty_rows"]);
return TokenizerPtr(new TokenizerLine(na, skipEmptyRows));
} else if (subclass == "tokenizer_log") {
return TokenizerPtr(new TokenizerLog());
} else if (subclass == "tokenizer_ws") {
std::vector<std::string> na = as<std::vector<std::string> >(spec["na"]);
std::string comment = as<std::string>(spec["comment"]);
bool skipEmptyRows = as<bool>(spec["skip_empty_rows"]);
std::vector<std::string> na =
cpp11::as_cpp<std::vector<std::string>>(spec["na"]);
std::string comment = cpp11::as_cpp<std::string>(spec["comment"]);
bool skipEmptyRows = cpp11::as_cpp<bool>(spec["skip_empty_rows"]);
return TokenizerPtr(new TokenizerWs(na, comment, skipEmptyRows));
}

Rcpp::stop("Unknown tokenizer type");
cpp11::stop("Unknown tokenizer type");
return TokenizerPtr();
}
@@ -1,9 +1,13 @@
#ifndef FASTREAD_TOKENIZER_H_
#define FASTREAD_TOKENIZER_H_

#include "cpp11/R.hpp"
#include "cpp11/list.hpp"
#include "cpp11/protect.hpp"

#include "Warnings.h"
#include "boost.h"
#include <Rcpp.h>

class Token;

typedef const char* SourceIterator;
@@ -43,13 +47,14 @@ class Tokenizer {
const std::string& expected,
const std::string& actual = "") {
if (pWarnings_ == NULL) {
Rcpp::warning("[%i, %i]: expected %s", row + 1, col + 1, expected);
cpp11::warning(
"[%i, %i]: expected %s", row + 1, col + 1, expected.c_str());
return;
}
pWarnings_->addWarning(row, col, expected, actual);
}

static TokenizerPtr create(Rcpp::List spec);
static TokenizerPtr create(cpp11::list spec);
};

// -----------------------------------------------------------------------------
@@ -1,7 +1,5 @@
#include <Rcpp.h>
using namespace Rcpp;

#include "TokenizerDelim.h"
#include "cpp11/protect.hpp"

TokenizerDelim::TokenizerDelim(
char delim,
@@ -69,7 +67,7 @@ Token TokenizerDelim::nextToken() {
hasNull = true;

if ((end_ - cur_) % 131072 == 0)
Rcpp::checkUserInterrupt();
cpp11::check_user_interrupt();

switch (state_) {
case STATE_DELIM: {
@@ -338,7 +336,7 @@ void TokenizerDelim::unescape(
} else if (escapeBackslash_ && !escapeDouble_) {
unescapeBackslash(begin, end, pOut);
} else if (escapeBackslash_ && escapeDouble_) {
Rcpp::stop("Backslash & double escapes not supported at this time");
cpp11::stop("Backslash & double escapes not supported at this time");
}
}

@@ -1,10 +1,10 @@
#ifndef FASTREAD_TOKENIZEDELIM_H_
#define FASTREAD_TOKENIZEDELIM_H_
#include "cpp11/R.hpp"

#include "Token.h"
#include "Tokenizer.h"
#include "utils.h"
#include <Rcpp.h>

enum DelimState {
STATE_DELIM,
@@ -1,11 +1,14 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/list.hpp"
#include "cpp11/protect.hpp"

#include "Source.h"
#include "Tokenizer.h"
#include "TokenizerFwf.h"
#include "utils.h"

#include "Source.h"

#include <sstream>

struct skip_t {
SourceIterator begin;
int lines;
@@ -24,7 +27,6 @@ skip_t skip_comments(
int skip = 0;
boost::iterator_range<const char*> haystack(cur, end);
while (boost::starts_with(haystack, comment)) {
// Rcpp::Rcout << boost::starts_with(haystack, comment);
// Skip rest of line
while (cur != end && *cur != '\n' && *cur != '\r') {
++cur;
@@ -76,8 +78,8 @@ std::vector<bool> emptyCols_(
return is_white;
}

// [[Rcpp::export]]
List whitespaceColumns(List sourceSpec, int n = 100, std::string comment = "") {
[[cpp11::export]] cpp11::list
whitespaceColumns(cpp11::list sourceSpec, int n, std::string comment) {
SourcePtr source = Source::create(sourceSpec);

skip_t s = skip_comments(source->begin(), source->end(), comment);
@@ -100,13 +102,14 @@ List whitespaceColumns(List sourceSpec, int n = 100, std::string comment = "") {
if (in_col)
end.push_back(empty.size());

return List::create(_["begin"] = begin, _["end"] = end, _["skip"] = s.lines);
using namespace cpp11::literals;
return cpp11::writable::list(
{"begin"_nm = begin, "end"_nm = end, "skip"_nm = s.lines});
}

// TokenizerFwf --------------------------------------------------------------

#include "TokenizerFwf.h"
#include <Rcpp.h>

TokenizerFwf::TokenizerFwf(
const std::vector<int>& beginOffset,
@@ -125,13 +128,13 @@ TokenizerFwf::TokenizerFwf(
trimWS_(trimWS),
skipEmptyRows_(skipEmptyRows) {
if (beginOffset_.size() != endOffset_.size())
Rcpp::stop(
cpp11::stop(
"Begin (%i) and end (%i) specifications must have equal length",
beginOffset_.size(),
endOffset_.size());

if (beginOffset_.size() == 0)
Rcpp::stop("Zero-length begin and end specifications not supported");
cpp11::stop("Zero-length begin and end specifications not supported");

// File is assumed to be ragged (last column can have variable width)
// when the last element of endOffset_ is NA
@@ -140,16 +143,16 @@ TokenizerFwf::TokenizerFwf(
max_ = 0;
for (int j = 0; j < (cols_ - isRagged_); ++j) {
if (endOffset_[j] <= beginOffset_[j])
Rcpp::stop(
cpp11::stop(
"Begin offset (%i) must be smaller than end offset (%i)",
beginOffset_[j],
endOffset_[j]);

if (beginOffset_[j] < 0)
Rcpp::stop("Begin offset (%i) must be greater than 0", beginOffset_[j]);
cpp11::stop("Begin offset (%i) must be greater than 0", beginOffset_[j]);

if (endOffset_[j] < 0)
Rcpp::stop("End offset (%i) must be greater than 0", endOffset_[j]);
cpp11::stop("End offset (%i) must be greater than 0", endOffset_[j]);

if (endOffset_[j] > max_) {
max_ = endOffset_[j];
@@ -204,11 +207,11 @@ Token TokenizerFwf::nextToken() {
break;

if (*fieldBegin == '\n' || *fieldBegin == '\r') {
warn(
row_,
col_,
tfm::format("%i chars between fields", skip),
tfm::format("%i chars until end of line", i));
std::stringstream ss1;
ss1 << skip << " chars betwen fields";
std::stringstream ss2;
ss2 << skip << " chars until end of line";
warn(row_, col_, ss1.str(), ss2.str());

row_++;
col_ = 0;
@@ -245,9 +248,13 @@ Token TokenizerFwf::nextToken() {
// Find the end of the field, stopping for newlines
for (int i = 0; i < width; ++i) {
if (fieldEnd == end_ || *fieldEnd == '\n' || *fieldEnd == '\r') {
if (!(col_ == 0 && !skipEmptyRows_))
warn(
row_, col_, tfm::format("%i chars", width), tfm::format("%i", i));
if (!(col_ == 0 && !skipEmptyRows_)) {
std::stringstream ss1;
ss1 << i << " chars";
std::stringstream ss2;
ss2 << i;
warn(row_, col_, ss1.str(), ss2.str());
}

tooShort = true;
break;
@@ -4,7 +4,6 @@
#include "Token.h"
#include "Tokenizer.h"
#include "utils.h"
#include <Rcpp.h>

class TokenizerFwf : public Tokenizer {
std::vector<int> beginOffset_, endOffset_;
@@ -4,7 +4,6 @@
#include "Token.h"
#include "Tokenizer.h"
#include "utils.h"
#include <Rcpp.h>

class TokenizerLine : public Tokenizer {
SourceIterator begin_, cur_, end_;
@@ -46,8 +45,9 @@ class TokenizerLine : public Tokenizer {
if (*cur_ == '\0')
hasNull = true;

if ((end_ - cur_) % 131072 == 0)
Rcpp::checkUserInterrupt();
if ((end_ - cur_) % 131072 == 0) {
cpp11::check_user_interrupt();
}

switch (*cur_) {
case '\r':
@@ -1,10 +1,11 @@
#ifndef FASTREAD_TOKENIZER_LOG_H_
#define FASTREAD_TOKENIZER_LOG_H_

#include "cpp11/protect.hpp"

#include "Token.h"
#include "Tokenizer.h"
#include "utils.h"
#include <Rcpp.h>

enum LogState {
LOG_DELIM,
@@ -53,7 +54,7 @@ class TokenizerLog : public Tokenizer {
Advance advance(&cur_);

if ((row_ + 1) % 100000 == 0 || (col_ + 1) % 100000 == 0)
Rcpp::checkUserInterrupt();
cpp11::check_user_interrupt();

switch (state_) {
case LOG_DELIM:
@@ -1,16 +1,15 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/R.hpp"

#include "Source.h"
#include "Tokenizer.h"
#include "TokenizerFwf.h"
#include "TokenizerWs.h"
#include "utils.h"

#include "Source.h"

// TokenizerWs
// --------------------------------------------------------------------

#include "TokenizerWs.h"
#include <Rcpp.h>
#include <cctype>

TokenizerWs::TokenizerWs(
@@ -4,7 +4,6 @@
#include "Token.h"
#include "Tokenizer.h"
#include "utils.h"
#include <Rcpp.h>

class TokenizerWs : public Tokenizer {
std::vector<std::string> NA_;
@@ -1,6 +1,12 @@
#ifndef READ_WARNINGS_H_
#define READ_WARNINGS_H_

#include "cpp11/data_frame.hpp"
#include "cpp11/sexp.hpp"
#include "cpp11/strings.hpp"
#include <string>
#include <vector>

class Warnings {
std::vector<int> row_, col_;
std::vector<std::string> expected_, actual_;
@@ -20,7 +26,7 @@ class Warnings {
actual_.push_back(actual);
}

Rcpp::RObject addAsAttribute(Rcpp::RObject x) {
cpp11::sexp addAsAttribute(cpp11::sexp x) {
if (size() == 0)
return x;

@@ -37,17 +43,16 @@ class Warnings {
actual_.clear();
}

Rcpp::List asDataFrame() {
Rcpp::List out = Rcpp::List::create(
Rcpp::_["row"] = Rcpp::wrap(row_),
Rcpp::_["col"] = Rcpp::wrap(col_),
Rcpp::_["expected"] = Rcpp::wrap(expected_),
Rcpp::_["actual"] = Rcpp::wrap(actual_));
out.attr("class") =
Rcpp::CharacterVector::create("tbl_df", "tbl", "data.frame");
out.attr("row.names") = Rcpp::IntegerVector::create(NA_INTEGER, -size());

return out;
cpp11::data_frame asDataFrame() {
using namespace cpp11::literals;

cpp11::writable::data_frame out({"row"_nm = row_,
"col"_nm = col_,
"expected"_nm = expected_,
"actual"_nm = actual_});
out.attr("class") = {"tbl_df", "tbl", "data.frame"};

return static_cast<SEXP>(out);
}
};

@@ -1,27 +1,26 @@
#include <Rcpp.h>
#include <fstream>
#include "cpp11/R.hpp"
#include "cpp11/function.hpp"
#include "cpp11/raws.hpp"
#include "cpp11/strings.hpp"

using namespace Rcpp;
#include <fstream>

// Wrapper around R's read_bin function
RawVector read_bin(RObject con, int bytes = 64 * 1024) {
Rcpp::Environment baseEnv = Rcpp::Environment::base_env();
Rcpp::Function readBin = baseEnv["readBin"];
cpp11::raws read_bin(cpp11::sexp con, int bytes) {
auto readBin = cpp11::package("base")["readBin"];

RawVector out = Rcpp::as<RawVector>(readBin(con, "raw", bytes));
return out;
return cpp11::raws(readBin(con, "raw", bytes));
}

// Read data from a connection in chunks and then combine into a single
// raw vector.
//
// [[Rcpp::export]]
CharacterVector read_connection_(
RObject con, std::string filename, int chunk_size = 64 * 1024) {
[[cpp11::export]] std::string
read_connection_(cpp11::sexp con, std::string filename, int chunk_size) {

std::ofstream out(filename.c_str(), std::fstream::out | std::fstream::binary);

RawVector chunk;
cpp11::writable::raws chunk;
while ((chunk = read_bin(con, chunk_size)).size() > 0) {
std::copy(chunk.begin(), chunk.end(), std::ostream_iterator<char>(out));
}
@@ -1,23 +1,25 @@
#pragma once

#include "Rcpp.h"
#include "cpp11/function.hpp"
#include "cpp11/raws.hpp"

inline SEXP R_GetConnection(SEXP con) { return con; }

inline size_t R_ReadConnection(SEXP con, void* buf, size_t n) {
static Rcpp::Function readBin = Rcpp::Environment::base_env()["readBin"];
static auto readBin = cpp11::package("base")["readBin"];

Rcpp::RawVector res = readBin(con, Rcpp::RawVector(0), n);
memcpy(buf, res.begin(), res.size());
cpp11::raws res(
readBin(con, cpp11::writable::raws(static_cast<R_xlen_t>(0)), n));
memcpy(buf, RAW(res), res.size());

return res.length();
return res.size();
}

inline size_t R_WriteConnection(SEXP con, void* buf, size_t n) {
static Rcpp::Function writeBin = Rcpp::Environment::base_env()["writeBin"];
static auto writeBin = cpp11::package("base")["writeBin"];

Rcpp::RawVector payload(n);
memcpy(payload.begin(), buf, n);
cpp11::writable::raws payload(n);
memcpy(RAW(payload), buf, n);

writeBin(payload, con);

Large diffs are not rendered by default.

@@ -1,24 +1,24 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/doubles.hpp"
#include "cpp11/integers.hpp"
#include "cpp11/protect.hpp"

#include "DateTime.h"

// [[Rcpp::export]]
NumericVector utctime(
IntegerVector year,
IntegerVector month,
IntegerVector day,
IntegerVector hour,
IntegerVector min,
IntegerVector sec,
NumericVector psec) {
[[cpp11::export]] cpp11::writable::doubles utctime(
cpp11::integers year,
cpp11::integers month,
cpp11::integers day,
cpp11::integers hour,
cpp11::integers min,
cpp11::integers sec,
cpp11::doubles psec) {
int n = year.size();
if (month.size() != n || day.size() != n || hour.size() != n ||
min.size() != n || sec.size() != n || psec.size() != n) {
Rcpp::stop("All inputs must be same length");
cpp11::stop("All inputs must be same length");
}

NumericVector out = NumericVector(n);
cpp11::writable::doubles out(n);

for (int i = 0; i < n; ++i) {
DateTime dt(
@@ -33,7 +33,7 @@ NumericVector utctime(
out[i] = dt.datetime();
}

out.attr("class") = CharacterVector::create("POSIXct", "POSIXt");
out.attr("class") = {"POSIXct", "POSIXt"};
out.attr("tzone") = "UTC";

return out;
@@ -1,3 +1,5 @@
#include <time.h>

#ifdef __cplusplus
extern "C" {
#endif
@@ -13,7 +15,7 @@ struct Rtm {
int tm_yday;
int tm_isdst;
long tm_gmtoff;
const char *tm_zone;
const char* tm_zone;
};
typedef struct Rtm stm;

@@ -1,5 +1,7 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/R.hpp"
#include "cpp11/integers.hpp"
#include "cpp11/list.hpp"
#include "cpp11/sexp.hpp"

#include "Collector.h"
#include "LocaleInfo.h"
@@ -8,8 +10,8 @@ using namespace Rcpp;
#include "TokenizerLine.h"
#include "Warnings.h"

// [[Rcpp::export]]
IntegerVector dim_tokens_(List sourceSpec, List tokenizerSpec) {
[[cpp11::export]] cpp11::integers
dim_tokens_(cpp11::list sourceSpec, cpp11::list tokenizerSpec) {
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
@@ -24,11 +26,15 @@ IntegerVector dim_tokens_(List sourceSpec, List tokenizerSpec) {
cols = t.col();
}

return IntegerVector::create(rows + 1, cols + 1);
cpp11::writable::integers out(rows + 1);
for (auto&& x : out) {
x = cols + 1;
}
return out;
}

// [[Rcpp::export]]
std::vector<int> count_fields_(List sourceSpec, List tokenizerSpec, int n_max) {
[[cpp11::export]] std::vector<int>
count_fields_(cpp11::list sourceSpec, cpp11::list tokenizerSpec, int n_max) {
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
@@ -50,8 +56,8 @@ std::vector<int> count_fields_(List sourceSpec, List tokenizerSpec, int n_max) {
return fields;
}

// [[Rcpp::export]]
RObject guess_header_(List sourceSpec, List tokenizerSpec, List locale_) {
[[cpp11::export]] cpp11::list guess_header_(
cpp11::list sourceSpec, cpp11::list tokenizerSpec, cpp11::list locale_) {
Warnings warnings;
LocaleInfo locale(locale_);
SourcePtr source = Source::create(sourceSpec);
@@ -75,20 +81,21 @@ RObject guess_header_(List sourceSpec, List tokenizerSpec, List locale_) {
}
}

return List::create(
_["header"] = out.vector(), _["skip"] = source->skippedRows() + 1);
using namespace cpp11::literals;
return cpp11::writable::list(
{"header"_nm = out.vector(), "skip"_nm = source->skippedRows() + 1});
}

// [[Rcpp::export]]
RObject tokenize_(List sourceSpec, List tokenizerSpec, int n_max) {
[[cpp11::export]] SEXP
tokenize_(cpp11::list sourceSpec, cpp11::list tokenizerSpec, int n_max) {
Warnings warnings;

SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings);

std::vector<std::vector<std::string> > rows;
std::vector<std::vector<std::string>> rows;

for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF;
t = tokenizer->nextToken()) {
@@ -106,17 +113,22 @@ RObject tokenize_(List sourceSpec, List tokenizerSpec, int n_max) {
row[t.col()] = t.asString();
}

RObject out = wrap(rows);
cpp11::writable::list out;
out.reserve(rows.size());

for (auto&& row : rows) {
out.push_back(cpp11::as_sexp(row));
}

return warnings.addAsAttribute(out);
}

// [[Rcpp::export]]
SEXP parse_vector_(
CharacterVector x,
List collectorSpec,
List locale_,
[[cpp11::export]] SEXP parse_vector_(
cpp11::strings x,
cpp11::list collectorSpec,
cpp11::list locale_,
const std::vector<std::string>& na,
const bool trim_ws = true) {
const bool trim_ws) {
Warnings warnings;
int n = x.size();

@@ -141,5 +153,5 @@ SEXP parse_vector_(
col->setValue(i, t);
}

return warnings.addAsAttribute(col->vector());
return warnings.addAsAttribute(static_cast<SEXP>(col->vector()));
}
@@ -1,5 +1,7 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/environment.hpp"
#include "cpp11/function.hpp"
#include "cpp11/list.hpp"
#include "cpp11/strings.hpp"

#include "Collector.h"
#include "LocaleInfo.h"
@@ -10,32 +12,31 @@ using namespace Rcpp;
#include "TokenizerLine.h"
#include "Warnings.h"

// [[Rcpp::export]]
CharacterVector read_file_(List sourceSpec, List locale_) {
[[cpp11::export]] cpp11::strings
read_file_(cpp11::list sourceSpec, cpp11::list locale_) {
SourcePtr source = Source::create(sourceSpec);
LocaleInfo locale(locale_);

return CharacterVector::create(
return cpp11::writable::strings(
locale.encoder_.makeSEXP(source->begin(), source->end()));
}

// [[Rcpp::export]]
RawVector read_file_raw_(List sourceSpec) {
[[cpp11::export]] cpp11::raws read_file_raw_(cpp11::list sourceSpec) {
SourcePtr source = Source::create(sourceSpec);

RawVector res(source->end() - source->begin());
std::copy(source->begin(), source->end(), res.begin());
return res;
cpp11::writable::raws res(
static_cast<R_xlen_t>(source->end() - source->begin()));
std::copy(source->begin(), source->end(), RAW(res));
return SEXP(res);
}

// [[Rcpp::export]]
CharacterVector read_lines_(
List sourceSpec,
List locale_,
[[cpp11::export]] cpp11::writable::strings read_lines_(
cpp11::list sourceSpec,
cpp11::list locale_,
std::vector<std::string> na,
int n_max = -1,
bool skip_empty_rows = false,
bool progress = true) {
int n_max,
bool skip_empty_rows,
bool progress) {

LocaleInfo locale(locale_);
Reader r(
@@ -44,28 +45,28 @@ CharacterVector read_lines_(
CollectorPtr(new CollectorCharacter(&locale.encoder_)),
progress);

return r.readToVector<CharacterVector>(n_max);
return SEXP(r.readToVector<cpp11::writable::strings>(n_max));
}

Function R6method(Environment env, const std::string& method) {
return as<Function>(env[method]);
cpp11::function
R6method(const cpp11::environment& env, const std::string& method) {
return static_cast<SEXP>(env[method.c_str()]);
}
bool isTrue(SEXP x) {
if (!(TYPEOF(x) == LGLSXP && Rf_length(x) == 1)) {
stop("`continue()` must return a length 1 logical vector");
cpp11::stop("`continue()` must return a length 1 logical vector");
}
return LOGICAL(x)[0] == TRUE;
}

// [[Rcpp::export]]
void read_lines_chunked_(
List sourceSpec,
List locale_,
[[cpp11::export]] void read_lines_chunked_(
cpp11::list sourceSpec,
cpp11::list locale_,
std::vector<std::string> na,
int chunkSize,
Environment callback,
bool skip_empty_rows = false,
bool progress = true) {
cpp11::environment callback,
bool skip_empty_rows,
bool progress) {

LocaleInfo locale(locale_);
Reader r(
@@ -74,11 +75,11 @@ void read_lines_chunked_(
CollectorPtr(new CollectorCharacter(&locale.encoder_)),
progress);

CharacterVector out;
cpp11::strings out;

int pos = 1;
while (isTrue(R6method(callback, "continue")())) {
CharacterVector out = r.readToVector<CharacterVector>(chunkSize);
cpp11::strings out = r.readToVector<cpp11::strings>(chunkSize);
if (out.size() == 0) {
return;
}
@@ -89,36 +90,35 @@ void read_lines_chunked_(
return;
}

// [[Rcpp::export]]
List read_lines_raw_(List sourceSpec, int n_max = -1, bool progress = false) {
[[cpp11::export]] cpp11::list
read_lines_raw_(cpp11::list sourceSpec, int n_max = -1, bool progress = false) {

Reader r(
Source::create(sourceSpec),
TokenizerPtr(new TokenizerLine()),
CollectorPtr(new CollectorRaw()),
progress);

return r.readToVector<List>(n_max);
return r.readToVector<cpp11::list>(n_max);
}

// [[Rcpp::export]]
void read_lines_raw_chunked_(
List sourceSpec,
[[cpp11::export]] void read_lines_raw_chunked_(
cpp11::list sourceSpec,
int chunkSize,
Environment callback,
bool progress = true) {
cpp11::environment callback,
bool progress) {

Reader r(
Source::create(sourceSpec),
TokenizerPtr(new TokenizerLine()),
CollectorPtr(new CollectorRaw()),
progress);

List out;
cpp11::list out;

int pos = 1;
while (isTrue(R6method(callback, "continue")())) {
List out = r.readToVector<List>(chunkSize);
cpp11::list out = r.readToVector<cpp11::list>(chunkSize);
if (out.size() == 0) {
return;
}
@@ -131,15 +131,14 @@ void read_lines_raw_chunked_(

typedef std::vector<CollectorPtr>::iterator CollectorItr;

// [[Rcpp::export]]
RObject read_tokens_(
List sourceSpec,
List tokenizerSpec,
ListOf<List> colSpecs,
CharacterVector colNames,
List locale_,
int n_max = -1,
bool progress = true) {
[[cpp11::export]] cpp11::sexp read_tokens_(
cpp11::list sourceSpec,
cpp11::list tokenizerSpec,
cpp11::list colSpecs,
cpp11::strings colNames,
cpp11::list locale_,
int n_max,
bool progress) {

LocaleInfo l(locale_);
Reader r(
@@ -152,16 +151,15 @@ RObject read_tokens_(
return r.readToDataFrame(n_max);
}

// [[Rcpp::export]]
void read_tokens_chunked_(
List sourceSpec,
Environment callback,
[[cpp11::export]] void read_tokens_chunked_(
cpp11::list sourceSpec,
cpp11::environment callback,
int chunkSize,
List tokenizerSpec,
ListOf<List> colSpecs,
CharacterVector colNames,
List locale_,
bool progress = true) {
cpp11::list tokenizerSpec,
cpp11::list colSpecs,
cpp11::strings colNames,
cpp11::list locale_,
bool progress) {

LocaleInfo l(locale_);
Reader r(
@@ -173,25 +171,24 @@ void read_tokens_chunked_(

int pos = 1;
while (isTrue(R6method(callback, "continue")())) {
DataFrame out = r.readToDataFrame(chunkSize);
if (out.nrows() == 0) {
cpp11::data_frame out(r.readToDataFrame(chunkSize));
if (out.nrow() == 0) {
return;
}
R6method(callback, "receive")(out, pos);
pos += out.nrows();
pos += out.nrow();
}

return;
}

// [[Rcpp::export]]
RObject melt_tokens_(
List sourceSpec,
List tokenizerSpec,
ListOf<List> colSpecs,
List locale_,
int n_max = -1,
bool progress = true) {
[[cpp11::export]] cpp11::sexp melt_tokens_(
cpp11::list sourceSpec,
cpp11::list tokenizerSpec,
cpp11::list colSpecs,
cpp11::list locale_,
int n_max,
bool progress) {

LocaleInfo l(locale_);
Reader r(
@@ -200,18 +197,17 @@ RObject melt_tokens_(
collectorsCreate(colSpecs, &l),
progress);

return r.meltToDataFrame(locale_, n_max);
return r.meltToDataFrame(cpp11::list(locale_), n_max);
}

// [[Rcpp::export]]
void melt_tokens_chunked_(
List sourceSpec,
Environment callback,
[[cpp11::export]] void melt_tokens_chunked_(
cpp11::list sourceSpec,
cpp11::environment callback,
int chunkSize,
List tokenizerSpec,
ListOf<List> colSpecs,
List locale_,
bool progress = true) {
cpp11::list tokenizerSpec,
cpp11::list colSpecs,
cpp11::list locale_,
bool progress) {

LocaleInfo l(locale_);
Reader r(
@@ -222,20 +218,23 @@ void melt_tokens_chunked_(

int pos = 1;
while (isTrue(R6method(callback, "continue")())) {
DataFrame out = r.meltToDataFrame(locale_, chunkSize);
if (out.nrows() == 0) {
cpp11::data_frame out(
r.meltToDataFrame(static_cast<SEXP>(locale_), chunkSize));
if (out.nrow() == 0) {
return;
}
R6method(callback, "receive")(out, pos);
pos += out.nrows();
pos += out.nrow();
}

return;
}

// [[Rcpp::export]]
std::vector<std::string> guess_types_(
List sourceSpec, List tokenizerSpec, Rcpp::List locale_, int n = 100) {
[[cpp11::export]] std::vector<std::string> guess_types_(
cpp11::list sourceSpec,
cpp11::list tokenizerSpec,
cpp11::list locale_,
int n) {
Warnings warnings;
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
@@ -267,8 +266,8 @@ std::vector<std::string> guess_types_(

std::vector<std::string> out;
for (size_t j = 0; j < collectors.size(); ++j) {
CharacterVector col = as<CharacterVector>(collectors[j]->vector());
out.push_back(collectorGuess(col, locale_));
cpp11::strings col(collectors[j]->vector());
out.push_back(collectorGuess(SEXP(col), cpp11::list(locale_)));
}

return out;
@@ -1,15 +1,15 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/strings.hpp"
#include "cpp11/list.hpp"
#include "cpp11/sexp.hpp"

#include "Collector.h"
#include "LocaleInfo.h"
#include "Token.h"

// [[Rcpp::export]]
RObject type_convert_col(
CharacterVector x,
List spec,
List locale_,
[[cpp11::export]] cpp11::sexp type_convert_col(
cpp11::strings x,
cpp11::list spec,
cpp11::list locale_,
int col,
const std::vector<std::string>& na,
bool trim_ws) {
@@ -35,5 +35,5 @@ RObject type_convert_col(
collector->setValue(i, t);
}

return collector->vector();
return static_cast<SEXP>(collector->vector());
}
@@ -1,21 +1,22 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/list.hpp"
#include "cpp11/sexp.hpp"
#include "cpp11/strings.hpp"

#include "write_connection.h"
#include <boost/iostreams/stream.hpp> // stream
#include <fstream>
#include <ostream>

// [[Rcpp::export]]
void write_lines_(
const CharacterVector& lines,
RObject connection,
[[cpp11::export]] void write_lines_(
cpp11::strings lines,
cpp11::sexp connection,
const std::string& na,
const std::string& sep = "\n") {
const std::string& sep) {
boost::iostreams::stream<connection_sink> output(connection);
for (CharacterVector::const_iterator i = lines.begin(); i != lines.end();
for (cpp11::strings::const_iterator i = lines.begin(); i != lines.end();
++i) {

if (CharacterVector::is_na(*i)) {
if (*i == NA_STRING) {
output << na << sep;
} else {
output << Rf_translateCharUTF8(*i) << sep;
@@ -25,34 +26,33 @@ void write_lines_(
return;
}

// [[Rcpp::export]]
void write_lines_raw_(
List x, RObject connection, const std::string& sep = "\n") {
[[cpp11::export]] void write_lines_raw_(
cpp11::list x, cpp11::sexp connection, const std::string& sep) {

boost::iostreams::stream<connection_sink> output(connection);

for (int i = 0; i < x.length(); ++i) {
RawVector y = x.at(i);
output.write(reinterpret_cast<const char*>(&y[0]), y.size() * sizeof(y[0]));
for (int i = 0; i < x.size(); ++i) {
cpp11::raws y(x.at(i));
output.write(
reinterpret_cast<const char*>(RAW(y)), y.size() * sizeof(RAW(y)[0]));
output << sep;
}

return;
}

// [[Rcpp::export]]
void write_file_(std::string x, RObject connection) {
[[cpp11::export]] void write_file_(std::string x, cpp11::sexp connection) {
boost::iostreams::stream<connection_sink> out(connection);

out << x;
return;
}

// [[Rcpp::export]]
void write_file_raw_(RawVector x, RObject connection) {
[[cpp11::export]] void write_file_raw_(cpp11::raws x, cpp11::sexp connection) {

boost::iostreams::stream<connection_sink> output(connection);

output.write(reinterpret_cast<const char*>(&x[0]), x.size() * sizeof(x[0]));
output.write(
reinterpret_cast<const char*>(RAW(x)), x.size() * sizeof(RAW(x)[0]));
return;
}
@@ -1,4 +1,5 @@
#include "write_connection.h"
#include "cpp11/protect.hpp"

// http://www.boost.org/doc/libs/1_63_0/libs/iostreams/doc/tutorial/container_sink.html
//
@@ -10,7 +11,7 @@ std::streamsize connection_sink::write(const char* s, std::streamsize n) {

if ((write_size = R_WriteConnection(con_, (void*)s, n)) !=
static_cast<size_t>(n)) {
Rcpp::stop("write failed, expected %l, got %l", n, write_size);
cpp11::stop("write failed, expected %l, got %l", n, write_size);
}
return write_size;
}
@@ -1,7 +1,6 @@
#ifndef READR_WRITE_CONNECTION_H_
#define READR_WRITE_CONNECTION_H_

#include <Rcpp.h>
#include <boost/iostreams/categories.hpp> // sink_tag
#include <ios> // streamsize

@@ -1,17 +1,20 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/list.hpp"
#include "cpp11/sexp.hpp"
#include "cpp11/strings.hpp"

#include "grisu3.h"
#include "write_connection.h"
#include <boost/iostreams/stream.hpp> // stream
#include <fstream>
#include <sstream>

enum quote_escape_t { DOUBLE = 1, BACKSLASH = 2, NONE = 3 };

// Defined later to make copyright clearer
template <class Stream>
void stream_delim(
Stream& output,
const RObject& x,
const cpp11::sexp& x,
int i,
char delim,
const std::string& na,
@@ -20,7 +23,7 @@ void stream_delim(
template <class Stream>
void stream_delim_row(
Stream& output,
const Rcpp::List& x,
const cpp11::list& x,
int i,
char delim,
const std::string& na,
@@ -88,7 +91,7 @@ void stream_delim(
template <class Stream>
void stream_delim(
Stream& output,
const List& df,
const cpp11::list& df,
char delim,
const std::string& na,
bool col_names,
@@ -104,7 +107,7 @@ void stream_delim(
}

if (col_names) {
CharacterVector names = as<CharacterVector>(df.attr("names"));
cpp11::strings names(df.attr("names"));
for (int j = 0; j < p; ++j) {
stream_delim(output, names, j, delim, na, escape);
if (j != p - 1)
@@ -113,18 +116,17 @@ void stream_delim(
output << eol;
}

RObject first_col = df[0];
cpp11::sexp first_col = df[0];
int n = Rf_length(first_col);

for (int i = 0; i < n; ++i) {
stream_delim_row(output, df, i, delim, na, escape, eol);
}
}

// [[Rcpp::export]]
std::string stream_delim_(
const List& df,
RObject connection,
[[cpp11::export]] std::string stream_delim_(
const cpp11::list& df,
cpp11::sexp connection,
char delim,
const std::string& na,
bool col_names,
@@ -167,7 +169,7 @@ std::string stream_delim_(
template <class Stream>
void stream_delim(
Stream& output,
const RObject& x,
const cpp11::sexp& x,
int i,
char delim,
const std::string& na,
@@ -220,7 +222,7 @@ void stream_delim(
break;
}
default:
Rcpp::stop(
cpp11::stop(
"Don't know how to handle vector of type %s.", Rf_type2char(TYPEOF(x)));
}
}
@@ -1,8 +1,4 @@
library(testthat)
library(readr)

if (requireNamespace("xml2")) {
test_check("readr", reporter = MultiReporter$new(reporters = list(JunitReporter$new(file = "test-results.xml"), CheckReporter$new())))
} else {
test_check("readr")
}
test_check("readr")
@@ -114,8 +114,10 @@ test_that("print(col_spec) with truncated output", {
test_that("spec object attached to read data", {

test_data <- read_csv("basic-df.csv", col_types = NULL, col_names = TRUE, progress = FALSE)
sp <- spec(test_data)
sp$skip <- NULL

expect_equal(spec(test_data),
expect_equal(sp,
cols(
a = col_logical(),
b = col_double(),
@@ -2,24 +2,24 @@ context("melt-chunked")

test_that("melt_delim_chunked", {
file <- readr_example("mtcars.csv")
unchunked <- melt_csv(file)
unchunked <- melt_csv(file, progress = FALSE)

get_dims <- function(data, pos) dims[[length(dims) + 1]] <<- dim(data)

# Full file in one chunk
dims <- list()
melt_csv_chunked(file, get_dims)
melt_csv_chunked(file, get_dims, progress = FALSE)
expect_equal(dim(unchunked), dims[[1]])

# Each line separately
dims <- list()
melt_csv_chunked(file, get_dims, chunk_size = 1)
melt_csv_chunked(file, get_dims, chunk_size = 1, progress = FALSE)
expect_true(all(vapply(dims[1:6], identical, logical(1), c(11L, 4L))))
expect_equal(nrow(unchunked) / 11L, length(dims))

# In chunks of 5
dims <- list()
melt_csv_chunked(file, get_dims, chunk_size = 5)
melt_csv_chunked(file, get_dims, chunk_size = 5, progress = FALSE)
expect_true(all(vapply(dims[1:6], identical, logical(1), c(55L, 4L))))
expect_true(identical(dims[[7]], c(33L, 4L)))

@@ -31,37 +31,37 @@ test_that("melt_delim_chunked", {
}
}
dims <- list()
melt_csv_chunked(file, get_dims_stop, chunk_size = 5)
melt_csv_chunked(file, get_dims_stop, chunk_size = 5, progress = FALSE)
expect_true(length(dims) == 2)
expect_true(all(vapply(dims[1:2], identical, logical(1), c(55L, 4L))))
})

test_that("DataFrameCallback works as intended", {
f <- readr_example("mtcars.csv")
out0 <- subset(melt_csv(f), data_type == "integer")
out0 <- subset(melt_csv(f, progress = FALSE), data_type == "integer")
fun3 <- DataFrameCallback$new(function(x, pos)
subset(x, data_type == "integer"))

out1 <- melt_csv_chunked(f, fun3)
out2 <- melt_csv_chunked(f, fun3, chunk_size = 1)
out3 <- melt_csv_chunked(f, fun3, chunk_size = 10)
out1 <- melt_csv_chunked(f, fun3, progress = FALSE)
out2 <- melt_csv_chunked(f, fun3, chunk_size = 1, progress = FALSE)
out3 <- melt_csv_chunked(f, fun3, chunk_size = 10, progress = FALSE)

expect_true(all.equal(out0, out1))
expect_true(all.equal(out0, out2))
expect_true(all.equal(out0, out3))


# No matching rows
out0 <- subset(melt_csv(f), data_type == "integer")
out0 <- subset(melt_csv(f, progress = FALSE), data_type == "integer")

fun5 <- DataFrameCallback$new(function(x, pos) subset(x, data_type == "integer"))

out1 <- melt_csv_chunked(f, fun5)
out1 <- melt_csv_chunked(f, fun5, progress = FALSE)

# Need to set guess_max higher than 1 to guess correct column types
out2 <- melt_csv_chunked(f, fun5, chunk_size = 1)
out2 <- melt_csv_chunked(f, fun5, chunk_size = 1, progress = FALSE)

out3 <- melt_csv_chunked(f, fun5, chunk_size = 10)
out3 <- melt_csv_chunked(f, fun5, chunk_size = 10, progress = FALSE)

expect_true(all.equal(out0, out1))
expect_true(all.equal(out0, out2))
@@ -70,10 +70,10 @@ test_that("DataFrameCallback works as intended", {

test_that("ListCallback works as intended", {
f <- readr_example("mtcars.csv")
out0 <- melt_csv(f)
out0 <- melt_csv(f, progress = FALSE)

fun <- ListCallback$new(function(x, pos) x[["value"]])
out1 <- melt_csv_chunked(f, fun, chunk_size = 10)
out1 <- melt_csv_chunked(f, fun, chunk_size = 10, progress = FALSE)

expect_equal(out0[["value"]], unlist(out1))
})
@@ -1,178 +1,178 @@
context("melt_csv")

test_that("read_csv type imputation and NA detection works", {
melt_data <- melt_csv("non-tabular.csv", na = "NA")
expect_equal(melt_data$data_type[7:11],
c("missing", "empty", "character", "integer", "double"))
})

test_that("read_tsv works on a simple file", {
melt_data <- melt_tsv("a\tb\n1\t2")
expect_equal(melt_data$data_type, rep(c("character", "integer"), each = 2))
})

test_that("melt_csv's 'NA' option genuinely changes the NA values", {
expect_equal(melt_csv("z\n", na = "z")$data_type, "missing")
})

test_that("melt_csv's 'NA' option works with multiple NA values", {
expect_equal(melt_csv("NA\nmiss\n13", na = c("13", "miss"))$data_type,
c("character", "missing", "missing"))
})

test_that('passing character() to melt_csv\'s "NA" option reads "" correctly', {
expect_equal(melt_csv("foo\n", na = character())$value, "foo")
})

test_that("passing \"\" to melt_csv's 'NA' option reads \"\" correctly", {
expect_equal(melt_csv("foo,bar\nfoo,\n", na = "")$value,
c("foo", "bar", "foo", NA))
})

test_that("changing melt_csv's 'quote' argument works correctly", {
test_data <- melt_csv("basic-df.csv")
test_data_singlequote <- melt_csv("basic-df-singlequote.csv", quote="'")
expect_identical(test_data, test_data_singlequote)
})

test_that("melt_csv's 'skip' option allows for skipping'", {
test_data <- melt_csv("basic-df.csv", skip = 1)
expect_equal(nrow(test_data), 40)
})

test_that("melt_csv's 'n_max' allows for a maximum number of records and does not corrupt any", {
test_data <- melt_csv("basic-df.csv", n_max = 7)
expect_equal(nrow(test_data), 28)
expect_equal(sum(test_data$data_type == "missing"), 0)
})

test_that("can read more than 100 columns", {
set.seed(2015-3-13)
x <- as.data.frame(matrix(rbinom(300, 2, .5), nrow = 2))
y <- format_csv(x)
expect_equal(max(melt_csv(y)$col), 150)
})

test_that("encoding affects text", {
x <- melt_csv("enc-iso-8859-1.txt", locale = locale(encoding = "ISO-8859-1"))
expect_identical(x$value[2], "\u00e9l\u00e8ve")
})

test_that("nuls are dropped with a warning", {
expect_warning(x <- melt_csv("raw.csv"))
expect_equal(readr:::n_problems(x), 1)
expect_equal(x$value[3], "ab")
})

test_that("can read from the clipboard", {
skip_on_cran()
skip_if_no_clipboard()
clipr::write_clip("a,b,c\n1,2,3")
expect_identical(melt_csv(clipboard()), melt_csv("a,b,c\n1,2,3"))
})

test_that("can read from a multi-line character vector", {
expect_identical(max(melt_csv(c("a,b,c", "1,2,3"))$row), 2)
})

# Column warnings ---------------------------------------------------------

test_that("missing lines are not skipped", {
# first
expect_equal(max(melt_csv("a,b\n\n\n1,2")$row), 4)

# middle
expect_equal(max(melt_csv("a,b\n1,2\n\n\n2,3\n")$row), 5)

# last (trailing \n is ignored)
expect_equal(max(melt_csv("a,b\n1,2\n\n\n")$row), 4)
})

# read_csv2 ---------------------------------------------------------------

test_that("decimal mark automatically set to ,", {
expect_message(
x <- melt_csv2("x\n1,23"),
if (default_locale()$decimal_mark == ".") "decimal .*grouping .*mark" else NA)
expect_equal(x$data_type[2], "double")
})

# Zero rows ---------------------------------------------------------------

test_that("n_max 0 gives zero row data frame", {
x <- melt_csv("a,b\n1,2", n_max = 0)
expect_equal(dim(x), c(0, 4))
})

# Comments ----------------------------------------------------------------

test_that("comments are ignored regardless of where they appear", {
out1 <- melt_csv('x\n1#comment',comment = "#")
out2 <- melt_csv('x\n1#comment\n#comment', comment = "#")
out3 <- melt_csv('x\n"1"#comment', comment = "#")

chk1 <- tibble::tibble(
row = c(1, 2),
col = c(1, 1),
data_type = c("character", "integer"),
value = c("x", "1"))

expect_true(all.equal(chk1, out1))
expect_true(all.equal(chk1, out2))
expect_true(all.equal(chk1, out3))

out5 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3#,B2,C2\nA4,A5,A6", comment = "#", progress = FALSE)
out6 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3,#B2,C2\nA4,A5,A6", comment = "#", progress = FALSE)
out7 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3,#B2,C2\n#comment\nA4,A5,A6", comment = "#", progress = FALSE)

chk2 <- tibble::tibble(
row = c(1, 1, 1, 2, 2, 2, 3, 4, 4, 4),
col = c(1, 2, 3, 1, 2, 3, 1, 1, 2, 3),
data_type = "character",
value = c("x1", "x2", "x3", "A2", "B2", "C2", "A3", "A4", "A5", "A6"))

expect_true(all.equal(chk2, out5))
expect_true(all.equal(chk2, out6))
expect_true(all.equal(chk2, out7))
})

test_that("escaped/quoted comments are ignored", {
out1 <- melt_delim('x\n\\#', comment = "#", delim = ",",
escape_backslash = TRUE, escape_double = FALSE)
out2 <- melt_csv('x\n"#"', comment = "#")

expect_equal(out1$value[2], "#")
expect_equal(out2$value[2], "#")
})

test_that("leading comments are ignored", {
out <- melt_csv("#a\n#b\nx\n1", comment = "#")

expect_equal(nrow(out), 2)
expect_equal(out$value[2], "1")
})

test_that("skip respects comments", {
melt_x <- function(...) {
melt_csv("#a\nb\nc", ...)$value
}

expect_equal(melt_x(), c("#a", "b", "c"))
expect_equal(melt_x(skip = 1), c("b", "c"))
expect_equal(melt_x(comment = "#"), c("b", "c"))
expect_equal(melt_x(comment = "#", skip = 2), c("c"))
})

test_that("melt_csv returns a four-col zero-row data.frame on an empty file", {
expect_equal(dim(melt_csv("empty-file")), c(0, 4))
})

test_that("melt_delim errors on length 0 delimiter", {
expect_error(melt_delim("a b\n1 2\n", delim = ""),
"`delim` must be at least one character, use `melt_table\\(\\)` for whitespace delimited input\\.")
})

test_that("melt_csv handles whitespace between delimiters and quoted fields", {
x <- melt_csv('1, \"hi,there\"\n3,4')
expect_equal(x$value[2:3], c("hi,there", "3"))
})
#test_that("read_csv type imputation and NA detection works", {
#melt_data <- melt_csv("non-tabular.csv", na = "NA")
#expect_equal(melt_data$data_type[7:11],
#c("missing", "empty", "character", "integer", "double"))
#})

#test_that("read_tsv works on a simple file", {
#melt_data <- melt_tsv("a\tb\n1\t2")
#expect_equal(melt_data$data_type, rep(c("character", "integer"), each = 2))
#})

#test_that("melt_csv's 'NA' option genuinely changes the NA values", {
#expect_equal(melt_csv("z\n", na = "z")$data_type, "missing")
#})

#test_that("melt_csv's 'NA' option works with multiple NA values", {
#expect_equal(melt_csv("NA\nmiss\n13", na = c("13", "miss"))$data_type,
#c("character", "missing", "missing"))
#})

#test_that('passing character() to melt_csv\'s "NA" option reads "" correctly', {
#expect_equal(melt_csv("foo\n", na = character())$value, "foo")
#})

#test_that("passing \"\" to melt_csv's 'NA' option reads \"\" correctly", {
#expect_equal(melt_csv("foo,bar\nfoo,\n", na = "")$value,
#c("foo", "bar", "foo", NA))
#})

#test_that("changing melt_csv's 'quote' argument works correctly", {
#test_data <- melt_csv("basic-df.csv")
#test_data_singlequote <- melt_csv("basic-df-singlequote.csv", quote="'")
#expect_identical(test_data, test_data_singlequote)
#})

#test_that("melt_csv's 'skip' option allows for skipping'", {
#test_data <- melt_csv("basic-df.csv", skip = 1)
#expect_equal(nrow(test_data), 40)
#})

#test_that("melt_csv's 'n_max' allows for a maximum number of records and does not corrupt any", {
#test_data <- melt_csv("basic-df.csv", n_max = 7)
#expect_equal(nrow(test_data), 28)
#expect_equal(sum(test_data$data_type == "missing"), 0)
#})

#test_that("can read more than 100 columns", {
#set.seed(2015-3-13)
#x <- as.data.frame(matrix(rbinom(300, 2, .5), nrow = 2))
#y <- format_csv(x)
#expect_equal(max(melt_csv(y)$col), 150)
#})

#test_that("encoding affects text", {
#x <- melt_csv("enc-iso-8859-1.txt", locale = locale(encoding = "ISO-8859-1"))
#expect_identical(x$value[2], "\u00e9l\u00e8ve")
#})

#test_that("nuls are dropped with a warning", {
#expect_warning(x <- melt_csv("raw.csv"))
#expect_equal(readr:::n_problems(x), 1)
#expect_equal(x$value[3], "ab")
#})

#test_that("can read from the clipboard", {
#skip_on_cran()
#skip_if_no_clipboard()
#clipr::write_clip("a,b,c\n1,2,3")
#expect_identical(melt_csv(clipboard()), melt_csv("a,b,c\n1,2,3"))
#})

#test_that("can read from a multi-line character vector", {
#expect_identical(max(melt_csv(c("a,b,c", "1,2,3"))$row), 2)
#})

## Column warnings ---------------------------------------------------------

#test_that("missing lines are not skipped", {
## first
#expect_equal(max(melt_csv("a,b\n\n\n1,2")$row), 4)

## middle
#expect_equal(max(melt_csv("a,b\n1,2\n\n\n2,3\n")$row), 5)

## last (trailing \n is ignored)
#expect_equal(max(melt_csv("a,b\n1,2\n\n\n")$row), 4)
#})

## read_csv2 ---------------------------------------------------------------

#test_that("decimal mark automatically set to ,", {
#expect_message(
#x <- melt_csv2("x\n1,23"),
#if (default_locale()$decimal_mark == ".") "decimal .*grouping .*mark" else NA)
#expect_equal(x$data_type[2], "double")
#})

## Zero rows ---------------------------------------------------------------

#test_that("n_max 0 gives zero row data frame", {
#x <- melt_csv("a,b\n1,2", n_max = 0)
#expect_equal(dim(x), c(0, 4))
#})

## Comments ----------------------------------------------------------------

#test_that("comments are ignored regardless of where they appear", {
#out1 <- melt_csv('x\n1#comment',comment = "#")
#out2 <- melt_csv('x\n1#comment\n#comment', comment = "#")
#out3 <- melt_csv('x\n"1"#comment', comment = "#")

#chk1 <- tibble::tibble(
#row = c(1, 2),
#col = c(1, 1),
#data_type = c("character", "integer"),
#value = c("x", "1"))

#expect_true(all.equal(chk1, out1))
#expect_true(all.equal(chk1, out2))
#expect_true(all.equal(chk1, out3))

#out5 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3#,B2,C2\nA4,A5,A6", comment = "#", progress = FALSE)
#out6 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3,#B2,C2\nA4,A5,A6", comment = "#", progress = FALSE)
#out7 <- melt_csv("x1,x2,x3\nA2,B2,C2\nA3,#B2,C2\n#comment\nA4,A5,A6", comment = "#", progress = FALSE)

#chk2 <- tibble::tibble(
#row = c(1, 1, 1, 2, 2, 2, 3, 4, 4, 4),
#col = c(1, 2, 3, 1, 2, 3, 1, 1, 2, 3),
#data_type = "character",
#value = c("x1", "x2", "x3", "A2", "B2", "C2", "A3", "A4", "A5", "A6"))

#expect_true(all.equal(chk2, out5))
#expect_true(all.equal(chk2, out6))
#expect_true(all.equal(chk2, out7))
#})

#test_that("escaped/quoted comments are ignored", {
#out1 <- melt_delim('x\n\\#', comment = "#", delim = ",",
#escape_backslash = TRUE, escape_double = FALSE)
#out2 <- melt_csv('x\n"#"', comment = "#")

#expect_equal(out1$value[2], "#")
#expect_equal(out2$value[2], "#")
#})

#test_that("leading comments are ignored", {
#out <- melt_csv("#a\n#b\nx\n1", comment = "#")

#expect_equal(nrow(out), 2)
#expect_equal(out$value[2], "1")
#})

#test_that("skip respects comments", {
#melt_x <- function(...) {
#melt_csv("#a\nb\nc", ...)$value
#}

#expect_equal(melt_x(), c("#a", "b", "c"))
#expect_equal(melt_x(skip = 1), c("b", "c"))
#expect_equal(melt_x(comment = "#"), c("b", "c"))
#expect_equal(melt_x(comment = "#", skip = 2), c("c"))
#})

#test_that("melt_csv returns a four-col zero-row data.frame on an empty file", {
#expect_equal(dim(melt_csv("empty-file")), c(0, 4))
#})

#test_that("melt_delim errors on length 0 delimiter", {
#expect_error(melt_delim("a b\n1 2\n", delim = ""),
#"`delim` must be at least one character, use `melt_table\\(\\)` for whitespace delimited input\\.")
#})

#test_that("melt_csv handles whitespace between delimiters and quoted fields", {
#x <- melt_csv('1, \"hi,there\"\n3,4')
#expect_equal(x$value[2:3], c("hi,there", "3"))
#})
@@ -127,8 +127,8 @@ test_that("check for line breaks in between widths", {
col = c(1, 2, 1, 1, 2),
data_type = "integer",
value = as.character(c(1, 1, 2, 1, 1)))
expect_true(all.equal(out1, exp))
expect_true(all.equal(out2, exp))
expect_true(all.equal(out1, exp, check.attributes = FALSE))
expect_true(all.equal(out2, exp, check.attributes = FALSE))
})

test_that("ignore commented lines anywhere in file", {