New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix #2203 (combine with list of characters with NA) #2209
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
1bcb5c9
Tests for #2203
zeehio b32f8eb
Fix issue #2203. combine deals with NA, has stricter coercion rules
zeehio 09c98da
Add tests for coercion rules
zeehio bd197e8
Fix handling of warnings in tests
zeehio 12adab3
Test that there are no warnings when no warnings are expected
zeehio 2c57eea
Fix missing lines. Clarify coercion warnings
zeehio 93dda75
Move test-combine helper functions to its own source.
zeehio c0685ab
combine() changes in NEWS
zeehio dbc6bbb
Fix std::string to const char *
zeehio 4a4327a
Fix tests per @krlmlr suggestions
zeehio File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,25 @@ | |
|
||
namespace dplyr { | ||
|
||
static inline bool has_classes(SEXP x) { | ||
SEXP classes; | ||
int i, num_classes; | ||
if (OBJECT(x)) { | ||
classes = Rf_getAttrib(x, R_ClassSymbol); | ||
num_classes = Rf_length(classes); | ||
if (num_classes > 0) { | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
static inline bool all_logical_na(SEXP x, SEXPTYPE xtype) { | ||
return LGLSXP == xtype && all_na(x); | ||
}; | ||
|
||
class Collecter { | ||
public: | ||
virtual ~Collecter() {}; | ||
|
@@ -33,10 +52,10 @@ namespace dplyr { | |
Collecter_Impl(int n_): data(n_, Rcpp::traits::get_na<RTYPE>()) {} | ||
|
||
void collect(const SlicingIndex& index, SEXP v) { | ||
Vector<RTYPE> source(v); | ||
STORAGE* source_ptr = Rcpp::internal::r_vector_start<RTYPE>(source); | ||
for (int i=0; i<index.size(); i++) { | ||
data[index[i]] = source_ptr[i]; | ||
if (all_logical_na(v, TYPEOF(v))) { | ||
collect_logicalNA(index); | ||
} else { | ||
collect_sexp(index, v); | ||
} | ||
} | ||
|
||
|
@@ -45,7 +64,7 @@ namespace dplyr { | |
} | ||
|
||
inline bool compatible(SEXP x) { | ||
return RTYPE == TYPEOF(x); | ||
return RTYPE == TYPEOF(x) || all_logical_na(x, TYPEOF(x)); | ||
} | ||
|
||
bool can_promote(SEXP x) const { | ||
|
@@ -57,11 +76,27 @@ namespace dplyr { | |
} | ||
|
||
bool is_logical_all_na() const { | ||
return RTYPE == LGLSXP && all(is_na(data)).is_true(); | ||
return all_logical_na(data, RTYPE); | ||
} | ||
|
||
protected: | ||
Vector<RTYPE> data; | ||
|
||
private: | ||
void collect_logicalNA(const SlicingIndex& index) { | ||
for (int i=0; i<index.size(); i++) { | ||
data[index[i]] = Rcpp::traits::get_na<RTYPE>(); | ||
} | ||
} | ||
|
||
void collect_sexp(const SlicingIndex& index, SEXP v) { | ||
Vector<RTYPE> source(v); | ||
STORAGE* source_ptr = Rcpp::internal::r_vector_start<RTYPE>(source); | ||
for (int i=0; i<index.size(); i++) { | ||
data[index[i]] = source_ptr[i]; | ||
} | ||
} | ||
|
||
}; | ||
|
||
template <> | ||
|
@@ -83,7 +118,9 @@ namespace dplyr { | |
|
||
inline bool compatible(SEXP x) { | ||
int RTYPE = TYPEOF(x); | ||
return RTYPE == REALSXP || (RTYPE == INTSXP && !Rf_inherits(x, "factor")) || RTYPE == LGLSXP; | ||
return (RTYPE == REALSXP && !has_classes(x)) || | ||
(RTYPE == INTSXP && !has_classes(x)) || | ||
all_logical_na(x, RTYPE); | ||
} | ||
|
||
bool can_promote(SEXP x) const { | ||
|
@@ -109,6 +146,8 @@ namespace dplyr { | |
collect_strings(index, v); | ||
} else if (Rf_inherits(v, "factor")) { | ||
collect_factor(index, v); | ||
} else if (all_logical_na(v, TYPEOF(v))) { | ||
collect_logicalNA(index, v); | ||
} else { | ||
CharacterVector vec(v); | ||
collect_strings(index, vec); | ||
|
@@ -120,7 +159,7 @@ namespace dplyr { | |
} | ||
|
||
inline bool compatible(SEXP x) { | ||
return (STRSXP == TYPEOF(x)) || Rf_inherits(x, "factor"); | ||
return (STRSXP == TYPEOF(x)) || Rf_inherits(x, "factor") || all_logical_na(x, TYPEOF(x)); | ||
} | ||
|
||
bool can_promote(SEXP x) const { | ||
|
@@ -136,6 +175,14 @@ namespace dplyr { | |
|
||
private: | ||
|
||
void collect_logicalNA(const SlicingIndex& index, LogicalVector source) { | ||
SEXP* p_data = Rcpp::internal::r_vector_start<STRSXP>(data); | ||
int n = index.size(); | ||
for (int i=0; i<n; i++) { | ||
p_data[index[i]] = NA_STRING; | ||
} | ||
} | ||
|
||
void collect_strings(const SlicingIndex& index, CharacterVector source) { | ||
SEXP* p_source = Rcpp::internal::r_vector_start<STRSXP>(source); | ||
SEXP* p_data = Rcpp::internal::r_vector_start<STRSXP>(data); | ||
|
@@ -177,11 +224,11 @@ namespace dplyr { | |
|
||
inline bool compatible(SEXP x) { | ||
int RTYPE = TYPEOF(x); | ||
return (INTSXP == RTYPE || RTYPE == LGLSXP) && !Rf_inherits(x, "factor"); | ||
return (INTSXP == RTYPE && !has_classes(x)) || all_logical_na(x, RTYPE); | ||
} | ||
|
||
bool can_promote(SEXP x) const { | ||
return TYPEOF(x) == REALSXP; | ||
return TYPEOF(x) == REALSXP && !has_classes(x); | ||
} | ||
|
||
std::string describe() const { | ||
|
@@ -206,7 +253,7 @@ namespace dplyr { | |
|
||
inline bool compatible(SEXP x) { | ||
String type = STRING_ELT(types,0); | ||
return Rf_inherits(x, type.get_cstring()); | ||
return Rf_inherits(x, type.get_cstring()) || all_logical_na(x, TYPEOF(x)); | ||
} | ||
|
||
inline bool can_promote(SEXP x) const { | ||
|
@@ -229,8 +276,12 @@ namespace dplyr { | |
Parent(n), tz(tz_) {} | ||
|
||
void collect(const SlicingIndex& index, SEXP v) { | ||
Parent::collect(index, v); | ||
update_tz(v); | ||
if (Rf_inherits(v, "POSIXct")) { | ||
Parent::collect(index, v); | ||
update_tz(v); | ||
} else if (all_logical_na(v, TYPEOF(v))) { | ||
Parent::collect(index, v); | ||
} | ||
} | ||
|
||
inline SEXP get() { | ||
|
@@ -242,7 +293,7 @@ namespace dplyr { | |
} | ||
|
||
inline bool compatible(SEXP x) { | ||
return Rf_inherits(x, "POSIXct"); | ||
return Rf_inherits(x, "POSIXct") || all_logical_na(x, TYPEOF(x)); | ||
} | ||
|
||
inline bool can_promote(SEXP x) const { | ||
|
@@ -295,20 +346,10 @@ namespace dplyr { | |
} | ||
|
||
void collect(const SlicingIndex& index, SEXP v) { | ||
// here we can assume that v is a factor with the right levels | ||
// we however do not assume that they are in the same order | ||
IntegerVector source(v); | ||
CharacterVector levels = source.attr("levels"); | ||
|
||
SEXP* levels_ptr = Rcpp::internal::r_vector_start<STRSXP>(levels); | ||
int* source_ptr = Rcpp::internal::r_vector_start<INTSXP>(source); | ||
for (int i=0; i<index.size(); i++) { | ||
if (source_ptr[i] == NA_INTEGER) { | ||
data[ index[i] ] = NA_INTEGER; | ||
} else { | ||
SEXP x = levels_ptr[ source_ptr[i] - 1 ]; | ||
data[ index[i] ] = levels_map.find(x)->second; | ||
} | ||
if (Rf_inherits(v, "factor") && has_same_levels_as(v)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. |
||
collect_factor(index, v); | ||
} else if (all_logical_na(v, TYPEOF(v))) { | ||
collect_logicalNA(index); | ||
} | ||
} | ||
|
||
|
@@ -319,7 +360,8 @@ namespace dplyr { | |
} | ||
|
||
inline bool compatible(SEXP x) { | ||
return Rf_inherits(x, "factor") && has_same_levels_as(x); | ||
return ((Rf_inherits(x, "factor") && has_same_levels_as(x)) || | ||
all_logical_na(x, TYPEOF(x))); | ||
} | ||
|
||
inline bool can_promote(SEXP x) const { | ||
|
@@ -347,11 +389,35 @@ namespace dplyr { | |
RObject model; | ||
CharacterVector levels; | ||
LevelsMap levels_map; | ||
|
||
void collect_factor(const SlicingIndex& index, SEXP v) { | ||
// here we can assume that v is a factor with the right levels | ||
// we however do not assume that they are in the same order | ||
IntegerVector source(v); | ||
CharacterVector levels = source.attr("levels"); | ||
|
||
SEXP* levels_ptr = Rcpp::internal::r_vector_start<STRSXP>(levels); | ||
int* source_ptr = Rcpp::internal::r_vector_start<INTSXP>(source); | ||
for (int i=0; i<index.size(); i++) { | ||
if (source_ptr[i] == NA_INTEGER) { | ||
data[ index[i] ] = NA_INTEGER; | ||
} else { | ||
SEXP x = levels_ptr[ source_ptr[i] - 1 ]; | ||
data[ index[i] ] = levels_map.find(x)->second; | ||
} | ||
} | ||
} | ||
|
||
void collect_logicalNA(const SlicingIndex& index) { | ||
for (int i=0; i<index.size();i++) { | ||
data[ index[i] ] = NA_INTEGER; | ||
} | ||
} | ||
}; | ||
|
||
template <> | ||
inline bool Collecter_Impl<LGLSXP>::can_promote(SEXP x) const { | ||
return (TYPEOF(x) == INTSXP && ! Rf_inherits(x, "factor")) || TYPEOF(x) == REALSXP; | ||
return is_logical_all_na(); | ||
} | ||
|
||
inline Collecter* collecter(SEXP model, int n) { | ||
|
@@ -363,12 +429,24 @@ namespace dplyr { | |
return new FactorCollecter(n, model); | ||
if (Rf_inherits(model, "Date")) | ||
return new TypedCollecter<INTSXP>(n, get_date_classes()); | ||
if (has_classes(model)) { | ||
SEXP classes = Rf_getAttrib(model, R_ClassSymbol); | ||
Rf_warning("Vectorizing '%s' elements may not preserve their attributes", | ||
CHAR(STRING_ELT(classes, 0))); | ||
return new TypedCollecter<INTSXP>(n, classes); | ||
} | ||
return new Collecter_Impl<INTSXP>(n); | ||
case REALSXP: | ||
if (Rf_inherits(model, "POSIXct")) | ||
return new POSIXctCollecter(n, Rf_getAttrib(model, Rf_install("tzone"))); | ||
if (Rf_inherits(model, "Date")) | ||
return new TypedCollecter<REALSXP>(n, get_date_classes()); | ||
if (has_classes(model)) { | ||
SEXP classes = Rf_getAttrib(model, R_ClassSymbol); | ||
Rf_warning("Vectorizing '%s' elements may not preserve their attributes", | ||
CHAR(STRING_ELT(classes, 0))); | ||
return new TypedCollecter<REALSXP>(n, classes); | ||
} | ||
return new Collecter_Impl<REALSXP>(n); | ||
case CPLXSXP: | ||
return new Collecter_Impl<CPLXSXP>(n); | ||
|
@@ -399,18 +477,37 @@ namespace dplyr { | |
return new Collecter_Impl<STRSXP>(n); | ||
} | ||
|
||
// logical NA can be promoted to whatever type comes next | ||
if (previous->is_logical_all_na()) { | ||
return collecter(model, n); | ||
} | ||
|
||
switch (TYPEOF(model)) { | ||
case INTSXP: | ||
if (Rf_inherits(model, "Date")) | ||
return new TypedCollecter<INTSXP>(n, get_date_classes()); | ||
if (Rf_inherits(model, "factor")) | ||
return new Collecter_Impl<STRSXP>(n); | ||
if (has_classes(model)) { | ||
SEXP classes = Rf_getAttrib(model, R_ClassSymbol); | ||
Rf_warning("Promoting class %s into %s may lose attributes", | ||
previous->describe().c_str(), | ||
get_single_class(model).c_str()); | ||
return new TypedCollecter<INTSXP>(n, classes); | ||
} | ||
return new Collecter_Impl<INTSXP>(n); | ||
case REALSXP: | ||
if (Rf_inherits(model, "POSIXct")) | ||
return new POSIXctCollecter(n, Rf_getAttrib(model, Rf_install("tzone"))); | ||
if (Rf_inherits(model, "Date")) | ||
return new TypedCollecter<REALSXP>(n, get_date_classes()); | ||
if (has_classes(model)) { | ||
SEXP classes = Rf_getAttrib(model, R_ClassSymbol); | ||
Rf_warning("Promoting class %s into %s may lose attributes", | ||
previous->describe().c_str(), | ||
get_single_class(model).c_str()); | ||
return new TypedCollecter<REALSXP>(n, classes); | ||
} | ||
return new Collecter_Impl<REALSXP>(n); | ||
case LGLSXP: | ||
return new Collecter_Impl<LGLSXP>(n); | ||
|
@@ -425,7 +522,6 @@ namespace dplyr { | |
return 0; | ||
} | ||
|
||
|
||
} | ||
|
||
#endif |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Breaking changes should be documented clearly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in 9fb42da