From e45f23f044e7ead34e20bcf342ac2e526115747a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romain=20Fran=C3=A7ois?= Date: Wed, 22 Apr 2015 18:02:23 +0200 Subject: [PATCH] better handling of character NA in all.equal. closes #1095 --- NEWS.md | 2 ++ inst/include/dplyr/JoinVisitorImpl.h | 40 +++++++++------------------- src/dplyr.cpp | 2 +- tests/testthat/test-equality.r | 9 +++++++ 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2774fa7e52..3a09f70e19 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,8 @@ * `bind_cols` always produces a `tbl_df` (#779). +* `all.equal` correctly treats character missing values (#1095). + # dplyr 0.4.1 * Don't assume that RPostgreSQL is available. diff --git a/inst/include/dplyr/JoinVisitorImpl.h b/inst/include/dplyr/JoinVisitorImpl.h index 07189154d9..3f01f5de95 100644 --- a/inst/include/dplyr/JoinVisitorImpl.h +++ b/inst/include/dplyr/JoinVisitorImpl.h @@ -42,13 +42,6 @@ namespace dplyr{ } } - // inline void debug(){ - // Rprintf( "visitor= %s. left=", DEMANGLE(JoinVisitorImpl) ) ; - // Rf_PrintValue(left) ; - // Rprintf( "right=" ) ; - // Rf_PrintValue(right) ; - // } - LHS_Vec left ; RHS_Vec right ; LHS_hasher LHS_hash_fun ; @@ -98,14 +91,7 @@ namespace dplyr{ inline void print(int i){ Rcpp::Rcout << get(i) << std::endl ; } - - // inline void debug(){ - // Rprintf( "visitor= %s. left=", DEMANGLE(JoinVisitorImpl) ) ; - // Rf_PrintValue(left) ; - // Rprintf( "right=" ) ; - // Rf_PrintValue(right) ; - // } - + protected: Vec left, right ; hasher hash_fun ; @@ -119,30 +105,37 @@ namespace dplyr{ class JoinStringOrderer { public: JoinStringOrderer( const CharacterVector& left_, const CharacterVector& right_ ) : - left(left_), right(right_), nleft(left.size()), nright(right.size()) + left(left_), right(right_), nleft(left.size()), nright(right.size()), n(nleft+nright), n_na(0) { make_orders() ; } inline int get_order(int i) const { if( i == NA_INTEGER ) return NA_INTEGER ; - return (i>=0) ? orders[i] : orders[nleft-i-1] ; + int val = (i>=0) ? orders[i] : orders[nleft-i-1] ; + if( val >= n - n_na ) val= NA_INTEGER ; + return val ; } private: const CharacterVector& left ; const CharacterVector& right ; - int nleft, nright ; + int nleft, nright, n ; IntegerVector orders ; + int n_na ; inline void make_orders(){ - CharacterVector big( nleft + nright ) ; + CharacterVector big(n) ; CharacterVector::iterator it = big.begin() ; std::copy( left.begin(), left.end(), it ) ; std::copy( right.begin(), right.end(), it + nleft ) ; Language call( "rank", big, _["ties.method"] = "min" ) ; orders = call.eval() ; + for(int i=n-1; i>=0; i--, n_na++){ + if( big[ orders[i-1] ] != NA_STRING ) return ; + } + } } ; @@ -190,14 +183,7 @@ namespace dplyr{ inline void print(int i){ Rcpp::Rcout << get(i) << std::endl ; } - - // inline void debug(){ - // Rprintf( "visitor= %s. left=", DEMANGLE(JoinVisitorImpl) ) ; - // Rf_PrintValue(left) ; - // Rprintf( "right=" ) ; - // Rf_PrintValue(right) ; - // } - + protected: diff --git a/src/dplyr.cpp b/src/dplyr.cpp index 66d491017e..2eaab5e26d 100644 --- a/src/dplyr.cpp +++ b/src/dplyr.cpp @@ -1420,7 +1420,7 @@ dplyr::BoolResult equal_data_frame(DataFrame x, DataFrame y, bool ignore_col_ord for( int i=0; i