Skip to content

Commit

Permalink
better handling of character NA in all.equal. closes #1095
Browse files Browse the repository at this point in the history
  • Loading branch information
romainfrancois committed Apr 22, 2015
1 parent 30392f1 commit e45f23f
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 28 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Expand Up @@ -17,6 +17,8 @@

* `bind_cols` always produces a `tbl_df` (#779).

* `all.equal` correctly treats character missing values (#1095).

# dplyr 0.4.1

* Don't assume that RPostgreSQL is available.
Expand Down
40 changes: 13 additions & 27 deletions inst/include/dplyr/JoinVisitorImpl.h
Expand Up @@ -42,13 +42,6 @@ namespace dplyr{
}
}

// inline void debug(){
// Rprintf( "visitor= %s. left=", DEMANGLE(JoinVisitorImpl) ) ;
// Rf_PrintValue(left) ;
// Rprintf( "right=" ) ;
// Rf_PrintValue(right) ;
// }

LHS_Vec left ;
RHS_Vec right ;
LHS_hasher LHS_hash_fun ;
Expand Down Expand Up @@ -98,14 +91,7 @@ namespace dplyr{
inline void print(int i){
Rcpp::Rcout << get(i) << std::endl ;
}

// inline void debug(){
// Rprintf( "visitor= %s. left=", DEMANGLE(JoinVisitorImpl) ) ;
// Rf_PrintValue(left) ;
// Rprintf( "right=" ) ;
// Rf_PrintValue(right) ;
// }


protected:
Vec left, right ;
hasher hash_fun ;
Expand All @@ -119,30 +105,37 @@ namespace dplyr{
class JoinStringOrderer {
public:
JoinStringOrderer( const CharacterVector& left_, const CharacterVector& right_ ) :
left(left_), right(right_), nleft(left.size()), nright(right.size())
left(left_), right(right_), nleft(left.size()), nright(right.size()), n(nleft+nright), n_na(0)
{
make_orders() ;
}

inline int get_order(int i) const {
if( i == NA_INTEGER ) return NA_INTEGER ;
return (i>=0) ? orders[i] : orders[nleft-i-1] ;
int val = (i>=0) ? orders[i] : orders[nleft-i-1] ;
if( val >= n - n_na ) val= NA_INTEGER ;
return val ;
}

private:
const CharacterVector& left ;
const CharacterVector& right ;
int nleft, nright ;
int nleft, nright, n ;
IntegerVector orders ;
int n_na ;

inline void make_orders(){
CharacterVector big( nleft + nright ) ;
CharacterVector big(n) ;
CharacterVector::iterator it = big.begin() ;
std::copy( left.begin(), left.end(), it ) ;
std::copy( right.begin(), right.end(), it + nleft ) ;

Language call( "rank", big, _["ties.method"] = "min" ) ;
orders = call.eval() ;
for(int i=n-1; i>=0; i--, n_na++){
if( big[ orders[i-1] ] != NA_STRING ) return ;
}

}

} ;
Expand Down Expand Up @@ -190,14 +183,7 @@ namespace dplyr{
inline void print(int i){
Rcpp::Rcout << get(i) << std::endl ;
}

// inline void debug(){
// Rprintf( "visitor= %s. left=", DEMANGLE(JoinVisitorImpl) ) ;
// Rf_PrintValue(left) ;
// Rprintf( "right=" ) ;
// Rf_PrintValue(right) ;
// }



protected:

Expand Down
2 changes: 1 addition & 1 deletion src/dplyr.cpp
Expand Up @@ -1420,7 +1420,7 @@ dplyr::BoolResult equal_data_frame(DataFrame x, DataFrame y, bool ignore_col_ord

for( int i=0; i<nrows_x; i++) map[i].push_back(i) ;
for( int i=0; i<nrows_y; i++) map[-i-1].push_back(-i-1) ;

RowTrack track_x( "Rows in x but not y: " ) ;
RowTrack track_y( "Rows in y but not x: " ) ;
RowTrack track_mismatch( "Rows with difference occurences in x and y: " ) ;
Expand Down
9 changes: 9 additions & 0 deletions tests/testthat/test-equality.r
Expand Up @@ -78,3 +78,12 @@ test_that( "data frame equality test with ignore_row_order=TRUE detects differen
expect_false(isTRUE(all.equal(DF1, DF2, ignore_row_order=TRUE)))

})

test_that("all.equal handles NA_character_ correctly. #1095", {
d1 <- data_frame(x = c(NA_character_))
expect_true(all.equal(d1, d1))

d2 <- data_frame( x = c(NA_character_, "foo", "bar" ) )
expect_true(all.equal(d2, d2))

})

0 comments on commit e45f23f

Please sign in to comment.