Skip to content
Permalink
Browse files

Merge pull request #3458 from tidyverse/fix-issue-3456

- The hybrid evaluator finds functions from dplyr even if dplyr is not attached (#3456).
  • Loading branch information
krlmlr committed Apr 10, 2018
2 parents 9861d7e + d9dae91 commit 924b624170ed807a921ac4d3888fb43045817461
@@ -8,18 +8,26 @@ class Result;
struct HybridHandler {
typedef dplyr::Result* (*HybridHandlerFun)(SEXP, const dplyr::ILazySubsets&, int);

enum Origin { DPLYR, STATS, BASE };

HybridHandlerFun handler ;
SEXP reference ;
Origin origin ;

HybridHandler():
handler(0),
reference(R_NilValue)
reference(R_NilValue),
origin(DPLYR)
{}

HybridHandler(HybridHandlerFun handler_, SEXP reference_):
handler(handler_), reference(reference_)
HybridHandler(HybridHandlerFun handler_, Origin origin_, SEXP reference_):
handler(handler_),
reference(reference_),
origin(origin_)
{}

bool hybrid(SEXP symbol, SEXP rho) const;

};

}
@@ -137,6 +137,85 @@ void registerHybridHandler(const char* name, HybridHandler proto) {

namespace dplyr {

struct FindFunData {
SEXP symbol;
SEXP env;
SEXP res;
bool forced;

FindFunData(SEXP symbol_, SEXP env_) :
symbol(symbol_),
env(env_),
res(R_NilValue),
forced(false)
{}

};


void protected_findFun(void* data) {
FindFunData* find_data = reinterpret_cast<FindFunData*>(data);

SEXP rho = find_data->env;
SEXP symbol = find_data->symbol;
SEXP vl;

while (rho != R_EmptyEnv) {
vl = Rf_findVarInFrame3(rho, symbol, TRUE) ;

if (vl != R_UnboundValue) {
// a promise, we need to evaluate it to find out if it
// is a function promise
if (TYPEOF(vl) == PROMSXP) {
PROTECT(vl);
vl = Rf_eval(vl, rho);
UNPROTECT(1);
}

// we found a function
if (TYPEOF(vl) == CLOSXP || TYPEOF(vl) == BUILTINSXP || TYPEOF(vl) == SPECIALSXP) {
find_data->res = vl;
return;
}

// a missing, just let R evaluation work as we have no way to
// assert if the missing argument would have evaluated to a function or data
if (vl == R_MissingArg) {
return;
}
}

// go in the parent environment
rho = ENCLOS(rho);
}

// we did not find a suitable function, so we force hybrid evaluation
// that happens e.g. when dplyr is not loaded and we use n() in the expression
find_data->forced = true;
return;

}


bool HybridHandler::hybrid(SEXP symbol, SEXP rho) const {
// the `protected_findFun` above might longjump so
// we evaluate it in a top level context
FindFunData find_data(symbol, rho);
Rboolean success = R_ToplevelExec(protected_findFun, reinterpret_cast<void*>(&find_data));

// success longjumped so force hybrid
if (!success) return true;

if (find_data.forced) {
if (origin == DPLYR && symbol != Rf_install("n")) {
warning("hybrid evaluation forced for `%s`. Please use dplyr::%s() or library(dplyr) to remove this warning.", CHAR(PRINTNAME(symbol)), CHAR(PRINTNAME(symbol)));
}
return true;
}

return find_data.res == reference;
}

Result* get_handler(SEXP call, const ILazySubsets& subsets, const Environment& env) {
LOG_INFO << "Looking up hybrid handler for call of type " << type2name(call);

@@ -145,18 +224,15 @@ Result* get_handler(SEXP call, const ILazySubsets& subsets, const Environment& e

HybridHandlerMap& handlers = get_handlers();

// if `check_hybrid_reference` is true, we check that the symbol `fun_symbol`
// evaluates to what we expect, i.e. the reference in its HybridHandler
// when we have `dplyr::` prefix we don't need to check
bool check_hybrid_reference = true ;
bool in_dplyr_namespace = false;
SEXP fun_symbol = CAR(call);
// interpret dplyr::fun() as fun(). #3309
if (TYPEOF(fun_symbol) == LANGSXP &&
CAR(fun_symbol) == R_DoubleColonSymbol &&
CADR(fun_symbol) == Rf_install("dplyr")
) {
fun_symbol = CADDR(fun_symbol) ;
check_hybrid_reference = false ;
fun_symbol = CADDR(fun_symbol);
in_dplyr_namespace = true;
}

if (TYPEOF(fun_symbol) != SYMSXP) {
@@ -166,18 +242,21 @@ Result* get_handler(SEXP call, const ILazySubsets& subsets, const Environment& e

LOG_VERBOSE << "Searching hybrid handler for function " << CHAR(PRINTNAME(fun_symbol));

// give up if the symbol is not known
HybridHandlerMap::const_iterator it = handlers.find(fun_symbol);
if (it == handlers.end()) {
LOG_VERBOSE << "Not found";
return 0;
}

// no hybrid evaluation if the symbol evaluates to something else than
// is expected. This would happen if e.g. the mean function has been shadowed
// mutate( x = mean(x) )
// if `mean` evaluates to something other than `base::mean` then no hybrid.
RObject fun = Rf_findFun(fun_symbol, env) ;
if (check_hybrid_reference && fun != it->second.reference) return 0 ;
if (!in_dplyr_namespace) {
// no hybrid evaluation if the symbol evaluates to something else than
// is expected. This would happen if e.g. the mean function has been shadowed
// mutate( x = mean(x) )
// if `mean` evaluates to something other than `base::mean` then no hybrid.

if (!it->second.hybrid(fun_symbol, env)) return 0;
}

LOG_INFO << "Using hybrid handler for " << CHAR(PRINTNAME(fun_symbol));

@@ -51,7 +51,7 @@ Result* count_distinct_prototype(SEXP call, const ILazySubsets& subsets, int) {
}

void install_count_handlers(HybridHandlerMap& handlers) {
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
handlers[ Rf_install("n") ] = HybridHandler(count_prototype, ns_dplyr["n"]) ;
handlers[ Rf_install("n_distinct") ] = HybridHandler(count_distinct_prototype, ns_dplyr["n_distinct"]) ;
Environment ns_dplyr = Environment::namespace_env("dplyr");
handlers[Rf_install("n")] = HybridHandler(count_prototype, HybridHandler::DPLYR, ns_dplyr["n"]);
handlers[Rf_install("n_distinct")] = HybridHandler(count_distinct_prototype, HybridHandler::DPLYR, ns_dplyr["n_distinct"]);
}
@@ -80,7 +80,7 @@ Result* verify_not_hybrid_prototype(SEXP call, const ILazySubsets&, int nargs) {
}

void install_debug_handlers(HybridHandlerMap& handlers) {
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
handlers[ Rf_install("verify_hybrid") ] = HybridHandler(verify_hybrid_prototype, ns_dplyr["verify_hybrid"]) ;
handlers[ Rf_install("verify_not_hybrid") ] = HybridHandler(verify_not_hybrid_prototype, ns_dplyr["verify_not_hybrid"]);
Environment ns_dplyr = Environment::namespace_env("dplyr");
handlers[Rf_install("verify_hybrid")] = HybridHandler(verify_hybrid_prototype, HybridHandler::DPLYR, ns_dplyr["verify_hybrid"]);
handlers[Rf_install("verify_not_hybrid")] = HybridHandler(verify_not_hybrid_prototype, HybridHandler::DPLYR, ns_dplyr["verify_not_hybrid"]);
}
@@ -48,5 +48,5 @@ Result* in_prototype(SEXP call, const ILazySubsets& subsets, int) {
}

void install_in_handlers(HybridHandlerMap& handlers) {
handlers[ Rf_install("%in%") ] = HybridHandler(in_prototype, Environment::base_namespace()["%in%"]);
handlers[Rf_install("%in%")] = HybridHandler(in_prototype, HybridHandler::BASE, Environment::base_namespace()["%in%"]);
}
@@ -67,7 +67,7 @@ Result* minmax_prototype(SEXP call, const ILazySubsets& subsets, int nargs) {
}

void install_minmax_handlers(HybridHandlerMap& handlers) {
Environment ns_base = Environment::base_namespace() ;
handlers[Rf_install("min")] = HybridHandler(minmax_prototype<true>, ns_base["min"]);
handlers[Rf_install("max")] = HybridHandler(minmax_prototype<false>, ns_base["max"]);
Environment ns_base = Environment::base_namespace();
handlers[Rf_install("min")] = HybridHandler(minmax_prototype<true>, HybridHandler::BASE, ns_base["min"]);
handlers[Rf_install("max")] = HybridHandler(minmax_prototype<false>, HybridHandler::BASE, ns_base["max"]);
}
@@ -329,8 +329,8 @@ Result* last_prototype(SEXP call, const ILazySubsets& subsets, int nargs) {
}

void install_nth_handlers(HybridHandlerMap& handlers) {
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
handlers[ Rf_install("first") ] = HybridHandler(first_prototype, ns_dplyr["first"]) ;
handlers[ Rf_install("last") ] = HybridHandler(last_prototype, ns_dplyr["last"]) ;
handlers[ Rf_install("nth") ] = HybridHandler(nth_prototype, ns_dplyr["nth"]);
Environment ns_dplyr = Environment::namespace_env("dplyr");
handlers[Rf_install("first")] = HybridHandler(first_prototype, HybridHandler::DPLYR, ns_dplyr["first"]);
handlers[Rf_install("last")] = HybridHandler(last_prototype, HybridHandler::DPLYR, ns_dplyr["last"]);
handlers[Rf_install("nth")] = HybridHandler(nth_prototype, HybridHandler::DPLYR, ns_dplyr["nth"]);
}
@@ -95,6 +95,6 @@ Result* leadlag_prototype(SEXP call, const ILazySubsets& subsets, int) {
void install_offset_handlers(HybridHandlerMap& handlers) {
Environment ns_dplyr = Environment::namespace_env("dplyr") ;

handlers[ Rf_install("lead") ] = HybridHandler(leadlag_prototype<Lead>, ns_dplyr["lead"]);
handlers[ Rf_install("lag") ] = HybridHandler(leadlag_prototype<Lag>, ns_dplyr["lag"]);
handlers[ Rf_install("lead") ] = HybridHandler(leadlag_prototype<Lead>, HybridHandler::DPLYR, ns_dplyr["lead"]);
handlers[ Rf_install("lag") ] = HybridHandler(leadlag_prototype<Lag>, HybridHandler::DPLYR, ns_dplyr["lag"]);
}
@@ -77,8 +77,9 @@ void install_simple_handlers(HybridHandlerMap& handlers) {
Environment ns_stats = Environment::namespace_env("stats") ;
Environment ns_base = Environment::base_namespace() ;

handlers[ Rf_install("mean") ] = HybridHandler(simple_prototype<dplyr::Mean>, ns_base["mean"]);
handlers[ Rf_install("var") ] = HybridHandler(simple_prototype<dplyr::Var>, ns_stats["var"]);
handlers[ Rf_install("sd") ] = HybridHandler(simple_prototype<dplyr::Sd>, ns_stats["sd"]);
handlers[ Rf_install("sum") ] = HybridHandler(simple_prototype<dplyr::Sum>, ns_base["sum"]);
handlers[ Rf_install("mean") ] = HybridHandler(simple_prototype<dplyr::Mean>, HybridHandler::BASE, ns_base["mean"]);
handlers[ Rf_install("sum") ] = HybridHandler(simple_prototype<dplyr::Sum>, HybridHandler::BASE, ns_base["sum"]);

handlers[ Rf_install("var") ] = HybridHandler(simple_prototype<dplyr::Var>, HybridHandler::STATS, ns_stats["var"]);
handlers[ Rf_install("sd") ] = HybridHandler(simple_prototype<dplyr::Sd>, HybridHandler::STATS, ns_stats["sd"]);
}
@@ -157,12 +157,12 @@ Result* rank_impl_prototype(SEXP call, const ILazySubsets& subsets, int nargs) {
}

void install_window_handlers(HybridHandlerMap& handlers) {
Environment ns_dplyr = Environment::namespace_env("dplyr") ;

handlers[ Rf_install("row_number") ] = HybridHandler(row_number_prototype, ns_dplyr["row_number"]);
handlers[ Rf_install("ntile") ] = HybridHandler(ntile_prototype, ns_dplyr["ntile"]);
handlers[ Rf_install("min_rank") ] = HybridHandler(rank_impl_prototype<dplyr::internal::min_rank_increment>, ns_dplyr["min_rank"]);
handlers[ Rf_install("percent_rank") ] = HybridHandler(rank_impl_prototype<dplyr::internal::percent_rank_increment>, ns_dplyr["percent_rank"]);
handlers[ Rf_install("dense_rank") ] = HybridHandler(rank_impl_prototype<dplyr::internal::dense_rank_increment>, ns_dplyr["dense_rank"]);
handlers[ Rf_install("cume_dist") ] = HybridHandler(rank_impl_prototype<dplyr::internal::cume_dist_increment>, ns_dplyr["cume_dist"]);
Environment ns_dplyr = Environment::namespace_env("dplyr");

handlers[Rf_install("row_number")] = HybridHandler(row_number_prototype, HybridHandler::DPLYR, ns_dplyr["row_number"]);
handlers[Rf_install("ntile")] = HybridHandler(ntile_prototype, HybridHandler::DPLYR, ns_dplyr["ntile"]);
handlers[Rf_install("min_rank")] = HybridHandler(rank_impl_prototype<dplyr::internal::min_rank_increment>, HybridHandler::DPLYR, ns_dplyr["min_rank"]);
handlers[Rf_install("percent_rank")] = HybridHandler(rank_impl_prototype<dplyr::internal::percent_rank_increment>, HybridHandler::DPLYR, ns_dplyr["percent_rank"]);
handlers[Rf_install("dense_rank")] = HybridHandler(rank_impl_prototype<dplyr::internal::dense_rank_increment>, HybridHandler::DPLYR, ns_dplyr["dense_rank"]);
handlers[Rf_install("cume_dist")] = HybridHandler(rank_impl_prototype<dplyr::internal::cume_dist_increment>, HybridHandler::DPLYR, ns_dplyr["cume_dist"]);
}

0 comments on commit 924b624

Please sign in to comment.
You can’t perform that action at this time.