Skip to content

Commit 924b624

Browse files
authored
Merge pull request #3458 from tidyverse/fix-issue-3456
- The hybrid evaluator finds functions from dplyr even if dplyr is not attached (#3456).
2 parents 9861d7e + d9dae91 commit 924b624

File tree

10 files changed

+131
-43
lines changed

10 files changed

+131
-43
lines changed

inst/include/dplyr/HybridHandler.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,26 @@ class Result;
88
struct HybridHandler {
99
typedef dplyr::Result* (*HybridHandlerFun)(SEXP, const dplyr::ILazySubsets&, int);
1010

11+
enum Origin { DPLYR, STATS, BASE };
12+
1113
HybridHandlerFun handler ;
1214
SEXP reference ;
15+
Origin origin ;
1316

1417
HybridHandler():
1518
handler(0),
16-
reference(R_NilValue)
19+
reference(R_NilValue),
20+
origin(DPLYR)
1721
{}
1822

19-
HybridHandler(HybridHandlerFun handler_, SEXP reference_):
20-
handler(handler_), reference(reference_)
23+
HybridHandler(HybridHandlerFun handler_, Origin origin_, SEXP reference_):
24+
handler(handler_),
25+
reference(reference_),
26+
origin(origin_)
2127
{}
2228

29+
bool hybrid(SEXP symbol, SEXP rho) const;
30+
2331
};
2432

2533
}

src/hybrid.cpp

Lines changed: 91 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,85 @@ void registerHybridHandler(const char* name, HybridHandler proto) {
137137

138138
namespace dplyr {
139139

140+
struct FindFunData {
141+
SEXP symbol;
142+
SEXP env;
143+
SEXP res;
144+
bool forced;
145+
146+
FindFunData(SEXP symbol_, SEXP env_) :
147+
symbol(symbol_),
148+
env(env_),
149+
res(R_NilValue),
150+
forced(false)
151+
{}
152+
153+
};
154+
155+
156+
void protected_findFun(void* data) {
157+
FindFunData* find_data = reinterpret_cast<FindFunData*>(data);
158+
159+
SEXP rho = find_data->env;
160+
SEXP symbol = find_data->symbol;
161+
SEXP vl;
162+
163+
while (rho != R_EmptyEnv) {
164+
vl = Rf_findVarInFrame3(rho, symbol, TRUE) ;
165+
166+
if (vl != R_UnboundValue) {
167+
// a promise, we need to evaluate it to find out if it
168+
// is a function promise
169+
if (TYPEOF(vl) == PROMSXP) {
170+
PROTECT(vl);
171+
vl = Rf_eval(vl, rho);
172+
UNPROTECT(1);
173+
}
174+
175+
// we found a function
176+
if (TYPEOF(vl) == CLOSXP || TYPEOF(vl) == BUILTINSXP || TYPEOF(vl) == SPECIALSXP) {
177+
find_data->res = vl;
178+
return;
179+
}
180+
181+
// a missing, just let R evaluation work as we have no way to
182+
// assert if the missing argument would have evaluated to a function or data
183+
if (vl == R_MissingArg) {
184+
return;
185+
}
186+
}
187+
188+
// go in the parent environment
189+
rho = ENCLOS(rho);
190+
}
191+
192+
// we did not find a suitable function, so we force hybrid evaluation
193+
// that happens e.g. when dplyr is not loaded and we use n() in the expression
194+
find_data->forced = true;
195+
return;
196+
197+
}
198+
199+
200+
bool HybridHandler::hybrid(SEXP symbol, SEXP rho) const {
201+
// the `protected_findFun` above might longjump so
202+
// we evaluate it in a top level context
203+
FindFunData find_data(symbol, rho);
204+
Rboolean success = R_ToplevelExec(protected_findFun, reinterpret_cast<void*>(&find_data));
205+
206+
// success longjumped so force hybrid
207+
if (!success) return true;
208+
209+
if (find_data.forced) {
210+
if (origin == DPLYR && symbol != Rf_install("n")) {
211+
warning("hybrid evaluation forced for `%s`. Please use dplyr::%s() or library(dplyr) to remove this warning.", CHAR(PRINTNAME(symbol)), CHAR(PRINTNAME(symbol)));
212+
}
213+
return true;
214+
}
215+
216+
return find_data.res == reference;
217+
}
218+
140219
Result* get_handler(SEXP call, const ILazySubsets& subsets, const Environment& env) {
141220
LOG_INFO << "Looking up hybrid handler for call of type " << type2name(call);
142221

@@ -145,18 +224,15 @@ Result* get_handler(SEXP call, const ILazySubsets& subsets, const Environment& e
145224

146225
HybridHandlerMap& handlers = get_handlers();
147226

148-
// if `check_hybrid_reference` is true, we check that the symbol `fun_symbol`
149-
// evaluates to what we expect, i.e. the reference in its HybridHandler
150-
// when we have `dplyr::` prefix we don't need to check
151-
bool check_hybrid_reference = true ;
227+
bool in_dplyr_namespace = false;
152228
SEXP fun_symbol = CAR(call);
153229
// interpret dplyr::fun() as fun(). #3309
154230
if (TYPEOF(fun_symbol) == LANGSXP &&
155231
CAR(fun_symbol) == R_DoubleColonSymbol &&
156232
CADR(fun_symbol) == Rf_install("dplyr")
157233
) {
158-
fun_symbol = CADDR(fun_symbol) ;
159-
check_hybrid_reference = false ;
234+
fun_symbol = CADDR(fun_symbol);
235+
in_dplyr_namespace = true;
160236
}
161237

162238
if (TYPEOF(fun_symbol) != SYMSXP) {
@@ -166,18 +242,21 @@ Result* get_handler(SEXP call, const ILazySubsets& subsets, const Environment& e
166242

167243
LOG_VERBOSE << "Searching hybrid handler for function " << CHAR(PRINTNAME(fun_symbol));
168244

245+
// give up if the symbol is not known
169246
HybridHandlerMap::const_iterator it = handlers.find(fun_symbol);
170247
if (it == handlers.end()) {
171248
LOG_VERBOSE << "Not found";
172249
return 0;
173250
}
174251

175-
// no hybrid evaluation if the symbol evaluates to something else than
176-
// is expected. This would happen if e.g. the mean function has been shadowed
177-
// mutate( x = mean(x) )
178-
// if `mean` evaluates to something other than `base::mean` then no hybrid.
179-
RObject fun = Rf_findFun(fun_symbol, env) ;
180-
if (check_hybrid_reference && fun != it->second.reference) return 0 ;
252+
if (!in_dplyr_namespace) {
253+
// no hybrid evaluation if the symbol evaluates to something else than
254+
// is expected. This would happen if e.g. the mean function has been shadowed
255+
// mutate( x = mean(x) )
256+
// if `mean` evaluates to something other than `base::mean` then no hybrid.
257+
258+
if (!it->second.hybrid(fun_symbol, env)) return 0;
259+
}
181260

182261
LOG_INFO << "Using hybrid handler for " << CHAR(PRINTNAME(fun_symbol));
183262

src/hybrid_count.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Result* count_distinct_prototype(SEXP call, const ILazySubsets& subsets, int) {
5151
}
5252

5353
void install_count_handlers(HybridHandlerMap& handlers) {
54-
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
55-
handlers[ Rf_install("n") ] = HybridHandler(count_prototype, ns_dplyr["n"]) ;
56-
handlers[ Rf_install("n_distinct") ] = HybridHandler(count_distinct_prototype, ns_dplyr["n_distinct"]) ;
54+
Environment ns_dplyr = Environment::namespace_env("dplyr");
55+
handlers[Rf_install("n")] = HybridHandler(count_prototype, HybridHandler::DPLYR, ns_dplyr["n"]);
56+
handlers[Rf_install("n_distinct")] = HybridHandler(count_distinct_prototype, HybridHandler::DPLYR, ns_dplyr["n_distinct"]);
5757
}

src/hybrid_debug.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ Result* verify_not_hybrid_prototype(SEXP call, const ILazySubsets&, int nargs) {
8080
}
8181

8282
void install_debug_handlers(HybridHandlerMap& handlers) {
83-
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
84-
handlers[ Rf_install("verify_hybrid") ] = HybridHandler(verify_hybrid_prototype, ns_dplyr["verify_hybrid"]) ;
85-
handlers[ Rf_install("verify_not_hybrid") ] = HybridHandler(verify_not_hybrid_prototype, ns_dplyr["verify_not_hybrid"]);
83+
Environment ns_dplyr = Environment::namespace_env("dplyr");
84+
handlers[Rf_install("verify_hybrid")] = HybridHandler(verify_hybrid_prototype, HybridHandler::DPLYR, ns_dplyr["verify_hybrid"]);
85+
handlers[Rf_install("verify_not_hybrid")] = HybridHandler(verify_not_hybrid_prototype, HybridHandler::DPLYR, ns_dplyr["verify_not_hybrid"]);
8686
}

src/hybrid_in.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@ Result* in_prototype(SEXP call, const ILazySubsets& subsets, int) {
4848
}
4949

5050
void install_in_handlers(HybridHandlerMap& handlers) {
51-
handlers[ Rf_install("%in%") ] = HybridHandler(in_prototype, Environment::base_namespace()["%in%"]);
51+
handlers[Rf_install("%in%")] = HybridHandler(in_prototype, HybridHandler::BASE, Environment::base_namespace()["%in%"]);
5252
}

src/hybrid_minmax.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ Result* minmax_prototype(SEXP call, const ILazySubsets& subsets, int nargs) {
6767
}
6868

6969
void install_minmax_handlers(HybridHandlerMap& handlers) {
70-
Environment ns_base = Environment::base_namespace() ;
71-
handlers[Rf_install("min")] = HybridHandler(minmax_prototype<true>, ns_base["min"]);
72-
handlers[Rf_install("max")] = HybridHandler(minmax_prototype<false>, ns_base["max"]);
70+
Environment ns_base = Environment::base_namespace();
71+
handlers[Rf_install("min")] = HybridHandler(minmax_prototype<true>, HybridHandler::BASE, ns_base["min"]);
72+
handlers[Rf_install("max")] = HybridHandler(minmax_prototype<false>, HybridHandler::BASE, ns_base["max"]);
7373
}

src/hybrid_nth.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,8 @@ Result* last_prototype(SEXP call, const ILazySubsets& subsets, int nargs) {
329329
}
330330

331331
void install_nth_handlers(HybridHandlerMap& handlers) {
332-
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
333-
handlers[ Rf_install("first") ] = HybridHandler(first_prototype, ns_dplyr["first"]) ;
334-
handlers[ Rf_install("last") ] = HybridHandler(last_prototype, ns_dplyr["last"]) ;
335-
handlers[ Rf_install("nth") ] = HybridHandler(nth_prototype, ns_dplyr["nth"]);
332+
Environment ns_dplyr = Environment::namespace_env("dplyr");
333+
handlers[Rf_install("first")] = HybridHandler(first_prototype, HybridHandler::DPLYR, ns_dplyr["first"]);
334+
handlers[Rf_install("last")] = HybridHandler(last_prototype, HybridHandler::DPLYR, ns_dplyr["last"]);
335+
handlers[Rf_install("nth")] = HybridHandler(nth_prototype, HybridHandler::DPLYR, ns_dplyr["nth"]);
336336
}

src/hybrid_offset.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,6 @@ Result* leadlag_prototype(SEXP call, const ILazySubsets& subsets, int) {
9595
void install_offset_handlers(HybridHandlerMap& handlers) {
9696
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
9797

98-
handlers[ Rf_install("lead") ] = HybridHandler(leadlag_prototype<Lead>, ns_dplyr["lead"]);
99-
handlers[ Rf_install("lag") ] = HybridHandler(leadlag_prototype<Lag>, ns_dplyr["lag"]);
98+
handlers[ Rf_install("lead") ] = HybridHandler(leadlag_prototype<Lead>, HybridHandler::DPLYR, ns_dplyr["lead"]);
99+
handlers[ Rf_install("lag") ] = HybridHandler(leadlag_prototype<Lag>, HybridHandler::DPLYR, ns_dplyr["lag"]);
100100
}

src/hybrid_simple.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ void install_simple_handlers(HybridHandlerMap& handlers) {
7777
Environment ns_stats = Environment::namespace_env("stats") ;
7878
Environment ns_base = Environment::base_namespace() ;
7979

80-
handlers[ Rf_install("mean") ] = HybridHandler(simple_prototype<dplyr::Mean>, ns_base["mean"]);
81-
handlers[ Rf_install("var") ] = HybridHandler(simple_prototype<dplyr::Var>, ns_stats["var"]);
82-
handlers[ Rf_install("sd") ] = HybridHandler(simple_prototype<dplyr::Sd>, ns_stats["sd"]);
83-
handlers[ Rf_install("sum") ] = HybridHandler(simple_prototype<dplyr::Sum>, ns_base["sum"]);
80+
handlers[ Rf_install("mean") ] = HybridHandler(simple_prototype<dplyr::Mean>, HybridHandler::BASE, ns_base["mean"]);
81+
handlers[ Rf_install("sum") ] = HybridHandler(simple_prototype<dplyr::Sum>, HybridHandler::BASE, ns_base["sum"]);
82+
83+
handlers[ Rf_install("var") ] = HybridHandler(simple_prototype<dplyr::Var>, HybridHandler::STATS, ns_stats["var"]);
84+
handlers[ Rf_install("sd") ] = HybridHandler(simple_prototype<dplyr::Sd>, HybridHandler::STATS, ns_stats["sd"]);
8485
}

src/hybrid_window.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,12 @@ Result* rank_impl_prototype(SEXP call, const ILazySubsets& subsets, int nargs) {
157157
}
158158

159159
void install_window_handlers(HybridHandlerMap& handlers) {
160-
Environment ns_dplyr = Environment::namespace_env("dplyr") ;
161-
162-
handlers[ Rf_install("row_number") ] = HybridHandler(row_number_prototype, ns_dplyr["row_number"]);
163-
handlers[ Rf_install("ntile") ] = HybridHandler(ntile_prototype, ns_dplyr["ntile"]);
164-
handlers[ Rf_install("min_rank") ] = HybridHandler(rank_impl_prototype<dplyr::internal::min_rank_increment>, ns_dplyr["min_rank"]);
165-
handlers[ Rf_install("percent_rank") ] = HybridHandler(rank_impl_prototype<dplyr::internal::percent_rank_increment>, ns_dplyr["percent_rank"]);
166-
handlers[ Rf_install("dense_rank") ] = HybridHandler(rank_impl_prototype<dplyr::internal::dense_rank_increment>, ns_dplyr["dense_rank"]);
167-
handlers[ Rf_install("cume_dist") ] = HybridHandler(rank_impl_prototype<dplyr::internal::cume_dist_increment>, ns_dplyr["cume_dist"]);
160+
Environment ns_dplyr = Environment::namespace_env("dplyr");
161+
162+
handlers[Rf_install("row_number")] = HybridHandler(row_number_prototype, HybridHandler::DPLYR, ns_dplyr["row_number"]);
163+
handlers[Rf_install("ntile")] = HybridHandler(ntile_prototype, HybridHandler::DPLYR, ns_dplyr["ntile"]);
164+
handlers[Rf_install("min_rank")] = HybridHandler(rank_impl_prototype<dplyr::internal::min_rank_increment>, HybridHandler::DPLYR, ns_dplyr["min_rank"]);
165+
handlers[Rf_install("percent_rank")] = HybridHandler(rank_impl_prototype<dplyr::internal::percent_rank_increment>, HybridHandler::DPLYR, ns_dplyr["percent_rank"]);
166+
handlers[Rf_install("dense_rank")] = HybridHandler(rank_impl_prototype<dplyr::internal::dense_rank_increment>, HybridHandler::DPLYR, ns_dplyr["dense_rank"]);
167+
handlers[Rf_install("cume_dist")] = HybridHandler(rank_impl_prototype<dplyr::internal::cume_dist_increment>, HybridHandler::DPLYR, ns_dplyr["cume_dist"]);
168168
}

0 commit comments

Comments
 (0)