Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce r_wrap_or_clone() and r_wrap_or_copy() #1599

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
50 changes: 27 additions & 23 deletions src/internal/attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,21 +61,27 @@ r_obj* node_names(r_obj* x) {
}

r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) {
int n_kept = 0;

r_obj* dots = KEEP_N(rlang_dots(env), &n_kept);
Comment on lines -64 to -66
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved these after the early exit for nm == NULL because they aren't needed, just for maximal performance and code clarity


if (!r_is_vector(x, -1)) {
r_abort("`x` must be a vector");
}

if (nm == r_null) {
x = set_names_dispatch(x, r_null, env);

FREE(n_kept);
return x;
// Fast case for dropping names
if (r_is_object(x)) {
return set_names_dispatch(x, r_null, env);
} else if (r_names(x) != r_null) {
x = r_wrap_or_clone(x);
r_attrib_poke_names(x, r_null);
return x;
} else {
return x;
}
}

int n_kept = 0;

r_obj* dots = KEEP_N(rlang_dots(env), &n_kept);

if (r_is_function(nm) || r_is_formula(nm, -1, -1)) {
if (r_names(mold) == r_null) {
mold = KEEP_N(eval_as_character(mold, env), &n_kept);
Expand All @@ -85,12 +91,16 @@ r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) {

nm = KEEP_N(rlang_as_function(nm, env), &n_kept);
nm = KEEP_N(eval_fn_dots(nm, mold, dots, env), &n_kept);
} else {
if (r_length(dots) > 0) {
nm = KEEP_N(eval_fn_dots(c_fn, nm, dots, env), &n_kept);
}
} else if (r_length(dots) > 0) {
nm = KEEP_N(eval_fn_dots(c_fn, nm, dots, env), &n_kept);
}

if (r_typeof(nm) != R_TYPE_character || r_is_object(nm)) {
nm = KEEP_N(eval_as_character(nm, env), &n_kept);

if (r_typeof(nm) != R_TYPE_character) {
r_abort("`nm` must be `NULL` or a character vector.");
}
}
Comment on lines 92 to 104
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tweaked this section a little

  • The nm == function case didn't call as.character() on the result of the function, but I felt like it was reasonable to do so since we call as.character() on the result of set_names(x, 1, 2) when ... are combined with c(), and on set_names(x, 1) when nm just isn't a character vector.

  • We also had some redundant "if not character, then error" checks below that I've removed in favor of just this single character check.


r_ssize n;
Expand All @@ -100,10 +110,6 @@ r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) {
n = r_length(x);
}

if (r_typeof(nm) != R_TYPE_character) {
r_abort("`nm` must be `NULL` or a character vector.");
}

r_ssize nm_n = r_length(nm);
if (nm_n != n) {
if (nm_n != 1) {
Expand All @@ -118,12 +124,13 @@ r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) {
r_chr_fill(nm, val, n);
}

if (!is_character(nm, n, OPTION_BOOL_null, OPTION_BOOL_null)) {
r_abort("`nm` must be `NULL` or a character vector the same length as `x`");
if (r_is_object(x)) {
x = set_names_dispatch(x, nm, env);
} else {
x = r_wrap_or_clone(x);
r_attrib_poke_names(x, nm);
}

x = set_names_dispatch(x, nm, env);

FREE(n_kept);
return x;
}
Expand Down Expand Up @@ -158,9 +165,6 @@ r_obj* names_dispatch(r_obj* x, r_obj* env) {
return r_eval(names_call, env);
}

// Use `names<-()` rather than setting names directly with `r_attrib_poke_names()`
// for genericity and for speed. `names<-()` can shallow duplicate `x`'s
// attributes using ALTREP wrappers, which is not in R's public API.
static inline
r_obj* set_names_dispatch(r_obj* x, r_obj* nm, r_obj* env) {
r_env_poke(env, r_syms.dot_x, x);
Expand Down
2 changes: 1 addition & 1 deletion src/internal/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ r_obj* obj_attrib_encode_utf8(r_obj* x, r_obj* attrib) {
}
KEEP(attrib_new);

x = KEEP(r_clone_shared(x));
x = KEEP(r_wrap_or_clone_shared(x));
r_poke_attrib(x, attrib_new);

FREE(2);
Expand Down
24 changes: 24 additions & 0 deletions src/rlang/obj.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,30 @@ r_obj* r_clone_shared(r_obj* x) {
return r_is_shared(x) ? r_clone(x) : x;
}

// Copy/Clone equivalents that attempt to generate a thin ALTREP wrapper
// instead of copying/cloning if possible. Typically useful before modifying
// attributes, rather than before modifying the underlying data.
static inline
r_obj* r_wrap_or_copy(r_obj* x) {
#if R_VERSION >= R_Version(3, 6, 0)
return R_duplicate_attr(x);
#else
return r_copy(x);
#endif
}
static inline
r_obj* r_wrap_or_clone(r_obj* x) {
#if R_VERSION >= R_Version(3, 6, 0)
return R_shallow_duplicate_attr(x);
#else
return r_clone(x);
#endif
}
static inline
r_obj* r_wrap_or_clone_shared(r_obj* x) {
return r_is_shared(x) ? r_wrap_or_clone(x) : x;
}

// These also clone names
r_obj* r_vec_clone(r_obj* x);
r_obj* r_vec_clone_shared(r_obj* x);
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test-attr.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ test_that("can supply function/formula to rename", {
expect_named(set_names(x, toupper), c("A", "B"))
expect_named(set_names(x, ~ toupper(.)), c("A", "B"))
expect_named(set_names(x, paste, "foo"), c("a foo", "b foo"))
expect_named(set_names(x, ~ c(3, 4)), c("3", "4"))
})

test_that("set_names() zaps names", {
Expand Down
18 changes: 18 additions & 0 deletions tests/testthat/test-c-api.R
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,24 @@ test_that("attributes are re-encoded recursively", {
expect_utf8_encoded(attrib_nested$latin1)
})

test_that("re-encoding attributes doesn't modify the original attributes", {
latin1 <- test_encodings()$latin1

# Large object so duplication in `r_obj_encode_utf8()` generates an ALTREP
# wrapper
x <- 1:1e6 + 0L
attr(x, "foo") <- latin1
original <- Encoding(latin1)

result <- r_obj_encode_utf8(x)
attrib <- attributes(result)
expect_utf8_encoded(attrib$foo)

# Still the same as before
attrib <- attributes(x)
expect_identical(Encoding(attrib$foo), original)
})

test_that("NAs aren't re-encoded to 'NA' (r-lib/vctrs#1291)", {
utf8 <- c(NA, test_encodings()$utf8)
latin1 <- c(NA, test_encodings()$latin1)
Expand Down