Skip to content

Allow single values_fn in pivot_wider() #739

@moodymudskipper

Description

@moodymudskipper

See #737 for full example

library(tidyverse)
df1 <- tibble::tribble(
  ~id, ~type, ~name, ~var1, ~var2, ~var3,
  10L, "Country",   "Norway", 169L,         14L, 164L,
  10L,   "Sport",     "Skii", 169L,         14L, 164L,
  10L,  "Format",    "Video", 169L,         14L, 164L,
  11L, "Country",    "Spain", 150L,         16L, 178L,
  11L,  "Format",    "Photo", 150L,         16L, 178L,
  11L,   "Sport",     "Bike", 150L,         16L, 178L,
  11L,   "Sport",   "Soccer", 150L,         16L, 178L,
  11L,   "Sport",   "Basket", 150L,         16L, 178L,
  12L, "Country",      "USA",   0L,          0L,   0L,
  12L,  "Format",    "Video",   0L,          0L,   0L,
  12L,   "Sport", "Baseball",   0L,          0L,   0L
)

################################################################
# This default behavior is nice but to get rid of the default  #
# message requires to be explicit, which can be annoying       #
################################################################

pivot_wider(df1, names_from = "type", values_from = "name")
#> Warning: Values in `name` are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list(name = list)` to suppress this warning.
#> * Use `values_fn = list(name = length)` to identify where the duplicates arise
#> * Use `values_fn = list(name = summary_fun)` to summarise duplicates
#> # A tibble: 3 x 7
#>      id  var1  var2  var3     Country       Sport      Format
#>   <int> <int> <int> <int> <list<chr>> <list<chr>> <list<chr>>
#> 1    10   169    14   164         [1]         [1]         [1]
#> 2    11   150    16   178         [1]         [3]         [1]
#> 3    12     0     0     0         [1]         [1]         [1]

pivot_wider(df1, names_from = "type", values_from = "name", values_fn = list(name=list))
#> # A tibble: 3 x 7
#>      id  var1  var2  var3     Country       Sport      Format
#>   <int> <int> <int> <int> <list<chr>> <list<chr>> <list<chr>>
#> 1    10   169    14   164         [1]         [1]         [1]
#> 2    11   150    16   178         [1]         [3]         [1]
#> 3    12     0     0     0         [1]         [1]         [1]

################################################################
# It becomes a lot of work if we have several "values_from"    #
################################################################

df2 <- df1 %>% mutate(name2 = substr(name,1,1)) %>%
  filter(type != "Format")

pivot_wider(df2, names_from = "type", values_from = c("name","name2"),
            values_fn = list(name=list, name2 = list))
#> # A tibble: 3 x 8
#>      id  var1  var2  var3 name_Country name_Sport name2_Country name2_Sport
#>   <int> <int> <int> <int>  <list<chr>> <list<chr>   <list<chr>> <list<chr>>
#> 1    10   169    14   164          [1]        [1]           [1]         [1]
#> 2    11   150    16   178          [1]        [3]           [1]         [3]
#> 3    12     0     0     0          [1]        [1]           [1]         [1]

Why couldn't we just do :

pivot_wider(df2, names_from = "type", values_from = c("name","name2"), values_fn = list)

In mutate_at / summarize_at we can feed either a function or a list of functions, this would feel natural here as well.

Metadata

Metadata

Assignees

No one assigned

    Labels

    featurea feature request or enhancementpivoting ♻️pivot rectangular data to different "shapes"

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions