-
Notifications
You must be signed in to change notification settings - Fork 420
Closed
Description
Spreading of data frames with multiple nested columns returns an error. (For more context on when one might encounter this issue see #197)
library("dplyr")
library("tidyr")
library("purrr")
library("broom")
data <- data_frame(
name = c("Alex", "Alex", "Alex", "Tim", "Tim", "Tim"),
year = c(1990, 1991, 1992, 1990, 1991, 1992),
height = c(160, 165, 170, 120, 134, 150),
weight = c(50, 52, 53, 48, 48, 52)
)
data
## name year height weight
## (chr) (dbl) (dbl) (dbl)
## 1 Alex 1990 160 50
## 2 Alex 1991 165 52
## 3 Alex 1992 170 53
## 4 Tim 1990 120 48
## 5 Tim 1991 134 48
## 6 Tim 1992 150 52
# nest
data <- nest(data, year, height, weight)
data
## name data
## (chr) (chr)
## 1 Alex <tbl_df [3,3]>
## 2 Tim <tbl_df [3,3]>
Then we build a linear model where we only estimate the slope.
data <- data %>%
mutate(
model = map(.$data, ~lm(year ~ height + 0, data = .))
)
data
## name data model
## (chr) (chr) (chr)
## 1 Alex <tbl_df [3,3]> <S3:lm>
## 2 Tim <tbl_df [3,3]> <S3:lm>
Then we extract model information with tidy and then unnest it:
# extract model statistics
tidy_model <- data %>%
mutate(tidy = map(model, tidy)) %>%
unnest(tidy)
tidy_model
## name data model term estimate std.error statistic p.value
## (chr) (chr) (chr) (chr) (dbl) (dbl) (dbl) (dbl)
## 1 Alex <tbl_df [3,3]> <S3:lm> height 12.05941 0.2074858 58.12160 0.0002958912
## 2 Tim <tbl_df [3,3]> <S3:lm> height 14.66374 0.9394218 15.60932 0.0040791367
# then we gather
tidy_model <- tidy_model %>%
gather(variable, value, estimate, std.error, statistic, p.value) %>%
unite(variable, term, variable)
tidy_model
## name data model variable value
## (chr) (chr) (chr) (chr) (dbl)
## 1 Alex <tbl_df [3,3]> <S3:lm> height_estimate 1.205941e+01
## 2 Tim <tbl_df [3,3]> <S3:lm> height_estimate 1.466374e+01
## 3 Alex <tbl_df [3,3]> <S3:lm> height_std.error 2.074858e-01
## 4 Tim <tbl_df [3,3]> <S3:lm> height_std.error 9.394218e-01
## 5 Alex <tbl_df [3,3]> <S3:lm> height_statistic 5.812160e+01
## 6 Tim <tbl_df [3,3]> <S3:lm> height_statistic 1.560932e+01
## 7 Alex <tbl_df [3,3]> <S3:lm> height_p.value 2.958912e-04
## 8 Tim <tbl_df [3,3]> <S3:lm> height_p.value 4.079137e-03
# finally we want to spread again
tidy_model %>%
spread(variable, value)
## Error in sort.int(x, na.last = na.last, decreasing = decreasing, ...) :
## 'x' must be atomic
Should this not return something like this:
tidy_model %>%
spread(variable, value)
## name data model_A model_B height_estimate height_p.value height_statistic height_std.error
## (chr) (chr) (chr) (chr) (dbl) (dbl) (dbl) (dbl)
## 1 Alex <tbl_df [3,3]> <S3:lm> <S3:lm> 0.20000000 1.843174e-13 3.453933e+12 5.790501e-14
## 2 Tim <tbl_df [3,3]> <S3:lm> <S3:lm> 0.06656805 2.449142e-02 2.598076e+01 2.562205e-03
Metadata
Metadata
Assignees
Labels
No labels