Using factor() in mutate on a grouped data frame gives strange results
require(Lahman)
d <- Batting %>% group_by(lgID,yearID) %>%
summarise(s = sum(G)) %>%
mutate(f0=s>9000,
f1=factor(s>9000))
xtabs(~f0,d) # 20 250
xtabs(~f1,d) # 136 134 ( wrong)
using ungroup() before mutate() solves the problem
d <- Batting %>% group_by(lgID,yearID) %>%
summarise(s = sum(G)) %>%
ungroup() %>%
mutate(f0=s>9000,
f1=factor(s>9000))
xtabs(~f0,d) # 20 250
xtabs(~f1,d) # 20 250