πŸŽ“ Tidy tools for academics
Switch branches/tags
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Type Name Latest commit message Commit time
Failed to load latest commit information.
R clean up and and rm tidyverse exports Jun 4, 2018
data added data Apr 26, 2018
man
.Rbuildignore better printing May 8, 2018
.gitignore better printing May 8, 2018
DESCRIPTION more fit indices. more robust models. fixed standaridization (for std… May 7, 2018
LICENSE first big commit Apr 26, 2018
LICENSE.md first big commit Apr 26, 2018
NAMESPACE added bang bangs and tidy_sem_model and reconfig pkgfun to fit older … May 8, 2018
NOTES.md
README.Rmd better printing May 8, 2018
README.md adj dash (NA) postion in print May 8, 2018
logo.R
make.R
tidyversity.Rproj added sweep & tidy_model and more fit stats May 2, 2018

README.md

tidyversity

lifecycle

πŸŽ“ Tidy tools for academics

*** This package is in very early development. Feedback is encouraged!!! ***

Installation

Install the development version from Github with:

## install devtools if not already
if (!requireNamespace("devtools", quietly = TRUE)) {
 install.packages("devtools")
}
## install tidyversity from Github
devtools::install_github("mkearney/tidyversity")

Load the package (it, of course, plays nicely with tidyverse).

## load tidyverse
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.2.1 ──
#> βœ” ggplot2 2.2.1   βœ” purrr  0.2.4
#> βœ” tibble 1.4.2   βœ” dplyr  0.7.4
#> βœ” tidyr  0.8.0   βœ” stringr 1.3.0
#> βœ” readr  1.1.1   βœ” forcats 0.3.0
#> ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> βœ– dplyr::filter() masks stats::filter()
#> βœ– dplyr::lag()  masks stats::lag()

## load tidyversity
library(tidyversity)

Regression models

Ordinary Least Squares (OLS)

Conduct an Ordinary Least Squares (OLS) regression analysis.

polcom %>%
 tidy_regression(follow_trump ~ news_1 + ambiv_sexism_1) %>%
 tidy_summary()
#> # A tidy model
#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1
#> Model type   : Ordinary Least Squares (OLS) regression
#> Model pkg::fun : stats::lm()
#> Model data   : 243 (observations) X 3 (variables)
#> $fit
#> fit_stat   n   df  estimate  p.value stars
#> F     243   2   3.831   0.023  *
#> R^2    243   -   0.031    -     
#> Adj R^2  243   -   0.023    -     
#> RMSE    243   -   0.409    -     
#> AIC    243   -  260.148    -     
#> BIC    243   -  274.121    -     
#> 
#> $coef
#> term        est   s.e.  est.se  p.value stars  std.est
#> (Intercept)   0.745  0.097   7.692   <.001  ***   <.001
#> news_1      0.022  0.012   1.811   0.071  +    0.048
#> ambiv_sexism_1 -0.038  0.021  -1.870   0.063  +    -0.050

Logistic (dichotomous)

Conduct a logistic regression analysis for binary (dichotomous) outcomes.

polcom %>%
 tidy_regression(follow_trump ~ news_1 + ambiv_sexism_1, type = "logistic") %>%
 tidy_summary()
#> # A tidy model
#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1
#> Model type   : Logistic regression
#> Model pkg::fun : stats::glm()
#> Model data   : 243 (observations) X 3 (variables)
#> $fit
#> fit_stat      n   df  estimate  p.value stars
#> Ο‡2        243  240  247.442   0.357   
#> Δχ2       243   2   7.466   0.024  *
#> Nagelkerke R^2  243   -   0.030    -     
#> McFadden R^2   243   -   0.029    -     
#> RMSE       243   -   2.540    -     
#> AIC       243   -  253.442    -     
#> BIC       243   -  263.921    -     
#> 
#> $coef
#> term        est   s.e.  est.se  p.value stars  std.est
#> (Intercept)   1.133  0.553   2.049   0.040  *    <.001
#> news_1      0.127  0.070   1.808   0.071  +    0.195
#> ambiv_sexism_1 -0.229  0.122  -1.872   0.061  +    -0.201

Poisson (count)

Conduct a poisson regression analysis for count data.

polcom %>%
 mutate(polarize = abs(therm_1 - therm_2)) %>%
 tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "poisson") %>%
 tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type   : Poisson regression
#> Model pkg::fun : stats::glm()
#> Model data   : 242 (observations) X 3 (variables)
#> $fit
#> fit_stat      n   df  estimate  p.value stars
#> Ο‡2        242  239  6549.419   <.001  ***
#> Δχ2       242   2  399.077   <.001  ***
#> Nagelkerke R^2  242   -   0.808    -     
#> McFadden R^2   242   -   0.057    -     
#> RMSE       242   -   0.760    -     
#> AIC       242   -  7725.222    -     
#> BIC       242   -  7735.689    -     
#> 
#> $coef
#> term        est   s.e.   est.se  p.value stars  std.est
#> (Intercept)   3.798  0.038   99.448   <.001  ***   <.001
#> news_1      0.045  0.005   9.358   <.001  ***   0.881
#> ambiv_sexism_1 -0.126  0.008  -15.852   <.001  ***   -2.230

Negative binomial (overdispersed)

Conduct a negative binomial regression analysis for overdispersed count data.

polcom %>%
 mutate(polarize = abs(therm_1 - therm_2)) %>%
 tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "negbinom") %>%
 tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type   : Negative binomial regression
#> Model pkg::fun : MASS::glm.nb()
#> Model data   : 242 (observations) X 3 (variables)
#> $fit
#> fit_stat      n   df  estimate  p.value stars
#> Ο‡2        242  239  293.328   0.009  **
#> Δχ2       242   2   8.440   0.015  *
#> Nagelkerke R^2  242   -   0.034    -     
#> McFadden R^2   242   -   0.028    -     
#> RMSE       242   -   0.761    -     
#> AIC       242   -  2312.391    -     
#> BIC       242   -  2326.347    -     
#> 
#> $coef
#> term        est   s.e.  est.se  p.value stars  std.est
#> (Intercept)   3.741  0.258  14.510   <.001  ***   3.752
#> news_1      0.053  0.032   1.632   0.103      0.113
#> ambiv_sexism_1 -0.123  0.054  -2.273   0.023  *    -0.158

Robust and quasi- models

polcom %>%
 mutate(polarize = abs(therm_1 - therm_2)) %>%
 tidy_regression(polarize ~ news_1 + ambiv_sexism_1, 
  type = "quasipoisson", robust = TRUE) %>%
 tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type   : [Robust] Poisson regression
#> Model pkg::fun : robust::glmRob()
#> Model data   : 242 (observations) X 3 (variables)
#> $fit
#> fit_stat      n   df   estimate  p.value stars
#> Ο‡2        242  239  6989.543   <.001  ***
#> Δχ2       242   2  58782.937   <.001  ***
#> Nagelkerke R^2  242   -    1.000    -     
#> McFadden R^2   242   -    0.894    -     
#> RMSE       242   -   31.865    -     
#> AIC       242   -  2245.147    -     
#> BIC       242   -  2259.103    -     
#> 
#> $coef
#> term        est   s.e.   est.se  p.value stars  std.est
#> (Intercept)   3.705  0.071   51.968   <.001  ***   <.001
#> news_1      0.079  0.010   8.325   <.001  ***   1.279
#> ambiv_sexism_1 -0.241  0.022  -11.179   <.001  ***   -2.086

Mean comparison models

ANOVA

Conduct an analysis of variance (ANOVA).

polcom %>%
 mutate(sex = ifelse(sex == 1, "Male", "Female"),
 vote_choice = case_when(
  vote_2016_choice == 1 ~ "Clinton",
  vote_2016_choice == 2 ~ "Trump",
  TRUE ~ "Other")) %>%
 tidy_anova(pp_party ~ sex * vote_choice) %>%
 tidy_summary()
#> # A tidy model
#> Model formula : pp_party ~ sex * vote_choice
#> Model type   : Analysis of variance (ANOVA)
#> Model pkg::fun : stats::aov()
#> Model data   : 243 (observations) X 3 (variables)
#> $fit
#> fit_stat   n   df  estimate  p.value stars
#> F     243   5   53.327   <.001  ***
#> R^2    243   -   0.529    -     
#> Adj R^2  243   -   0.519    -     
#> RMSE    243   -   1.238    -     
#> AIC    243   -  801.115    -     
#> BIC    243   -  825.567    -     
#> 
#> $coef
#> term         est    s.e.   est.se  statistic  p.value stars  std.est
#> sex        1.000   19.238   19.238    12.561   <.001  ***   2.000
#> vote_choice    2.000  388.606  194.303   126.867   <.001  ***   2.000
#> sex:vote_choice  2.000   0.519   0.259    0.169   0.844      2.000
#> Residuals    237.000  362.978   1.532     -     -       237.000

t-tests

polcom %>%
 tidy_ttest(pp_ideology ~ follow_trump) %>%
 tidy_summary()
#> # A tidy model
#> Model formula : pp_ideology ~ follow_trump
#> Model type   : T-test
#> Model pkg::fun : stats::t.test()
#> Model data   : 244 (observations)
#> $fit
#> group    df   mean   diff   lo.95   hi.05
#> FALSE 76.911  4.185   0.922   0.308   1.536
#> TRUE  76.911  3.263  -0.922  -0.308  -1.536
#> 
#> $coef
#>   est    t  p.value stars
#> 0.922  2.992   0.004  **

Latent variable models

Structural equation modeling (SEM)

Conduct latent variable analysis using structural equation modeling.

## mutate data and then specify and estimate model
sem1 <- polcom %>%
 mutate(therm_2 = therm_2 / 10, 
  therm_1 = 10 - therm_1 / 10) %>%
 tidy_sem_model(news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6,
  ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 + 
   ambiv_sexism_4 + ambiv_sexism_5 + ambiv_sexism_6,
  partisan =~ a*therm_1 + a*therm_2,
  ambiv_sexism ~ age + sex + hhinc + edu + news + partisan) %>%
 tidy_sem()

## print model summary
sem1 %>%
 tidy_summary()
#> # A tidy model
#> Model formula : news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6
#>         ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 + ambiv_sexism_4 + 
#>           ambiv_sexism_5 + ambiv_sexism_6
#>         partisan =~ a * therm_1 + a * therm_2
#>         ambiv_sexism ~ age + sex + hhinc + edu + news + partisan
#> Model type   : Structural Equation Model (SEM)
#> Model pkg::fun : lavaan::sem()
#> Model data   : 235 (observations) X 18 (variables)
#> $fit
#> fit_stat       n   df   estimate  p.value stars
#> chisq       235  127   239.579   <.001  ***
#> aic        235   -    0.907    -     
#> bic        235   -    0.892    -     
#> cfi        235   -  16138.684    -     
#> tli        235   -  16256.310    -     
#> rmsea       235   -    0.061    -     
#> srmr        235   -    0.073    -     
#> R^2:ambiv_sexism  235   -    0.379    -     
#> 
#> $coef
#> term                est    se  est.se  p.value stars  std.est
#> news =~ news_1          1.000  <.001   -     -        0.173
#> news =~ news_2          1.592  0.722   2.204   0.028  *    0.340
#> news =~ news_3          5.069  2.095   2.419   0.016  *    0.781
#> news =~ news_4          5.587  2.312   2.417   0.016  *    0.851
#> news =~ news_5          3.493  1.485   2.353   0.019  *    0.520
#> news =~ news_6          1.255  0.683   1.838   0.066  +    0.196
#> ambiv_sexism =~ ambiv_sexism_1  1.000  <.001   -     -        0.825
#> ambiv_sexism =~ ambiv_sexism_2  0.942  0.067  14.043   <.001  ***   0.801
#> ambiv_sexism =~ ambiv_sexism_3  0.795  0.067  11.844   <.001  ***   0.706
#> ambiv_sexism =~ ambiv_sexism_4  0.743  0.064  11.647   <.001  ***   0.697
#> ambiv_sexism =~ ambiv_sexism_5  0.902  0.062  14.644   <.001  ***   0.825
#> ambiv_sexism =~ ambiv_sexism_6  0.904  0.064  14.185   <.001  ***   0.807
#> partisan =~ therm_1       1.000  <.001   -     -        0.577
#> partisan =~ therm_2       1.000  <.001   -     -        0.592
#> ambiv_sexism ~ age       -0.004  0.005  -0.824   0.410      -0.051
#> ambiv_sexism ~ sex       -0.271  0.130  -2.089   0.037  *    -0.130
#> ambiv_sexism ~ hhinc      -0.021  0.023  -0.878   0.380      -0.057
#> ambiv_sexism ~ edu       -0.088  0.069  -1.279   0.201      -0.083
#> ambiv_sexism ~ news       0.130  0.215   0.607   0.544      0.047
#> ambiv_sexism ~ partisan     0.347  0.069   5.032   <.001  ***   0.592

Multilevel modeling (MLM)

Estimate multilevel (mixed effects) models.

lme4::sleepstudy %>%
 tidy_mlm(Reaction ~ Days + (Days | Subject)) %>%
 summary()
#> Linear mixed model fit by REML ['lmerMod']
#> Formula: Reaction ~ Days + (Days | Subject)
#>  Data: .data
#> 
#> REML criterion at convergence: 1743.6
#> 
#> Scaled residuals: 
#>  Min   1Q Median   3Q  Max 
#> -3.954 -0.463 0.023 0.463 5.179 
#> 
#> Random effects:
#> Groups  Name    Variance Std.Dev. Corr
#> Subject (Intercept) 612.1  24.74    
#>      Days     35.1   5.92  0.07
#> Residual       654.9  25.59    
#> Number of obs: 180, groups: Subject, 18
#> 
#> Fixed effects:
#>       Estimate Std. Error t value
#> (Intercept)  251.41    6.82  36.84
#> Days      10.47    1.55  6.77
#> 
#> Correlation of Fixed Effects:
#>   (Intr)
#> Days -0.138

Data sets

Comes with one data set.

polcom

Consists of survey responses to demographic, background, and likert-type attitudinal items about political communication.

print(tibble::as_tibble(polcom), n = 5)
#> # A tibble: 244 x 63
#>  follow_trump news_1 news_2 news_3 news_4 news_5 news_6 ambiv_sexism_1 ambiv_sexism_2
#> * <lgl>     <int> <int> <int> <int> <int> <int>     <int>     <int>
#> 1 TRUE       8   1   1   1   1   6       3       3
#> 2 TRUE       1   1   1   1   1   1       5       5
#> 3 TRUE       8   1   1   1   8   1       5       4
#> 4 TRUE       8   1   1   1   1   6       2       2
#> 5 TRUE       6   1   2   1   1   3       4       4
#> # ... with 239 more rows, and 54 more variables: ambiv_sexism_3 <int>, ambiv_sexism_4 <int>,
#> #  ambiv_sexism_5 <int>, ambiv_sexism_6 <int>, img1_hrc_1 <int>, img1_hrc_2 <dbl>,
#> #  img1_hrc_3 <int>, img1_hrc_4 <dbl>, img1_hrc_5 <int>, img1_hrc_6 <int>, img1_hrc_7 <int>,
#> #  img1_hrc_8 <int>, img1_hrc_9 <int>, img2_hrc_10 <int>, img2_hrc_11 <int>, img2_hrc_12 <dbl>,
#> #  img2_hrc_13 <int>, img2_hrc_14 <int>, img2_hrc_15 <dbl>, img1_djt_1 <int>, img1_djt_2 <dbl>,
#> #  img1_djt_3 <int>, img1_djt_4 <dbl>, img1_djt_5 <int>, img1_djt_6 <int>, img1_djt_7 <int>,
#> #  img1_djt_8 <int>, img1_djt_9 <int>, img2_djt_10 <int>, img2_djt_11 <int>, img2_djt_12 <dbl>,
#> #  img2_djt_13 <int>, img2_djt_14 <int>, img2_djt_15 <dbl>, pie_1 <int>, pie_2 <int>, pie_3 <int>,
#> #  pie_4 <int>, vote_2016 <int>, vote_2016_choice <int>, pp_ideology <int>, pp_party <int>,
#> #  pp_party_lean <int>, therm_1 <int>, therm_2 <int>, therm_3 <int>, therm_4 <int>, therm_5 <int>,
#> #  age <int>, sex <int>, gender <int>, race <int>, edu <int>, hhinc <int>

Descriptive statistics

Return summary statistics in the form of a data frame ***(not yet added)***.

## summary stats for social media use (numeric) variables
summarize_numeric(polcom_survey, smuse1:smuse3)

## summary stats for respondent sex and race (categorical) variables
summarize_categorical(polcom_survey, sex, race)

Estimate Cronbach’s alpha for a set of variables.

## reliability of social media use items
cronbachs_alpha(polcom, ambiv_sexism_1:ambiv_sexism_6)
#>              items  alpha alpha.std
#> 1 ambiv_sexism_1:ambiv_sexism_6 0.904609 0.904600
#> 2        -ambiv_sexism_1 0.882322 0.882225
#> 3        -ambiv_sexism_2 0.884272 0.884121
#> 4        -ambiv_sexism_3 0.896061 0.896218
#> 5        -ambiv_sexism_4 0.897127 0.897411
#> 6        -ambiv_sexism_5 0.883554 0.883420
#> 7        -ambiv_sexism_6 0.881595 0.881855