Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
---
title: '    Dutch modality exclusivity norms'
author: '<a href="#section-info" style="color:#E4E5E8 !important; text-decoration:none !important; font-size:10px"> Bernabeu (2018) </a>'
output:
flexdashboard::flex_dashboard:
theme: 'spacelab'
orientation: columns
vertical_layout: scroll
favicon: https://i.ibb.co/bB9fCfr/norms-favicon.png
runtime: shiny
---
```{r packages, include = FALSE}
library(arules)
library(car)
library(contrast)
library(corpcor)
library(plyr) # Load before `dplyr` (https://github.com/tidyverse/dplyr/issues/347)
library(dplyr)
library(flexdashboard)
library(gdata)
library(ggplot2)
library(ggrepel)
library(GPArotation)
library(grid)
library(gridExtra)
library(gtools)
library(Hmisc)
library(htmltools)
library(irr)
library(kableExtra)
library(knitr)
library(lattice)
library(ltm)
library(MASS)
library(pander)
library(pastecs)
library(plotly)
library(png)
library(psych)
library(QuantPsyc)
library(RColorBrewer)
library(RCurl)
library(reactable)
library(reshape)
library(rmarkdown)
library(Rmisc)
library(rsconnect)
library(scales)
library(shiny)
library(shinyWidgets)
library(stringr)
library(tibble)
library(tidyr)
panderOptions('keep.trailing.zeros', TRUE)
knitr::opts_chunk$set(cache = FALSE, fig.align='center')
```
<!-- Begin definition of layout parameters -->
<head>
<style type="text/css">
<!-- Tabs format -->
/* inactive tab */
ul.nav.navbar-nav.navbar-left a {
color: white !important;
background-color: none !important;
border-bottom: none !important;
}
/* inactive tab hovered over */
ul.nav.navbar-nav.navbar-left a:hover {
color: white !important;
background-color: #34507D !important;
border-bottom: none !important;
text-shadow: none !important;
}
/* active tab */
ul.nav.navbar-nav.navbar-left li.active a {
color: white !important;
font-weight: bold !important;
background-color: #5472A3 !important;
border-bottom: none !important;
text-shadow: none !important;
}
/* visited links in active tabs */
ul.nav.navbar-nav.navbar-left li.active a:visited {
color: white !important;
font-weight: bold !important;
background-color: #5472A3 !important;
border-bottom: none !important;
text-shadow: none !important;
}
/* visited links in inactive tabs */
ul.nav.navbar-nav.navbar-left a:visited {
color: white !important;
font-weight: bold !important;
background-color: #5472A3 !important;
border-bottom: none !important;
text-shadow: none !important;
}
<!-- Links format -->
/* unvisited link */
section-properties, section-concepts, section-cf-lc-english-norms, section-sound-symbolism, section-info a {
color: #3C6CA7 !important;
border-bottom: 0.03px solid #5277A5 !important;
font-weight: normal !important;
background-color: none !important;
text-shadow: none !important;
}
/* visited link */
section-properties, section-concepts, section-cf-lc-english-norms, section-sound-symbolism, section-info a:visited {
color: #426DA1 !important;
border-bottom: none !important;
font-weight: normal !important;
background-color: none !important;
text-shadow: none !important;
}
/* mouse over link */
section-properties, section-concepts, section-cf-lc-english-norms, section-sound-symbolism, section-info a:hover {
color: #2462B0 !important;
border-bottom: none !important;
font-weight: normal !important;
background-color: none !important;
text-shadow: 1px 1px darkgrey !important;
}
/* selected link */
section-properties, section-concepts, section-cf-lc-english-norms, section-sound-symbolism, section-info a:active {
color: #1964BF !important;
border-bottom: none !important;
font-weight: bold !important;
background-color: none !important;
text-shadow: 1px 1px darkgrey !important;
}
<!-- Define CSS style for customising output to specific screen sizes -->
.desktop-only {display: inline;}
/* Smartphone Portrait and Landscape */
@media only screen
and (max-width : 765px){
.desktop-only {display: none;}
}
.mobile-only {display: inline;}
/* Smartphone Portrait and Landscape */
@media only screen
and (min-width : 766px){
.mobile-only {display: none;}
}
<!-- Define CSS style for fonts -->
body{ /* Normal */
font-size: 16px;
}
td { /* Table */
font-size: 14px;
}
h1.title {
font-size: 38px;
font-weight: bold;
color: #28002E
}
h1 { /* Header 1 */
font-size: 28px;
font-weight: bold;
color: #28002E
}
h2 { /* Header 2 */
font-size: 22px;
font-weight: bold;
color: #28002E
}
h3 { /* Header 3 */
font-size: 18px;
font-weight: bold;
}
h4 { /* Header 4 */
font-size: 16px;
}
h5 { /* Header 5 */
font-size: 15px;
}
h6 { /* Header 6 */
font-size: 14px;
}
code.r{ /* Code block */
font-size: 12px;
}
pre { /* Code-formatted output */
font-size: 14px;
padding-top: 2px;
padding-bottom: 2px;
margin-top: -15px;
margin-bottom: 18px;
}
<!-- Define CSS style for splitting columns -->
* {
box-sizing: border-box;
}
/* Create two equal columns that float next to each other */
.column {
float: left;
padding: 10px;
}
/* Clear floats after the columns */
.row:after {
content: "";
display: table;
clear: both;
}
</style>
<!-- Load library of icons -->
<link rel='stylesheet' href='https://use.fontawesome.com/releases/v5.7.0/css/all.css' integrity='sha384-lZN37f5QGtY3VHgisS14W3ExzMWZxybE1SJSEsQp9S+oqd12jhcu+A56Ebc1zFSJ' crossorigin='anonymous'>
<!-- Javascript function to enable a hovering tooltip -->
<script>
$(document).ready(function(){
$('[data-toggle="tooltip1"]').tooltip();
});
</script>
</head>
```{r global, include = FALSE}
# Since this script contains the dashboard, the code run is minimised,and all the rest--namely, diagnostic commands--is commented out. Fully run code on the proper analysis scripts, at https://osf.io/brkjw/.
# Perform analyses for PCA
# RELATION AMONG MODALITIES
# Below is a Principal Components Analysis (PCA) with plots. Firstly it is performed
# on the Dutch norms, and then on Lynott and Connell's (2009, 2013) English norms
# (leaving out gustatory and olfactory scores and words).
all = read.csv('https://raw.githubusercontent.com/pablobernabeu/Modality-exclusivity-norms-747-Dutch-English-replication/master/all.csv', fileEncoding = 'Latin1')
#nrow(all)
# Set correct numeric class for standard deviation variables
all$SD_Auditory = as.numeric(all$SD_Auditory)
all$SD_Haptic = as.numeric(all$SD_Haptic)
all$SD_Visual = as.numeric(all$SD_Visual)
# PROPERTIES
# Principal component analysis on the three modalities
# Check conditions for a PCA
# matrix
prop = all[all$cat == 'Property' & !is.na(all$word), c('Auditory', 'Haptic', 'Visual')]
#nrow(prop)
prop_matrix = cor(prop, use = 'complete.obs')
#prop_matrix
#round(prop_matrix, 2)
# POOR: correlations not apt for a PCA, with too many below .3
# now on the raw vars:
#nrow(prop)
#cortest.bartlett(prop)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
#KMO(prop_matrix)
# Result: .56 = mediocre. PCA not strongly recommended. But we still do it
# because the purpose is graphical only.
# check determinant
#det(prop_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_prop = psych::principal(prop, nfactors = 3, rotate = "none")
#pc1_prop
# RESULT: Only PC1, with eigenvalue > 1, should be extracted,
# acc to Kaiser's criterion (Jolliffe's threshold of 0.7 way too lax;
# Field, Miles, & Field, 2012)
# Unrotated: scree plot
#plot(pc1_prop$values, type = "b")
# Result: one or two RCs should be extracted, converging with eigenvalues
# Now with varimax rotation, Kaiser-normalized (by default).
# Always preferable because it captures explained variance best.
# Compare eigenvalues w/ 1 & 2 factors
pc2_prop = psych::principal(prop, nfactors = 2, rotate = "varimax", scores = TRUE)
#pc2_prop
#pc2_prop$loadings
# good to extract 2 factors, as they both explain quite the same variance,
# and both surpass 1 eigenvalue
#pc2_prop$residual
#pc2_prop$fit
#pc2_prop$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals OK: fewer than 50% have absolute
# values > 0.05 (exactly 50% do).Model fit good, > .90.
# Communalities good, all > .7 (av = .83).
# subset and add PCs
props = all[all$cat == 'Property' & !is.na(all$word),]
#nrow(props)
props = cbind(props, pc2_prop$scores)
#nrow(props)
# Set to character format
props$word = as.character(props$word)
# Replace NAs in corpora with 0 to allow selection
props[is.na(props$phonemes_DUTCHPOND), 'phonemes_DUTCHPOND'] = 0
props[is.na(props$freq_lg10CD_SUBTLEXNL), 'freq_lg10CD_SUBTLEXNL'] = 0
props[is.na(props$freq_lg10WF_SUBTLEXNL), 'freq_lg10WF_SUBTLEXNL'] = 0
props[is.na(props$inflected_adj_freq_lg10CD_SUBTLEXNL), 'inflected_adj_freq_lg10CD_SUBTLEXNL'] = 0
props[is.na(props$freq_CELEX_lem), 'freq_CELEX_lem'] = 0
props[is.na(props$orth_neighbours_DUTCHPOND), 'orth_neighbours_DUTCHPOND'] = 0
props[is.na(props$phon_neighbours_DUTCHPOND), 'phon_neighbours_DUTCHPOND'] = 0
props[is.na(props$AoA_Brysbaertetal2014), 'AoA_Brysbaertetal2014'] = 0
props[is.na(props$concrete_Brysbaertetal2014), 'concrete_Brysbaertetal2014'] = 0
# Turn modality exclusivity into percentage format
props$Exclusivity = props$Exclusivity * 100
# CONCEPTS
# Principal component analysis on the three modalities
# Check conditions for a PCA
# matrix
conc = all[all$cat == 'Concept' & !is.na(all$word), c('Auditory', 'Haptic', 'Visual')]
#nrow(conc)
conc_matrix = cor(conc, use = 'complete.obs')
#conc_matrix
#round(conc_matrix, 2)
# POOR: correlations not apt for a PCA, with too many below .3
# now on the raw data:
#nrow(conc)
#cortest.bartlett(conc)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
#KMO(conc_matrix)
# Result: .49 = poor. PCA not strongly recommended. But we still do it
# because the purpose is graphical really.
# check determinant
#det(conc_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_conc = psych::principal(conc, nfactors = 3, rotate = "none")
#pc1_conc
# RESULT good: PC1 and PC2, with eigenvalue > 1, should be extracted,
# acc to Kaiser's criterion (Jolliffe's threshold of 0.7 way too lax;
# Field, Miles, & Field, 2012)
# Unrotated: scree plot
#plot(pc1_conc$values, type = "b")
# Result: with no point of inflexion along the y axis, two PCs would obtain.
# Now with varimax rotation, Kaiser-normalized (by default):
# Always preferable because it captures explained variance best.
# Compare eigenvalues w/ 1 & 2 Principal Components
pc2_conc = psych::principal(conc, nfactors = 2, rotate = "varimax", scores = TRUE)
#pc2_conc
#pc2_conc$loadings
# good to extract 2 Principal Components, as they both explain quite the same variance,
# and both surpass 1 eigenvalue
#pc2_conc$residual
#pc2_conc$fit
#pc2_conc$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals bad: over 50% have absolute
# values > 0.05. Model fit good, > .90. Communalities good, all > .7 (av = .82).
# subset and add PCs
concs = all[all$cat == 'Concept' & !is.na(all$word),]
#nrow(concs)
concs = cbind(concs, pc2_conc$scores)
#nrow(concs)
# Set to character format
concs$word = as.character(concs$word)
# Replace NAs in corpora with 0 to allow selection
concs[is.na(concs$phonemes_DUTCHPOND), 'phonemes_DUTCHPOND'] = 0
concs[is.na(concs$freq_lg10WF_SUBTLEXNL), 'freq_lg10WF_SUBTLEXNL'] = 0
concs[is.na(concs$freq_lg10CD_SUBTLEXNL), 'freq_lg10CD_SUBTLEXNL'] = 0
concs[is.na(concs$freq_CELEX_lem), 'freq_CELEX_lem'] = 0
concs[is.na(concs$orth_neighbours_DUTCHPOND), 'orth_neighbours_DUTCHPOND'] = 0
concs[is.na(concs$phon_neighbours_DUTCHPOND), 'phon_neighbours_DUTCHPOND'] = 0
concs[is.na(concs$AoA_Brysbaertetal2014), 'AoA_Brysbaertetal2014'] = 0
concs[is.na(concs$concrete_Brysbaertetal2014), 'concrete_Brysbaertetal2014'] = 0
# Turn modality exclusivity into percentage format
concs$Exclusivity = concs$Exclusivity * 100
# Colors for plots
colours = c('Auditory' = 'firebrick1', 'Haptic' = 'dodgerblue', 'Visual' = 'forestgreen')
```
<i class="fas fa-code" aria-hidden="true"></i> Info {style="background-color: #FCFCFC; data-width: 100%; width: 900px; margin: 0 auto;"}
=======================================================================
Column {style="height:1300px; background-color:#FCFCFC;"}
--------------------------------------------------------
<h2> Information </h2>
<!-- Links -->
<div style = "text-align: right; padding-top: 35px; padding-bottom: 30px; padding-right: 60px;">
<a href="https://osf.io/58gzs/" style="color:#5C746D; border-bottom:none;font-size: 20px; font-weight: bold; font-family: 'Courier New', Courier, monospace;"> <i class='fas fa-database' aria-hidden="true" style='font-size:20px; color:#577787'></i> Data</a>
<a href="https://github.com/pablobernabeu/Modality-exclusivity-norms-747-Dutch-English-replication/blob/master/Shiny-app%2FDutch-modality-exclusivity-norms.rmd" style="color:#5C746D; border-bottom:none;font-size: 20px; font-weight: bold; font-family: 'Courier New', Courier, monospace;"> <i class='fab fa-r-project' aria-hidden="true" style='font-size:20px; color:#526772'></i><i class='fas fa-code' aria-hidden="true" style='font-size:12px; color:#526772'></i> Code</a>
<a href="https://mybinder.org/v2/gh/pablobernabeu/Modality-exclusivity-norms-747-Dutch-English-replication/master?urlpath=rstudio" style="color:#5C746D; border-bottom:none; font-size: 15px; font-weight: bold; font-family: 'Courier New', Courier, monospace;"> <img src='https://img.shields.io/badge/Binder-RStudio-579ACA.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFkAAABZCAMAAABi1XidAAAB8lBMVEX///9XmsrmZYH1olJXmsr1olJXmsrmZYH1olJXmsr1olJXmsrmZYH1olL1olJXmsr1olJXmsrmZYH1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olJXmsrmZYH1olL1olL0nFf1olJXmsrmZYH1olJXmsq8dZb1olJXmsrmZYH1olJXmspXmspXmsr1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olLeaIVXmsrmZYH1olL1olL1olJXmsrmZYH1olLna31Xmsr1olJXmsr1olJXmsrmZYH1olLqoVr1olJXmsr1olJXmsrmZYH1olL1olKkfaPobXvviGabgadXmsqThKuofKHmZ4Dobnr1olJXmsr1olJXmspXmsr1olJXmsrfZ4TuhWn1olL1olJXmsqBi7X1olJXmspZmslbmMhbmsdemsVfl8ZgmsNim8Jpk8F0m7R4m7F5nLB6jbh7jbiDirOEibOGnKaMhq+PnaCVg6qWg6qegKaff6WhnpKofKGtnomxeZy3noG6dZi+n3vCcpPDcpPGn3bLb4/Mb47UbIrVa4rYoGjdaIbeaIXhoWHmZYHobXvpcHjqdHXreHLroVrsfG/uhGnuh2bwj2Hxk17yl1vzmljzm1j0nlX1olL3AJXWAAAAbXRSTlMAEBAQHx8gICAuLjAwMDw9PUBAQEpQUFBXV1hgYGBkcHBwcXl8gICAgoiIkJCQlJicnJ2goKCmqK+wsLC4usDAwMjP0NDQ1NbW3Nzg4ODi5+3v8PDw8/T09PX29vb39/f5+fr7+/z8/Pz9/v7+zczCxgAABC5JREFUeAHN1ul3k0UUBvCb1CTVpmpaitAGSLSpSuKCLWpbTKNJFGlcSMAFF63iUmRccNG6gLbuxkXU66JAUef/9LSpmXnyLr3T5AO/rzl5zj137p136BISy44fKJXuGN/d19PUfYeO67Znqtf2KH33Id1psXoFdW30sPZ1sMvs2D060AHqws4FHeJojLZqnw53cmfvg+XR8mC0OEjuxrXEkX5ydeVJLVIlV0e10PXk5k7dYeHu7Cj1j+49uKg7uLU61tGLw1lq27ugQYlclHC4bgv7VQ+TAyj5Zc/UjsPvs1sd5cWryWObtvWT2EPa4rtnWW3JkpjggEpbOsPr7F7EyNewtpBIslA7p43HCsnwooXTEc3UmPmCNn5lrqTJxy6nRmcavGZVt/3Da2pD5NHvsOHJCrdc1G2r3DITpU7yic7w/7Rxnjc0kt5GC4djiv2Sz3Fb2iEZg41/ddsFDoyuYrIkmFehz0HR2thPgQqMyQYb2OtB0WxsZ3BeG3+wpRb1vzl2UYBog8FfGhttFKjtAclnZYrRo9ryG9uG/FZQU4AEg8ZE9LjGMzTmqKXPLnlWVnIlQQTvxJf8ip7VgjZjyVPrjw1te5otM7RmP7xm+sK2Gv9I8Gi++BRbEkR9EBw8zRUcKxwp73xkaLiqQb+kGduJTNHG72zcW9LoJgqQxpP3/Tj//c3yB0tqzaml05/+orHLksVO+95kX7/7qgJvnjlrfr2Ggsyx0eoy9uPzN5SPd86aXggOsEKW2Prz7du3VID3/tzs/sSRs2w7ovVHKtjrX2pd7ZMlTxAYfBAL9jiDwfLkq55Tm7ifhMlTGPyCAs7RFRhn47JnlcB9RM5T97ASuZXIcVNuUDIndpDbdsfrqsOppeXl5Y+XVKdjFCTh+zGaVuj0d9zy05PPK3QzBamxdwtTCrzyg/2Rvf2EstUjordGwa/kx9mSJLr8mLLtCW8HHGJc2R5hS219IiF6PnTusOqcMl57gm0Z8kanKMAQg0qSyuZfn7zItsbGyO9QlnxY0eCuD1XL2ys/MsrQhltE7Ug0uFOzufJFE2PxBo/YAx8XPPdDwWN0MrDRYIZF0mSMKCNHgaIVFoBbNoLJ7tEQDKxGF0kcLQimojCZopv0OkNOyWCCg9XMVAi7ARJzQdM2QUh0gmBozjc3Skg6dSBRqDGYSUOu66Zg+I2fNZs/M3/f/Grl/XnyF1Gw3VKCez0PN5IUfFLqvgUN4C0qNqYs5YhPL+aVZYDE4IpUk57oSFnJm4FyCqqOE0jhY2SMyLFoo56zyo6becOS5UVDdj7Vih0zp+tcMhwRpBeLyqtIjlJKAIZSbI8SGSF3k0pA3mR5tHuwPFoa7N7reoq2bqCsAk1HqCu5uvI1n6JuRXI+S1Mco54YmYTwcn6Aeic+kssXi8XpXC4V3t7/ADuTNKaQJdScAAAAAElFTkSuQmCC' alt='Launch Binder RStudio'></img> Edit and run code online</a>
<a href="http://creativecommons.org/licenses/by/4.0/" rel="Attribution licence" style="color:#9AA7A3; font-size:15px"> Licence &nbsp;<img style="border-width:0;" src="https://i.creativecommons.org/l/by/4.0/80x15.png" alt="Creative Commons License CC BY"></img></a>
</div>
<div style = "padding-left: 60px; padding-right: 60px; text-align: justify; background-color:#FCFCFC; font-size: 16px; line-height: 1.6;">
This dashboard presents the data and analyses of a set of modality exclusivity norms for Dutch. Various tabs and interactive plots are available. In addition, the data set is available in [CSV format](https://osf.io/ge7pn/) and [Excel format including column definitions](https://osf.io/58gzs/).
The norms, which are ratings of linguistic stimuli, served a twofold purpose: first, the creation of linguistic stimuli ([Bernabeu, 2018](https://psyarxiv.com/s2c5h); see also Speed & Majid, 2017), and second, a conceptual replication of Lynott and Connell's (2009, 2013) analyses. In the collection of the ratings, forty-two respondents completed surveys for the properties or the concepts separately. Each word was rated by eight participants on average, with a minimum of five (e.g., for *bevriezend*) and a maximum of ten ratings per word (e.g., for *donzig*). The [instructions to participants](https://osf.io/ungey/) were similar to those used by Lynott and Connell (2009, 2013), except that we elicited three modalities (auditory, haptic, visual) instead of five.
> <div style = "text-align: justify; background-color: #FCFCFC; font-size: 15px;"> 'This is a stimulus validation for a future experiment. The task is to rate how much you experience everyday' [properties/concepts] 'using three different perceptual senses: feeling by touch, hearing and seeing. Please rate every word on each of the three senses, from 0 (not experienced at all with that sense) to 5 (experienced greatly with that sense). If you do not know the meaning of a word, leave it blank.' </div>
<h3> Definitions (as in Lynott & Connell, 2009, 2013) </h3>
* Dominant modality: highest-rated modality;
* Modality exclusivity: range of the three modality ratings divided by the sum;
* Perceptual strength: highest rating across modalities.
<br>
```{r}
allNL = all[!is.na(all$main),]
allNL$Range = floor(allNL$Exclusivity * 4)
allNL$Range = mapvalues(allNL$Range, from = c(0, 1, 2, 3, 4),
to = c("0-20%", "20-40%", "40-60%", "60-80%", "80-100%"))
allNL$cat = dplyr::recode(allNL$cat, Concept = "Concepts", Property = "Properties")
# Set order to display properties first, instead of alphabetical 'Concepts-Properties' order
allNL$cat = as.factor(allNL$cat)
allNL$cat = factor(allNL$cat, levels = rev(levels(allNL$cat)))
p = ggplot(allNL) +
geom_bar(mapping = aes(x = main, fill = Range), position = position_stack(reverse = TRUE)) +
scale_fill_grey(start=.9, end=0, labels = c("0-20%", "20-40%", "40-60%", "60-80%", "80-100%"),
guide = guide_legend(reverse = TRUE, override.aes = list(size = 11))) +
scale_x_discrete(expand = c(.24,0)) + scale_y_continuous(expand = expand_scale(mult = c(0, .05))) +
facet_grid(. ~ cat) + labs(fill = "Modality\nExclusivity", x = 'Dominant Modality', y = 'Number of Words') +
theme_bw() + theme(legend.position = c(.17, .6), legend.title = element_text(size = 20, face = 'bold'),
legend.text = element_text(size = 18), legend.background = element_rect(fill=alpha('white', 0)),
axis.title = element_text(size = 21, face = "bold"), axis.text = element_text(size = 19),
axis.title.x = element_text(margin = margin(.09, 0, 0, 0, "cm")),
axis.text.x = element_text(margin = margin(.09, 0, 0, 0, "cm")),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text = element_text(size = 21, face = 'bold', vjust = .7,
margin = margin(.3, 0, .3, 0, "cm")),
axis.ticks.x = element_blank())
renderPlot(p)
```
<h5 style='text-align:justify; padding-left:5px; padding-right:5px; padding-bottom:10px; line-height: 1.4;'> Figure 1. Number of words in the norms per word category, dominant modality, and modality exclusivity. <br> [<i class='fas fa-external-link-alt' aria-hidden=TRUE style='font-size:7'></i> Github](https://raw.githubusercontent.com/pablobernabeu/Modality-exclusivity-norms-747-Dutch-English-replication/master/stacked_exc.png) </h5>
These norms were validated in an experiment demonstrating that shifts across trials with different dominant modalities incurred semantic processing costs ([Bernabeu, Willems, & Louwerse, 2017](https://mindmodeling.org/cogsci2017/papers/0318/index.html)). All data for that study are [available](https://osf.io/97unm/wiki/home/), including a [dashboard](https://pablobernabeu.shinyapps.io/ERP-waveform-visualization_CMS-experiment/) (in case of downtime of the dashboard site, please see [this alternative](https://mybinder.org/v2/gh/pablobernabeu/Modality-switch-effects-emerge-early-and-increase-throughout-conceptual-processing/master?urlpath=shiny/Shiny-app/)).
The [**properties**](#properties){style='background-color:#FDFFFF'} and the [**concepts**](#concepts){style='background-color:#FDFFFF'} may also be consulted together on a [**table**](#table){style='background-color:#FDFFFF'}. Distinct relationships emerged among the modalities, with the visual and haptic modalities being more related to each other than to the auditory modality. This ties in with findings that, in conceptual processing, modalities can be collated based on language statistics (Louwerse & Connell, 2011). Furthermore, properties were found to be more strongly perceptual than concepts ([**cf. English norms by Lynott & Connell, 2009, 2013**](#section-cf-lc-english-norms){style='background-color:#FDFFFF'}).
<br>
```{r}
# ENGLISH PROPERTIES
# check conditions for a PCA
# matrix
eng_prop = all[all$cat == 'Property', c('English_Auditory_Lynott_Connell_2009_2013', 'English_Haptic_Lynott_Connell_2009_2013', 'English_Visual_Lynott_Connell_2009_2013')]
# nrow(eng_prop)
eng_prop_matrix = cor(eng_prop, use = 'complete.obs')
# eng_prop_matrix
# round(eng_prop_matrix, 2)
# OK: correlations good for a PCA, with enough < .3
# now on the raw vars:
# nrow(eng_prop)
# cortest.bartlett(eng_prop)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
# KMO(eng_prop_matrix)
# Result: .56 = mediocre. PCA not strongly recommended. But we still do it
# because the purpose is graphical only.
# check determinant
# det(eng_prop_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_eng_prop = psych::principal(eng_prop, nfactors = 3, rotate = "none")
# pc1_eng_prop
# RESULT: Extract either one PC, acc to Kaiser's criterion, or two RCs, acc to
# Joliffe's (Field, Miles, & Field, 2012)
# Unrotated: scree plot
# plot(pc1_eng_prop$values, type = "b")
# Result: again one or two RCs should be extracted
# Now with varimax rotation, Kaiser-normalized (by default)
pc2_eng_prop = psych::principal(eng_prop, nfactors = 2, rotate = "varimax", scores = TRUE)
# pc2_eng_prop
# pc2_eng_prop$loadings
# two components are good, as they both have eigenvalues over 1
# pc2_eng_prop$residual
# pc2_eng_prop$fit
# pc2_eng_prop$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals bad: more than 50% have absolute
# values > 0.05. Model fit good, > .90. Communalities good, all > .7.
# subset and add PCs
eng_props = all[all$cat == 'Property', ]
# nrow(eng_props)
eng_props = cbind(eng_props, pc2_eng_prop$scores)
# nrow(eng_props)
#
# head(eng_props)
# Finally, plot
# Set sample words to show on plot (first word in each modality)
auditory_w = as.character(sort(eng_props[eng_props$English_Main_Lynott_Connell_2009_2013=='Auditory', 'English_Word_Lynott_Connell_2009_2013'])[1])
haptic_w = as.character(sort(eng_props[eng_props$English_Main_Lynott_Connell_2009_2013=='Haptic', 'English_Word_Lynott_Connell_2009_2013'])[1])
visual_w = as.character(sort(eng_props[eng_props$English_Main_Lynott_Connell_2009_2013=='Visual', 'English_Word_Lynott_Connell_2009_2013'])[1])
w_set = c(auditory_w, haptic_w, visual_w)
eng_props$English_Main_Lynott_Connell_2009_2013 = dplyr::recode(eng_props$English_Main_Lynott_Connell_2009_2013, Auditory = "a", Haptic = "h", Visual = "v")
Engprops = ggplot(eng_props,
aes(RC1, RC2, label = as.character(English_Main_Lynott_Connell_2009_2013))) + stat_density2d (color = "gray87") +
geom_text(size = ifelse(eng_props$English_Word_Lynott_Connell_2009_2013 %in% w_set, 8, 5),
fontface = ifelse(eng_props$English_Word_Lynott_Connell_2009_2013 %in% w_set, 'bold', 'plain')) +
geom_point(data=eng_props[eng_props$English_Word_Lynott_Connell_2009_2013 %in% w_set,], pch=21, fill=NA, size=8, stroke=2, alpha=.6) +
ggtitle('English properties (Lynott & Connell, 2009)') +
labs(x = "", y = "Varimax-rotated Principal Component 2") + theme_bw() +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.title.y = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.text.x = element_text(size=9), axis.text.y = element_text(size=9),
plot.title = element_text(hjust = 0.5, size = 17, margin=margin(1,1,1,1)) ) +
geom_label_repel(data = eng_props[eng_props$English_Word_Lynott_Connell_2009_2013 %in% w_set,], aes(label = English_Word_Lynott_Connell_2009_2013), size = 6,
alpha = 0.77, color = 'black', box.padding = 1.5 )
# ENGLISH CONCEPTS
# check conditions for a PCA
# matrix
eng_conc = all[all$cat == 'Concept', c('English_Auditory_Lynott_Connell_2009_2013', 'English_Haptic_Lynott_Connell_2009_2013', 'English_Visual_Lynott_Connell_2009_2013')]
# nrow(eng_conc)
eng_conc_matrix = cor(eng_conc, use = 'complete.obs')
# eng_conc_matrix
# round(eng_conc_matrix, 2)
# POOR: correlations not apt for a PCA, with too many below .3
# now on the raw data:
# nrow(eng_conc)
# cortest.bartlett(eng_conc)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
# KMO(eng_conc_matrix)
# Result: .48 = poor. PCA not strongly recommended. But we still do it
# because the purpose is graphical really.
# check determinant
# det(eng_conc_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_eng_conc = psych::principal(eng_conc, nfactors = 3, rotate = "none")
# pc1_eng_conc
# RESULT: Extract either one PC, acc to Kaiser's criterion, or two RCs, acc to
# Joliffe's (Field, Miles, & Field, 2012)
# Unrotated: scree plot
# plot(pc1_eng_conc$values, type = "b")
# Result: two PCs obtain.
# Now with varimax rotation, Kaiser-normalized (by default):
# always preferable because it captures explained variance best.
pc2_eng_conc = psych::principal(eng_conc, nfactors = 2, rotate = "varimax", scores = TRUE)
# pc2_eng_conc
# pc2_eng_conc$loadings
#
#
# pc2_eng_conc$residual
# pc2_eng_conc$fit
# pc2_eng_conc$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals bad: over 50% have absolute
# values > 0.05. Model fit good, > .90. Communalities good, all > .7.
# subset and add PCs
eng_concs = all[all$cat == 'Concept', ]
# nrow(eng_concs)
eng_concs = cbind(eng_concs, pc2_eng_conc$scores)
# summary(eng_concs$RC1, eng_concs$RC2)
eng_concs = eng_concs[eng_concs$normed == 'Dut_Eng' | eng_concs$normed == 'English',]
# nrow(eng_concs)
# summary(eng_concs$RC1, eng_concs$RC2)
# Finally, plot
# Set sample words to show on plot (first word in each modality)
auditory_w = as.character(sort(eng_concs[eng_concs$English_Main_Lynott_Connell_2009_2013=='Auditory', 'English_Word_Lynott_Connell_2009_2013'])[1])
haptic_w = as.character(sort(eng_concs[eng_concs$English_Main_Lynott_Connell_2009_2013=='Haptic', 'English_Word_Lynott_Connell_2009_2013'])[1])
visual_w = as.character(sort(eng_concs[eng_concs$English_Main_Lynott_Connell_2009_2013=='Visual', 'English_Word_Lynott_Connell_2009_2013'])[1])
w_set = c(auditory_w, haptic_w, visual_w)
eng_concs$English_Main_Lynott_Connell_2009_2013 = dplyr::recode(eng_concs$English_Main_Lynott_Connell_2009_2013, Auditory = "a", Haptic = "h", Visual = "v")
Engconcs = ggplot(eng_concs,
aes(RC1, RC2, label = as.character(English_Main_Lynott_Connell_2009_2013))) + stat_density2d (color = "gray87") +
geom_text(size = ifelse(eng_concs$English_Word_Lynott_Connell_2009_2013 %in% w_set, 8, 5),
fontface = ifelse(eng_concs$English_Word_Lynott_Connell_2009_2013 %in% w_set, 'bold', 'plain')) +
geom_point(data=eng_concs[eng_concs$English_Word_Lynott_Connell_2009_2013 %in% w_set,], pch=21, fill=NA, size=8, stroke=2, alpha=.6) +
ggtitle('English concepts (Lynott & Connell, 2013)') +
labs(x = "Varimax-rotated Principal Component 1", y = "Varimax-rotated Principal Component 2") +
theme_bw() +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.title.y = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.text.x = element_text(size=9), axis.text.y = element_text(size=9),
plot.title = element_text(hjust = 0.5, size = 17, margin=margin(1,1,1,1)) ) +
geom_label_repel(data = eng_concs[eng_concs$English_Word_Lynott_Connell_2009_2013 %in% w_set,], aes(label = English_Word_Lynott_Connell_2009_2013), size = 6,
alpha = 0.77, color = 'black', box.padding = 1.5 )
# DUTCH
# Properties
# check conditions for a PCA
# matrix
Property = all[all$cat == 'Property' & !is.na(all$word), c('Auditory', 'Haptic', 'Visual')]
# nrow(Property)
prop_matrix = cor(Property, use = 'complete.obs')
# prop_matrix
# round(prop_matrix, 2)
# POOR: correlations not apt for a PCA, with too many below .3
# now on the raw vars:
# nrow(Property)
# cortest.bartlett(Property)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
# KMO(prop_matrix)
# Result: .56 = mediocre. PCA not strongly recommended. But we still do it
# because the purpose is graphical only.
# check determinant
# det(prop_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_prop = psych::principal(Property, nfactors = 3, rotate = "none")
# pc1_prop
# RESULT: Only PC1, with eigenvalue > 1, should be extracted,
# acc to Kaiser's criterion (Jolliffe's threshold of 0.7 way too lax;
# Field, Miles, & Field, 2012)
# Unrotated: scree plot
# plot(pc1_prop$values, type = "b")
# Result: one or two RCs should be extracted, converging with eigenvalues
# Now with varimax rotation, Kaiser-normalized (by default).
# Always preferable because it captures explained variance best.
# Compare eigenvalues w/ 1 & 2 factors
pc2_prop = psych::principal(Property, nfactors = 2, rotate = "varimax", scores = TRUE)
# pc2_prop
# pc2_prop$loadings
# good to extract 2 factors, as they both explain quite the same variance,
# and both surpass 1 eigenvalue
# pc2_prop$residual
# pc2_prop$fit
# pc2_prop$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals OK: fewer than 50% have absolute
# values > 0.05 (exactly 50% do).Model fit good, > .90.
# Communalities good, all > .7 (av = .83).
# subset and add PCs
properties = all[all$cat == 'Property' & !is.na(all$word), ]
# nrow(properties)
properties = cbind(properties, pc2_prop$scores)
# nrow(properties)
# Finally, plot: letters+density (cf. Lynott & Connell, 2009, 2013)
# Set sample words to show on plot (first word in each modality)
auditory_w = as.character(sort(properties[properties$main=='Auditory', 'word'])[1])
haptic_w = as.character(sort(properties[properties$main=='Haptic', 'word'])[1])
visual_w = as.character(sort(properties[properties$main=='Visual', 'word'])[1])
w_set = c(auditory_w, haptic_w, visual_w)
properties$main = dplyr::recode(properties$main, Auditory = "a", Haptic = "h", Visual = "v")
NLprops = ggplot(properties,
aes(RC1, RC2, label = as.character(main))) + stat_density2d (color = "gray87") +
geom_text(size = ifelse(properties$word %in% w_set, 8, 5),
fontface = ifelse(properties$word %in% w_set, 'bold', 'plain')) +
geom_point(data=properties[properties$word %in% w_set,], pch=21, fill=NA, size=8, stroke=2, alpha=.6) +
ggtitle('Dutch properties') + labs(x = "", y = "") + theme_bw() +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.title.y = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.text.x = element_text(size=9), axis.text.y = element_text(size=9),
plot.title = element_text(hjust = 0.5, size = 17, margin=margin(1,1,1,1)) ) +
geom_label_repel(data = properties[properties$word %in% w_set,], aes(label = word), size = 6,
alpha = 0.77, color = 'black', box.padding = 1.5 )
# CONCEPTS
# check conditions for a PCA
# matrix
Concept = all[all$cat == 'Concept' & !is.na(all$word), c('Auditory', 'Haptic', 'Visual')]
# nrow(Concept)
conc_matrix = cor(Concept, use = 'complete.obs')
# conc_matrix
# round(conc_matrix, 2)
# POOR: correlations not apt for a PCA, with too many below .3
# now on the raw data:
# nrow(Concept)
# cortest.bartlett(Concept)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
# KMO(conc_matrix)
# Result: .49 = poor. PCA not strongly recommended. But we still do it
# because the purpose is graphical really.
# check determinant
# det(conc_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_conc = psych::principal(Concept, nfactors = 3, rotate = "none")
# pc1_conc
# RESULT good: PC1 and PC2, with eigenvalue > 1, should be extracted,
# acc to Kaiser's criterion (Jolliffe's threshold of 0.7 way too lax;
# Field, Miles, & Field, 2012)
# Unrotated: scree plot
# plot(pc1_conc$values, type = "b")
# Result: with no point of inflexion along the y axis, two PCs would obtain.
# Now with varimax rotation, Kaiser-normalized (by default):
# Always preferable because it captures explained variance best.
# Compare eigenvalues w/ 1 & 2 Principal Components
pc2_conc = psych::principal(Concept, nfactors = 2, rotate = "varimax", scores = TRUE)
# pc2_conc
# pc2_conc$loadings
# good to extract 2 Principal Components, as they both explain quite the same variance,
# and both surpass 1 eigenvalue
# pc2_conc$residual
# pc2_conc$fit
# pc2_conc$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals bad: over 50% have absolute
# values > 0.05. Model fit good, > .90. Communalities good, all > .7 (av = .82).
# subset and add PCs
concepts = all[all$cat == 'Concept' & !is.na(all$word), ]
# nrow(concepts)
concepts = cbind(concepts, pc2_conc$scores)
# nrow(concepts)
# Finally, plot
# Set sample words to show on plot (first word in each modality)
auditory_w = as.character(sort(concepts[concepts$main=='Auditory', 'word'])[1])
haptic_w = as.character(sort(concepts[concepts$main=='Haptic', 'word'])[1])
visual_w = as.character(sort(concepts[concepts$main=='Visual', 'word'])[1])
w_set = c(auditory_w, haptic_w, visual_w)
concepts$main = dplyr::recode(concepts$main, Auditory = "a", Haptic = "h", Visual = "v")
NLconcs = ggplot(concepts,
aes(RC1, RC2, label = as.character(main))) + stat_density2d (color = "gray87") +
geom_text(size = ifelse(concepts$word %in% w_set, 8, 5),
fontface = ifelse(concepts$word %in% w_set, 'bold', 'plain')) +
geom_point(data=concepts[concepts$word %in% w_set,], pch=21, fill=NA, size=8, stroke=2, alpha=.6) +
ggtitle('Dutch concepts') +
labs(x = "Varimax-rotated Principal Component 1", y = "") + theme_bw() +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.title.y = element_text(colour = 'black', size = 14, margin=margin(1,1,1,1)),
axis.text.x = element_text(size=9), axis.text.y = element_text(size=9),
plot.title = element_text(hjust = 0.5, size = 17, margin=margin(1,1,1,1)) ) +
geom_label_repel(data = concepts[concepts$word %in% w_set,], aes(label = word), size = 6,
alpha = 0.77, color = 'black', box.padding = 1.5 )
# Four plots on a grid. This CSS Flexbox has to be used in Shiny instead of the usual gridextra, multiplot, etc.
# (https://plotly-r.com/arranging-views.html)
div(
style = "display: flex; flex-wrap: wrap; justify-content: center",
div(renderPlot(Engprops), style = "width: 50%; border: none;"),
div(renderPlot(NLprops), style = "width: 50%; border: none;"),
div(renderPlot(Engconcs), style = "width: 50%; border: none;"),
div(renderPlot(NLconcs), style = "width: 50%; border: none;")
)
```
<h5 style='text-align:justify; padding-left:5px; padding-right:5px; padding-bottom:3px; line-height:1.4;'> Figure 2. Dutch norms compared to English norms (reanalysis of Lynott & Connell, 2009, 2013, narrowed to three modalities) based on a principal component analysis of the auditory, haptic, and visual ratings for each word. Letters indicate the dominant modality of each word (*a* = auditory, *h* = haptic, *v* = visual), and contours further display the degree of consistency of the modalities. [<i class='fas fa-external-link-alt' aria-hidden=TRUE style='font-size:7'></i> Github](https://raw.githubusercontent.com/pablobernabeu/Modality-exclusivity-norms-747-Dutch-English-replication/master/allfour_lowres.png)
<!-- Begin two-column format -->
<div class="row">
<div class="column" style="width: 65%;">
<div>
The norms also served to investigate [**sound symbolism**](#sound-symbolism){style='background-color:#FDFFFF'}, which is the relation between the form of words and their meaning. The form of words rests on their sound more than on their visual or tactile properties (at least in spoken language). Therefore, auditory ratings should more reliably predict the lexical properties of words (length, frequency, distinctiveness) than haptic or visual ratings would. Lynott and Connell's (2013) findings were replicated, as auditory ratings were either the best predictor of lexical properties, or yielded an effect that was opposite in polarity to the effects of haptic and visual ratings. The present analyses and further ones will be reported in a forthcoming paper.
All data and code are [available for re-use](https://osf.io/brkjw/wiki/home/) under a [CC BY licence](https://creativecommons.org/licenses/by/4.0/), by citing the source:
</div>
</div>
<div class="column" style="width: 35%; padding-left: 8px;">
<div style = "padding-top: 5px; padding-bottom: 8px; float: right; font-size: 13px; display: inline-block; background-color: #FAFAFA;">
<div style = "text-align: center !important; padding-bottom: 0 !important;"><img src="https://upload.wikimedia.org/wikipedia/commons/e/e7/Booba-Kiki.svg" alt="Bouba-Kiki" width=80px height=55px></img></div>
<div style="text-align: justify !important; padding-top: 0 !important; padding-right: 5px; padding-left: 5px;"> Sound symbolism is a psycholinguistic effect whereby the pronunciation and the meaning of words bear a non-arbitrary relationship. For instance, when people are asked to match the pseudowords *bouba* and *kiki* to the above objects, the vast majority name the angular object *kiki* and the smooth one *bouba* (K&ouml;hler, 1929; Sourav et al., 2019). </div>
<div style="font-size:65%; padding-top:4px; padding-right: 5px; padding-left: 5px; text-align:left;"> Image: <a href="https://commons.wikimedia.org/wiki/File:Booba-Kiki.svg" title="via Wikimedia Commons">Monochrome version 1 June 2007 by BendžVectorized with Inkscape --Qef (talk) 21:21, 23 June 2008 (UTC)</a> [<a href="http://creativecommons.org/licenses/by-sa/3.0/">CC BY-SA</a>].</div>
</div>
</div>
<!-- End two-column format -->
</div>
> <div style = 'text-align: justify; text-indent: -1.5em; margin-left: 1.5em; font-size: 16px; background-color: #FCFCFC; font-size: 15px;'> Bernabeu, P. (2018). Dutch modality exclusivity norms for 336 properties and 411 concepts [Data dashboard]. Retrieved from https://pablobernabeu.shinyapps.io/Dutch-Modality-Exclusivity-Norms/.</div>
<!-- Create heading anchor for the corpora section below -->
<a name="section-external-corpora"></a>
<h3> External corpora </h3>
* Concreteness and age of acquisition: norms by Brysbaert, Warriner, and Kuperman (2014);
* Phonological and orthographic neighbours: DutchPOND (Marian et al., 2012);
* Word frequency and contextual diversity: SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010);
* Lemma frequency: CELEX (Baayen, Piepenbrock, & van Rijn, 1993).
<h3 style='padding-top:5px;'> Acknowledgements </h3>
This research was greatly supported by the supervision from Max Louwerse and Roel Willems; the financial help from Tilburg University; Wendy Leijten's help with the translations; and the forty-two students from Tilburg University and Radboud University who completed the surveys.
<!-- Create heading anchor for the References section below -->
<a name="section-references"></a>
<h3 style='padding-top:5px;'> References </h3>
<div style = "text-align: justify; text-indent:-2em; margin-left:2em; background-color:#FCFCFC; font-size:15px;">
Baayen, R. H., Piepenbrock, R., & van Rijn, H. (1993). *The CELEX Lexical Database* [CD-ROM]. Philadelphia: Linguistic Data Consortium, University of Pennsylvania.
Bernabeu, P. (2018). *Dutch modality exclusivity norms for 336 properties and 411 concepts*. PsyArXiv. https://psyarxiv.com/s2c5h.
Bernabeu, P., Willems, R. M., & Louwerse, M. M. (2017). Modality switch effects emerge early and increase throughout conceptual processing: Evidence from ERPs. In G. Gunzelmann, A. Howes, T. Tenbrink, & E. J. Davelaar (Eds.), *Proceedings of the 39th Annual Conference of the Cognitive Science Society* (pp. 1629-1634). Austin, TX: Cognitive Science Society. https://mindmodeling.org/cogsci2017/papers/0318/index.html.
Brysbaert, M., Warriner, A.B., & Kuperman, V. (2014). Concreteness ratings for 40 thousand generally known English word lemmas. *Behavior Research Methods, 46*, 3, 904-911. <br>
[https://doi.org/10.3758/s13428-013-0403-5](https://doi.org/10.3758/s13428-013-0403-5).
Field, A. P., Miles, J., & Field, Z. (2012). *Discovering Statistics Using R*. London, UK: Sage.
Keuleers, E., Brysbaert, M. & New, B. (2010). SUBTLEX-NL: A new frequency measure for Dutch words based on film subtitles. *Behavior Research Methods, 42*, 3, 643-650. [https://doi.org/10.3758/BRM.42.3.643](https://doi.org/10.3758/BRM.42.3.643).
K&ouml;hler, W. (1929). *Gestalt Psychology*. New York: Liveright.
Louwerse, M., & Connell, L. (2011). A taste of words: Linguistic context and perceptual simulation predict the modality of words. *Cognitive Science, 35*, 2, 381-98. [https://doi.org/10.1111/j.1551-6709.2010.01157.x](https://doi.org/10.1111/j.1551-6709.2010.01157.x).
Lynott, D., & Connell, L. (2009). Modality exclusivity norms for 423 object concepts. *Behavior Research Methods, 41*, 2, 558-564. [https://doi.org/10.3758/BRM.41.2.558](https://doi.org/10.3758/BRM.41.2.558).
Lynott, D., & Connell, L. (2013). Modality exclusivity norms for 400 nouns: The relationship between perceptual experience and surface word form. *Behavior Research Methods, 45*, 2, 516-526. <br>
[https://doi.org/10.3758/s13428-012-0267-0](https://doi.org/10.3758/s13428-012-0267-0).
Marian, V., Bartolotti, J., Chabal, S., & Shook, A. (2012). CLEARPOND: Cross-Linguistic Easy-Access Resource for Phonological and Orthographic Neighborhood Densities. *PLoS ONE, 7*, 8: e43230. <br>
[https://doi.org/10.1371/journal.pone.0043230](https://doi.org/10.1371/journal.pone.0043230).
Sourav, S., Kekunnaya, R., Shareef, I., Banerjee, S., Bottari, D., & R&ouml;der, B. (2019). A protracted sensitive period regulates the development of cross-modal sound-shape associations in humans. *Psychological Science, 30*, 10, 1473-1482. [https://doi.org/10.1177/0956797619866625](https://doi.org/10.1177/0956797619866625).
Speed, L. J., & Majid, A. (2017). Dutch modality exclusivity norms: Simulating perceptual modality in space. *Behavior Research Methods, 49*, 6, 2204-2218. [https://doi.org/10.3758/s13428-017-0852-3](https://doi.org/10.3758/s13428-017-0852-3).
</div>
<h2 style='padding-top:5px;'> Contact </h2>
Pablo Bernabeu. Email: p.bernabeu@lancaster.ac.uk
[Website](https://pablobernabeu.github.io/)
<br/>
</div>
Table {#table style="background-color:#FCFCFC;"}
=======================================================================
Inputs {.sidebar style='position:fixed; padding-top: 65px; padding-bottom:30px;'}
-----------------------------------------------------------------------
```{r}
# Modal dialogs containing definitions and corpora
actionLink('table_DefinitionsCorporaLink', HTML('<span style="font-size: 14px; font-weight:bold; border-bottom:none;"><i class="far fa-clipboard" aria-hidden="true"></i> Definitions and corpora </span>'))
observeEvent(input$table_DefinitionsCorporaLink, {
sendSweetAlert(session, type = NULL, title = NULL, html = TRUE,
closeOnClickOutside = TRUE, btn_labels = 'Close',
text = HTML('<div style = "font-size = 14px; text-align: justify;">
<h4 style="background-color:#FFFEF0; padding-top:2px; padding-bottom:2px;"><b> Definitions </b></h4>
<p style="font-size:13px"><b>Principal component analysis (PCA):</b> Method for reducing dimensionality of data while retaining the main patterns (<a href="https://www.nature.com/articles/nmeth.4346">read more</a>).</p>
<p style="font-size:13px"><b> Dominant modality:</b> Highest-rated modality.<sup>*</sup> </p>
<p style="font-size:13px"><b> Modality exclusivity:</b> Range of the three modality ratings divided by the sum.<sup>*</sup> </p>
<p style="font-size:13px"><b> Perceptual strength:</b> Highest rating across modalities.<sup>*</sup> </p>
<p style="font-size:11px !important; padding-bottom:4px;">* Calculated following <a href="#section-info"> Lynott and Connell (2009, 2013)</a>.</p>
<h4 style="background-color: #FFFEF0; padding-top:6px; padding-bottom:2px;"><b> Corpora </b></h4>
<p style="font-size:13px"><b> Concreteness and age of acquisition:</b> Norms by Brysbaert, Warriner, and Kuperman (2014).</p>
<p style="font-size:13px"><b> Phonological and orthographic neighbours:</b> DutchPOND (Marian et al., 2012).</p>
<p style="font-size:13px"><b> Word frequency and contextual diversity:</b> SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010).</p>
<p style="font-size:13px"><b> Lemma frequency:</b> CELEX (Baayen, Piepenbrock, & van Rijn, 1993).</p>
<p><i class="fas fa-book-open" aria-hidden="true"></i> <a href="#section-info">References</a></p>
</div>'))
})
```
<br/>
##### Data selected below {style='margin-top:-16px;'}
<!-- Output right below removed from mobiles because it's badly displayed -->
#### {.desktop-only}
<div style='padding-top:9px; margin-bottom:-12px;'>
```{r}
# Number of words selected on sidebar below
reactive(cat(paste0(nrow(selected_words()[selected_words()$cat=='Property',]), ' Property words', '\n',
nrow(selected_words()[selected_words()$cat=='Concept',]), ' Concept words')))
```
</div>
<!-- Show again on all devices -->
####
```{r}
# Download data selected on sidebar as a CSV file
# Create placeholder for download link
uiOutput("table_csv_link")
# Create the actual downloadLink
output$table_csv_link = renderUI({
downloadLink("table_csv", " Download spreadsheet", class = "fa fa-download", style = 'border-bottom:none !important')
})
# Add download handling
output$table_csv = downloadHandler(
filename = function() {
paste0('Selected-Dutch-modality-norms-Bernabeu-et-al-', str_replace_all(format(Sys.time(), '%d-%b-%Y_%X-%Z'), ':', '-'), '.csv') },
content = function(file) {
write.csv(selected_words(),
file, row.names = FALSE, fileEncoding = 'Latin1')
}
)
```
#### **Word Categories** {style='margin-top:22px;'}
```{r}
pickerInput(inputId = "table_wordcategory", choices = c('Property', 'Concept'),
choicesOpt = list(content = c('Properties', 'Concepts')),
multiple = TRUE, selected = c('Property', 'Concept'),
options = list(style = "light", size = 15))
pickerInput(inputId = "table_nouncategory", choices = c('de', 'het'),
choicesOpt = list(content = c("Fem/Masc 'de' noun", "Neuter 'het' noun")),
multiple = TRUE, selected = c('de', 'het'),
options = list(style = "light", size = 15))
```
#### **Modality** {style='margin-top:22px;'}
```{r}
modalities_labels = c(
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: #ff3030; border: none !important; border-radius: 4px !important; color: #FDFDFD !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Auditory </span>'),
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: dodgerblue; border-radius: 4px !important; border: none !important; color: #FDFDFD !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Haptic </span>'),
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: forestgreen; border-radius: 4px !important; border: none !important; color: #fdfdfd !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Visual </span>')
)
pickerInput(inputId = "table_modalities", label = "Dominant modalities",
choices = c('Auditory', 'Haptic', 'Visual'),
choicesOpt = list(content = modalities_labels),
multiple = TRUE, selected = c('Auditory', 'Haptic', 'Visual'),
options = list(style = "light", size = 15))
sliderInput("table_Exclusivity", "Modality exclusivity (%)", min = 0, max = 100, value = c(0, 100))
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$Exclusivity),2)))))
sliderInput("table_Strength", "Perceptual strength", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$Perceptualstrength),2)))))
sliderInput("table_Auditory", "Auditory rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$Auditory),2)))))
sliderInput("table_Haptic", "Haptic rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$Haptic),2)))))
sliderInput("table_Visual", "Visual rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$Visual),2)))))
```
#### **Concreteness** {style='margin-top:22px;'}
```{r}
sliderInput("table_concreteness", NULL, min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$concrete_Brysbaertetal2014),2)))))
```
#### **Length** {style='margin-top:22px;'}
```{r}
sliderInput("table_letters", "Number of letters", min = 3, max = 17, value = c(3, 17), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$letters),2)))))
sliderInput("table_phonemes_DutchPOND", "Number of phonemes", min = 0, max = 15, value = c(0, 15), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$phonemes_DUTCHPOND),2)))))
```
#### **Word Frequency** {style='margin-top:22px;'}
```{r}
sliderInput("table_ContextualDiversity", "Contextual diversity (CD)", min = 0, max = 4, value = c(0, 4), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$freq_lg10CD_SUBTLEXNL),2)))))
sliderInput("table_InflectedContextualDiversity", "CD Inflected adjective", min = 0, max = 4, value = c(0, 4), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(sum(selected_words()$inflected_adj_freq_lg10CD_SUBTLEXNL)/336,2)))))
sliderInput("table_WF_SUBTLEXNL", "Word frequency", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$freq_lg10WF_SUBTLEXNL),2)))))
sliderInput("table_lemma_CELEX", "Lemma frequency", min = 0, max = 3.2, value = c(0, 3.2), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$freq_CELEX_lem),2)))))
```
#### **Distinctiveness** {style='margin-top:22px;'}
```{r}
sliderInput("table_phon_neighbours_DutchPOND", "Phonological neighbours", min = 0, max = 50, value = c(0, 50), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$phon_neighbours_DUTCHPOND),2)))))
sliderInput("table_orth_neighbours_DutchPOND", "Orthographic neighbours", min = 0, max = 32, value = c(0, 32), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$orth_neighbours_DUTCHPOND),2)))))
```
#### **Age of Acquisition** {style='margin-top:22px;'}
```{r}
sliderInput("table_AoA", NULL, min = 0, max = 15, value = c(0, 15), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_words()$AoA_Brysbaertetal2014),2)))))
```
<div style = "padding-top: 4px; font-size = 0.6em; text-align: left">
#### **Definitions**
**Principal component analysis (PCA):** Method for reducing dimensionality of data while retaining the main patterns ([read more](https://www.nature.com/articles/nmeth.4346)).
<div><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Dominant modality: </b></span> Highest-rated modality. </div>
<div style="padding-top:5px;"><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Modality exclusivity: </b></span> Range of the three modality ratings divided by the sum. </div>
<div style="padding-top:5px; padding-bottom:6px;"><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Perceptual strength: </b> </span> Highest rating across modalities. </div>
#### **Corpora**
**Concreteness and age of acquisition:** [Norms by Brysbaert, Warriner, and Kuperman (2014)](#references).
**Phonological and orthographic neighbours**: [DutchPOND (Marian et al., 2012)](#references).
**Word frequency and contextual diversity**: [SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010)](#references).
**Lemma frequency**: [CELEX (Baayen, Piepenbrock, & van Rijn, 1993)](#references).
</div>
Column {style="data-width:100%; position:static; height:1000px;"}
-----------------------------------------------------------------------
### <span style = 'font-size: 15px;'> **Property and concept words**. Please scroll horizontally and vertically. Column information available by hovering on header. Columns may be simultaneously filtered and sorted (e.g., to find out range), and words may be searched under the header. [CSV](https://osf.io/ge7pn/) and [Excel spreadsheet](https://osf.io/58gzs/) also available.</span> {style="margin-top:10px; padding-top:30px; text-align:justify;"}
<div style = "text-align: justify; background-color: #FCFCFC;">
```{r}
# Arrange data set and create reactive version taking sidebar input
table_data = all[!is.na(all$word), c('word','cat','inflected_prop','conc_cat','main','Exclusivity','Perceptualstrength','Auditory',
'Haptic','Visual','SD_Auditory','SD_Haptic','SD_Visual','concrete_Brysbaertetal2014','letters',
'phonemes_DUTCHPOND','freq_lg10CD_SUBTLEXNL','inflected_adj_freq_lg10CD_SUBTLEXNL',
'freq_lg10WF_SUBTLEXNL','freq_CELEX_lem','phon_neighbours_DUTCHPOND',
'orth_neighbours_DUTCHPOND','AoA_Brysbaertetal2014'
)]
# Replace missing values in corpus variables with zeros to allow filtering and calculation of means.
# Missing values in corpora indeed have zero frequency.
table_data[is.na(table_data$freq_lg10CD_SUBTLEXNL),'freq_lg10CD_SUBTLEXNL'] = 0
table_data[is.na(table_data$inflected_adj_freq_lg10CD_SUBTLEXNL),'inflected_adj_freq_lg10CD_SUBTLEXNL'] = 0
table_data[is.na(table_data$freq_lg10WF_SUBTLEXNL),'freq_lg10WF_SUBTLEXNL'] = 0
table_data[is.na(table_data$freq_CELEX_lem),'freq_CELEX_lem'] = 0
table_data[is.na(table_data$AoA_Brysbaertetal2014),'AoA_Brysbaertetal2014'] = 0
table_data[is.na(table_data$concrete_Brysbaertetal2014),'concrete_Brysbaertetal2014'] = 0
table_data[is.na(table_data$phonemes_DUTCHPOND),'phonemes_DUTCHPOND'] = 0
table_data[is.na(table_data$orth_neighbours_DUTCHPOND),'orth_neighbours_DUTCHPOND'] = 0
table_data[is.na(table_data$phon_neighbours_DUTCHPOND),'phon_neighbours_DUTCHPOND'] = 0
# Turn modality exclusivity into percentage format
table_data$Exclusivity = table_data$Exclusivity * 100
# Create function to have tooltips on column titles (from https://glin.github.io/reactable/articles/cookbook/cookbook.html)
with_tooltip = function(value, tooltip) {
span(style = 'text-decoration: underline; text-decoration-style: dotted;', title = tooltip, value)
}
# Reactive for the sliders, applicable to both table and plot
selected_words = reactive({
# Values above or equal to minimum selected, and below or equal to maximum selected
table_data[which(
table_data$cat %in% input$table_wordcategory &
table_data$conc_cat %in% c(input$table_nouncategory, NA) &
table_data$main %in% input$table_modalities &
table_data$Exclusivity >= as.numeric(min(input$table_Exclusivity)) &
table_data$Exclusivity <= as.numeric(max(input$table_Exclusivity)) &
table_data$Perceptualstrength >= as.numeric(min(input$table_Strength)) &
table_data$Perceptualstrength <= as.numeric(max(input$table_Strength)) &
table_data$Auditory >= as.numeric(min(input$table_Auditory)) &
table_data$Auditory <= as.numeric(max(input$table_Auditory)) &
table_data$Haptic >= as.numeric(min(input$table_Haptic)) &
table_data$Haptic <= as.numeric(max(input$table_Haptic)) &
table_data$Visual >= as.numeric(min(input$table_Visual)) &
table_data$Visual <= as.numeric(max(input$table_Visual)) &
table_data$concrete_Brysbaertetal2014 >= as.numeric(min(input$table_concreteness)) &
table_data$concrete_Brysbaertetal2014 <= as.numeric(max(input$table_concreteness)) &
table_data$letters >= as.numeric(min(input$table_letters)) &
table_data$letters <= as.numeric(max(input$table_letters)) &
table_data$phonemes_DUTCHPOND >= as.numeric(min(input$table_phonemes_DutchPOND)) &
table_data$phonemes_DUTCHPOND <= as.numeric(max(input$table_phonemes_DutchPOND)) &
table_data$phon_neighbours_DUTCHPOND >= as.numeric(min(input$table_phon_neighbours_DutchPOND)) &
table_data$phon_neighbours_DUTCHPOND <= as.numeric(max(input$table_phon_neighbours_DutchPOND)) &
table_data$orth_neighbours_DUTCHPOND >= as.numeric(min(input$table_orth_neighbours_DutchPOND)) &
table_data$orth_neighbours_DUTCHPOND <= as.numeric(max(input$table_orth_neighbours_DutchPOND)) &
table_data$freq_lg10CD_SUBTLEXNL >= as.numeric(min(input$table_ContextualDiversity)) &
table_data$freq_lg10CD_SUBTLEXNL <= as.numeric(max(input$table_ContextualDiversity)) &
table_data$inflected_adj_freq_lg10CD_SUBTLEXNL >= as.numeric(min(input$table_InflectedContextualDiversity)) &
table_data$inflected_adj_freq_lg10CD_SUBTLEXNL <= as.numeric(max(input$table_InflectedContextualDiversity)) &
table_data$freq_lg10WF_SUBTLEXNL >= as.numeric(min(input$table_WF_SUBTLEXNL)) &
table_data$freq_lg10WF_SUBTLEXNL <= as.numeric(max(input$table_WF_SUBTLEXNL)) &
table_data$freq_CELEX_lem >= as.numeric(min(input$table_lemma_CELEX)) &
table_data$freq_CELEX_lem <= as.numeric(max(input$table_lemma_CELEX)) &
table_data$AoA_Brysbaertetal2014 >= as.numeric(min(input$table_AoA)) &
table_data$AoA_Brysbaertetal2014 <= as.numeric(max(input$table_AoA))
),]
})
# Table. Begin by creating structure for descriptive bars inside cells.
bar_chart <- function(label, width = "100%", height = "8px", fill = "#00bfc4", background = NULL) {
bar <- div(style = list(background = fill, width = width, height = height, marginRight = "6px"))
chart <- div(style = list(flexGrow = 1, marginLeft = "8px", background = background), bar)
div(style = list(display = "flex", alignItems = "center"), label, chart)
}
renderReactable({
reactable(selected_words(),
defaultSorted = list(cat = 'desc', word = 'asc'),
defaultColDef = colDef(footerStyle = list(fontWeight = "bold")),
height = 840, striped = TRUE, pagination = FALSE, highlight = TRUE,
columns = list(
word = colDef(name='Word', filterable=TRUE, minWidth = 160,
style = list(fontWeight='bold', color='#453246')),
cat = colDef(header = with_tooltip('Word category', 'Property / Concept')),
inflected_prop = colDef(filterable=TRUE, minWidth = 140,
header = with_tooltip('Inflected adjective',
"Adjectives inflected for masculine and feminine nouns.")),
conc_cat = colDef(header = with_tooltip('Definite article', "Definite article 'de' is used for feminine and masculine nouns, whereas 'het' is used for neuter nouns.")),
main = colDef(header = with_tooltip('Dominant Modality',
'Highest-rated modality (as defined in Lynott & Connell, 2009, 2013). Values: Auditory, Haptic, Visual.')),
Exclusivity = colDef(cell = function(value) {
paste0(sprintf("%.2f", round(value,2)),'%') # Round to two digits, keeping trailing zeros
},
header = with_tooltip('Modality Exclusivity',
'Range of the three modality ratings divided by the sum (as defined in Lynott & Connell, 2009, 2013). Values colour-coded from lower, in grey, to higher, in violet.'),
style = function(value) {
if (value < 33) {
color = '#9D9D9D'
} else if (value < 67) {
color = '#7A5151'
} else {
color = '#541E1E'
}
list(color = color)
}),
Perceptualstrength = colDef(header = with_tooltip('Perceptual Strength',
'Highest rating across modalities (as defined in Lynott & Connell, 2009, 2013). Values colour-coded from lower, in grey, to higher, in violet.'),
cell = function(value) {value = sprintf("%.2f", round(value,2))}, # Round to two digits, keeping trailing zeros
style = function(value) {
if (value < 1.7) {
color = '#9D9D9D'
} else if (value < 3.3) {
color = '#7A5151'
} else {
color = '#541E1E'
}
list(color = color)
}),
Auditory = colDef(header = with_tooltip('Auditory Rating',
'Mean rating of each word on the auditory modality across participants.'),
cell = function(value) {
width <- paste0(value / max(table_data$Auditory) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits, keeping trailing zeros
bar_chart(value, width = width, fill = '#ff3030')
},
align = 'left'),
Haptic = colDef(header = with_tooltip('Haptic Rating',
'Mean rating of each word on the haptic (tactile) modality across participants.'),
cell = function(value) {
width <- paste0(value / max(table_data$Haptic) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits, keeping trailing zeros
bar_chart(value, width = width, fill = 'dodgerblue')
},
align = 'left'),
Visual = colDef(header = with_tooltip('Visual Rating', 'Mean rating of each word on the visual modality across participants.'),
cell = function(value) {
width <- paste0(value / max(table_data$Visual) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits, keeping trailing zeros
bar_chart(value, width = width, fill = 'forestgreen')
},
align = 'left'),
SD_Auditory = colDef(header = with_tooltip('SD Auditory Rating',
'Standard Deviation of Auditory ratings across participants.'),
format = colFormat(digits = 2)),
SD_Haptic = colDef(header = with_tooltip('SD Haptic Rating',
'Standard Deviation of Haptic ratings across participants.'),
format = colFormat(digits = 2)),
SD_Visual = colDef(header = with_tooltip('SD Visual Rating',
'Standard Deviation of Visual ratings across participants.'),
format = colFormat(digits = 2)),
concrete_Brysbaertetal2014 = colDef(header = with_tooltip('Concreteness', 'Norms by Brysbaert, Warriner, and Kuperman (2014)'),
format = colFormat(digits = 2), minWidth = 110),
letters = colDef(name = 'Letters', format = colFormat(digits = 0)),
phonemes_DUTCHPOND = colDef(header = with_tooltip('Phonemes', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0)),
freq_lg10CD_SUBTLEXNL = colDef(header = with_tooltip('Contextual Diversity (CD)',
"An advanced measure of word frequency for 'Word' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2), minWidth = 120),
inflected_adj_freq_lg10CD_SUBTLEXNL = colDef(header = with_tooltip('CD Inflected Adjective',
"An advanced measure of word frequency for 'Inflected Adjective' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010). Zeros are shown for concepts although no value applies."),
format = colFormat(digits = 2), minWidth = 120),
freq_lg10WF_SUBTLEXNL = colDef(header = with_tooltip('Word Frequency',
"A measure of word frequency for 'Word' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2)),
freq_CELEX_lem = colDef(header = with_tooltip('Lemma Frequency',
"A measure of word frequency for 'Word' column, based on the lemma, i.e., word root (Baayen, Piepenbrock, & van Rijn, 1993)."),
format = colFormat(digits = 2)),
phon_neighbours_DUTCHPOND = colDef(header = with_tooltip('Phonological Neighbours', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0)),
orth_neighbours_DUTCHPOND = colDef(header = with_tooltip('Orthographic Neighbours', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0), minWidth = 110),
AoA_Brysbaertetal2014 = colDef(header = with_tooltip('Age of Acquisition',
"Norms by Brysbaert, Warriner, and Kuperman (2014)."),
format = colFormat(digits = 2))
)
)
})
```
</div>
Properties {data-orientation=rows data-height=3200}
=======================================================================
Inputs {.sidebar style='position:fixed; padding-top: 65px; padding-bottom:30px;'}
-----------------------------------------------------------------------
```{r}
# Modal dialogs containing definitions and corpora
actionLink('properties_DefinitionsCorporaLink', HTML('<span style="font-size: 14px; font-weight:bold; border-bottom:none; margin-bottom: 6px;"><i class="far fa-clipboard" aria-hidden="true"></i> Definitions and corpora </span>'))
observeEvent(input$properties_DefinitionsCorporaLink, {
sendSweetAlert(session, type = NULL, title = NULL, html = TRUE,
closeOnClickOutside = TRUE, btn_labels = 'Close',
text = HTML('<div style = "font-size = 14px; text-align: justify;">
<h4 style="background-color: #FFFEF0; padding-top:2px; padding-bottom:2px;"><b> Definitions </b></h4>
<p style="font-size:13px"><b>Principal component analysis (PCA):</b> Method for reducing dimensionality of data while retaining the main patterns (<a href="https://www.nature.com/articles/nmeth.4346">read more</a>).</p>
<p style="font-size:13px"><b> Dominant modality:</b> Highest-rated modality.<sup>*</sup> </p>
<p style="font-size:13px"><b> Modality exclusivity:</b> Range of the three modality ratings divided by the sum.<sup>*</sup> </p>
<p style="font-size:13px"><b> Perceptual strength:</b> Highest rating across modalities.<sup>*</sup> </p>
<p style="font-size:11px !important; padding-bottom:4px;">* Calculated following <a href="#section-info"> Lynott and Connell (2009, 2013)</a>.</p>
<h4 style="background-color: #FFFEF0; padding-top:6px; padding-bottom:2px;"><b> Corpora </b></h4>
<p style="font-size:13px"><b> Concreteness and age of acquisition:</b> Norms by Brysbaert, Warriner, and Kuperman (2014).</p>
<p style="font-size:13px"><b> Phonological and orthographic neighbours:</b> DutchPOND (Marian et al., 2012).</p>
<p style="font-size:13px"><b> Word frequency and contextual diversity:</b> SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010).</p>
<p style="font-size:13px"><b> Lemma frequency:</b> CELEX (Baayen, Piepenbrock, & van Rijn, 1993).</p>
<p><i class="fas fa-book-open" aria-hidden="true"></i> <a href="#section-info">References</a></p>
</div>'))
})
```
```{r}
# Loadings of principal components. Tidy-format names by binding them as a column and
# removing dummy rownames left from the principal() output.
properties_modality_loadings =
data.frame( cbind( names(prop),
data.frame(pc2_prop$loadings[1:3,1:2])
) )
rownames(properties_modality_loadings) = NULL
properties_modality_loadings[,1] = as.factor(properties_modality_loadings[,1])
# RC (Rotated Component) renamed PC for clarity
colnames(properties_modality_loadings)[colnames(properties_modality_loadings)=="names.prop."] = "Variable"
colnames(properties_modality_loadings)[colnames(properties_modality_loadings)=="RC1"] = "PC1"
colnames(properties_modality_loadings)[colnames(properties_modality_loadings)=="RC2"] = "PC2"
properties_modality_loadings[,c('PC1','PC2')] = round(properties_modality_loadings[,c('PC1','PC2')], 2)
# Present values as correlations by removing any zeros before a decimal point
properties_modality_loadings[,'PC1'] = str_replace_all(properties_modality_loadings[,'PC1'], "0\\.", "\\.")
properties_modality_loadings[,'PC2'] = str_replace_all(properties_modality_loadings[,'PC2'], "0\\.", "\\.")
# Modal dialog showing loadings of principal components for properties
actionLink("properties_loadings", " Principal component loadings", class = 'fa fa-table', style = 'border-bottom:none !important')
observeEvent(input$properties_loadings, {
showModal(modalDialog(
title = HTML('<div style="padding-bottom:0px; font-size:14px; text-align:justify;"> <b>Loadings of principal components (PC) in the form of correlations.</b> Coefficients above <i>r</i> = &plusmn;.70 (i.e., 50% shared variance) shown in bold. When the three modalities were reduced to two principal components, the visual and haptic modalities loaded onto the components similarly, both with positive polarities, whereas the auditory modality was relatively independent, with a negative polarity.</div>'),
div( HTML( # Below, table constructed
properties_modality_loadings %>%
# Highlight correlations above .7
mutate(PC1 = ifelse(abs(as.numeric(PC1)) > .7, cell_spec(PC1, "html", bold = TRUE, color = 'black'),
cell_spec(PC1, "html")),
PC2 = ifelse(abs(as.numeric(PC2)) > .7, cell_spec(PC2, "html", bold = TRUE, color = 'black'),
cell_spec(PC2, "html")) ) %>%
kable(format = "html", escape = FALSE) %>%
kable_styling('striped', full_width = FALSE, position = "left")
),
align = 'center'),
size = 'm', easyClose = TRUE, footer = modalButton('Close')
))
})
```
<div style='padding-top:15px;' />
```{r}
# Highlight words
renderUI({
selectizeInput("highlighted_properties", label = NULL, choices = sort(selected_props()$word), multiple = TRUE,
options = list(placeholder = 'Words to highlight on plot '))
})
```
</div>
##### Data selected below
<!-- Output right below removed from mobiles because it's badly displayed -->
#### {.desktop-only}
<div style='padding-top:9px; margin-bottom:-12px;'>
```{r}
# Number of words selected on sidebar below
reactive(cat(paste0(nrow(selected_props()), ' Property words')))
```
</div>
<!-- Show again on all devices -->
####
```{r}
# Download data selected on sidebar as a CSV file
# Create placeholder for download link
uiOutput("properties_csv_link")
# Create the actual downloadLink
output$properties_csv_link = renderUI({
downloadLink("properties_csv", " Download spreadsheet", class = "fa fa-download", style = 'border-bottom:none;')
})
# Add download handling
output$properties_csv = downloadHandler(
filename = function() {
paste0('Selected-Dutch-modality-norms-Bernabeu-et-al-', str_replace_all(format(Sys.time(), '%d-%b-%Y_%X-%Z'), ':', '-'), '.csv') },
content = function(file) {
# Leave Dutch data inside and remove English data, which aren't used in the tabs that have CSV download
drops = c('id','file','normed','English_Word_Lynott_Connell_2009_2013','English_Main_Lynott_Connell_2009_2013','English_Perceptualstrength_Lynott_Connell_2009_2013','English_Exclusivity_Lynott_Connell_2009_2013','English_Auditory_Lynott_Connell_2009_2013','English_Haptic_Lynott_Connell_2009_2013','English_Visual_Lynott_Connell_2009_2013','English_Letters_Lynott_Connell_2009_2013','RC1','RC2')
write.csv(selected_props()[,!(names(selected_props()) %in% drops)],
file, row.names = FALSE, fileEncoding = 'Latin1')
}
)
# Download data selected on sidebar as a plot
# Create placeholder for download link
uiOutput("properties_png_link")
# Create the actual downloadLink
output$properties_png_link = renderUI({
downloadLink("properties_png", " Download plot", class = "fa fa-download", style = 'border-bottom:none !important')
})
# Add download handling
output$properties_png = downloadHandler(
filename = function() {
paste0('Selected-Dutch-modality-norms-Bernabeu-et-al-', str_replace_all(format(Sys.time(), '%d-%b-%Y_%X-%Z'), ':', '-'), '.png') },
content = function(file){
png(file, units='in', width=13, height=11, res=900)
print(properties_png())
dev.off()},
contentType = 'image/png'
)
```
#### **Modality** {style='margin-top:22px;'}
```{r}
modalities_labels = c(
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: #ff3030; border: none !important; border-radius: 4px !important; color: #FDFDFD !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Auditory </span>'),
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: dodgerblue; border-radius: 4px !important; border: none !important; color: #FDFDFD !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Haptic </span>'),
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: forestgreen; border-radius: 4px !important; border: none !important; color: #fdfdfd !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Visual </span>')
)
pickerInput(inputId = "properties_modalities", label = "Dominant modalities",
choices = sort(unique(props$main)),
choicesOpt = list(content = modalities_labels),
multiple = TRUE, selected = unique(props$main),
options = list(style = "light", size = 15))
sliderInput("properties_Exclusivity", "Modality exclusivity (%)", min = 0, max = 100, value = c(0, 100))
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$Exclusivity),2)))))
sliderInput("properties_Strength", "Perceptual strength", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$Perceptualstrength),2)))))
sliderInput("properties_Auditory", "Auditory rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$Auditory),2)))))
sliderInput("properties_Haptic", "Haptic rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$Haptic),2)))))
sliderInput("properties_Visual", "Visual rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$Visual),2)))))
```
#### **Concreteness** {style='margin-top:22px;'}
```{r}
sliderInput("properties_concreteness", NULL, min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$concrete_Brysbaertetal2014),2)))))
```
#### **Length** {style='margin-top:22px;'}
```{r}
sliderInput("properties_letters", "Number of letters", min = 3, max = 17, value = c(3, 17), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$letters),2)))))
sliderInput("properties_phonemes_DutchPOND", "Number of phonemes", min = 0, max = 15, value = c(0, 15), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$phonemes_DUTCHPOND),2)))))
```
#### **Word Frequency** {style='margin-top:22px;'}
```{r}
sliderInput("properties_ContextualDiversity", "Contextual diversity (CD)", min = 0, max = 4, value = c(0, 4), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$freq_lg10CD_SUBTLEXNL),2)))))
sliderInput("properties_InflectedContextualDiversity", "CD Inflected adjective", min = 0, max = 4, value = c(0, 4), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(sum(selected_props()$inflected_adj_freq_lg10CD_SUBTLEXNL)/336,2)))))
sliderInput("properties_WF_SUBTLEXNL", "Word frequency", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$freq_lg10WF_SUBTLEXNL),2)))))
sliderInput("properties_lemma_CELEX", "Lemma frequency", min = 0, max = 3.2, value = c(0, 3.2), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$freq_CELEX_lem),2)))))
```
#### **Distinctiveness** {style='margin-top:22px;'}
```{r}
sliderInput("properties_phon_neighbours_DutchPOND", "Phonological neighbours", min = 0, max = 50, value = c(0, 50), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$phon_neighbours_DUTCHPOND),2)))))
sliderInput("properties_orth_neighbours_DutchPOND", "Orthographic neighbours", min = 0, max = 32, value = c(0, 32), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$orth_neighbours_DUTCHPOND),2)))))
```
#### **Age of Acquisition** {style='margin-top:22px;'}
```{r}
sliderInput("properties_AoA", NULL, min = 0, max = 15, value = c(0, 15), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_props()$AoA_Brysbaertetal2014),2)))))
```
<div style = "padding-top: 4px; font-size = 0.6em; text-align: left">
#### **Definitions**
**Principal component analysis (PCA):** Method for reducing dimensionality of data while retaining the main patterns ([read more](https://www.nature.com/articles/nmeth.4346)).
<div><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Dominant modality: </b></span> Highest-rated modality. </div>
<div style="padding-top:5px;"><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Modality exclusivity: </b></span> Range of the three modality ratings divided by the sum. </div>
<div style="padding-top:5px; padding-bottom:6px;"><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Perceptual strength: </b> </span> Highest rating across modalities. </div>
#### **Corpora**
**Concreteness and age of acquisition:** [Norms by Brysbaert, Warriner, and Kuperman (2014)](#references).
**Phonological and orthographic neighbours**: [DutchPOND (Marian et al., 2012)](#references).
**Word frequency and contextual diversity**: [SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010)](#references).
**Lemma frequency**: [CELEX (Baayen, Piepenbrock, & van Rijn, 1993)](#references).
</div>
Row {data-height=510px style='data-width:100%; position:static;'}
-----------------------------------------------------------------------
### <span style = 'font-size: 15px;'> **Property words**. Please scroll horizontally and vertically. Column information available by hovering on header. Columns may be simultaneously filtered and sorted (e.g., to find out range), and words may be searched under the header. [CSV](https://osf.io/ge7pn/) and [Excel spreadsheet](https://osf.io/58gzs/) also available.</span> {style='margin-top:10px; padding-top:30px; text-align:justify;'}
<div style = "padding-top: 10px; text-align: justify; background-color:#FCFCFC;">
```{r}
# Reactive for the sliders, applicable to both table and plot
selected_props = reactive({
# Values above or equal to minimum selected, and below or equal to maximum selected
props[which(
!is.na(props$word) &
props$main %in% input$properties_modalities &
props$Exclusivity >= as.numeric(min(input$properties_Exclusivity)) &
props$Exclusivity <= as.numeric(max(input$properties_Exclusivity)) &
props$Perceptualstrength >= as.numeric(min(input$properties_Strength)) &
props$Perceptualstrength <= as.numeric(max(input$properties_Strength)) &
props$Auditory >= as.numeric(min(input$properties_Auditory)) &
props$Auditory <= as.numeric(max(input$properties_Auditory)) &
props$Haptic >= as.numeric(min(input$properties_Haptic)) &
props$Haptic <= as.numeric(max(input$properties_Haptic)) &
props$Visual >= as.numeric(min(input$properties_Visual)) &
props$Visual <= as.numeric(max(input$properties_Visual)) &
props$concrete_Brysbaertetal2014 >= as.numeric(min(input$properties_concreteness)) &
props$concrete_Brysbaertetal2014 <= as.numeric(max(input$properties_concreteness)) &
props$letters >= as.numeric(min(input$properties_letters)) &
props$letters <= as.numeric(max(input$properties_letters)) &
props$phonemes_DUTCHPOND >= as.numeric(min(input$properties_phonemes_DutchPOND)) &
props$phonemes_DUTCHPOND <= as.numeric(max(input$properties_phonemes_DutchPOND)) &
props$freq_lg10CD_SUBTLEXNL >= as.numeric(min(input$properties_ContextualDiversity)) &
props$freq_lg10CD_SUBTLEXNL <= as.numeric(max(input$properties_ContextualDiversity)) &
props$inflected_adj_freq_lg10CD_SUBTLEXNL >= as.numeric(min(input$properties_InflectedContextualDiversity)) &
props$inflected_adj_freq_lg10CD_SUBTLEXNL <= as.numeric(max(input$properties_InflectedContextualDiversity)) &
props$freq_lg10WF_SUBTLEXNL >= as.numeric(min(input$properties_WF_SUBTLEXNL)) &
props$freq_lg10WF_SUBTLEXNL <= as.numeric(max(input$properties_WF_SUBTLEXNL)) &
props$freq_CELEX_lem >= as.numeric(min(input$properties_lemma_CELEX)) &
props$freq_CELEX_lem <= as.numeric(max(input$properties_lemma_CELEX)) &
props$phon_neighbours_DUTCHPOND >= as.numeric(min(input$properties_phon_neighbours_DutchPOND)) &
props$phon_neighbours_DUTCHPOND <= as.numeric(max(input$properties_phon_neighbours_DutchPOND)) &
props$orth_neighbours_DUTCHPOND >= as.numeric(min(input$properties_orth_neighbours_DutchPOND)) &
props$orth_neighbours_DUTCHPOND <= as.numeric(max(input$properties_orth_neighbours_DutchPOND)) &
props$AoA_Brysbaertetal2014 >= as.numeric(min(input$properties_AoA)) &
props$AoA_Brysbaertetal2014 <= as.numeric(max(input$properties_AoA))
),]
})
# Variables to have on table
properties_columns = c('word','inflected_prop','main','Exclusivity','Perceptualstrength','Auditory',
'Haptic', 'Visual', 'SD_Auditory', 'SD_Haptic', 'SD_Visual',
'concrete_Brysbaertetal2014','letters','phonemes_DUTCHPOND',
'freq_lg10CD_SUBTLEXNL','inflected_adj_freq_lg10CD_SUBTLEXNL',
'freq_lg10WF_SUBTLEXNL','freq_CELEX_lem','phon_neighbours_DUTCHPOND',
'orth_neighbours_DUTCHPOND','AoA_Brysbaertetal2014')
# Table
renderReactable({
reactable(selected_props()[properties_columns],
defaultSorted = list(word = 'asc'),
defaultColDef = colDef(footerStyle = list(fontWeight = "bold")),
height = 390, striped = TRUE, pagination = FALSE, highlight = TRUE,
columns = list(
word = colDef(name='Word', filterable=TRUE, minWidth = 160,
style = list(fontWeight='bold', color='#453246')),
inflected_prop = colDef(filterable=TRUE, minWidth = 140,
header = with_tooltip('Inflected adjective',
"Adjectives inflected for masculine and feminine nouns.")),
main = colDef(header = with_tooltip('Dominant Modality',
'Highest-rated modality (as defined in Lynott & Connell, 2009, 2013). Values: Auditory, Haptic, Visual.')),
Exclusivity = colDef(cell = function(value) {
paste0(value,'%')
},
format = colFormat(digits = 2),
header = with_tooltip('Modality Exclusivity',
'Range of the three modality ratings divided by the sum (as defined in Lynott & Connell, 2009, 2013). Values colour-coded from lower, in grey, to higher, in violet.'),
style = function(value) {
if (value < 33) {
color = '#9D9D9D'
} else if (value < 67) {
color = '#7A5151'
} else {
color = '#541E1E'
}
list(color = color)
}),
Perceptualstrength = colDef(format = colFormat(digits = 2),
header = with_tooltip('Perceptual Strength',
'Highest rating across modalities (as defined in Lynott & Connell, 2009, 2013). Values colour-coded from lower, in grey, to higher, in violet.'),
style = function(value) {
if (value < 1.7) {
color = '#9D9D9D'
} else if (value < 3.3) {
color = '#7A5151'
} else {
color = '#541E1E'
}
list(color = color)
}),
Auditory = colDef(header = with_tooltip('Auditory Rating',
'Mean rating of each word on the auditory modality across participants.'),
cell = function(value) {
width <- paste0(value / max(props$Auditory) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits by keeping trailing zeros
bar_chart(value, width = width, fill = '#ff3030')
},
align = 'left'),
Haptic = colDef(header = with_tooltip('Haptic Rating',
'Mean rating of each word on the haptic (tactile) modality across participants.'),
cell = function(value) {
width <- paste0(value / max(props$Haptic) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits by keeping trailing zeros
bar_chart(value, width = width, fill = 'dodgerblue')
},
align = 'left'),
Visual = colDef(header = with_tooltip('Visual Rating', 'Mean rating of each word on the visual modality across participants.'),
cell = function(value) {
width <- paste0(value / max(props$Visual) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits by keeping trailing zeros
bar_chart(value, width = width, fill = 'forestgreen')
},
align = 'left'),
SD_Auditory = colDef(header = with_tooltip('SD Auditory Rating',
'Standard Deviation of Auditory ratings across participants.'),
format = colFormat(digits = 2)),
SD_Haptic = colDef(header = with_tooltip('SD Haptic Rating',
'Standard Deviation of Haptic ratings across participants.'),
format = colFormat(digits = 2)),
SD_Visual = colDef(header = with_tooltip('SD Visual Rating',
'Standard Deviation of Visual ratings across participants.'),
format = colFormat(digits = 2)),
concrete_Brysbaertetal2014 = colDef(header = with_tooltip('Concreteness', 'Norms by Brysbaert, Warriner, and Kuperman (2014)'),
format = colFormat(digits = 2), minWidth = 110),
letters = colDef(name = 'Letters', format = colFormat(digits = 0)),
phonemes_DUTCHPOND = colDef(header = with_tooltip('Phonemes', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0)),
freq_lg10CD_SUBTLEXNL = colDef(header = with_tooltip('Contextual Diversity (CD)',
"An advanced measure of word frequency for 'Word' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2), minWidth = 120),
inflected_adj_freq_lg10CD_SUBTLEXNL = colDef(header = with_tooltip('CD Inflected Adjective',
"An advanced measure of word frequency for 'Inflected Adjective' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2), minWidth = 120),
freq_lg10WF_SUBTLEXNL = colDef(header = with_tooltip('Word Frequency',
"A measure of word frequency for 'Word' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2)),
freq_CELEX_lem = colDef(header = with_tooltip('Lemma Frequency',
"A measure of word frequency for 'Word' column, based on the lemma, i.e., word root (Baayen, Piepenbrock, & van Rijn, 1993)."),
format = colFormat(digits = 2)),
phon_neighbours_DUTCHPOND = colDef(header = with_tooltip('Phonological Neighbours', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0)),
orth_neighbours_DUTCHPOND = colDef(header = with_tooltip('Orthographic Neighbours', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0), minWidth = 110),
AoA_Brysbaertetal2014 = colDef(header = with_tooltip('Age of Acquisition',
"Norms by Brysbaert, Warriner, and Kuperman (2014)."),
format = colFormat(digits = 2))
)
)
})
```
</div>
Row {data-height=800 style='position:static;'}
-----------------------------------------------------------------------
### <span style = 'font-size: 15px;'> Principal component analysis (PCA) reflecting different relationships among the modalities. The visual and haptic modalities were related, whereas the auditory modality was more independent (see PCA loadings on side bar; cf. <a href="#section-cf-lc-english-norms">Lynott & Connell's 2009 data for English</a>). </span> {style="margin-top: 17px !important; text-align:justify; padding-bottom: 5px !important; padding-left: 4px !important; padding-right: 4px !important; text-align:justify;"}
<!-- create margin -->
<div style="font-size:8px; padding-top:8px;"> </div>
```{r}
# reactive for the word bar
highlighted_properties = reactive(input$highlighted_properties)
renderPlotly({
ggplotly(
ggplot( selected_props(), aes(RC1, RC2, label = as.character(word), color = main,
# Html tags below used for format. Decimals rounded to two.
text = paste0(' ', '<span style="padding-top:3px; padding-bottom:3px; font-size:2.2em; color:#EEEEEE">', capitalize(word), '</span> ', '<br>',
'</b><br><span style="color:#EEEEEE"> Dominant modality: </span><b style="color:#EEEEEE">', main, ' ',
' ', '</b><br><span style="color:#EEEEEE"> Modality exclusivity: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Exclusivity, 2)), '% ',
'</b><br><span style="color:#EEEEEE"> Perceptual strength: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Perceptualstrength, 2)),
'</b><br><span style="color:#EEEEEE"> Auditory rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Auditory, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Haptic rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Haptic, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Visual rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Visual, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Concreteness (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(concrete_Brysbaertetal2014, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Number of letters: </span><b style="color:#EEEEEE">', letters, ' ',
'</b><br><span style="color:#EEEEEE"> Number of phonemes (DutchPOND): </span><b style="color:#EEEEEE">',
round(phonemes_DUTCHPOND, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Contextual diversity (lg10CD SUBTLEX-NL): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(freq_lg10CD_SUBTLEXNL, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Word frequency (lg10WF SUBTLEX-NL): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(freq_lg10WF_SUBTLEXNL, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Lemma frequency (CELEX): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(freq_CELEX_lem, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Phonological neighbourhood size (DutchPOND): </span><b style="color:#EEEEEE">',
round(phon_neighbours_DUTCHPOND, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Orthographic neighbourhood size (DutchPOND): </span><b style="color:#EEEEEE">',
round(orth_neighbours_DUTCHPOND, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Age of acquisition (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(AoA_Brysbaertetal2014, 2)), ' ', '<br> '
) ) ) +
geom_text(size = ifelse(selected_props()$word %in% highlighted_properties(), 7,
ifelse(is.null(highlighted_properties()), 3, 2.8)),
fontface = ifelse(selected_props()$word %in% highlighted_properties(), 'bold', 'plain')) +
geom_point(alpha = 0) + # This geom_point helps to colour the tooltip according to the dominant modality
scale_colour_manual(values = colours, drop = FALSE) + theme_bw() + ggtitle('Property words') +
labs(x = 'Varimax-rotated Principal Component 1', y = 'Varimax-rotated Principal Component 2') +
guides(color = guide_legend(title = 'Main<br>modality')) +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'), plot.title = element_text(size = 14, hjust = .5),
axis.title.x = element_text(colour = 'black', size = 12, margin = margin(15,15,0,15)),
axis.title.y = element_text(colour = 'black', size = 12, margin = margin(0,15,15,5)),
axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8),
legend.background = element_rect(size = 2), legend.position = 'none',
legend.title = element_blank(),
legend.text = element_text(colour = colours, size = 13) ),
tooltip = 'text'
)
})
# For download, save plot without the interactive 'plotly' part
properties_png = reactive({ ggplot(selected_props(), aes(RC1, RC2, color = main, label = as.character(word))) +
geom_text(show.legend = FALSE, size = ifelse(selected_props()$word %in% highlighted_properties(), 7,
ifelse(is.null(highlighted_properties()), 3, 2.8)),
fontface = ifelse(selected_props()$word %in% highlighted_properties(), 'bold', 'plain')) +
geom_point(alpha = 0) + scale_colour_manual(values = colours, drop = FALSE) + theme_bw() +
guides(color = guide_legend(title = 'Main<br>modality', override.aes = list(size = 7, alpha = 1))) +
ggtitle( paste0('Properties', ' (showing ', nrow(selected_props()), ' out of ', nrow(props), ')') ) +
labs(x = 'Varimax-rotated Principal Component 1', y = 'Varimax-rotated Principal Component 2') +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'), plot.title = element_text(size = 17, hjust = .5, margin = margin(3,3,7,3)),
axis.title.x = element_text(colour = 'black', size = 12, margin = margin(10,10,2,10)),
axis.title.y = element_text(colour = 'black', size = 12, margin = margin(10,10,10,5)),
axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8),
legend.background = element_rect(size = 2), legend.position = 'right',
legend.title = element_blank(), legend.text = element_text(size = 15))
})
```
Concepts {data-orientation=rows data-height=3200}
=======================================================================
Inputs {.sidebar style='position:fixed; padding-top: 65px; padding-bottom:30px;'}
-----------------------------------------------------------------------
```{r}
# Modal dialogs containing definitions and corpora
actionLink('concepts_DefinitionsCorporaLink', HTML('<span style="font-size: 14px; font-weight:bold; border-bottom:none; margin-bottom: 6px;"><i class="far fa-clipboard" aria-hidden="true"></i> Definitions and corpora </span>'))
observeEvent(input$concepts_DefinitionsCorporaLink, {
sendSweetAlert(session, type = NULL, title = NULL, html = TRUE,
closeOnClickOutside = TRUE, btn_labels = 'Close',
text = HTML('<div style = "font-size = 14px; text-align: justify;">
<h4 style="background-color: #FFFEF0; padding-top:3px; padding-bottom:3px;"><b> Definitions </b></h4>
<p style="font-size:13px"><b>Principal component analysis (PCA):</b> Method for reducing dimensionality of data while retaining the main patterns (<a href="https://www.nature.com/articles/nmeth.4346">read more</a>).</p>
<p style="font-size:13px"><b> Dominant modality:</b> Highest-rated modality.<sup>*</sup> </p>
<p style="font-size:13px"><b> Modality exclusivity:</b> Range of the three modality ratings divided by the sum.<sup>*</sup> </p>
<p style="font-size:13px"><b> Perceptual strength:</b> Highest rating across modalities.<sup>*</sup> </p>
<p style="font-size:11px !important; padding-bottom:4px;">* Calculated following <a href="#section-info"> Lynott and Connell (2009, 2013)</a>.</p>
<h4 style="background-color: #FFFEF0; padding-top:3px; padding-bottom:3px;"><b> Corpora </b></h4>
<p style="font-size:13px"><b> Concreteness and age of acquisition:</b> Norms by Brysbaert, Warriner, and Kuperman (2014).</p>
<p style="font-size:13px"><b> Phonological and orthographic neighbours:</b> DutchPOND (Marian et al., 2012).</p>
<p style="font-size:13px"><b> Word frequency and contextual diversity:</b> SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010).</p>
<p style="font-size:13px"><b> Lemma frequency:</b> CELEX (Baayen, Piepenbrock, & van Rijn, 1993).</p>
<p><i class="fas fa-book-open" aria-hidden="true"></i> <a href="#section-info">References</a></p>
</div>'))
})
```
```{r}
# Principal component loadings. Tidy-format names by binding them as a column and
# removing dummy rownames left from the principal() output.
concepts_modality_loadings =
data.frame( cbind( names(conc),
data.frame(pc2_conc$loadings[1:3,1:2])
) )
rownames(concepts_modality_loadings) = NULL
concepts_modality_loadings[,1] = as.factor(concepts_modality_loadings[,1])
# RC (Rotated Component) renamed PC for clarity
colnames(concepts_modality_loadings)[colnames(concepts_modality_loadings)=="names.conc."] = "Variable"
colnames(concepts_modality_loadings)[colnames(concepts_modality_loadings)=="RC1"] = "PC1"
colnames(concepts_modality_loadings)[colnames(concepts_modality_loadings)=="RC2"] = "PC2"
concepts_modality_loadings[,c('PC1','PC2')] = round(concepts_modality_loadings[,c('PC1','PC2')], 2)
# Present values as correlations by removing any zeros before a decimal point
concepts_modality_loadings[,'PC1'] = str_replace_all(concepts_modality_loadings[,'PC1'], "0\\.", "\\.")
concepts_modality_loadings[,'PC2'] = str_replace_all(concepts_modality_loadings[,'PC2'], "0\\.", "\\.")
# Modal dialog showing loadings of principal components for concepts
actionLink("concepts_loadings", " Principal component loadings", class = 'fa fa-table', style = 'border-bottom:none !important')
observeEvent(input$concepts_loadings, {
showModal(modalDialog(
title = HTML('<div style="padding-bottom:0px; font-size:14px; text-align:justify;"> <b>Loadings of principal components (PC) in the form of correlations.</b> Coefficients above <i>r</i> = &plusmn;.70 (i.e., 50% shared variance) shown in bold. When the three modalities were reduced to two principal components, visual and haptic ratings shared one component, whereas the auditory modality was relatively independent, having its own component.</div>'),
div( HTML( # Below, table constructed
concepts_modality_loadings %>%
# Highlight correlations above .7
mutate(PC1 = ifelse(abs(as.numeric(PC1)) > .7, cell_spec(PC1, "html", bold = TRUE, color = 'black'),
cell_spec(PC1, "html")),
PC2 = ifelse(abs(as.numeric(PC2)) > .7, cell_spec(PC2, "html", bold = TRUE, color = 'black'),
cell_spec(PC2, "html")) ) %>%
kable(format = "html", escape = FALSE) %>%
kable_styling('striped', full_width = FALSE, position = "left")
),
align = 'center'),
size = 'm', easyClose = TRUE, footer = modalButton('Close')
))
})
```
<div style='padding-top:15px;' />
```{r}
# Highlight words
renderUI({
selectizeInput("highlighted_concepts", label = NULL, choices = sort(selected_concs()$word), multiple = TRUE,
options = list(placeholder = 'Words to highlight on plot '))
})
```
</div>
##### Data selected below
<!-- Output right below removed from mobiles because it's badly displayed -->
#### {.desktop-only}
<div style='padding-top:9px; margin-bottom:-12px;'>
```{r}
# Number of words selected on sidebar below
reactive(cat(paste0(nrow(selected_concs()), ' Concept words')))
```
</div>
<!-- Show again on all devices -->
####
```{r}
# Download data selected on sidebar as a CSV file
# Create placeholder for download link
uiOutput("concepts_csv_link")
# Create the actual downloadLink
output$concepts_csv_link = renderUI({
downloadLink("concepts_csv", " Download spreadsheet", class = "fa fa-download", style = 'border-bottom:none !important')
})
# Add download handling
output$concepts_csv = downloadHandler(
filename = function() {
paste0('Selected-Dutch-modality-norms-Bernabeu-et-al-', str_replace_all(format(Sys.time(), '%d-%b-%Y_%X-%Z'), ':', '-'), '.csv') },
content = function(file) {
# Leave Dutch data inside and remove English data, which aren't used in the tabs that have CSV download
drops = c('id','file','normed','English_Word_Lynott_Connell_2009_2013','English_Main_Lynott_Connell_2009_2013','English_Perceptualstrength_Lynott_Connell_2009_2013','English_Exclusivity_Lynott_Connell_2009_2013','English_Auditory_Lynott_Connell_2009_2013','English_Haptic_Lynott_Connell_2009_2013','English_Visual_Lynott_Connell_2009_2013','English_Letters_Lynott_Connell_2009_2013','RC1','RC2')
write.csv(selected_concs()[,!(names(selected_concs()) %in% drops)],
file, row.names = FALSE, fileEncoding = 'Latin1')
}
)
# Download data selected on sidebar as a plot
# Create placeholder for download link
uiOutput("concepts_png_link")
# Create the actual downloadLink
output$concepts_png_link = renderUI({
downloadLink("concepts_png", " Download plot", class = "fa fa-download", style = 'border-bottom:none !important')
})
# Add download handling
output$concepts_png = downloadHandler(
filename = function() {
paste0('Selected-Dutch-modality-norms-Bernabeu-et-al-', str_replace_all(format(Sys.time(), '%d-%b-%Y_%X-%Z'), ':', '-'), '.png') },
content = function(file){
png(file, units='in', width=13, height=11, res=900)
print(concepts_png())
dev.off()},
contentType = 'image/png'
)
```
#### **Definite Article** {style='margin-top:22px;'}
```{r}
# Filter noun category
pickerInput(inputId = "concepts_nouncategory", choices = c('de', 'het'),
choicesOpt = list(content = c("Fem/Masc 'de' noun", "Neuter 'het' noun")),
multiple = TRUE, selected = c('de', 'het'),
options = list(style = "light", size = 15))
```
#### **Modality** {style='margin-top:22px;'}
```{r}
modalities_labels = c(
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: #ff3030; border: none !important; border-radius: 4px !important; color: #FDFDFD !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Auditory </span>'),
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: dodgerblue; border-radius: 4px !important; border: none !important; color: #FDFDFD !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Haptic </span>'),
sprintf('<span style="margin: 0px 0px 4px 0px !important; font-size: 14px !important; text-align: center !important; text-decoration: none !important; background-color: forestgreen; border-radius: 4px !important; border: none !important; color: #fdfdfd !important; cursor: pointer !important; padding: 0.9px 3px 4.6px !important;"> Visual </span>')
)
pickerInput(inputId = "concepts_modalities", label = "Dominant modalities",
choices = sort(unique(concs$main)),
choicesOpt = list(content = modalities_labels),
multiple = TRUE, selected = unique(concs$main),
options = list(style = "light", size = 15))
sliderInput("concepts_Exclusivity", "Modality exclusivity (%)", min = 0, max = 100, value = c(0, 100))
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$Exclusivity),2)))))
sliderInput("concepts_Strength", "Perceptual strength", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$Perceptualstrength),2)))))
sliderInput("concepts_Auditory", "Auditory rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$Auditory),2)))))
sliderInput("concepts_Haptic", "Haptic rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$Haptic),2)))))
sliderInput("concepts_Visual", "Visual rating", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$Visual),2)))))
```
#### **Concreteness** {style='margin-top:22px;'}
```{r}
sliderInput("concepts_concreteness", NULL, min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$concrete_Brysbaertetal2014),2)))))
```
#### **Length** {style='margin-top:22px;'}
```{r}
sliderInput("concepts_letters", "Number of letters", min = 3, max = 17, value = c(3, 17), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$letters),2)))))
sliderInput("concepts_phonemes_DutchPOND", "Number of phonemes", min = 0, max = 15, value = c(0, 15), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$phonemes_DUTCHPOND),2)))))
```
#### **Word Frequency** {style='margin-top:22px;'}
```{r}
sliderInput("concepts_ContextualDiversity", "Contextual diversity (CD)", min = 0, max = 4, value = c(0, 4), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$freq_lg10CD_SUBTLEXNL),2)))))
sliderInput("concepts_WF_SUBTLEXNL", "Word frequency", min = 0, max = 5, value = c(0, 5), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$freq_lg10WF_SUBTLEXNL),2)))))
sliderInput("concepts_lemma_CELEX", "Lemma frequency", min = 0, max = 3.2, value = c(0, 3.2), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$freq_CELEX_lem),2)))))
```
#### **Distinctiveness** {style='margin-top:22px;'}
```{r}
sliderInput("concepts_phon_neighbours_DutchPOND", "Phonological neighbours", min = 0, max = 50, value = c(0, 50), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$phon_neighbours_DUTCHPOND),2)))))
sliderInput("concepts_orth_neighbours_DutchPOND", "Orthographic neighbours", min = 0, max = 32, value = c(0, 32), step = 1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$orth_neighbours_DUTCHPOND),2)))))
```
#### **Age of Acquisition** {style='margin-top:22px;'}
```{r}
sliderInput("concepts_AoA", NULL, min = 0, max = 15, value = c(0, 15), step = 0.1)
reactive(cat(paste0('Mean = ', sprintf("%.2f", round(mean(selected_concs()$AoA_Brysbaertetal2014),2)))))
```
<div style = "padding-top: 4px; font-size = 0.6em; text-align: left">
#### **Definitions**
**Principal component analysis (PCA):** Method for reducing dimensionality of data while retaining the main patterns ([read more](https://www.nature.com/articles/nmeth.4346)).
<div><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Dominant modality: </b></span> Highest-rated modality. </div>
<div style="padding-top:5px;"><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Modality exclusivity: </b></span> Range of the three modality ratings divided by the sum. </div>
<div style="padding-top:5px; padding-bottom:6px;"><span data-toggle="tooltip1" data-placement="bottom" title=" &nbsp; Computed as in Lynott and Connell (2009, 2013). " style="border-bottom: 0.9px dotted grey"><b> Perceptual strength: </b> </span> Highest rating across modalities. </div>
#### **Corpora**
**Concreteness and age of acquisition:** [Norms by Brysbaert, Warriner, and Kuperman (2014)](#references).
**Phonological and orthographic neighbours**: [DutchPOND (Marian et al., 2012)](#references).
**Word frequency and contextual diversity**: [SUBTLEX-NL (Keuleers, Brysbaert, & New, 2010)](#references).
**Lemma frequency**: [CELEX (Baayen, Piepenbrock, & van Rijn, 1993)](#references).
</div>
Row {data-height=510px style='data-width:100%; position:static;'}
-----------------------------------------------------------------------
### <span style = 'font-size: 15px;'> **Concept words**. Please scroll horizontally and vertically. Column information available by hovering on header. Columns may be simultaneously filtered and sorted (e.g., to find out range), and words may be searched under the header. [CSV](https://osf.io/ge7pn/) and [Excel spreadsheet](https://osf.io/58gzs/) also available.</span> {style='margin-top:10px; padding-top:30px; text-align: justify'}
<div style = "padding-top: 10px; text-align: justify; background-color:#FCFCFC;">
```{r}
# Reactive for the sliders, applicable to both table and plot, applicable to both table and plot
selected_concs = reactive({
# Values above or equal to minimum selected, and below or equal to maximum selected.
concs[which(
!is.na(concs$word) &
concs$main %in% input$concepts_modalities &
concs$conc_cat %in% input$concepts_nouncategory &
concs$Exclusivity >= as.numeric(min(input$concepts_Exclusivity)) &
concs$Exclusivity <= as.numeric(max(input$concepts_Exclusivity)) &
concs$Perceptualstrength >= as.numeric(min(input$concepts_Strength)) &
concs$Perceptualstrength <= as.numeric(max(input$concepts_Strength)) &
concs$Auditory >= as.numeric(min(input$concepts_Auditory)) &
concs$Auditory <= as.numeric(max(input$concepts_Auditory)) &
concs$Haptic >= as.numeric(min(input$concepts_Haptic)) &
concs$Haptic <= as.numeric(max(input$concepts_Haptic)) &
concs$Visual >= as.numeric(min(input$concepts_Visual)) &
concs$Visual <= as.numeric(max(input$concepts_Visual)) &
concs$concrete_Brysbaertetal2014 >= as.numeric(min(input$concepts_concreteness)) &
concs$concrete_Brysbaertetal2014 <= as.numeric(max(input$concepts_concreteness)) &
concs$letters >= as.numeric(min(input$concepts_letters)) &
concs$letters <= as.numeric(max(input$concepts_letters)) &
concs$phonemes_DUTCHPOND >= as.numeric(min(input$concepts_phonemes_DutchPOND)) &
concs$phonemes_DUTCHPOND <= as.numeric(max(input$concepts_phonemes_DutchPOND)) &
concs$phon_neighbours_DUTCHPOND >= as.numeric(min(input$concepts_phon_neighbours_DutchPOND)) &
concs$phon_neighbours_DUTCHPOND <= as.numeric(max(input$concepts_phon_neighbours_DutchPOND)) &
concs$orth_neighbours_DUTCHPOND >= as.numeric(min(input$concepts_orth_neighbours_DutchPOND)) &
concs$orth_neighbours_DUTCHPOND <= as.numeric(max(input$concepts_orth_neighbours_DutchPOND)) &
concs$freq_lg10CD_SUBTLEXNL >= as.numeric(min(input$concepts_ContextualDiversity)) &
concs$freq_lg10CD_SUBTLEXNL <= as.numeric(max(input$concepts_ContextualDiversity)) &
concs$freq_lg10WF_SUBTLEXNL >= as.numeric(min(input$concepts_WF_SUBTLEXNL)) &
concs$freq_lg10WF_SUBTLEXNL <= as.numeric(max(input$concepts_WF_SUBTLEXNL)) &
concs$freq_CELEX_lem >= as.numeric(min(input$concepts_lemma_CELEX)) &
concs$freq_CELEX_lem <= as.numeric(max(input$concepts_lemma_CELEX)) &
concs$AoA_Brysbaertetal2014 >= as.numeric(min(input$concepts_AoA)) &
concs$AoA_Brysbaertetal2014 <= as.numeric(max(input$concepts_AoA))
),]
})
# Variables to have on table
concepts_columns = c('word','conc_cat','main','Exclusivity','Perceptualstrength','Auditory',
'Haptic', 'Visual', 'SD_Auditory', 'SD_Haptic', 'SD_Visual',
'concrete_Brysbaertetal2014','letters','phonemes_DUTCHPOND',
'freq_lg10CD_SUBTLEXNL','freq_lg10WF_SUBTLEXNL','freq_CELEX_lem',
'phon_neighbours_DUTCHPOND','orth_neighbours_DUTCHPOND',
'AoA_Brysbaertetal2014')
# Table
renderReactable({
reactable(selected_concs()[concepts_columns],
defaultSorted = list(word = 'asc'),
defaultColDef = colDef(footerStyle = list(fontWeight = "bold")),
height = 390, striped = TRUE, pagination = FALSE, highlight = TRUE,
columns = list(
word = colDef(name='Word', filterable=TRUE, minWidth = 160,
style = list(fontWeight='bold', color='#453246')),
conc_cat = colDef(header = with_tooltip('Definite article',
"Definite article 'de' is used for feminine and masculine nouns, whereas 'het' is used for neuter nouns.")),
main = colDef(header = with_tooltip('Dominant Modality',
'Highest-rated modality (as defined in Lynott & Connell, 2009, 2013). Values: Auditory, Haptic, Visual.')),
Exclusivity = colDef(cell = function(value) {
paste0(value,'%')
},
format = colFormat(digits = 2),
header = with_tooltip('Modality Exclusivity',
'Range of the three modality ratings divided by the sum (as defined in Lynott & Connell, 2009, 2013). Values colour-coded from lower, in grey, to higher, in violet.'),
style = function(value) {
if (value < 33) {
color = '#9D9D9D'
} else if (value < 67) {
color = '#7A5151'
} else {
color = '#541E1E'
}
list(color = color)
}),
Perceptualstrength = colDef(format = colFormat(digits = 2),
header = with_tooltip('Perceptual Strength',
'Highest rating across modalities (as defined in Lynott & Connell, 2009, 2013). Values colour-coded from lower, in grey, to higher, in violet.'),
style = function(value) {
if (value < 1.7) {
color = '#9D9D9D'
} else if (value < 3.3) {
color = '#7A5151'
} else {
color = '#541E1E'
}
list(color = color)
}),
Auditory = colDef(header = with_tooltip('Auditory Rating',
'Mean rating of each word on the auditory modality across participants.'),
cell = function(value) {
width <- paste0(value / max(concs$Auditory) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits by keeping trailing zeros
bar_chart(value, width = width, fill = '#ff3030')
},
align = 'left'),
Haptic = colDef(header = with_tooltip('Haptic Rating',
'Mean rating of each word on the haptic (tactile) modality across participants.'),
cell = function(value) {
width <- paste0(value / max(concs$Haptic) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits by keeping trailing zeros
bar_chart(value, width = width, fill = 'dodgerblue')
},
align = 'left'),
Visual = colDef(header = with_tooltip('Visual Rating', 'Mean rating of each word on the visual modality across participants.'),
cell = function(value) {
width <- paste0(value / max(concs$Visual) * 100, "%")
value = sprintf("%.2f", round(value,2)) # Round to two digits by keeping trailing zeros
bar_chart(value, width = width, fill = 'forestgreen')
},
align = 'left'),
SD_Auditory = colDef(header = with_tooltip('SD Auditory Rating',
'Standard Deviation of Auditory ratings across participants.'),
format = colFormat(digits = 2)),
SD_Haptic = colDef(header = with_tooltip('SD Haptic Rating',
'Standard Deviation of Haptic ratings across participants.'),
format = colFormat(digits = 2)),
SD_Visual = colDef(header = with_tooltip('SD Visual Rating',
'Standard Deviation of Visual ratings across participants.'),
format = colFormat(digits = 2)),
concrete_Brysbaertetal2014 = colDef(header = with_tooltip('Concreteness', 'Norms by Brysbaert, Warriner, and Kuperman (2014)'),
format = colFormat(digits = 2), minWidth = 110),
letters = colDef(name = 'Letters', format = colFormat(digits = 0)),
phonemes_DUTCHPOND = colDef(header = with_tooltip('Phonemes', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0)),
freq_lg10CD_SUBTLEXNL = colDef(header = with_tooltip('Contextual Diversity (CD)',
"An advanced measure of word frequency for 'Word' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2), minWidth = 120),
freq_lg10WF_SUBTLEXNL = colDef(header = with_tooltip('Word Frequency',
"A measure of word frequency for 'Word' column, based on SUBTLEX-NL corpus (Keuleers, Brysbaert, & New, 2010)."),
format = colFormat(digits = 2)),
freq_CELEX_lem = colDef(header = with_tooltip('Lemma Frequency',
"A measure of word frequency for 'Word' column, based on the lemma, i.e., word root (Baayen, Piepenbrock, & van Rijn, 1993)."),
format = colFormat(digits = 2)),
phon_neighbours_DUTCHPOND = colDef(header = with_tooltip('Phonological Neighbours', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0)),
orth_neighbours_DUTCHPOND = colDef(header = with_tooltip('Orthographic Neighbours', 'DutchPOND corpus (Marian et al., 2012).'),
format = colFormat(digits = 0), minWidth = 110),
AoA_Brysbaertetal2014 = colDef(header = with_tooltip('Age of Acquisition',
"Norms by Brysbaert, Warriner, and Kuperman (2014)."),
format = colFormat(digits = 2))
)
)
})
```
</div>
Row {data-height=800 style='position:static;}
-----------------------------------------------------------------------
### <span style = 'font-size: 15px; text-align: justify !important;'> Principal component analysis (PCA) reflecting different relationships among the modalities. The visual and haptic modalities were related, whereas the auditory modality was more independent (see PCA loadings on side bar; cf. <a href="#section-cf-lc-english-norms">Lynott & Connell's 2009 data for English</a>). </span> {style="margin-top: 17px !important; text-align:justify; padding-bottom: 5px !important; padding-left: 4px !important; padding-right: 4px !important;"}
<!-- create margin -->
<div style="font-size:8px; padding-top:8px;"> </div>
```{r}
# reactive for the word bar
highlighted_concepts = reactive(input$highlighted_concepts)
renderPlotly({
ggplotly(
ggplot( selected_concs(), aes(RC1, RC2, color = main, label = as.character(word),
# Html tags below used for format. Decimals rounded to two.
text = paste0(' ', '<span style="padding-top:3px; padding-bottom:3px; font-size:2.2em; color:#EEEEEE">', capitalize(word), '</span> ', '<br>',
'</b><br><span style="color:#EEEEEE"> Definite article: </span><b style="color:#EEEEEE">', conc_cat, ' ',
'</b><br><span style="color:#EEEEEE"> Dominant modality: </span><b style="color:#EEEEEE">', main, ' ',
' ', '</b><br><span style="color:#EEEEEE"> Modality exclusivity: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Exclusivity, 2)), '% ',
'</b><br><span style="color:#EEEEEE"> Perceptual strength: </span><b style="color:#EEEEEE">', round(Perceptualstrength, 2),
'</b><br><span style="color:#EEEEEE"> Auditory rating: </span><b style="color:#EEEEEE">', round(Auditory, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Haptic rating: </span><b style="color:#EEEEEE">', round(Haptic, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Visual rating: </span><b style="color:#EEEEEE">', round(Visual, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Concreteness (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
round(concrete_Brysbaertetal2014, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Number of letters: </span><b style="color:#EEEEEE">', letters, ' ',
'</b><br><span style="color:#EEEEEE"> Number of phonemes (DutchPOND): </span><b style="color:#EEEEEE">',
round(phonemes_DUTCHPOND, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Contextual diversity (lg10CD SUBTLEX-NL): </span><b style="color:#EEEEEE">',
round(freq_lg10CD_SUBTLEXNL, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Word frequency (lg10WF SUBTLEX-NL): </span><b style="color:#EEEEEE">',
round(freq_lg10WF_SUBTLEXNL, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Lemma frequency (CELEX): </span><b style="color:#EEEEEE">', round(freq_CELEX_lem, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Phonological neighbourhood size (DutchPOND): </span><b style="color:#EEEEEE">',
round(phon_neighbours_DUTCHPOND, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Orthographic neighbourhood size (DutchPOND): </span><b style="color:#EEEEEE">',
round(orth_neighbours_DUTCHPOND, 2), ' ',
'</b><br><span style="color:#EEEEEE"> Age of acquisition (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
round(AoA_Brysbaertetal2014, 2), ' ', '<br> ' ) ) ) +
geom_text(size = ifelse(selected_concs()$word %in% highlighted_concepts(), 7,
ifelse(is.null(highlighted_concepts()), 3, 2.8)),
fontface = ifelse(selected_concs()$word %in% highlighted_concepts(), 'bold', 'plain')) +
geom_point(alpha = 0) + # This geom_point helps to colour the tooltip according to the dominant modality
scale_colour_manual(values = colours, drop = FALSE) + theme_bw() + ggtitle('Concept words') +
labs(x = 'Varimax-rotated Principal Component 1', y = 'Varimax-rotated Principal Component 2') +
guides(color = guide_legend(title = 'Main<br>modality')) +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'), plot.title = element_text(size = 14, hjust = .5),
axis.title.x = element_text(colour = 'black', size = 12, margin = margin(15,15,0,15)),
axis.title.y = element_text(colour = 'black', size = 12, margin = margin(0,15,15,5)),
axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8),
legend.background = element_rect(size = 2), legend.position = 'none',
legend.title = element_blank(),
legend.text = element_text(colour = colours, size = 15) ),
tooltip = 'text'
)
})
# For download, save plot without the interactive 'plotly' part
concepts_png = reactive({ ggplot(selected_concs(), aes(RC1, RC2, color = main, label = as.character(word))) +
geom_text(size = ifelse(selected_concs()$word %in% highlighted_concepts(), 7,
ifelse(is.null(highlighted_concepts()), 3, 2.8)),
fontface = ifelse(selected_concs()$word %in% highlighted_concepts(), 'bold', 'plain')) +
geom_point(alpha = 0) + scale_colour_manual(values = colours, drop = FALSE) + theme_bw() +
guides(color = guide_legend(title = 'Main<br>modality', override.aes = list(size = 7, alpha = 1))) +
ggtitle( paste0('Concepts', ' (showing ', nrow(selected_concs()), ' out of ', nrow(concs), ')') ) +
labs(x = 'Varimax-rotated Principal Component 1', y = 'Varimax-rotated Principal Component 2') +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'), plot.title = element_text(size = 17, hjust = .5, margin = margin(3,3,7,3)),
axis.title.x = element_text(colour = 'black', size = 12, margin = margin(10,10,2,10)),
axis.title.y = element_text(colour = 'black', size = 12, margin = margin(10,10,10,5)),
axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8),
legend.background = element_rect(size = 2), legend.position = 'right',
legend.title = element_blank(), legend.text = element_text(size = 15) )
})
```
Cf. L&C English Norms
=======================================================================
<div style = "background-color: #FCFCFC; text-align: justify; font-size: 15px; margin-top: 10px; padding-top: 30px; padding-bottom: 5px; padding-left: 20px; padding-right: 20px;">
<b> Comparison with Lynott and Connell's (2009, 2013) English norms ([see complete data](http://www.lancaster.ac.uk/staff/connelll/lab/norms.html)).</b> In both languages, the visual and the haptic modalities were more related to each than to the auditory modality. Properties were more strongly perceptual than concepts.
</div>
Column {style="height:1500;" data-padding=1}
-----------------------------------------------------------------------
### <span style='font-size:15px; text-align: justify !important;'> Reanalysis of [Lynott and Connell's (2009) English properties](https://doi.org/10.3758/BRM.41.2.558) narrowed to three modalities</span>
```{r include = FALSE}
# Check conditions for a PCA
# Matrix
eng_prop = all[all$cat == 'Property', c('English_Auditory_Lynott_Connell_2009_2013', 'English_Haptic_Lynott_Connell_2009_2013', 'English_Visual_Lynott_Connell_2009_2013')]
#nrow(eng_prop)
eng_prop_matrix = cor(eng_prop, use = 'complete.obs')
#eng_prop_matrix
#round(eng_prop_matrix, 2)
# OK: correlations good for a PCA, with enough < .3
# now on the raw vars:
#nrow(eng_prop)
#cortest.bartlett(eng_prop)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
#KMO(eng_prop_matrix)
# Result: .56 = mediocre. PCA not strongly recommended. But we still do it
# because the purpose is graphical only.
# check determinant
#det(eng_prop_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_eng_prop = psych::principal(eng_prop, nfactors = 3, rotate = "none")
#pc1_eng_prop
# RESULT: Extract either one PC, acc to Kaiser's criterion, or two RCs, acc to
# Joliffe's (Field, Miles, & Field, 2012)
# Unrotated: scree plot
#plot(pc1_eng_prop$values, type = "b")
# Result: again one or two RCs should be extracted
# Now with varimax rotation, Kaiser-normalized (by default)
pc2_eng_prop = psych::principal(eng_prop, nfactors = 2, rotate = "varimax",
scores = TRUE)
#pc2_eng_prop
#pc2_eng_prop$loadings
# two components are good, as they both have eigenvalues over 1
#pc2_eng_prop$residual
#pc2_eng_prop$fit
#pc2_eng_prop$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals bad: more than 50% have absolute
# values > 0.05. Model fit good, > .90. Communalities good, all > .7.
# subset and add PCs
eng_props = all[all$cat == 'Property', ]
#nrow(eng_props)
eng_props = cbind(eng_props, pc2_eng_prop$scores)
#nrow(eng_props)
#head(eng_props)
# Set word factor to character format
eng_props$English_Word_Lynott_Connell_2009_2013 = as.character(eng_props$English_Word_Lynott_Connell_2009_2013)
```
```{r}
ggplotly( ggplot(eng_props,
aes(RC1, RC2, label = as.character(English_Word_Lynott_Connell_2009_2013), color = English_Main_Lynott_Connell_2009_2013,
text = paste0(' ', '<span style="padding-top:3px; padding-bottom:3px; font-size:2.3em; color:#EEEEEE">', capitalize(English_Word_Lynott_Connell_2009_2013), '</span> ', '<br>',
'</b><br><span style="color:#EEEEEE"> Dominant modality: </span><b style="color:#EEEEEE">', English_Main_Lynott_Connell_2009_2013, ' ',
'</b><br><span style="color:#EEEEEE"> Modality exclusivity: </span><b style="color:#EEEEEE">',
paste0(sprintf("%.2f", round(Exclusivity, 2)), '% '),
'</b><br><span style="color:#EEEEEE"> Perceptual strength: </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(Perceptualstrength, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Auditory rating: </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(Auditory, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Haptic rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Haptic, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Visual rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Visual, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Concreteness (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(concrete_Brysbaertetal2014, 2)), ' ', '<br> '
) ) ) +
labs(x = "Varimax-rotated Principal Component 1", y = "Varimax-rotated Principal Component 2") +
geom_text(size = 2.8, show.legend=FALSE) + scale_colour_manual(values = colours, drop = FALSE) + theme_bw() +
geom_point(alpha = 0) + # This geom_point helps to colour the tooltip according to the dominant modality
guides(color = guide_legend(title = 'Main<br>modality', override.aes = list(size = 7, alpha = 1))) +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 9),
axis.title.y = element_text(colour = 'black', size = 9),
axis.text.x = element_text(size = 6), axis.text.y = element_text(size = 6),
legend.title = element_blank(), plot.title = element_blank() ),
tooltip = 'text' )
```
### <span style='font-size:15px; text-align: justify !important;'> Reanalysis of [Lynott and Connell's (2013) English concepts](https://doi.org/10.3758/s13428-012-0267-0) narrowed to three modalities</span>
```{r include = FALSE}
# check conditions for a PCA
# matrix
eng_conc = all[all$cat == 'Concept', c('English_Auditory_Lynott_Connell_2009_2013', 'English_Haptic_Lynott_Connell_2009_2013', 'English_Visual_Lynott_Connell_2009_2013')]
#nrow(eng_conc)
eng_conc_matrix = cor(eng_conc, use = 'complete.obs')
#eng_conc_matrix
#round(eng_conc_matrix, 2)
# POOR: correlations not apt for a PCA, with too many below .3
# now on the raw data:
#nrow(eng_conc)
#cortest.bartlett(eng_conc)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
#KMO(eng_conc_matrix)
# Result: .48 = poor. PCA not strongly recommended. But we still do it
# because the purpose is graphical really.
# check determinant
#det(eng_conc_matrix)
# GOOD: > 0.00001
# start off with unrotated PCA
pc1_eng_conc = psych::principal(eng_conc, nfactors = 3, rotate = "none")
#pc1_eng_conc
# RESULT: Extract either one PC, acc to Kaiser's criterion, or two RCs, acc to
# Joliffe's (Field, Miles, & Field, 2012)
# Unrotated: scree plot
#plot(pc1_eng_conc$values, type = "b")
# Result: two PCs obtain.
# Now with varimax rotation, Kaiser-normalized (by default):
# always preferable because it captures explained variance best.
pc2_eng_conc = psych::principal(eng_conc, nfactors = 2, rotate = "varimax",
scores = TRUE)
#pc2_eng_conc
#pc2_eng_conc$loadings
#pc2_eng_conc$residual
#pc2_eng_conc$fit
#pc2_eng_conc$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats). Residuals bad: over 50% have absolute
# values > 0.05. Model fit good, > .90. Communalities good, all > .7.
# subset and add PCs
eng_concs = all[all$cat == 'Concept', ]
#nrow(eng_concs)
eng_concs = cbind(eng_concs, pc2_eng_conc$scores)
#summary(eng_concs$RC1, eng_concs$RC2)
eng_concs = eng_concs[eng_concs$normed == 'Dut_Eng' | eng_concs$normed ==
'English',]
#nrow(eng_concs)
#summary(eng_concs$RC1, eng_concs$RC2)
# Set word factor to character format
eng_concs$English_Word_Lynott_Connell_2009_2013 = as.character(eng_concs$English_Word_Lynott_Connell_2009_2013)
```
```{r}
ggplotly( ggplot(eng_concs,
aes(RC1, RC2, label = as.character(English_Word_Lynott_Connell_2009_2013), color = English_Main_Lynott_Connell_2009_2013,
text = paste0(' ', '<span style="padding-top:3px; padding-bottom:3px; font-size:2.3em; color:#EEEEEE">', capitalize(English_Word_Lynott_Connell_2009_2013), '</span> ', '<br>',
'</b><br><span style="color:#EEEEEE"> Dominant modality: </span><b style="color:#EEEEEE">', English_Main_Lynott_Connell_2009_2013, ' ',
'</b><br><span style="color:#EEEEEE"> Modality exclusivity: </span><b style="color:#EEEEEE">',
paste0(sprintf("%.2f", round(Exclusivity, 2)), '% '),
'</b><br><span style="color:#EEEEEE"> Perceptual strength: </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(Perceptualstrength, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Auditory rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Auditory, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Haptic rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Haptic, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Visual rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Visual, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Concreteness (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(concrete_Brysbaertetal2014, 2)), ' ', '<br> '
) ) ) +
labs(x = "Varimax-rotated Principal Component 1", y = "Varimax-rotated Principal Component 2") +
geom_text(size = 2.8, show.legend=FALSE) + scale_colour_manual(values = colours, drop = FALSE) + theme_bw() +
geom_point(alpha = 0) + # This geom_point helps to colour the tooltip according to the dominant modality
guides(color = guide_legend(title = 'Main<br>modality', override.aes = list(size = 7, alpha = 1))) +
theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 9),
axis.title.y = element_text(colour = 'black', size = 9),
axis.text.x = element_text(size = 6), axis.text.y = element_text(size = 6),
legend.title = element_blank(), plot.title = element_blank() ),
tooltip = 'text' )
```
Column {style="height:50%;" data-padding=1}
-----------------------------------------------------------------------
### <span style='font-size:15px; text-align: justify !important;'> &nbsp;&nbsp;&nbsp; [Dutch properties](#properties)</span>
```{r}
properties_Dutch = props
ggplotly( ggplot(properties_Dutch,
aes(RC1, RC2, label = as.character(word), color = main,
# Html tags below used for format. Decimals rounded to two.
text = paste0(' ', '<span style="padding-top:3px; padding-bottom:3px; font-size:2.2em; color:#EEEEEE">', capitalize(word), '</span> ', '<br>',
'</b><br><span style="color:#EEEEEE"> Dominant modality: </span><b style="color:#EEEEEE">', main, ' ',
'</b><br><span style="color:#EEEEEE"> Modality exclusivity: </span><b style="color:#EEEEEE">',
paste0(sprintf("%.2f", round(Exclusivity, 2)), '% '),
'</b><br><span style="color:#EEEEEE"> Perceptual strength: </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(Perceptualstrength, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Auditory rating: </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(Auditory, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Haptic rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Haptic, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Visual rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Visual, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Concreteness (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(concrete_Brysbaertetal2014, 2)), ' ', '<br> '
) ) ) +
labs(x = "Varimax-rotated Principal Component 1", y = "Varimax-rotated Principal Component 2") +
geom_text(size = 2.8) + scale_colour_manual(values = colours, drop = FALSE) +
geom_point(alpha = 0) + # This geom_point helps to colour the tooltip according to the dominant modality
theme_bw() + theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 9),
axis.title.y = element_text(colour = 'black', size = 9),
axis.text.x = element_text(size = 6), axis.text.y = element_text(size = 6),
legend.title = element_blank(), plot.title = element_blank() ),
tooltip = 'text' )
```
### <span style='font-size:15px; text-align: justify !important;'> &nbsp;&nbsp;&nbsp; [Dutch concepts](#concepts)</span>
```{r}
concepts_Dutch = concs
ggplotly( ggplot(concepts_Dutch,
aes(RC1, RC2, label = as.character(word), color = main,
# Html tags below used for format. Decimals rounded to two.
text = paste0(' ', '<span style="padding-top:3px; padding-bottom:3px; font-size:2.2em; color:#EEEEEE">', capitalize(word), '</span> ', '<br>',
'</b><br><span style="color:#EEEEEE"> Dominant modality: </span><b style="color:#EEEEEE">', main, ' ',
'</b><br><span style="color:#EEEEEE"> Modality exclusivity: </span><b style="color:#EEEEEE">', paste0(sprintf("%.2f", round(Exclusivity, 2)), '% '),
'</b><br><span style="color:#EEEEEE"> Perceptual strength: </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(Perceptualstrength, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Auditory rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Auditory, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Haptic rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Haptic, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Visual rating: </span><b style="color:#EEEEEE">', sprintf("%.2f", round(Visual, 2)), ' ',
'</b><br><span style="color:#EEEEEE"> Concreteness (Brysbaert et al., 2014): </span><b style="color:#EEEEEE">',
sprintf("%.2f", round(concrete_Brysbaertetal2014, 2)), ' ', '<br> '
) ) ) +
labs(x = "Varimax-rotated Principal Component 1", y = "Varimax-rotated Principal Component 2") +
geom_text(size = 2.8) + scale_colour_manual(values = colours, drop = FALSE) +
geom_point(alpha = 0) + # This geom_point helps to colour the tooltip according to the dominant modality
theme_bw() + theme( plot.background = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.line = element_line(color = 'black'),
axis.title.x = element_text(colour = 'black', size = 9),
axis.title.y = element_text(colour = 'black', size = 9),
axis.text.x = element_text(size = 6), axis.text.y = element_text(size = 6),
legend.title = element_blank(), plot.title = element_blank() ),
tooltip = 'text' )
```
Sound Symbolism {data-orientation=rows data-padding=0.5 background-color=#FBFBFB style='width: 950px; margin: 0 auto;'}
=======================================================================
Row {data-height=420px data-padding=0.5}
-----------------------------------------------------------------------
```{r include = FALSE}
# SOUND SYMBOLISM (aka iconicity). Analyses run in this code chunk. Result tables in the next two chunks.
# Last tests: iconicity/sound symbolism on concepts and properties separately.
# Regressions include same lexical vars (DVs) as Lynott and Connell, plus
# concreteness and age of acquisition.
# Note that the selection is based on p-value thresholds, as in L&C, but also on
# AIC, which is a bayesian, relative method more appropriate with such a large
# sample. Importantly, AIC and F/p-value criteria resulted in the same inclusions
# and exclusions for every regression.
# For both properties_Dutch2 and concepts_Dutch2, we start with PCA with all lexical variables in order
# to isolate them, because they are intercorrelated (see Table 5 in Lynott & Connell,
# 2013)
#
# all = read.csv('https://raw.githubusercontent.com/pablobernabeu/Modality-exclusivity-norms-747-Dutch-English-replication/master/all.csv', fileEncoding = 'Latin1')
# nrow(all)
# Length is 759 but only 747 are from these norms. Rest are from Lynott and Connell
# (2009, 2013) for comparative analyses. These extra items do not have an id number
# in the file.
# ----------------------------------------------------------------------------------
# Iconicity within properties alone, as in Lynott and Connell (2013). As a novelty,
# the iconicity analysis is hereby performed also on the Dutch properties, in
# addition to the concepts.
properties_Dutch2 = subset(all, subset = cat == 'Property')
#nrow(properties_Dutch2)
# There aren't lexical data for every single word.
# Number of properties per lexical variable (from the Dutch items only of course)
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$phonemes_DUTCHPOND))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$phon_neighbours_DUTCHPOND))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$orth_neighbours_DUTCHPOND))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$freq_lg10CD_SUBTLEXNL))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$freq_lg10WF_SUBTLEXNL))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$freq_CELEX_lem))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$AoA_Brysbaertetal2014))
#describe(complete.cases(properties_Dutch2[complete.cases(properties_Dutch2$Exclusivity),]$concrete_Brysbaertetal2014))
# M, SD
#stat.desc(properties_Dutch2$letters)
#stat.desc(properties_Dutch2$phonemes_DUTCHPOND)
#stat.desc(properties_Dutch2$phon_neighbours_DUTCHPOND)
#stat.desc(properties_Dutch2$orth_neighbours_DUTCHPOND)
#stat.desc(properties_Dutch2$freq_lg10CD_SUBTLEXNL)
#stat.desc(properties_Dutch2$freq_lg10WF_SUBTLEXNL)
#stat.desc(properties_Dutch2$freq_CELEX_lem)
#stat.desc(properties_Dutch2$AoA_Brysbaertetal2014)
#stat.desc(properties_Dutch2$concrete_Brysbaertetal2014)
# See correlation of all lexical variables:
mat_lexicals_properties_Dutch2 = as.matrix(properties_Dutch2[c('letters', 'phonemes_DUTCHPOND',
'orth_neighbours_DUTCHPOND', 'phon_neighbours_DUTCHPOND', 'freq_lg10CD_SUBTLEXNL',
'freq_lg10WF_SUBTLEXNL', 'freq_CELEX_lem', 'AoA_Brysbaertetal2014',
'concrete_Brysbaertetal2014')])
#rcor.test(mat_lexicals_properties_Dutch2, use='complete.obs')
corrs_properties_Dutch2 = rcor.test(mat_lexicals_properties_Dutch2, use='complete.obs')
#write.csv(corrs_properties_Dutch2$cor.mat, file = "corrs_properties_Dutch2.csv",na="") # find table in folder
# (saved just for the manuscript)
# Go on to PCA. This PCA does not include age of acquisition or concreteness, to allow a
# better comparison with the English data, and because no correlations > .7 (i.e. half
# of variance explained)
lexicals_properties_Dutch2 = properties_Dutch2[c('letters', 'phonemes_DUTCHPOND', 'orth_neighbours_DUTCHPOND',
'phon_neighbours_DUTCHPOND', 'freq_lg10CD_SUBTLEXNL', 'freq_lg10WF_SUBTLEXNL',
'freq_CELEX_lem')]
# str(lexicals_properties_Dutch2)
# start with PCA for lexical variables, done as in Lynott and Connell (2013)
# Check conditions for a PCA
# Correlations
#cor(lexicals_properties_Dutch2, use = 'complete.obs')
# Result: all variables fit for PCA, as they have few scores below .3
# The correlations broadly replicate Lynott and Connell.
# now on the raw vars:
#cortest.bartlett(lexicals_properties_Dutch2)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
lexicals_properties_Dutch2_matrix = cor(lexicals_properties_Dutch2, use = 'complete.obs')
#KMO(lexicals_properties_Dutch2_matrix)
# Result: .78 = good.
# determinant
#det(lexicals_properties_Dutch2_matrix)
# GOOD: above 0.00001
# start off with unrotated PCA
PCA_lexicals_properties_Dutch2 = psych::principal(lexicals_properties_Dutch2, nfactors = 7, scores = TRUE)
#PCA_lexicals_properties_Dutch2
# By all standards, extract 3 components
# scree analysis
#plot(PCA_lexicals_properties_Dutch2$values, type = "b")
# result: again, extract 3 components
PCA_lexicals_properties_Dutch2 = psych::principal(lexicals_properties_Dutch2, nfactors = 3, rotate =
"varimax", scores = TRUE)
#PCA_lexicals_properties_Dutch2 # eigenvalues and exp variances good
#PCA_lexicals_properties_Dutch2$loadings
# The PCA replicates Lynott and Connell. Standdized correlation coeffs
# between each PC and its corresponding set of variables are all above .89,
# while the rest of coefficients are all below .33.
PCA_lexicals_properties_Dutch2
# RC1 = length // RC2 = frequency // RC3 = distinctiveness
#PCA_lexicals_properties_Dutch2$residual
#PCA_lexicals_properties_Dutch2$fit
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats pack). Residuals good: less than half w/ absolute
# values > 0.05. Model fit good, > .90. Communalities (h2) good, all well > .7
properties_Dutch2 = cbind(properties_Dutch2, PCA_lexicals_properties_Dutch2$scores)
# REGRESSION
# standardize (mean-center and scale)
properties_Dutch2$s_Auditory = scale(properties_Dutch2$Auditory)
properties_Dutch2$s_Haptic = scale(properties_Dutch2$Haptic)
properties_Dutch2$s_Visual = scale(properties_Dutch2$Visual)
properties_Dutch2$s_freq_lg10CD_SUBTLEXNL = scale(properties_Dutch2$freq_lg10CD_SUBTLEXNL)
properties_Dutch2$s_freq_lg10WF_SUBTLEXNL = scale(properties_Dutch2$freq_lg10WF_SUBTLEXNL)
properties_Dutch2$s_freq_CELEX_lem = scale(properties_Dutch2$freq_CELEX_lem)
properties_Dutch2$s_AoA_Brysbaertetal2014 = scale(properties_Dutch2$AoA_Brysbaertetal2014)
properties_Dutch2$s_concrete_Brysbaertetal2014 = scale(properties_Dutch2$concrete_Brysbaertetal2014)
properties_Dutch2$s_letters = scale(properties_Dutch2$letters)
properties_Dutch2$s_phonemes_DUTCHPOND = scale(properties_Dutch2$phonemes_DUTCHPOND)
properties_Dutch2$s_orth_neighbours_DUTCHPOND = scale(properties_Dutch2$orth_neighbours_DUTCHPOND)
properties_Dutch2$s_phon_neighbours_DUTCHPOND = scale(properties_Dutch2$phon_neighbours_DUTCHPOND)
properties_Dutch2$s_RC1_lexicals = scale(properties_Dutch2$RC1)
properties_Dutch2$s_RC2_lexicals = scale(properties_Dutch2$RC2)
properties_Dutch2$s_RC3_lexicals = scale(properties_Dutch2$RC3)
# length: letters
fit_letters_properties_Dutch2 = lm(properties_Dutch2$s_letters ~ properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic +
properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_letters_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: kurtose. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_letters)
properties_Dutch2$log_s_letters = log(3 + properties_Dutch2$s_letters)
fit_letters_properties_Dutch2 = lm(properties_Dutch2$log_s_letters ~ properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic +
properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_letters_properties_Dutch2$residuals, norm = TRUE)
# same; go back
fit_letters_properties_Dutch2 = lm(properties_Dutch2$s_letters ~ properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic +
properties_Dutch2$s_Visual, data = properties_Dutch2)
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_letters_properties_Dutch2)
#mean(vif(fit_letters_properties_Dutch2))
#1/vif(fit_letters_properties_Dutch2)
# RESULTS: all good
step_letters_properties_Dutch2_AIC = stepAIC(fit_letters_properties_Dutch2, direction="both")
step_letters_properties_Dutch2_F = stepAIC(fit_letters_properties_Dutch2, direction="both", test="F")
summary(fit_letters_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Number of letters'
# Save F-test part of the regression
Variable = rownames(anova(fit_letters_properties_Dutch2))
DF = anova(fit_letters_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_letters_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_letters_properties_Dutch2)[,'Mean Sq']
F = anova(fit_letters_properties_Dutch2)[,'F value']
F_p = anova(fit_letters_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_letters_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_letters_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_letters_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_letters_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_letters_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results_total = merge(F_results, t_results, all = TRUE)
# length: phonemes_DUTCHPOND
fit_phonemes_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$s_phonemes_DUTCHPOND ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_phonemes_DUTCHPOND_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_phonemes_DUTCHPOND)
properties_Dutch2$log_s_phonemes_DUTCHPOND = log(3 + properties_Dutch2$s_phonemes_DUTCHPOND)
fit_phonemes_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$log_s_phonemes_DUTCHPOND ~ properties_Dutch2$s_Auditory
+ properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_phonemes_DUTCHPOND_properties_Dutch2$residuals, norm = TRUE)
# worse; back
fit_phonemes_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$s_phonemes_DUTCHPOND ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_phonemes_DUTCHPOND_properties_Dutch2)
#mean(vif(fit_phonemes_DUTCHPOND_properties_Dutch2))
#1/vif(fit_phonemes_DUTCHPOND_properties_Dutch2)
# RESULTS: all good
step_phonemes_DUTCHPOND_properties_Dutch2_AIC = stepAIC(fit_phonemes_DUTCHPOND_properties_Dutch2,
direction="both")
step_phonemes_DUTCHPOND_properties_Dutch2_F = stepAIC(fit_phonemes_DUTCHPOND_properties_Dutch2,
direction="both", test="F")
#summary(fit_phonemes_DUTCHPOND_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Number of phonemes'
# Save F-test part of the regression
Variable = rownames(anova(fit_phonemes_DUTCHPOND_properties_Dutch2))
DF = anova(fit_phonemes_DUTCHPOND_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_phonemes_DUTCHPOND_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_phonemes_DUTCHPOND_properties_Dutch2)[,'Mean Sq']
F = anova(fit_phonemes_DUTCHPOND_properties_Dutch2)[,'F value']
F_p = anova(fit_phonemes_DUTCHPOND_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: SUBTLEX-NL log-10 WF
fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2 = lm(properties_Dutch2$s_freq_lg10WF_SUBTLEXNL ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_freq_lg10WF_SUBTLEXNL)
properties_Dutch2$log_s_freq_lg10WF_SUBTLEXNL = log(3 + properties_Dutch2$s_freq_lg10WF_SUBTLEXNL)
fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2 = lm(properties_Dutch2$log_s_freq_lg10WF_SUBTLEXNL ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2$residuals, norm = TRUE)
# quite better
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)
#mean(vif(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2))
#1/vif(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)
# RESULTS: all good
step_freq_lg10WF_SUBTLEXNL_properties_Dutch2_AIC = stepAIC(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2,
direction="both")
step_freq_lg10WF_SUBTLEXNL_properties_Dutch2_F = stepAIC(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2,
direction="both", test="F")
summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Word frequency'
# Save F-test part of the regression
Variable = rownames(anova(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2))
DF = anova(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)[,'Mean Sq']
F = anova(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)[,'F value']
F_p = anova(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: SUBTLEX-NL log-10 CD
fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2 = lm(properties_Dutch2$s_freq_lg10CD_SUBTLEXNL ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
stat.desc(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
psych::describe(properties_Dutch2$s_freq_lg10CD_SUBTLEXNL)
properties_Dutch2$log_s_freq_lg10CD_SUBTLEXNL = log(3 + properties_Dutch2$s_freq_lg10CD_SUBTLEXNL)
fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2 = lm(properties_Dutch2$log_s_freq_lg10CD_SUBTLEXNL ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2$residuals, norm = TRUE)
# quite better
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)
#mean(vif(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2))
#1/vif(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)
# RESULTS: all good
step_freq_lg10CD_SUBTLEXNL_properties_Dutch2_AIC = stepAIC(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2,
direction="both")
step_freq_lg10CD_SUBTLEXNL__properties_Dutch2F = stepAIC(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2,
direction="both", test="F")
#summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Contextual diversity'
# Save F-test part of the regression
Variable = rownames(anova(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2))
DF = anova(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)[,'Mean Sq']
F = anova(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)[,'F value']
F_p = anova(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: CELEX log-10 lemma WF
fit_freq_CELEX_lem_properties_Dutch2 = lm(properties_Dutch2$s_freq_CELEX_lem ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_freq_CELEX_lem_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_freq_CELEX_lem)
properties_Dutch2$log_s_freq_CELEX_lem = log(3 + properties_Dutch2$s_freq_CELEX_lem)
fit_freq_CELEX_lem_properties_Dutch2 = lm(properties_Dutch2$log_s_freq_CELEX_lem ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_freq_CELEX_lem_properties_Dutch2$residuals, norm = TRUE)
# same; go back
fit_freq_CELEX_lem_properties_Dutch2 = lm(properties_Dutch2$s_freq_CELEX_lem ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_freq_CELEX_lem_properties_Dutch2)
#mean(vif(fit_freq_CELEX_lem_properties_Dutch2))
#1/vif(fit_freq_CELEX_lem_properties_Dutch2)
# RESULTS: all good
step_freq_CELEX_lem_properties_Dutch2_AIC = stepAIC(fit_freq_CELEX_lem_properties_Dutch2, direction="both")
step_freq_CELEX_lem_properties_Dutch2_F = stepAIC(fit_freq_CELEX_lem_properties_Dutch2, direction="both",
test="F")
#summary(fit_freq_CELEX_lem_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Lemma frequency'
# Save F-test part of the regression
Variable = rownames(anova(fit_freq_CELEX_lem_properties_Dutch2))
DF = anova(fit_freq_CELEX_lem_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_freq_CELEX_lem_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_freq_CELEX_lem_properties_Dutch2)[,'Mean Sq']
F = anova(fit_freq_CELEX_lem_properties_Dutch2)[,'F value']
F_p = anova(fit_freq_CELEX_lem_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# distinctiveness: phon neigh size
fit_phon_neighbours_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$s_phon_neighbours_DUTCHPOND ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_phon_neighbours_DUTCHPOND_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_phon_neighbours_DUTCHPOND)
properties_Dutch2$log_s_phon_neighbours_DUTCHPOND = log(2 + properties_Dutch2$s_phon_neighbours_DUTCHPOND)
fit_phon_neighbours_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$log_s_phon_neighbours_DUTCHPOND ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_phon_neighbours_DUTCHPOND_properties_Dutch2$residuals, norm = TRUE)
# quite better
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)
#mean(vif(fit_phon_neighbours_DUTCHPOND_properties_Dutch2))
#1/vif(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)
# RESULTS: all good
step_phon_neighbours_DUTCHPOND_properties_Dutch2_AIC =
stepAIC(fit_phon_neighbours_DUTCHPOND_properties_Dutch2, direction="both")
step_phon_neighbours_DUTCHPOND_properties_Dutch2_F =
stepAIC(fit_phon_neighbours_DUTCHPOND_properties_Dutch2, direction="both", test="F")
#summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Phonological neighbours'
# Save F-test part of the regression
Variable = rownames(anova(fit_phon_neighbours_DUTCHPOND_properties_Dutch2))
DF = anova(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)[,'Mean Sq']
F = anova(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)[,'F value']
F_p = anova(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# distinctiveness: orth neigh size
fit_orth_neighbours_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$s_orth_neighbours_DUTCHPOND ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_orth_neighbours_DUTCHPOND_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_orth_neighbours_DUTCHPOND)
properties_Dutch2$log_s_orth_neighbours_DUTCHPOND = log(2 + properties_Dutch2$s_orth_neighbours_DUTCHPOND)
fit_orth_neighbours_DUTCHPOND_properties_Dutch2 = lm(properties_Dutch2$log_s_orth_neighbours_DUTCHPOND ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_orth_neighbours_DUTCHPOND_properties_Dutch2$residuals, norm = TRUE)
# quite better
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)
#mean(vif(fit_orth_neighbours_DUTCHPOND_properties_Dutch2))
#1/vif(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)
# RESULTS: all good
step_orth_neighbours_DUTCHPOND_properties_Dutch2_AIC =
stepAIC(fit_orth_neighbours_DUTCHPOND_properties_Dutch2, direction="both")
step_orth_neighbours_DUTCHPOND_properties_Dutch2_F =
stepAIC(fit_orth_neighbours_DUTCHPOND_properties_Dutch2, direction="both", test="F")
#summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Orthographic neighbours'
# Save F-test part of the regression
Variable = rownames(anova(fit_orth_neighbours_DUTCHPOND_properties_Dutch2))
DF = anova(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)[,'Mean Sq']
F = anova(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)[,'F value']
F_p = anova(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# length: Rotated Principal Component 1 for the lexical variables
fit_RC1_lexicals_properties_Dutch2 = lm(properties_Dutch2$s_RC1_lexicals ~ properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic
+ properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_RC1_lexicals_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_RC1_lexicals)
properties_Dutch2$log_s_RC1_lexicals_properties_Dutch2 = log(4 + properties_Dutch2$s_RC1_lexicals)
fit_RC1_lexicals_properties_Dutch2 = lm(properties_Dutch2$log_s_RC1_lexicals ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_RC1_lexicals_properties_Dutch2$residuals, norm = TRUE)
# good!
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_RC1_lexicals_properties_Dutch2)
#mean(vif(fit_RC1_lexicals_properties_Dutch2))
#1/vif(fit_RC1_lexicals_properties_Dutch2)
# RESULTS: all good
step_RC1_lexicals_properties_Dutch2_AIC = stepAIC(fit_RC1_lexicals_properties_Dutch2, direction="both")
step_RC1_lexicals_properties_Dutch2_F = stepAIC(fit_RC1_lexicals_properties_Dutch2, direction="both",
test="F")
#summary(fit_RC1_lexicals_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Length PC'
loadings(PCA_lexicals_properties_Dutch2)[1:7, c('RC1','RC2','RC3')] # Correspondence between variables and components
# Save F-test part of the regression
Variable = rownames(anova(fit_RC1_lexicals_properties_Dutch2))
DF = anova(fit_RC1_lexicals_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_RC1_lexicals_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_RC1_lexicals_properties_Dutch2)[,'Mean Sq']
F = anova(fit_RC1_lexicals_properties_Dutch2)[,'F value']
F_p = anova(fit_RC1_lexicals_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_RC1_lexicals_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_RC1_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_RC1_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_RC1_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_RC1_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: Rotated Principal Component 2 for the lexical variables
fit_RC2_lexicals_properties_Dutch2 = lm(properties_Dutch2$s_RC2_lexicals ~ properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic
+ properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_RC2_lexicals_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: kurtosed. Raw scores/2.SE < 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_RC2_lexicals)
properties_Dutch2$log_s_RC2_lexicals = log(3 + properties_Dutch2$s_RC2_lexicals)
fit_RC2_lexicals_properties_Dutch2 = lm(properties_Dutch2$log_s_RC2_lexicals ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_RC2_lexicals_properties_Dutch2$residuals, norm = TRUE)
# worse; back
fit_RC2_lexicals_properties_Dutch2 = lm(properties_Dutch2$s_RC2_lexicals ~ properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic
+ properties_Dutch2$s_Visual, data = properties_Dutch2)
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_RC2_lexicals_properties_Dutch2)
#mean(vif(fit_RC2_lexicals_properties_Dutch2))
#1/vif(fit_RC2_lexicals_properties_Dutch2)
# RESULTS: all good
step_RC2_lexicals_properties_Dutch2_AIC = stepAIC(fit_RC2_lexicals_properties_Dutch2, direction="both")
step_RC2_lexicals_properties_Dutch2_F = stepAIC(fit_RC2_lexicals_properties_Dutch2, direction="both",
test="F")
#summary(fit_RC2_lexicals_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Frequency PC'
loadings(PCA_lexicals_properties_Dutch2)[1:7, c('RC1','RC2','RC3')] # Correspondence between variables and components
# Save F-test part of the regression
Variable = rownames(anova(fit_RC2_lexicals_properties_Dutch2))
DF = anova(fit_RC2_lexicals_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_RC2_lexicals_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_RC2_lexicals_properties_Dutch2)[,'Mean Sq']
F = anova(fit_RC2_lexicals_properties_Dutch2)[,'F value']
F_p = anova(fit_RC2_lexicals_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_RC2_lexicals_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_RC2_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_RC2_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_RC2_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_RC2_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# distinctiveness: Rotated Principal Component 3 for the lexical variables
fit_RC3_lexicals_properties_Dutch2 = lm(properties_Dutch2$s_RC3_lexicals ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_RC3_lexicals_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_RC3_lexicals)
properties_Dutch2$log_s_RC3_lexicals = log(3 + properties_Dutch2$s_RC3_lexicals)
fit_RC3_lexicals_properties_Dutch2 = lm(properties_Dutch2$log_s_RC3_lexicals ~ properties_Dutch2$s_Auditory +
properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_RC3_lexicals_properties_Dutch2$residuals, norm = TRUE)
# quite better
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_RC3_lexicals_properties_Dutch2)
#mean(vif(fit_RC3_lexicals_properties_Dutch2))
#1/vif(fit_RC3_lexicals_properties_Dutch2)
# RESULTS: all good
step_RC3_lexicals_properties_Dutch2_AIC = stepAIC(fit_RC3_lexicals_properties_Dutch2, direction="both")
step_RC3_lexicals_properties_Dutch2_F = stepAIC(fit_RC3_lexicals_properties_Dutch2, direction="both",
test="F")
#summary(fit_RC3_lexicals_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Distinctiveness PC'
loadings(PCA_lexicals_properties_Dutch2)[1:7, c('RC1','RC2','RC3')] # Correspondence between variables and components
# Save F-test part of the regression
Variable = rownames(anova(fit_RC3_lexicals_properties_Dutch2))
DF = anova(fit_RC3_lexicals_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_RC3_lexicals_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_RC3_lexicals_properties_Dutch2)[,'Mean Sq']
F = anova(fit_RC3_lexicals_properties_Dutch2)[,'F value']
F_p = anova(fit_RC3_lexicals_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_RC3_lexicals_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_RC3_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_RC3_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_RC3_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_RC3_lexicals_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# additional var: age of acquisition
fit_AoA_Brysbaertetal2014_properties_Dutch2 = lm(properties_Dutch2$s_AoA_Brysbaertetal2014 ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
stat.desc(fit_AoA_Brysbaertetal2014_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: good
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_AoA_Brysbaertetal2014_properties_Dutch2)
#mean(vif(fit_AoA_Brysbaertetal2014_properties_Dutch2))
#1/vif(fit_AoA_Brysbaertetal2014_properties_Dutch2)
# RESULTS: all good
step_AoA_Brysbaertetal2014_properties_Dutch2_AIC = stepAIC(fit_AoA_Brysbaertetal2014_properties_Dutch2,
direction="both")
step_AoA_Brysbaertetal2014_properties_Dutch2_F = stepAIC(fit_AoA_Brysbaertetal2014_properties_Dutch2,
direction="both", test="F")
#summary(fit_AoA_Brysbaertetal2014_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Age of acquisition'
# Save F-test part of the regression
Variable = rownames(anova(fit_AoA_Brysbaertetal2014_properties_Dutch2))
DF = anova(fit_AoA_Brysbaertetal2014_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_AoA_Brysbaertetal2014_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_AoA_Brysbaertetal2014_properties_Dutch2)[,'Mean Sq']
F = anova(fit_AoA_Brysbaertetal2014_properties_Dutch2)[,'F value']
F_p = anova(fit_AoA_Brysbaertetal2014_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# additional var: concreteness
fit_concrete_Brysbaertetal2014_properties_Dutch2 = lm(properties_Dutch2$s_concrete_Brysbaertetal2014 ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
#stat.desc(fit_concrete_Brysbaertetal2014_properties_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(properties_Dutch2$s_concrete_Brysbaertetal2014)
properties_Dutch2$log_s_concrete_Brysbaertetal2014 = log(4 + properties_Dutch2$s_concrete_Brysbaertetal2014)
fit_concrete_Brysbaertetal2014_properties_Dutch2 = lm(properties_Dutch2$log_s_concrete_Brysbaertetal2014 ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# check residuals again
#stat.desc(fit_concrete_Brysbaertetal2014_properties_Dutch2$residuals, norm = TRUE)
# worse; back
fit_concrete_Brysbaertetal2014_properties_Dutch2 = lm(properties_Dutch2$s_concrete_Brysbaertetal2014 ~
properties_Dutch2$s_Auditory + properties_Dutch2$s_Haptic + properties_Dutch2$s_Visual, data = properties_Dutch2)
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_concrete_Brysbaertetal2014_properties_Dutch2)
#mean(vif(fit_concrete_Brysbaertetal2014_properties_Dutch2))
#1/vif(fit_concrete_Brysbaertetal2014_properties_Dutch2)
# RESULTS: all good
step_concrete_Brysbaertetal2014_properties_Dutch2_AIC =
stepAIC(fit_concrete_Brysbaertetal2014_properties_Dutch2, direction="both")
step_concrete_Brysbaertetal2014_properties_Dutch2_F =
stepAIC(fit_concrete_Brysbaertetal2014_properties_Dutch2, direction="both", test="F")
#summary(fit_concrete_Brysbaertetal2014_properties_Dutch2)
# Save results
Category = 'Properties'
Dependent = 'Concreteness'
# Save F-test part of the regression
Variable = rownames(anova(fit_concrete_Brysbaertetal2014_properties_Dutch2))
DF = anova(fit_concrete_Brysbaertetal2014_properties_Dutch2)[,'Df']
Sum_Sq = anova(fit_concrete_Brysbaertetal2014_properties_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_concrete_Brysbaertetal2014_properties_Dutch2)[,'Mean Sq']
F = anova(fit_concrete_Brysbaertetal2014_properties_Dutch2)[,'F value']
F_p = anova(fit_concrete_Brysbaertetal2014_properties_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_properties_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# Iconicity in properties
# Auditory strength either was the strongest predictor or presented an opposite
# polarity from the main predictor. This held for all lexical DVs except age of
# acquisition.
# __________________________________________________________________________
# Iconicity within concepts alone, as in Lynott and Connell (2013)
concepts_Dutch2 = all[all$cat == 'Concept' & c(all$normed == 'Dutch' | all$normed == 'Dut_Eng'),]
#nrow(concepts_Dutch2)
# There aren't lexical data for every single word.
# Percentage of concepts per lexical variable (from items w/ Dutch norms)
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$phonemes_DUTCHPOND))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$phon_neighbours_DUTCHPOND))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$orth_neighbours_DUTCHPOND))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$freq_lg10CD_SUBTLEXNL))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$freq_lg10WF_SUBTLEXNL))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$freq_CELEX_lem))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$AoA_Brysbaertetal2014))
#describe(complete.cases(concepts_Dutch2[complete.cases(concepts_Dutch2$Exclusivity),]$concrete_Brysbaertetal2014))
# M, SD
#stat.desc(concepts_Dutch2$letters)
#stat.desc(concepts_Dutch2$phonemes_DUTCHPOND)
#stat.desc(concepts_Dutch2$phon_neighbours_DUTCHPOND)
#stat.desc(concepts_Dutch2$orth_neighbours_DUTCHPOND)
#stat.desc(concepts_Dutch2$freq_lg10CD_SUBTLEXNL)
#stat.desc(concepts_Dutch2$freq_lg10WF_SUBTLEXNL)
#stat.desc(concepts_Dutch2$freq_CELEX_lem)
#stat.desc(concepts_Dutch2$AoA_Brysbaertetal2014)
#stat.desc(concepts_Dutch2$concrete_Brysbaertetal2014)
# See correlation of all lexical variables:
mat_lexicals_concepts_Dutch2 = as.matrix(concepts_Dutch2[c('letters', 'phonemes_DUTCHPOND',
'orth_neighbours_DUTCHPOND', 'phon_neighbours_DUTCHPOND', 'freq_lg10CD_SUBTLEXNL',
'freq_lg10WF_SUBTLEXNL', 'freq_CELEX_lem', 'AoA_Brysbaertetal2014',
'concrete_Brysbaertetal2014')])
#rcor.test(mat_lexicals_concepts_Dutch2, use='complete.obs')
corrs_concepts_Dutch2 = rcor.test(mat_lexicals_concepts_Dutch2, use='complete.obs')
#write.csv(corrs_concepts_Dutch2$cor.mat, file = "corrs_concepts_Dutch2.csv",na="") # find table in folder
# Go on to PCA. This PCA does not include age of acquisition or concreteness, to allow a
# better comparison with the English data, and because no correlations > .7 (i.e. half
# of variance explained)
lexicals_concepts_Dutch2 = concepts_Dutch2[c('letters', 'phonemes_DUTCHPOND', 'orth_neighbours_DUTCHPOND',
'phon_neighbours_DUTCHPOND', 'freq_lg10CD_SUBTLEXNL', 'freq_lg10WF_SUBTLEXNL',
'freq_CELEX_lem')]
#nrow(lexicals_concepts_Dutch2)
# start with PCA for lexical variables, done as in Lynott and Connell (2013)
# Check conditions for a PCA
# Correlations
#cor(lexicals_concepts_Dutch2, use = 'complete.obs')
# Result: all variables fit for PCA, as they have few scores below .3
# The correlations broadly replicate Lynott and Connell.
# now on the raw vars:
#cortest.bartlett(lexicals_concepts_Dutch2)
# GOOD: Bartlett's test significant
# KMO: Kaiser-Meyer-Olkin Measure of Sampling Adequacy
lexicals_concepts_Dutch2_matrix = cor(lexicals_concepts_Dutch2, use = 'complete.obs')
#KMO(lexicals_concepts_Dutch2_matrix)
# Result: .71 = good.
# determinant
#det(lexicals_concepts_Dutch2_matrix)
# GOOD: above 0.00001
# start off with unrotated PCA
PCA_lexicals_concepts_Dutch2 = psych::principal(lexicals_concepts_Dutch2, nfactors = 7, scores = TRUE)
#PCA_lexicals_concepts_Dutch2
# by Kaiser's and Joliffe's standard, extract 3 RCs
# scree analysis
#plot(PCA_lexicals_concepts_Dutch2$values, type = "b")
# result: again, extract 3 components
PCA_lexicals_concepts_Dutch2 = psych::principal(lexicals_concepts_Dutch2, nfactors = 3, rotate =
"varimax", scores = TRUE)
PCA_lexicals_concepts_Dutch2 #-> check explained variance along components
#PCA_lexicals_concepts_Dutch2$loadings
# The PCA replicates Lynott and Connell. Standdized correlation coefficients
# between each PC and its corresponding set of variables are all above .89,
# while the rest of coefficients are all below .33.
#PCA_lexicals_concepts_Dutch2
# RC1 = length // RC2 = frequency // RC3 = distinctiveness
#PCA_lexicals_concepts_Dutch2$residual
#PCA_lexicals_concepts_Dutch2$fit
#PCA_lexicals_concepts_Dutch2$communality
# Results based on a Kaiser-normalizalized orthogonal (varimax) rotation
# (by default in psych::stats pack). Residuals good: less than half w/ absolute
# values > 0.05. Model fit good, > .90. Communalities (h2) good, all well > .7
concepts_Dutch2 = cbind(concepts_Dutch2, PCA_lexicals_concepts_Dutch2$scores)
# REGRESSION
# standardize (mean-center and scale)
concepts_Dutch2$s_Auditory = scale(concepts_Dutch2$Auditory)
concepts_Dutch2$s_Haptic = scale(concepts_Dutch2$Haptic)
concepts_Dutch2$s_Visual = scale(concepts_Dutch2$Visual)
concepts_Dutch2$s_freq_lg10CD_SUBTLEXNL = scale(concepts_Dutch2$freq_lg10CD_SUBTLEXNL)
concepts_Dutch2$s_freq_lg10WF_SUBTLEXNL = scale(concepts_Dutch2$freq_lg10WF_SUBTLEXNL)
concepts_Dutch2$s_freq_CELEX_lem = scale(concepts_Dutch2$freq_CELEX_lem)
concepts_Dutch2$s_AoA_Brysbaertetal2014 = scale(concepts_Dutch2$AoA_Brysbaertetal2014)
concepts_Dutch2$s_concrete_Brysbaertetal2014 = scale(concepts_Dutch2$concrete_Brysbaertetal2014)
concepts_Dutch2$s_letters = scale(concepts_Dutch2$letters)
concepts_Dutch2$s_phonemes_DUTCHPOND = scale(concepts_Dutch2$phonemes_DUTCHPOND)
concepts_Dutch2$s_orth_neighbours_DUTCHPOND = scale(concepts_Dutch2$orth_neighbours_DUTCHPOND)
concepts_Dutch2$s_phon_neighbours_DUTCHPOND = scale(concepts_Dutch2$phon_neighbours_DUTCHPOND)
concepts_Dutch2$s_RC1_lexicals = scale(concepts_Dutch2$RC1)
concepts_Dutch2$s_RC2_lexicals = scale(concepts_Dutch2$RC2)
concepts_Dutch2$s_RC3_lexicals = scale(concepts_Dutch2$RC3)
# length: letters
fit_letters_concepts_Dutch2 = lm(concepts_Dutch2$s_letters ~ concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic +
concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_letters_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_letters)
concepts_Dutch2$log_s_letters = log(3 + concepts_Dutch2$s_letters)
fit_letters_concepts_Dutch2 = lm(concepts_Dutch2$log_s_letters ~ concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic +
concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_letters_concepts_Dutch2$residuals, norm = TRUE)
# better though still skew/kurtose
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1),
# and tolerance (pref. > 0.2)
#vif(fit_letters_concepts_Dutch2)
#mean(vif(fit_letters_concepts_Dutch2))
#1/vif(fit_letters_concepts_Dutch2)
# RESULTS: all good
step_letters_concepts_Dutch2_AIC = stepAIC(fit_letters_concepts_Dutch2, direction="both")
step_letters_concepts_Dutch2_F = stepAIC(fit_letters_concepts_Dutch2, direction="both", test="F")
#summary(fit_letters_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Number of letters'
# Save F-test part of the regression
Variable = rownames(anova(fit_letters_concepts_Dutch2))
DF = anova(fit_letters_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_letters_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_letters_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_letters_concepts_Dutch2)[,'F value']
F_p = anova(fit_letters_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_letters_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_letters_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_letters_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_letters_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_letters_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_letters_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# length: phonemes_DUTCHPOND
fit_phonemes_DUTCHPOND_concepts_Dutch2 = lm(concepts_Dutch2$s_phonemes_DUTCHPOND ~ concepts_Dutch2$s_Auditory +
concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_phonemes_DUTCHPOND_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew and kurtose. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_phonemes_DUTCHPOND)
concepts_Dutch2$log_s_phonemes_DUTCHPOND = log(3 + concepts_Dutch2$s_phonemes_DUTCHPOND)
fit_phonemes_DUTCHPOND_concepts_Dutch2 = lm(concepts_Dutch2$log_s_phonemes_DUTCHPOND ~ concepts_Dutch2$s_Auditory
+ concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_phonemes_DUTCHPOND_concepts_Dutch2$residuals, norm = TRUE)
# good
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_phonemes_DUTCHPOND_concepts_Dutch2)
#mean(vif(fit_phonemes_DUTCHPOND_concepts_Dutch2))
#1/vif(fit_phonemes_DUTCHPOND_concepts_Dutch2)
# RESULTS: all good
step_phonemes_DUTCHPOND_concepts_Dutch2_AIC = stepAIC(fit_phonemes_DUTCHPOND_concepts_Dutch2,
direction="both")
step_phonemes_DUTCHPOND_concepts_Dutch2_F = stepAIC(fit_phonemes_DUTCHPOND_concepts_Dutch2,
direction="both", test="F")
#summary(fit_phonemes_DUTCHPOND_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Number of phonemes'
# Save F-test part of the regression
Variable = rownames(anova(fit_phonemes_DUTCHPOND_concepts_Dutch2))
DF = anova(fit_phonemes_DUTCHPOND_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_phonemes_DUTCHPOND_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_phonemes_DUTCHPOND_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_phonemes_DUTCHPOND_concepts_Dutch2)[,'F value']
F_p = anova(fit_phonemes_DUTCHPOND_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phonemes_DUTCHPOND_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: SUBTLEX-NL log-10 CD
fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2 = lm(concepts_Dutch2$s_freq_lg10CD_SUBTLEXNL ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_freq_lg10CD_SUBTLEXNL)
concepts_Dutch2$log_s_freq_lg10CD_SUBTLEXNL = log(5 + concepts_Dutch2$s_freq_lg10CD_SUBTLEXNL)
fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2 = lm(concepts_Dutch2$log_s_freq_lg10CD_SUBTLEXNL ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2$residuals, norm = TRUE)
# worse! back
fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2 = lm(concepts_Dutch2$s_freq_lg10CD_SUBTLEXNL ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)
#mean(vif(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2))
#1/vif(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)
# RESULTS: all good
step_freq_lg10CD_SUBTLEXNL_concepts_Dutch2_AIC = stepAIC(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2,
direction="both")
step_freq_lg10CD_SUBTLEXNL__concepts_Dutch2F = stepAIC(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2,
direction="both", test="F")
#summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Contextual diversity'
# Save F-test part of the regression
Variable = rownames(anova(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2))
DF = anova(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)[,'F value']
F_p = anova(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10CD_SUBTLEXNL_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: SUBTLEX-NL log-10 WF
fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2 =
lm(concepts_Dutch2$s_freq_lg10WF_SUBTLEXNL ~ concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual,
data = concepts_Dutch2)
#stat.desc(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: good. Raw scores/2.SE < 1
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)
#mean(vif(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2))
#1/vif(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)
# RESULTS: all good
step_freq_lg10WF_SUBTLEXNL_concepts_Dutch2_AIC = stepAIC(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2,
direction="both")
step_freq_lg10WF_SUBTLEXNL_concepts_Dutch2_F = stepAIC(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2,
direction="both", test="F")
#summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Word frequency'
# Save F-test part of the regression
Variable = rownames(anova(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2))
DF = anova(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)[,'F value']
F_p = anova(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_lg10WF_SUBTLEXNL_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: CELEX log-10 lemma WF
fit_freq_CELEX_lem_concepts_Dutch2 = lm(concepts_Dutch2$s_freq_CELEX_lem ~ concepts_Dutch2$s_Auditory +
concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_freq_CELEX_lem_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: good. Raw scores/2.SE < 1
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_freq_CELEX_lem_concepts_Dutch2)
#mean(vif(fit_freq_CELEX_lem_concepts_Dutch2))
#1/vif(fit_freq_CELEX_lem_concepts_Dutch2)
# RESULTS: all good
step_freq_CELEX_lem_concepts_Dutch2_AIC = stepAIC(fit_freq_CELEX_lem_concepts_Dutch2, direction="both")
step_freq_CELEX_lem_concepts_Dutch2_F = stepAIC(fit_freq_CELEX_lem_concepts_Dutch2, direction="both",
test="F")
#summary(fit_freq_CELEX_lem_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Lemma frequency'
# Save F-test part of the regression
Variable = rownames(anova(fit_freq_CELEX_lem_concepts_Dutch2))
DF = anova(fit_freq_CELEX_lem_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_freq_CELEX_lem_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_freq_CELEX_lem_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_freq_CELEX_lem_concepts_Dutch2)[,'F value']
F_p = anova(fit_freq_CELEX_lem_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_freq_CELEX_lem_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# distinctiveness: phon neigh size
fit_phon_neighbours_DUTCHPOND_concepts_Dutch2 = lm(concepts_Dutch2$s_phon_neighbours_DUTCHPOND ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_phon_neighbours_DUTCHPOND)
concepts_Dutch2$log_s_phon_neighbours_DUTCHPOND = log(2 + concepts_Dutch2$s_phon_neighbours_DUTCHPOND)
fit_phon_neighbours_DUTCHPOND_concepts_Dutch2 = lm(concepts_Dutch2$log_s_phon_neighbours_DUTCHPOND ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2$residuals, norm = TRUE)
# better but not perfect
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)
#mean(vif(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2))
#1/vif(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)
# RESULTS: all good
step_phon_neighbours_DUTCHPOND_concepts_Dutch2_AIC =
stepAIC(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2, direction="both")
step_phon_neighbours_DUTCHPOND_concepts_Dutch2_F = stepAIC(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2,
direction="both", test="F")
#summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Phonological neighbours'
# Save F-test part of the regression
Variable = rownames(anova(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2))
DF = anova(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)[,'F value']
F_p = anova(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_phon_neighbours_DUTCHPOND_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# distinctiveness: orth neigh size
fit_orth_neighbours_DUTCHPOND_concepts_Dutch2 = lm(concepts_Dutch2$s_orth_neighbours_DUTCHPOND ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
psych::describe(concepts_Dutch2$s_orth_neighbours_DUTCHPOND)
concepts_Dutch2$log_s_orth_neighbours_DUTCHPOND = log(2 + concepts_Dutch2$s_orth_neighbours_DUTCHPOND)
fit_orth_neighbours_DUTCHPOND_concepts_Dutch2 = lm(concepts_Dutch2$log_s_orth_neighbours_DUTCHPOND ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2$residuals, norm = TRUE)
# better though still skew/kurtose
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)
#mean(vif(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2))
#1/vif(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)
# RESULTS: all good
step_orth_neighbours_DUTCHPOND_concepts_Dutch2_AIC =
stepAIC(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2, direction="both")
step_orth_neighbours_DUTCHPOND_concepts_Dutch2_F =
stepAIC(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2, direction="both", test="F")
#summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Orthographic neighbours'
# Save F-test part of the regression
Variable = rownames(anova(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2))
DF = anova(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)[,'F value']
F_p = anova(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_orth_neighbours_DUTCHPOND_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# length: Rotated Principal Component 1 for the lexical variables
fit_RC1_lexicals_concepts_Dutch2 = lm(concepts_Dutch2$s_RC1_lexicals ~ concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic
+ concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_RC1_lexicals_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_RC1_lexicals)
concepts_Dutch2$log_s_RC1_lexicals_concepts_Dutch2 = log(3 + concepts_Dutch2$s_RC1_lexicals)
fit_RC1_lexicals_concepts_Dutch2 = lm(concepts_Dutch2$log_s_RC1_lexicals ~ concepts_Dutch2$s_Auditory +
concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_RC1_lexicals_concepts_Dutch2$residuals, norm = TRUE)
# good
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_RC1_lexicals_concepts_Dutch2)
#mean(vif(fit_RC1_lexicals_concepts_Dutch2))
#1/vif(fit_RC1_lexicals_concepts_Dutch2)
# RESULTS: all good
step_RC1_lexicals_concepts_Dutch2_AIC = stepAIC(fit_RC1_lexicals_concepts_Dutch2, direction="both")
step_RC1_lexicals_concepts_Dutch2_F = stepAIC(fit_RC1_lexicals_concepts_Dutch2, direction="both",
test="F")
#summary(fit_RC1_lexicals_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Length PC'
# Save F-test part of the regression
Variable = rownames(anova(fit_RC1_lexicals_concepts_Dutch2))
DF = anova(fit_RC1_lexicals_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_RC1_lexicals_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_RC1_lexicals_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_RC1_lexicals_concepts_Dutch2)[,'F value']
F_p = anova(fit_RC1_lexicals_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC1_lexicals_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# freq: Rotated Principal Component 2 for the lexical variables
fit_RC2_lexicals_concepts_Dutch2 = lm(concepts_Dutch2$s_RC2_lexicals ~ concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic
+ concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_RC2_lexicals_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: good. Raw scores/2.SE < 1
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_RC2_lexicals_concepts_Dutch2)
#mean(vif(fit_RC2_lexicals_concepts_Dutch2))
#1/vif(fit_RC2_lexicals_concepts_Dutch2)
# RESULTS: all good
step_RC2_lexicals_concepts_Dutch2_AIC = stepAIC(fit_RC2_lexicals_concepts_Dutch2, direction="both")
step_RC2_lexicals_concepts_Dutch2_F = stepAIC(fit_RC2_lexicals_concepts_Dutch2, direction="both",
test="F")
summary(fit_RC2_lexicals_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Frequency PC'
# Save F-test part of the regression
Variable = rownames(anova(fit_RC2_lexicals_concepts_Dutch2))
DF = anova(fit_RC2_lexicals_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_RC2_lexicals_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_RC2_lexicals_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_RC2_lexicals_concepts_Dutch2)[,'F value']
F_p = anova(fit_RC2_lexicals_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC2_lexicals_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# distinctiveness: Rotated Principal Component 3 for the lexical variables
fit_RC3_lexicals_concepts_Dutch2 = lm(concepts_Dutch2$s_RC3_lexicals ~ concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic
+ concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_RC3_lexicals_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skewed and kurtosed. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_RC3_lexicals)
concepts_Dutch2$log_s_RC3_lexicals = log(3 + concepts_Dutch2$s_RC3_lexicals)
fit_RC3_lexicals_concepts_Dutch2 = lm(concepts_Dutch2$log_s_RC3_lexicals ~ concepts_Dutch2$s_Auditory +
concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_RC3_lexicals_concepts_Dutch2$residuals, norm = TRUE)
# better though still non-normal
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_RC3_lexicals_concepts_Dutch2)
#mean(vif(fit_RC3_lexicals_concepts_Dutch2))
#1/vif(fit_RC3_lexicals_concepts_Dutch2)
# RESULTS: all good
step_RC3_lexicals_concepts_Dutch2_AIC = stepAIC(fit_RC3_lexicals_concepts_Dutch2, direction="both")
step_RC3_lexicals_concepts_Dutch2_F = stepAIC(fit_RC3_lexicals_concepts_Dutch2, direction="both",
test="F")
#summary(fit_RC3_lexicals_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Distinctiveness PC'
# Save F-test part of the regression
Variable = rownames(anova(fit_RC3_lexicals_concepts_Dutch2))
DF = anova(fit_RC3_lexicals_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_RC3_lexicals_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_RC3_lexicals_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_RC3_lexicals_concepts_Dutch2)[,'F value']
F_p = anova(fit_RC3_lexicals_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_RC3_lexicals_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# additional var: age of acquisition
fit_AoA_Brysbaertetal2014_concepts_Dutch2 = lm(concepts_Dutch2$s_AoA_Brysbaertetal2014 ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_AoA_Brysbaertetal2014_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: good. Raw scores/2.SE < 1
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_AoA_Brysbaertetal2014_concepts_Dutch2)
#mean(vif(fit_AoA_Brysbaertetal2014_concepts_Dutch2))
#1/vif(fit_AoA_Brysbaertetal2014_concepts_Dutch2)
# RESULTS: all good
step_AoA_Brysbaertetal2014_concepts_Dutch2_AIC = stepAIC(fit_AoA_Brysbaertetal2014_concepts_Dutch2,
direction="both")
step_AoA_Brysbaertetal2014_concepts_Dutch2_F = stepAIC(fit_AoA_Brysbaertetal2014_concepts_Dutch2,
direction="both", test="F")
#summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Age of acquisition'
# Save F-test part of the regression
Variable = rownames(anova(fit_AoA_Brysbaertetal2014_concepts_Dutch2))
DF = anova(fit_AoA_Brysbaertetal2014_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_AoA_Brysbaertetal2014_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_AoA_Brysbaertetal2014_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_AoA_Brysbaertetal2014_concepts_Dutch2)[,'F value']
F_p = anova(fit_AoA_Brysbaertetal2014_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_AoA_Brysbaertetal2014_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# additional var: concreteness
fit_concrete_Brysbaertetal2014_concepts_Dutch2 = lm(concepts_Dutch2$s_concrete_Brysbaertetal2014 ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
#stat.desc(fit_concrete_Brysbaertetal2014_concepts_Dutch2$residuals, norm = TRUE)
# residuals distribution: skew. Raw scores/2.SE > 1
# have to log-transform DV and re-run regression
#psych::describe(concepts_Dutch2$s_concrete_Brysbaertetal2014)
concepts_Dutch2$log_s_concrete_Brysbaertetal2014 = log(3 + concepts_Dutch2$s_concrete_Brysbaertetal2014)
fit_concrete_Brysbaertetal2014_concepts_Dutch2 = lm(concepts_Dutch2$log_s_concrete_Brysbaertetal2014 ~
concepts_Dutch2$s_Auditory + concepts_Dutch2$s_Haptic + concepts_Dutch2$s_Visual, data = concepts_Dutch2)
# check residuals again
#stat.desc(fit_concrete_Brysbaertetal2014_concepts_Dutch2$residuals, norm = TRUE)
# good
# Check multicollinearity: largest VIF (pref. < 10), mean VIF (pref. around 1), and
# tolerance (pref. > 0.2)
#vif(fit_concrete_Brysbaertetal2014_concepts_Dutch2)
#mean(vif(fit_concrete_Brysbaertetal2014_concepts_Dutch2))
#1/vif(fit_concrete_Brysbaertetal2014_concepts_Dutch2)
# RESULTS: all good
step_concrete_Brysbaertetal2014_concepts_Dutch2_AIC =
stepAIC(fit_concrete_Brysbaertetal2014_concepts_Dutch2, direction="both")
step_concrete_Brysbaertetal2014_concepts_Dutch2_F =
stepAIC(fit_concrete_Brysbaertetal2014_concepts_Dutch2, direction="both", test="F")
#summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2)
# Save results
Category = 'Concepts'
Dependent = 'Concreteness'
# Save F-test part of the regression
Variable = rownames(anova(fit_concrete_Brysbaertetal2014_concepts_Dutch2))
DF = anova(fit_concrete_Brysbaertetal2014_concepts_Dutch2)[,'Df']
Sum_Sq = anova(fit_concrete_Brysbaertetal2014_concepts_Dutch2)[,'Sum Sq']
Mean_Sq = anova(fit_concrete_Brysbaertetal2014_concepts_Dutch2)[,'Mean Sq']
F = anova(fit_concrete_Brysbaertetal2014_concepts_Dutch2)[,'F value']
F_p = anova(fit_concrete_Brysbaertetal2014_concepts_Dutch2)[,'Pr(>F)']
F_results = data.frame(Category, Dependent, Variable, DF, Sum_Sq, Mean_Sq, F, F_p)
# Save t-test part of the regression
Variable = rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2)))
Estimate = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2)))), 'Estimate'])
SE = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2)))), 'Std. Error'])
t = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2)))), 't value'])
t_p = as.vector(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2))[1:length(rownames(coefficients(summary(fit_concrete_Brysbaertetal2014_concepts_Dutch2)))), 'Pr(>|t|)'])
t_results = data.frame(Category, Dependent, Variable, Estimate, SE, t, t_p)
results = merge(F_results, t_results, all = TRUE)
results_total = rbind(results_total, results)
# Format
# Compute p-value asterisks
# For F
results_total$F_p.asterisks = NA
results_total$F_p.asterisks = as.character(results_total$F_p.asterisks)
results_total$F_p.asterisks = ifelse(results_total$F_p < .001, '***', NA)
results_total$F_p.asterisks =
ifelse(results_total$F_p > .001 & results_total$F_p < .01, '**',
results_total$F_p.asterisks)
results_total$F_p.asterisks =
ifelse(results_total$F_p >= .01 & results_total$F_p < .05, '*',
results_total$F_p.asterisks)
results_total$F_p.asterisks =
ifelse(results_total$F_p >= .05, 'N.S.', results_total$F_p.asterisks)
# For t
results_total$t_p.asterisks = NA
results_total$t_p.asterisks = as.character(results_total$t_p.asterisks)
results_total$t_p.asterisks = ifelse(results_total$t_p < .001, '***', NA)
results_total$t_p.asterisks =
ifelse(results_total$t_p >= .001 & results_total$t_p < .01, '**',
results_total$t_p.asterisks)
results_total$t_p.asterisks =
ifelse(results_total$t_p >= .01 & results_total$t_p < .05, '*',
results_total$t_p.asterisks)
results_total$t_p.asterisks =
ifelse(results_total$t_p >= .05, 'N.S.', results_total$t_p.asterisks)
# APA format for numbers
# Round decimals
results_total$Sum_Sq = sprintf("%.2f", round(as.numeric(results_total$Sum_Sq), 2))
results_total$Mean_Sq = sprintf("%.2f", round(as.numeric(results_total$Mean_Sq), 2))
results_total$F = sprintf("%.2f", round(as.numeric(results_total$F), 2))
results_total$F_p = sprintf("%.2f", round(as.numeric(results_total$F_p), 3))
results_total$Estimate = sprintf("%.2f", round(as.numeric(results_total$Estimate), 2))
results_total$SE = sprintf("%.2f", round(as.numeric(results_total$SE), 2))
results_total$t = sprintf("%.2f", round(as.numeric(results_total$t), 2))
results_total$t_p = sprintf("%.2f", round(as.numeric(results_total$t_p), 3))
# Replace single zeros with '< .001'
results_total$F_p = ifelse(results_total$F_p < .001, '< .001', results_total$F_p)
results_total$t_p = ifelse(results_total$t_p < .001, '< .001', results_total$t_p)
results_total$F_p = as.character(results_total$F_p)
results_total$t_p = as.character(results_total$t_p)
# Remove the zero before the point in p-values
results_total$F_p = sub('^(-)?0[.]', '\\1.', results_total$F_p)
results_total$t_p = sub('^(-)?0[.]', '\\1.', results_total$t_p)
# Order columns and dataset
names(results_total)
results_total = results_total[, c(1:8, 13, 9:12, 14)]
# Clarify variable names
results_total$Variable = dplyr::recode(results_total$Variable,
`properties_Dutch2$s_Auditory` = 'Auditory',
`properties_Dutch2$s_Haptic` = 'Haptic',
`properties_Dutch2$s_Visual` = 'Visual',
`concepts_Dutch2$s_Auditory` = 'Auditory',
`concepts_Dutch2$s_Haptic` = 'Haptic',
`concepts_Dutch2$s_Visual` = 'Visual')
# Iconicity of concepts and comparison with properties
# The properties sample was characterized by smaller advantages for Auditory
# predictor, compared to the concepts sample. The tendency of either larger or
# opposite scores for the Auditory strength was less evident, even though it was
# still marginally present.
```
<div style = "background-color: #FCFCFC; padding-top: 60px; padding-left: 15px; padding-right: 1px; margin-bottom:6px;">
```{r}
# Loadings of principal components for properties. Tidy-format names by binding them as a column and
# removing dummy rownames left from the principal() output.
properties_lexicals_loadings =
data.frame( cbind( names(lexicals_properties_Dutch2),
data.frame(PCA_lexicals_properties_Dutch2$loadings[, c('RC1','RC2','RC3')])
) )
concepts_lexicals_loadings =
data.frame( cbind( names(lexicals_concepts_Dutch2),
data.frame(PCA_lexicals_concepts_Dutch2$loadings[, c('RC1','RC2','RC3')])
) )
names(properties_lexicals_loadings)[1] = "Dependent"
properties_lexicals_loadings[,'Dependent'] = as.factor(properties_lexicals_loadings[,'Dependent'])
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="letters"] = "Letters"
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="phonemes_DUTCHPOND"] = "Phonemes"
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="freq_lg10CD_SUBTLEXNL"] = "Contextual diversity"
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="freq_lg10WF_SUBTLEXNL"] = "Word frequency"
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="freq_CELEX_lem"] = "Lemma frequency"
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="phon_neighbours_DUTCHPOND"] = "Phonological neighbours"
levels(properties_lexicals_loadings[,'Dependent'])[levels(properties_lexicals_loadings[,'Dependent'])=="orth_neighbours_DUTCHPOND"] = "Orthographic neighbours"
# RC (Rotated Component) renamed PC for clarity
colnames(properties_lexicals_loadings)[colnames(properties_lexicals_loadings)=="names.lexicals_properties_Dutch2."] = "Variable"
colnames(properties_lexicals_loadings)[colnames(properties_lexicals_loadings)=="RC1"] = "PC1"
colnames(properties_lexicals_loadings)[colnames(properties_lexicals_loadings)=="RC2"] = "PC2"
colnames(properties_lexicals_loadings)[colnames(properties_lexicals_loadings)=="RC3"] = "PC3"
# Round and keep measurement decimals
properties_lexicals_loadings$PC1 = sprintf("%.2f", round(properties_lexicals_loadings$PC1, 2))
properties_lexicals_loadings$PC2 = sprintf("%.2f", round(properties_lexicals_loadings$PC2, 2))
properties_lexicals_loadings$PC3 = sprintf("%.2f", round(properties_lexicals_loadings$PC3, 2))
# Present values as correlations by removing any zeros before a decimal point
properties_lexicals_loadings[,'PC1'] = str_replace_all(properties_lexicals_loadings[,'PC1'], "0\\.", "\\.")
properties_lexicals_loadings[,'PC2'] = str_replace_all(properties_lexicals_loadings[,'PC2'], "0\\.", "\\.")
properties_lexicals_loadings[,'PC3'] = str_replace_all(properties_lexicals_loadings[,'PC3'], "0\\.", "\\.")
# Order variables
properties_lexicals_loadings[,'Dependent'] = factor(properties_lexicals_loadings[,'Dependent'],
levels = c("Letters", "Phonemes", "Contextual diversity", "Word frequency",
"Lemma frequency", "Phonological neighbours", "Orthographic neighbours"))
properties_lexicals_loadings = with(properties_lexicals_loadings, properties_lexicals_loadings[order(Dependent),])
rownames(properties_lexicals_loadings) = NULL
# Modal dialog showing loadings of principal components for properties
actionLink("properties_lexicals_loadings",
HTML('<span style = "background-color:#F9F9F9; padding-top:3px; padding-bottom:0px; padding-left:6px; padding-right:6px;"><i class="glyphicon glyphicon-th" aria-hidden="true"></i> See principal component loadings for properties</span>'))
observeEvent(input$properties_lexicals_loadings, {
showModal(modalDialog(
title = HTML('<div style="padding-bottom:0px; padding-left:2px; font-size:14px; text-align:justify;"> <b>Loadings of principal components for properties (see \'PC\' dependent variables), in the form of correlations.</b> Coefficients above <i>r</i> = &plusmn;.70 (i.e., 50% shared variance) shown in bold. </div>'),
div( HTML( # Below, table constructed
properties_lexicals_loadings %>%
# Highlight correlations above .7
mutate(PC1 = ifelse(abs(as.numeric(PC1)) > .7, cell_spec(PC1, "html", bold = TRUE, color = 'black'),
cell_spec(PC1, "html")),
PC2 = ifelse(abs(as.numeric(PC2)) > .7, cell_spec(PC2, "html", bold = TRUE, color = 'black'),
cell_spec(PC2, "html")),
PC3 = ifelse(abs(as.numeric(PC3)) > .7, cell_spec(PC3, "html", bold = TRUE, color = 'black'),
cell_spec(PC3, "html")) ) %>%
kable(format = "html", escape = FALSE) %>%
kable_styling('striped', full_width = FALSE, position = "left")
),
align = 'center'),
size = 'm', easyClose = TRUE, footer = modalButton("Close")
))
})
```
</div>
<div style = "background-color: #FCFCFC; text-align: justify; padding-top: 37px; padding-right: 45px; padding-left: 45px; padding-bottom: 38px; margin-top: 10; margin-bottom: 7px; line-height: 1.6; font-size: 16px;">
**Sound symbolism is the relation between the form of words and their meaning.** The form of words rests on their sound more than on their visual or tactile properties (at least in spoken language). Therefore, auditory ratings should more reliably predict the lexical properties of words (length, frequency, distinctiveness) than haptic or visual ratings would ([see external corpora](#external-corpora)). By means of regression analyses following Lynott and Connell (2013; see [Table 6](https://link.springer.com/article/10.3758%2Fs13428-012-0267-0#Sec4)), we found that auditory ratings were either the best predictor of lexical properties, or yielded an effect that was opposite in polarity to the effects of haptic and visual ratings, thus supporting sound symbolism.
Standardised coefficients ($\beta$) presented below, followed by asterisks indicating significance (<sup>\*</sup>*p* < .05; <sup>\*\*</sup>*p* < .01; <sup>\*\*\*</sup>*p* < .001), and the standard error below. Regression assumptions observed, i.e., normal distribution of residuals (transformations attempted), largest variance inflation factor < 10 and its mean around 1, tolerance > 0.2 ([Field, Miles, & Field, 2012](#references)). 'PC' = Kaiser-normalised, varimax-rotated principal component.
</div>
<div style = "background-color: #FCFCFC; padding-top: 60px; padding-right: 22px; padding-left: 1px; margin-bottom:6px;">
```{r}
# Loadings of principal components for concepts. Tidy-format names by binding them as a column and
# removing dummy rownames left from the principal() output.
concepts_lexicals_loadings =
data.frame( cbind( names(lexicals_concepts_Dutch2),
data.frame(PCA_lexicals_concepts_Dutch2$loadings[, c('RC1','RC2','RC3')])
) )
names(concepts_lexicals_loadings)[1] = "Dependent"
concepts_lexicals_loadings[,'Dependent'] = as.factor(concepts_lexicals_loadings[,'Dependent'])
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="letters"] = "Letters"
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="phonemes_DUTCHPOND"] = "Phonemes"
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="freq_lg10CD_SUBTLEXNL"] = "Contextual diversity"
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="freq_lg10WF_SUBTLEXNL"] = "Word frequency"
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="freq_CELEX_lem"] = "Lemma frequency"
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="phon_neighbours_DUTCHPOND"] = "Phonological neighbours"
levels(concepts_lexicals_loadings[,'Dependent'])[levels(concepts_lexicals_loadings[,'Dependent'])=="orth_neighbours_DUTCHPOND"] = "Orthographic neighbours"
# RC (Rotated Component) renamed PC for clarity
colnames(concepts_lexicals_loadings)[colnames(concepts_lexicals_loadings)=="names.lexicals_concepts_Dutch2."] = "Variable"
colnames(concepts_lexicals_loadings)[colnames(concepts_lexicals_loadings)=="RC1"] = "PC1"
colnames(concepts_lexicals_loadings)[colnames(concepts_lexicals_loadings)=="RC2"] = "PC2"
colnames(concepts_lexicals_loadings)[colnames(concepts_lexicals_loadings)=="RC3"] = "PC3"
# Round and keep measurement decimals
concepts_lexicals_loadings$PC1 = sprintf("%.2f", round(concepts_lexicals_loadings$PC1, 2))
concepts_lexicals_loadings$PC2 = sprintf("%.2f", round(concepts_lexicals_loadings$PC2, 2))
concepts_lexicals_loadings$PC3 = sprintf("%.2f", round(concepts_lexicals_loadings$PC3, 2))
# Present values as correlations by removing any zeros before a decimal point
concepts_lexicals_loadings[,'PC1'] = str_replace_all(concepts_lexicals_loadings[,'PC1'], "0\\.", "\\.")
concepts_lexicals_loadings[,'PC2'] = str_replace_all(concepts_lexicals_loadings[,'PC2'], "0\\.", "\\.")
concepts_lexicals_loadings[,'PC3'] = str_replace_all(concepts_lexicals_loadings[,'PC3'], "0\\.", "\\.")
# Order variables
concepts_lexicals_loadings[,'Dependent'] = factor(concepts_lexicals_loadings[,'Dependent'],
levels = c("Letters", "Phonemes", "Contextual diversity", "Word frequency",
"Lemma frequency", "Phonological neighbours", "Orthographic neighbours"))
concepts_lexicals_loadings = with(concepts_lexicals_loadings, concepts_lexicals_loadings[order(Dependent),])
rownames(concepts_lexicals_loadings) = NULL
# Modal dialog showing loadings of principal components for concepts
actionLink("concepts_lexicals_loadings",
HTML('<span style = "background-color:#F9F9F9; padding-top:3px; padding-bottom:0px; padding-left:6px; padding-right:6px;"><i class="glyphicon glyphicon-th" aria-hidden="true"></i> See principal component loadings for concepts</span>'))
observeEvent(input$concepts_lexicals_loadings, {
showModal(modalDialog(
title = HTML('<div style="padding-bottom:0px; padding-left:2px; font-size:14px; text-align:justify;"> <b>Loadings of principal components for concepts (see \'PC\' dependent variables), in the form of correlations.</b> Coefficients above <i>r</i> = &plusmn;.70 (i.e., 50% shared variance) shown in bold. </div>'),
div( HTML( # Below, table constructed
concepts_lexicals_loadings %>%
# Highlight correlations above .7
mutate(PC1 = ifelse(abs(as.numeric(PC1)) > .7, cell_spec(PC1, "html", bold = TRUE, color = 'black'),
cell_spec(PC1, "html")),
PC2 = ifelse(abs(as.numeric(PC2)) > .7, cell_spec(PC2, "html", bold = TRUE, color = 'black'),
cell_spec(PC2, "html")),
PC3 = ifelse(abs(as.numeric(PC3)) > .7, cell_spec(PC3, "html", bold = TRUE, color = 'black'),
cell_spec(PC3, "html")) ) %>%
kable(format = "html", escape = FALSE) %>%
kable_styling('striped', full_width = FALSE, position = "left")
),
align = 'center'),
size = 'm', easyClose = TRUE, footer = modalButton("Close")
))
})
```
</div>
Row {style="data-width:100%; margin-bottom:25px;"}
-----------------------------------------------------------------------
### <span style="font-size:20px; color:black; text-align: justify !important;"> &nbsp; **Property words**</span> {style="background-color:#FFFAF4; margin-bottom:2px;"}
<div style = "background-color: white; padding-top:4px; padding-bottom:1px;">
```{r}
# Properties results table
properties_Dutch2_results =
results_total[results_total$Category=='Properties' &
!results_total$Variable=='(Intercept)' &
!results_total$Variable=='Residuals',
c('Dependent', 'Variable', 'Estimate', 't_p.asterisks', 'SE')]
# Don't display any symbol with non-significant effects (i.e., remove 'N.S.')
properties_Dutch2_results$t_p.asterisks_clean = str_remove_all(properties_Dutch2_results$t_p.asterisks, 'N.S.')
# Display Beta coefficients, followed by asterisks indicating significance, and SE below
properties_Dutch2_results$coefficients_and_asterisks =
ifelse(!is.na(properties_Dutch2_results$Estimate) & !is.na(properties_Dutch2_results$t_p.asterisks_clean),
paste0(properties_Dutch2_results$Estimate, properties_Dutch2_results$t_p.asterisks_clean, '<br>', '(', properties_Dutch2_results$SE, ')'),
NA)
# Kable table
HTML(
kable(
spread(properties_Dutch2_results[,c('Dependent', 'Variable', 'coefficients_and_asterisks')],
Variable, coefficients_and_asterisks), escape = FALSE
) %>%
kable_styling('striped', full_width = FALSE)
)
```
</div>
### <span style="font-size:20px; color:black; text-align: justify !important;"> &nbsp; **Concept words**</span> {style="background-color:#FFFEF3; margin-bottom:2px;"}
<div style = "background-color:white; padding-top:4px; padding-bottom:1px;">
```{r}
# Concepts results table
concepts_Dutch2_results =
results_total[results_total$Category=='Concepts' &
!results_total$Variable=='(Intercept)' &
!results_total$Variable=='Residuals',
c('Dependent', 'Variable', 'Estimate', 't_p.asterisks', 'SE')]
# Don't display any symbol with non-significant effects (i.e., remove 'N.S.')
concepts_Dutch2_results$t_p.asterisks_clean = str_remove_all(concepts_Dutch2_results$t_p.asterisks, 'N.S.')
# Display Beta coefficients, followed by asterisks indicating significance, and SE below
concepts_Dutch2_results$coefficients_and_asterisks =
ifelse(!is.na(concepts_Dutch2_results$Estimate) & !is.na(concepts_Dutch2_results$t_p.asterisks_clean),
paste0(concepts_Dutch2_results$Estimate, concepts_Dutch2_results$t_p.asterisks_clean, '<br>', '(', concepts_Dutch2_results$SE, ')'),
NA)
# Kable table
HTML(
kable(
spread(concepts_Dutch2_results[,c('Dependent', 'Variable', 'coefficients_and_asterisks')],
Variable, coefficients_and_asterisks), escape = FALSE
) %>%
kable_styling('striped', full_width = FALSE)
)
```
</div>