From cedcb8ce80d70f9313b2a8be619cbb76ffe6bd92 Mon Sep 17 00:00:00 2001 From: Tyler Date: Tue, 13 Oct 2015 20:01:10 -0400 Subject: [PATCH] added to readme --- DESCRIPTION | 9 ++--- README.Rmd | 43 +++++++++++++++++++- README.md | 114 ++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 147 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c6052c5..e250f1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,14 @@ Package: formality Title: Calculate Text Formality Version: 0.0.1 -Authors@R: c(person("Tyler", "Rinker", email = - "tyler.rinker@gmail.com", role = c("aut", "cre"))) +Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", + "cre"))) Maintainer: Tyler Rinker -Description: Calculate the formality of text based on part of speech - tags. +Description: Calculate the formality of text based on part of speech tags. Depends: R (>= 3.2.2) Imports: data.table, tagger Suggests: testthat -Date: 2015-10-12 +Date: 2015-10-13 License: GPL-2 LazyData: TRUE Roxygen: list(wrap = FALSE) diff --git a/README.Rmd b/README.Rmd index df4082a..164a270 100644 --- a/README.Rmd +++ b/README.Rmd @@ -24,6 +24,8 @@ verbadge <- sprintf(' * send a pull request on: * compose a friendly e-mail to: + + +# Examples + +The following examples demonstrate some of the functionality of **formality**. + +## Load the Tools/Data + +```{r} +library(formality) +data(presidential_debates_2012) +``` + + +## Assessing Formality + +`formality` takes the text as `text.var` and any number of grouping variables as `grouping.var`. Here we use the `presidential_debates_2012` data set and look at the formality of the people involved. Note that for smaller text Heylighen & Dewaele (2002) state: + +> At present, a sample would probably need to contain a few hundred words for the measure to be minimally reliable. For single sentences, the F-value should only be computed for purposes of illustration" (p. 24). + +```{r} +form1 <- with(presidential_debates_2012, formality(dialogue, person)) +form1 +``` + +## Recycling the First Run + +This will take ~20 seconds because of the part of speech tagging that must be undertaken. The output can be reused as `text.var` cutting the time to a fraction of the first run. + + +```{r} +with(presidential_debates_2012, formality(form1, list(time, person))) +``` + + diff --git a/README.md b/README.md index 93c1cf6..a579f82 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,26 @@ Status](https://coveralls.io/repos/trinker/formality/badge.svg?branch=master)](h [**tagger**](https://github.com/trinker/tagger) package to conduct formality analysis. Heylighen (1999) and Heylighen & Dewaele (2002, 1999) have given the *F-measure* as a measure of how *contextual* or -*formal* language is. Language is considered more formal when it contains -much of the information directly in the text, whereas, contextual -language relies on shared experiences to more efficiently dialogue with -others. +*formal* language is. Language is considered more formal when it +contains much of the information directly in the text, whereas, +contextual language relies on shared experiences to more efficiently +dialogue with others. + + +Table of Contents +============ + +- [Formality Equation](#formality-equation) +- [Installation](#installation) +- [Contact](#contact) +- [Examples](#examples) + - [Load the Tools/Data](#load-the-toolsdata) + - [Assessing Formality](#assessing-formality) + - [Recycling the First Run](#recycling-the-first-run) + +Formality Equation +============ + The **formality** package's main function is also titled `formality` and uses Heylighen & Dewaele's (1999) *F-measure*. The *F-measure* is @@ -55,17 +71,9 @@ the *F-measure*: Science, 7(3), 293-340. doi:10.1023/A:1019661126744 - -Table of Contents -============ - -- [Installation](#installation) -- [Contact](#contact) - Installation ============ - To download the development version of **formality**: Download the [zip @@ -75,7 +83,11 @@ and run `R CMD INSTALL` on it, or use the **pacman** package to install the development version: if (!require("pacman")) install.packages("pacman") - pacman::p_load_gh("trinker/formality") + pacman::p_load_gh(c( + "trinker/termco", + "trinker/tagger", + "trinker/formality" + )) Contact ======= @@ -84,3 +96,79 @@ You are welcome to: * submit suggestions and bug-reports at: * send a pull request on: * compose a friendly e-mail to: + + +Examples +======== + +The following examples demonstrate some of the functionality of +**formality**. + +Load the Tools/Data +------------------- + + library(formality) + data(presidential_debates_2012) + +Assessing Formality +------------------- + +`formality` takes the text as `text.var` and any number of grouping +variables as `grouping.var`. Here we use the `presidential_debates_2012` +data set and look at the formality of the people involved. Note that for +smaller text Heylighen & Dewaele (2002) state: + +> At present, a sample would probably need to contain a few hundred +> words for the measure to be minimally reliable. For single sentences, +> the F-value should only be computed for purposes of illustration" (p. +> 24). + + form1 <- with(presidential_debates_2012, formality(dialogue, person)) + form1 + + ## person noun adjective preposition article pronoun verb adverb + ## 1: QUESTION 155 70 91 38 77 112 26 + ## 2: LEHRER 182 93 104 62 101 164 48 + ## 3: SCHIEFFER 347 176 209 102 211 342 69 + ## 4: ROMNEY 4406 2346 3178 1396 2490 4676 1315 + ## 5: OBAMA 3993 1935 2909 1070 2418 4593 1398 + ## 6: CROWLEY 387 135 269 104 249 405 134 + ## interjection formal contextual n F + ## 1: 4 354 219 573 61.78010 + ## 2: 8 441 321 762 57.87402 + ## 3: 0 834 622 1456 57.28022 + ## 4: 25 11326 8506 19832 57.10972 + ## 5: 13 9907 8422 18329 54.05096 + ## 6: 0 895 788 1683 53.17885 + +Recycling the First Run +----------------------- + +This will take ~20 seconds because of the part of speech tagging that +must be undertaken. The output can be reused as `text.var` cutting the +time to a fraction of the first run. + + with(presidential_debates_2012, formality(form1, list(time, person))) + + ## time person noun adjective preposition article pronoun verb + ## 1: time 2 QUESTION 155 70 91 38 77 112 + ## 2: time 1 LEHRER 182 93 104 62 101 164 + ## 3: time 1 ROMNEY 950 483 642 286 504 978 + ## 4: time 3 ROMNEY 1766 958 1388 617 1029 1920 + ## 5: time 3 SCHIEFFER 347 176 209 102 211 342 + ## 6: time 2 ROMNEY 1690 905 1148 493 957 1778 + ## 7: time 3 OBAMA 1546 741 1185 432 973 1799 + ## 8: time 1 OBAMA 792 357 579 219 452 925 + ## 9: time 2 OBAMA 1655 837 1145 419 993 1869 + ## 10: time 2 CROWLEY 387 135 269 104 249 405 + ## adverb interjection formal contextual n F + ## 1: 26 4 354 219 573 61.78010 + ## 2: 48 8 441 321 762 57.87402 + ## 3: 240 4 2361 1726 4087 57.76853 + ## 4: 536 10 4729 3495 8224 57.50243 + ## 5: 69 0 834 622 1456 57.28022 + ## 6: 539 11 4236 3285 7521 56.32230 + ## 7: 522 4 3904 3298 7202 54.20716 + ## 8: 281 2 1947 1660 3607 53.97838 + ## 9: 595 7 4056 3464 7520 53.93617 + ## 10: 134 0 895 788 1683 53.17885 \ No newline at end of file