Skip to content

Commit

Permalink
added plot method
Browse files Browse the repository at this point in the history
  • Loading branch information
trinker committed Oct 14, 2015
1 parent cedcb8c commit 8a07434
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 29 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role =
Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
Description: Calculate the formality of text based on part of speech tags.
Depends: R (>= 3.2.2)
Imports: data.table, tagger
Imports: data.table, ggplot2, gridExtra, grid, tagger
Suggests: testthat
Date: 2015-10-13
License: GPL-2
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@

S3method(formality,Formality)
S3method(formality,default)
S3method(plot,Formality)
export(formality)
importFrom(data.table,":=")
138 changes: 134 additions & 4 deletions R/formality.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
#' data(presidential_debates_2012)
#' (form1 <- with(presidential_debates_2012, formality(dialogue, person)))
#' with(presidential_debates_2012, formality(form1, list(person, time))) #recycle form 1 for speed
#'
#' plot(form1)
#' plot(with(presidential_debates_2012, formality(form1, list(person, time))))
formality <- function(text.var, grouping.var = NULL, order.by.formality = TRUE, ...){

UseMethod("formality")
Expand Down Expand Up @@ -83,8 +86,8 @@ formality.default <- function(text.var, grouping.var = NULL, order.by.formality
}
}

formal <- c('noun', 'adjective', 'preposition', 'article')
contextual <- c('pronoun', 'verb', 'adverb', 'interjection')
formal <- c('noun', 'preposition', 'adjective', 'article')
contextual <- c('verb', 'pronoun', 'adverb', 'interjection')

## in other version this will be extracted
#=============================================
Expand Down Expand Up @@ -166,8 +169,8 @@ formality.Formality <- function(text.var, grouping.var = NULL, order.by.formalit
}
}

formal <- c('noun', 'adjective', 'preposition', 'article')
contextual <- c('pronoun', 'verb', 'adverb', 'interjection')
formal <- c('noun', 'preposition', 'adjective', 'article')
contextual <- c('verb', 'pronoun', 'adverb', 'interjection')

counts <- attributes(text.var)[["counts"]][["counts"]]

Expand All @@ -191,3 +194,130 @@ formality.Formality <- function(text.var, grouping.var = NULL, order.by.formalit
out

}



#' Plots a Formality Object
#'
#' Plots a Formality object.
#'
#' @param x The Formality object
#' @param plot logical. If \code{TRUE} the output is plotted.
#' @param \ldots ignored.
#' @return Returns a list of the three \pkg{ggplot2} objects that make the
#' combined plot.
#' @importFrom data.table :=
#' @method plot Formality
#' @export
plot.Formality <- function(x, plot = TRUE, ...){

group.vars <- n <- warn <- contextual <- formal <- type <- NULL

grps <- attr(x, "group.var")
pos <- attr(x, "pos.vars")

## Prepare the pos data
express1 <- paste0("lapply(list(", paste(pos, collapse=","), "), function(y) as.numeric(y/n))")
express2 <- paste0("paste(", paste(grps, collapse=", "), ", sep = \"_\")")
pos_dat <- x[, c(grps, pos, "n"), with=FALSE][,
(pos) := eval(parse(text=express1))][,
'group.vars' := eval(parse(text=express2))][,
'group.vars' := factor(group.vars, levels=rev(group.vars))][,
c(pos, "n", "group.vars"), with = FALSE]

pos_dat_long <- data.table::melt(pos_dat, id = c("group.vars", "n"),
variable.name = "pos", value.name = "proportion")[,
pos := factor(pos, levels = attr(x, "pos.vars"))]

## prepare the formality data
form_dat <- x[, c(grps, "n", "F"), with=FALSE][,
'group.vars' := eval(parse(text=express2))][,
'group.vars' := factor(group.vars, levels=rev(group.vars))][,
c("group.vars", "n", "F"), with = FALSE][,
warn := ifelse(n > 300, FALSE, TRUE)]

## prepare the contectual/formal data
con_form_dat <- x[, c(grps, "contextual", "formal", "n"), with=FALSE][,
(c("contextual", "formal")) := list(contextual/n, formal/n)][,
'group.vars' := eval(parse(text=express2))][,
'group.vars' := factor(group.vars, levels=rev(group.vars))][,
c("contextual", "formal", "n", "group.vars"), with = FALSE]

con_form_long <- data.table::melt(con_form_dat, id = c("group.vars", "n"),
variable.name = "type", value.name = "proportion")[,
type := factor(type, levels = c("formal", "contextual"))]

con_form_plot <- ggplot2::ggplot(con_form_long,
ggplot2::aes_string(x = "group.vars", weight = "proportion", fill ="type")) +
ggplot2::geom_bar() +
ggplot2::coord_flip() +
ggplot2::xlab(NULL) +
ggplot2::ylab("") +
ggplot2::theme_bw() +
ggplot2::theme(
panel.grid = ggplot2::element_blank(),
#legend.position="bottom",
legend.title = ggplot2::element_blank(),
panel.border = ggplot2::element_blank(),
axis.line = ggplot2::element_line(color="grey70")
) +
ggplot2::scale_y_continuous(labels=function(x) paste0(round(x*100, 0), "%"),
expand = c(0,0)) +
ggplot2::scale_fill_manual(values=pals[c(2, 6), 2])

form_plot <- ggplot2::ggplot(form_dat,
ggplot2::aes_string(y = "group.vars", x = "F")) +
ggplot2::geom_point(ggplot2::aes_string(size="n"), alpha=.22) +
ggplot2::scale_size(range=c(1, 7), name = "Text\nLength") +
ggplot2::geom_point(ggplot2::aes_string(color="warn"), size=1.5) +
ggplot2::scale_color_manual(values=c("black", "red"), guide=FALSE) +
ggplot2::ylab(NULL) +
ggplot2::xlab("F Measure") +
ggplot2::theme_bw() +
ggplot2::theme(
#legend.position="bottom",
axis.title.x = ggplot2::element_text(size=11),
#legend.title = ggplot2::element_blank(),
panel.border = ggplot2::element_blank(),
axis.line = ggplot2::element_line(color="grey70")
)

pos_heat_plot <- ggplot2::ggplot(pos_dat_long,
ggplot2::aes_string(y = "group.vars", x = "pos", fill="proportion")) +
ggplot2::geom_tile() +
ggplot2::scale_fill_gradient(
labels=function(x) paste0(round(x*100, 0), "%"),
high="#BF812D",
low="white",
name = ggplot2::element_blank()
)+
ggplot2::ylab(NULL) +
ggplot2::xlab("Part of Speech") +
ggplot2::theme_bw() +
ggplot2::theme(
panel.grid = ggplot2::element_blank(),
#legend.position="bottom",
axis.title.x = ggplot2::element_text(size=11),
legend.title = ggplot2::element_blank(),
panel.border = ggplot2::element_rect(color="grey88")
) +
ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = .5, barheight = 10)) #+
#ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = 14, barheight = .5))

plotout1 <- gridExtra::arrangeGrob(con_form_plot, form_plot,
widths = grid::unit(c(.5, .5), "native"), ncol=2)

plotout2 <- gridExtra::arrangeGrob(plotout1, pos_heat_plot, ncol=1)
if (isTRUE(plot)) gridExtra::grid.arrange(plotout2)
return(invisible(list(formality = form_plot, contextual_formal = con_form_plot, pos = pos_heat_plot)))
}


pals <- structure(list(pos = c("noun", "adjective", "preposition", "article",
"pronoun", "verb", "adverb", "interjection"), cols = c("#8C510A",
"#BF812D", "#DFC27D", "#F6E8C3", "#C7EAE5", "#80CDC1", "#35978F",
"#01665E")), .Names = c("pos", "cols"), row.names = c(NA, -8L
), class = "data.frame")



27 changes: 25 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,26 @@ output:
toc: true
---

```{r, echo=FALSE}
```{r, echo=FALSE, message=FALSE, warning=FALSE}
library(knitr)
desc <- suppressWarnings(readLines("DESCRIPTION"))
regex <- "(^Version:\\s+)(\\d+\\.\\d+\\.\\d+)"
loc <- grep(regex, desc)
ver <- gsub(regex, "\\2", desc[loc])
verbadge <- sprintf('<a href="https://img.shields.io/badge/Version-%s-orange.svg"><img src="https://img.shields.io/badge/Version-%s-orange.svg" alt="Version"/></a></p>', ver, ver)
````

[![Project Status: Wip - Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](http://www.repostatus.org/badges/0.1.0/wip.svg)](http://www.repostatus.org/#wip)
```{r, echo=FALSE}
knit_hooks$set(htmlcap = function(before, options, envir) {
if(!before) {
paste('<p class="caption"><b><em>',options$htmlcap,"</em></b></p>",sep="")
}
})
knitr::opts_knit$set(self.contained = TRUE, cache = FALSE)
knitr::opts_chunk$set(fig.path = "inst/figure/")
```

[![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/0.1.0/active.svg)](http://www.repostatus.org/#active)
[![Build Status](https://travis-ci.org/trinker/formality.svg?branch=master)](https://travis-ci.org/trinker/formality)
[![Coverage Status](https://coveralls.io/repos/trinker/formality/badge.svg?branch=master)](https://coveralls.io/r/trinker/formality?branch=master)
`r verbadge`
Expand Down Expand Up @@ -103,4 +114,16 @@ This will take ~20 seconds because of the part of speech tagging that must be un
with(presidential_debates_2012, formality(form1, list(time, person)))
```

## Plotting

The generic `plot` function provides three views of the data:

1. A filled bar plot of formal vs. contextual usage
2. A dotplot of formality\*\*
3. A heatmap of the usage of the parts of speech used to calculate the formality score

\*\****Note*** *red dot in center is a warning of less than 300 words*

```{r}
plot(form1)
````
61 changes: 39 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ formality
============


[![Project Status: Wip - Initial development is in progress, but there
has not yet been a stable, usable release suitable for the
public.](http://www.repostatus.org/badges/0.1.0/wip.svg)](http://www.repostatus.org/#wip)
[![Project Status: Active - The project has reached a stable, usable
state and is being actively
developed.](http://www.repostatus.org/badges/0.1.0/active.svg)](http://www.repostatus.org/#active)
[![Build
Status](https://travis-ci.org/trinker/formality.svg?branch=master)](https://travis-ci.org/trinker/formality)
[![Coverage
Expand Down Expand Up @@ -33,6 +33,7 @@ Table of Contents
- [Load the Tools/Data](#load-the-toolsdata)
- [Assessing Formality](#assessing-formality)
- [Recycling the First Run](#recycling-the-first-run)
- [Plotting](#plotting)

Formality Equation
============
Expand Down Expand Up @@ -126,13 +127,13 @@ smaller text Heylighen & Dewaele (2002) state:
form1 <- with(presidential_debates_2012, formality(dialogue, person))
form1

## person noun adjective preposition article pronoun verb adverb
## 1: QUESTION 155 70 91 38 77 112 26
## 2: LEHRER 182 93 104 62 101 164 48
## 3: SCHIEFFER 347 176 209 102 211 342 69
## 4: ROMNEY 4406 2346 3178 1396 2490 4676 1315
## 5: OBAMA 3993 1935 2909 1070 2418 4593 1398
## 6: CROWLEY 387 135 269 104 249 405 134
## person noun preposition adjective article verb pronoun adverb
## 1: QUESTION 155 91 70 38 112 77 26
## 2: LEHRER 182 104 93 62 164 101 48
## 3: SCHIEFFER 347 209 176 102 342 211 69
## 4: ROMNEY 4406 3178 2346 1396 4676 2490 1315
## 5: OBAMA 3993 2909 1935 1070 4593 2418 1398
## 6: CROWLEY 387 269 135 104 405 249 134
## interjection formal contextual n F
## 1: 4 354 219 573 61.78010
## 2: 8 441 321 762 57.87402
Expand All @@ -150,17 +151,17 @@ time to a fraction of the first run.

with(presidential_debates_2012, formality(form1, list(time, person)))

## time person noun adjective preposition article pronoun verb
## 1: time 2 QUESTION 155 70 91 38 77 112
## 2: time 1 LEHRER 182 93 104 62 101 164
## 3: time 1 ROMNEY 950 483 642 286 504 978
## 4: time 3 ROMNEY 1766 958 1388 617 1029 1920
## 5: time 3 SCHIEFFER 347 176 209 102 211 342
## 6: time 2 ROMNEY 1690 905 1148 493 957 1778
## 7: time 3 OBAMA 1546 741 1185 432 973 1799
## 8: time 1 OBAMA 792 357 579 219 452 925
## 9: time 2 OBAMA 1655 837 1145 419 993 1869
## 10: time 2 CROWLEY 387 135 269 104 249 405
## time person noun preposition adjective article verb pronoun
## 1: time 2 QUESTION 155 91 70 38 112 77
## 2: time 1 LEHRER 182 104 93 62 164 101
## 3: time 1 ROMNEY 950 642 483 286 978 504
## 4: time 3 ROMNEY 1766 1388 958 617 1920 1029
## 5: time 3 SCHIEFFER 347 209 176 102 342 211
## 6: time 2 ROMNEY 1690 1148 905 493 1778 957
## 7: time 3 OBAMA 1546 1185 741 432 1799 973
## 8: time 1 OBAMA 792 579 357 219 925 452
## 9: time 2 OBAMA 1655 1145 837 419 1869 993
## 10: time 2 CROWLEY 387 269 135 104 405 249
## adverb interjection formal contextual n F
## 1: 26 4 354 219 573 61.78010
## 2: 48 8 441 321 762 57.87402
Expand All @@ -171,4 +172,20 @@ time to a fraction of the first run.
## 7: 522 4 3904 3298 7202 54.20716
## 8: 281 2 1947 1660 3607 53.97838
## 9: 595 7 4056 3464 7520 53.93617
## 10: 134 0 895 788 1683 53.17885
## 10: 134 0 895 788 1683 53.17885

Plotting
--------

The generic `plot` function provides three views of the data:

1. A filled bar plot of formal vs. contextual usage
2. A dotplot of formality\*\*
3. A heatmap of the usage of the parts of speech used to calculate the
formality score

\*\****Note*** *red dot in center is a warning of less than 300 words*

plot(form1)

![](inst/figure/unnamed-chunk-6-1.png)
Binary file added inst/figure/unnamed-chunk-6-1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions man/formality.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ purposes of illustration" (p. 24).
data(presidential_debates_2012)
(form1 <- with(presidential_debates_2012, formality(dialogue, person)))
with(presidential_debates_2012, formality(form1, list(person, time))) #recycle form 1 for speed
plot(form1)
plot(with(presidential_debates_2012, formality(form1, list(person, time))))
}
\references{
Heylighen, F. (1999). Advantages and limitations of formal expression. doi:10.1023/A:1009686703349 \cr \cr
Expand Down
23 changes: 23 additions & 0 deletions man/plot.Formality.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/formality.R
\name{plot.Formality}
\alias{plot.Formality}
\title{Plots a Formality Object}
\usage{
\method{plot}{Formality}(x, plot = TRUE, ...)
}
\arguments{
\item{x}{The Formality object}

\item{plot}{logical. If \code{TRUE} the output is plotted.}

\item{\ldots}{ignored.}
}
\value{
Returns a list of the three \pkg{ggplot2} objects that make the
combined plot.
}
\description{
Plots a Formality object.
}

0 comments on commit 8a07434

Please sign in to comment.