Skip to content
Browse files

tweaks to example (all examples run correctly) and added some static …

…docs stuff.
  • Loading branch information...
1 parent 53d2a8a commit a9c4e9a11a413c33b85eb9326bb0538127b6fd5e @trinker committed Jan 24, 2013
Showing with 23,824 additions and 213 deletions.
  1. +3 −0 .Rbuildignore
  2. +7 −3 DESCRIPTION
  3. +43 −39 NAMESPACE
  4. +170 −45 R/bracketX.R
  5. +1 −1 R/cm_bidist.R
  6. +3 −2 R/cm_df.temp.R
  7. +13 −2 R/cm_df.transcript.R
  8. +2 −0 R/gantt_plot.R
  9. +2 −0 R/gantt_wrap.R
  10. +12 −7 R/kullback.leibler.R
  11. +23 −1 R/polarity.R
  12. +3 −3 R/qda.handler.R
  13. +19 −1 R/qdap-package.R
  14. +27 −12 R/question_type.R
  15. +96 −0 R/replace_contraction.R
  16. +2 −1 R/scrubber.R
  17. +41 −0 R/space_fill.R
  18. +4 −2 R/strip.R
  19. +34 −29 R/term.find.R
  20. +1 −1 R/trans.cloud.R
  21. +36 −17 R/wfm.R
  22. +19 −7 R/word.network.plot.R
  23. +73 −39 R/{word.associate.R → word_associate.R}
  24. +1 −1 README.md
  25. BIN data/contractions.rda
  26. +105 −0 inst/staticdocs/BuckleySaltonSWL.html
  27. +92 −0 inst/staticdocs/DATA.html
  28. +97 −0 inst/staticdocs/DATA2.html
  29. +110 −0 inst/staticdocs/DICTIONARY.html
  30. +92 −0 inst/staticdocs/NAer.html
  31. +102 −0 inst/staticdocs/OnixTxtRetToolkitSWL1.html
  32. +169 −0 inst/staticdocs/Readability.html
  33. +109 −0 inst/staticdocs/SYNONYM.html
  34. +100 −0 inst/staticdocs/Search.html
  35. +95 −0 inst/staticdocs/Top100Words.html
  36. +95 −0 inst/staticdocs/Top200Words.html
  37. +95 −0 inst/staticdocs/Top25Words.html
  38. +87 −0 inst/staticdocs/Trim.html
  39. +164 −0 inst/staticdocs/Word_Frequency_Matrix.html
  40. +89 −0 inst/staticdocs/abbreviations.html
  41. +99 −0 inst/staticdocs/action.verbs.html
  42. +95 −0 inst/staticdocs/adjacency_matrix.html
  43. +99 −0 inst/staticdocs/adverb.html
  44. +114 −0 inst/staticdocs/all_words.html
  45. +108 −0 inst/staticdocs/bag.o.words.html
  46. +93 −0 inst/staticdocs/blank2NA.html
  47. +165 −0 inst/staticdocs/bracketX.html
  48. +106 −0 inst/staticdocs/capitalizer.html
  49. +87 −0 inst/staticdocs/clean.html
  50. +121 −0 inst/staticdocs/cm_code.blank.html
  51. +111 −0 inst/staticdocs/cm_code.combine.html
  52. +116 −0 inst/staticdocs/cm_code.exclude.html
  53. +110 −0 inst/staticdocs/cm_code.overlap.html
  54. +123 −0 inst/staticdocs/cm_code.transform.html
  55. +104 −0 inst/staticdocs/cm_combine.dummy.html
  56. +113 −0 inst/staticdocs/cm_df.fill.html
  57. +127 −0 inst/staticdocs/cm_df.temp.html
  58. +129 −0 inst/staticdocs/cm_df.transcript.html
  59. +129 −0 inst/staticdocs/cm_df2long.html
  60. +128 −0 inst/staticdocs/cm_distance.html
  61. +95 −0 inst/staticdocs/cm_dummy2long.html
  62. +107 −0 inst/staticdocs/cm_long2dummy.html
  63. +95 −0 inst/staticdocs/cm_range.temp.html
  64. +114 −0 inst/staticdocs/cm_range2long.html
  65. +102 −0 inst/staticdocs/cm_time.temp.html
  66. +117 −0 inst/staticdocs/cm_time2long.html
  67. +96 −0 inst/staticdocs/colSplit.html
  68. +133 −0 inst/staticdocs/colsplit2df.html
  69. +96 −0 inst/staticdocs/common.html
  70. +84 −0 inst/staticdocs/common.list.html
  71. +89 −0 inst/staticdocs/contractions.html
  72. +91 −0 inst/staticdocs/convert.html
  73. +815 −0 inst/staticdocs/css/bootstrap-responsive.css
  74. +9 −0 inst/staticdocs/css/bootstrap-responsive.min.css
  75. +4,983 −0 inst/staticdocs/css/bootstrap.css
  76. +9 −0 inst/staticdocs/css/bootstrap.min.css
  77. +28 −0 inst/staticdocs/css/highlight.css
  78. +18 −0 inst/staticdocs/css/staticdocs.css
  79. +119 −0 inst/staticdocs/data_viewing.html
  80. +129 −0 inst/staticdocs/dir_map.html
  81. +111 −0 inst/staticdocs/dissimilarity.html
  82. +104 −0 inst/staticdocs/distTab.html
  83. +131 −0 inst/staticdocs/diversity.html
  84. +90 −0 inst/staticdocs/duplicates.html
  85. +98 −0 inst/staticdocs/emoticon.html
  86. +96 −0 inst/staticdocs/end_mark.html
  87. +97 −0 inst/staticdocs/endf.html
  88. +91 −0 inst/staticdocs/env.syl.html
  89. +89 −0 inst/staticdocs/env.syn.html
  90. +89 −0 inst/staticdocs/exclude.html
  91. +104 −0 inst/staticdocs/file_handling.html
  92. +154 −0 inst/staticdocs/formality.html
  93. +153 −0 inst/staticdocs/gantt.html
  94. +132 −0 inst/staticdocs/gantt_plot.html
  95. +128 −0 inst/staticdocs/gantt_rep.html
  96. +209 −0 inst/staticdocs/gantt_wrap.html
  97. +110 −0 inst/staticdocs/hash.html
  98. BIN inst/staticdocs/img/glyphicons-halflings-white.png
  99. BIN inst/staticdocs/img/glyphicons-halflings.png
  100. +120 −0 inst/staticdocs/imperative.html
  101. +95 −0 inst/staticdocs/incomplete.replace.html
  102. +93 −0 inst/staticdocs/increase.amplification.words.html
  103. +601 −0 inst/staticdocs/index.html
  104. +83 −0 inst/staticdocs/interjections.html
  105. +1,825 −0 inst/staticdocs/js/bootstrap.js
  106. +6 −0 inst/staticdocs/js/bootstrap.min.js
  107. +112 −0 inst/staticdocs/justification.html
  108. +102 −0 inst/staticdocs/key_merge.html
  109. +117 −0 inst/staticdocs/kullback.leibler.html
  110. +115 −0 inst/staticdocs/labMT.html
  111. +101 −0 inst/staticdocs/lookup.html
  112. +102 −0 inst/staticdocs/merge_all.html
  113. +102 −0 inst/staticdocs/mraja1.html
  114. +105 −0 inst/staticdocs/mraja1spl.html
  115. +138 −0 inst/staticdocs/multicsv.html
  116. +124 −0 inst/staticdocs/multigsub.html
  117. +105 −0 inst/staticdocs/multiscale.html
  118. +93 −0 inst/staticdocs/negation.words.html
  119. +95 −0 inst/staticdocs/negative.words.html
  120. +101 −0 inst/staticdocs/outlier.detect.html
  121. +98 −0 inst/staticdocs/outlier.labeler.html
  122. +110 −0 inst/staticdocs/paste2.html
  123. +94 −0 inst/staticdocs/plot.character.table.html
  124. +78 −0 inst/staticdocs/plot.diversity.html
  125. +106 −0 inst/staticdocs/plot.formality.html
  126. +122 −0 inst/staticdocs/plot.polarity.html
  127. +94 −0 inst/staticdocs/plot.pos.by.html
  128. +94 −0 inst/staticdocs/plot.question_type.html
  129. +93 −0 inst/staticdocs/plot.termco.html
  130. +84 −0 inst/staticdocs/plot.word_stats.html
  131. +207 −0 inst/staticdocs/polarity.html
  132. +178 −0 inst/staticdocs/pos.html
  133. +95 −0 inst/staticdocs/positive.words.html
  134. +93 −0 inst/staticdocs/potential_NA.html
  135. +75 −0 inst/staticdocs/preposition.html
  136. +78 −0 inst/staticdocs/print.adjacency_matrix.html
  137. +90 −0 inst/staticdocs/print.character.table.html
  138. +78 −0 inst/staticdocs/print.cm_distance.html
  139. +78 −0 inst/staticdocs/print.colsplit2df.html
  140. +80 −0 inst/staticdocs/print.dissimilarity.html
  141. +80 −0 inst/staticdocs/print.diversity.html
  142. +78 −0 inst/staticdocs/print.formality.html
  143. +80 −0 inst/staticdocs/print.kullback.leibler.html
  144. +80 −0 inst/staticdocs/print.polarity.html
  145. +90 −0 inst/staticdocs/print.pos.by.html
  146. +90 −0 inst/staticdocs/print.pos.html
  147. +78 −0 inst/staticdocs/print.question_type.html
  148. +90 −0 inst/staticdocs/print.termco.html
  149. +80 −0 inst/staticdocs/print.v.outer.html
  150. +78 −0 inst/staticdocs/print.word_associate.html
  151. +78 −0 inst/staticdocs/print.word_list.html
  152. +81 −0 inst/staticdocs/print.word_stats.html
  153. +100 −0 inst/staticdocs/prop.html
  154. +101 −0 inst/staticdocs/qcombine.html
  155. +114 −0 inst/staticdocs/qcv.html
  156. +78 −0 inst/staticdocs/qdap.html
  157. +144 −0 inst/staticdocs/qheat.html
  158. +134 −0 inst/staticdocs/qprep.html
  159. +142 −0 inst/staticdocs/question_type.html
  160. +95 −0 inst/staticdocs/raj.act.1.html
  161. +95 −0 inst/staticdocs/raj.act.2.html
  162. +95 −0 inst/staticdocs/raj.act.3.html
  163. +95 −0 inst/staticdocs/raj.act.4.html
  164. +95 −0 inst/staticdocs/raj.act.5.html
  165. +100 −0 inst/staticdocs/raj.demographics.html
  166. +99 −0 inst/staticdocs/raj.html
  167. +96 −0 inst/staticdocs/rajPOS.html
  168. +105 −0 inst/staticdocs/rajSPLIT.html
  169. +160 −0 inst/staticdocs/rank_freq_plot.html
  170. +175 −0 inst/staticdocs/read.transcript.html
  171. +104 −0 inst/staticdocs/replace_abbreviation.html
  172. +109 −0 inst/staticdocs/replace_contraction.html
  173. +108 −0 inst/staticdocs/replace_number.html
  174. +113 −0 inst/staticdocs/replace_symbol.html
  175. +92 −0 inst/staticdocs/replacer.html
  176. +104 −0 inst/staticdocs/rm_row.html
Sorry, we could not display the entire diff because it was too big.
View
3 .Rbuildignore
@@ -2,3 +2,6 @@
^.*\.Rproj$
.Rbuildignore
qdap.Rproj
+.gitignore
+.gitattributes
+tools.R
View
10 DESCRIPTION
@@ -7,11 +7,13 @@ Author: Tyler Rinker
Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
Depends:
R (>= 2.15),
- ggplot2 (>= 0.9.2)
+ gdata,
+ ggplot2 (>= 0.9.2),
+ grid,
+ scales
Imports:
gridExtra,
chron,
- scales,
RColorBrewer,
igraph,
tm,
@@ -145,6 +147,7 @@ Collate:
'scrubber.R'
'Search.R'
'sentSplit.R'
+ 'space_fill.R'
'spaste.R'
'speakerSplit.R'
'stemmer.R'
@@ -170,11 +173,12 @@ Collate:
'url_dl.R'
'v.outer.R'
'wfm.R'
- 'word.associate.R'
'word.count.R'
'word.network.plot.R'
'word_diff_list.R'
'word_list.R'
'word_stats.R'
'words.R'
'xnoy.R'
+ 'word_associate.R'
+ 'replace_contraction.R'
View
82 NAMESPACE
@@ -1,3 +1,35 @@
+S3method(common,default)
+S3method(common,list)
+S3method(plot,character.table)
+S3method(plot,diversity)
+S3method(plot,formality)
+S3method(plot,polarity)
+S3method(plot,pos.by)
+S3method(plot,question_type)
+S3method(plot,termco)
+S3method(plot,word_stats)
+S3method(print,adjacency_matrix)
+S3method(print,character.table)
+S3method(print,cm_distance)
+S3method(print,colsplit2df)
+S3method(print,dissimilarity)
+S3method(print,diversity)
+S3method(print,formality)
+S3method(print,kullback.leibler)
+S3method(print,polarity)
+S3method(print,pos)
+S3method(print,pos.by)
+S3method(print,question_type)
+S3method(print,termco)
+S3method(print,v.outer)
+S3method(print,word_associate)
+S3method(print,word_list)
+S3method(print,word_stats)
+export(NAer)
+export(SMOG)
+export(Search)
+export(TOT)
+export(Trim)
export(adjacency_matrix)
export(adjmat)
export(all_words)
@@ -29,8 +61,8 @@ export(cm_range.temp)
export(cm_range2long)
export(cm_time.temp)
export(cm_time2long)
-export(coleman_liau)
export(colSplit)
+export(coleman_liau)
export(colsplit2df)
export(combo_syllable.sum)
export(common)
@@ -52,6 +84,8 @@ export(gantt)
export(gantt_plot)
export(gantt_rep)
export(gantt_wrap)
+export(genX)
+export(genXtract)
export(hash)
export(htruncdf)
export(imperative)
@@ -69,7 +103,6 @@ export(merge_all)
export(mgsub)
export(multigsub)
export(multiscale)
-export(NAer)
export(outlier.detect)
export(outlier.labeler)
export(paste2)
@@ -90,25 +123,25 @@ export(rank_freq_mplot)
export(rank_freq_plot)
export(read.transcript)
export(replace_abbreviation)
+export(replace_contraction)
export(replace_number)
export(replace_symbol)
export(replacer)
export(right.just)
export(rm_empty_row)
export(rm_row)
export(scrubber)
-export(Search)
export(sentCombine)
export(sentSplit)
-export(SMOG)
+export(space_fill)
export(spaste)
export(speakerSplit)
export(stem.words)
export(stem2df)
export(stemmer)
export(stopwords)
-export(strip)
export(strWrap)
+export(strip)
export(syllable.count)
export(syllable.sum)
export(syn)
@@ -119,10 +152,8 @@ export(termco.c)
export(termco.d)
export(termco2mat)
export(text2color)
-export(TOT)
export(trans.cloud)
export(trans.venn)
-export(Trim)
export(truncdf)
export(url_dl)
export(v.outer)
@@ -131,13 +162,17 @@ export(wf.combine)
export(wfdf)
export(wfm)
export(wfm.expanded)
-export(word.associate)
export(word.count)
export(word.network.plot)
export(word.split)
+export(word_associate)
export(word_diff_list)
export(word_list)
export(word_stats)
+import(RColorBrewer)
+import(RCurl)
+import(Snowball)
+import(XML)
import(chron)
import(gdata)
import(ggplot2)
@@ -146,39 +181,8 @@ import(igraph)
import(openNLP)
import(openNLPmodels.en)
import(parallel)
-import(RColorBrewer)
-import(RCurl)
import(reshape2)
import(scales)
-import(Snowball)
import(tm)
import(venneuler)
import(wordcloud)
-import(XML)
-S3method(common,default)
-S3method(common,list)
-S3method(plot,character.table)
-S3method(plot,diversity)
-S3method(plot,formality)
-S3method(plot,polarity)
-S3method(plot,pos.by)
-S3method(plot,question_type)
-S3method(plot,termco)
-S3method(plot,word_stats)
-S3method(print,adjacency_matrix)
-S3method(print,character.table)
-S3method(print,cm_distance)
-S3method(print,colsplit2df)
-S3method(print,dissimilarity)
-S3method(print,diversity)
-S3method(print,formality)
-S3method(print,kullback.leibler)
-S3method(print,polarity)
-S3method(print,pos)
-S3method(print,pos.by)
-S3method(print,question_type)
-S3method(print,termco)
-S3method(print,v.outer)
-S3method(print,word_associate)
-S3method(print,word_list)
-S3method(print,word_stats)
View
215 R/bracketX.R
@@ -3,16 +3,24 @@
#' \code{bracketX} - Apply bracket removal to character vectors.
#'
#' @param text.var The text variable
-#' @param bracket The type of bracket (and encased text) to remove. This is one of
-#' the strings \code{"curly"}, \code{"square"}, \code{"round"}, \code{"angle"}
-#' and \code{"all"}. These strings correspond to: \{, [, (, < or all four types.
+#' @param bracket The type of bracket (and encased text) to remove. This is one
+#' or more of the strings \code{"curly"}, \code{"square"}, \code{"round"},
+#' \code{"angle"} and \code{"all"}. These strings correspond
+#' to: \{, [, (, < or all four types.
#' @param missing Value to assign to empty cells.
-#' @param names logical. If TRUE the sentences are given as the names of the counts.
+#' @param names logical. If TRUE the sentences are given as the names of the
+#' counts.
#' @return \code{bracketX} - returns a vector of text with brackets removed.
+#' @section Warning: the \code{gen} functions are more flexible than the
+#' \code{bracket} functions but are not compatable with special
+#' \code{\link[base]{regex}} characters. It is recommended that the researcher
+#' use the \code{bracket} functions.
#' @rdname bracketX
#' @references \url{http://stackoverflow.com/questions/8621066/remove-text-inside-brackets-parens-and-or-braces}
#' @keywords bracket-remove, parenthesis, bracket, curly-braces
#' @export
+#' @seealso
+#' \code{\link[base]{regex}}
#' @examples
#' \dontrun{
#' examp <- structure(list(person = structure(c(1L, 2L, 1L, 3L),
@@ -24,73 +32,190 @@
#'
#' examp
#' bracketX(examp$text, "square")
-#' bracketX(examp$text, "curly")
+#' bracketX(examp$text, "curly")
+#' bracketX(examp$text, c("square", "round"))
#' bracketX(examp$text)
#'
#' examp
#' bracketXtract(examp$text, "square")
#' bracketXtract(examp$text, "curly")
+#' bracketXtract(examp$text, c("square", "round"))
+#' bracketXtract(examp$text, c("square", "round"), merge = FALSE)
#' bracketXtract(examp$text)
#' bracketXtract(examp$text, with = TRUE)
#'
#' paste2(bracketXtract(examp$text, "curly"), " ")
+#'
+#' x <- c("Where is the /big dog#?", "I think he's @@arunning@@b with /little cat#.")
+#' genXtract(x, c("/", "@@a"), c("#", "@@b"))
+#' x <- c("Where is the L1big dogL2?", "I think he's 98running99 with L1little catL2.")
+#' genXtract(x, c("L1", 98), c("L2", 99))
+#'
+#' DATA$state
+#' genX(DATA$state, c("is", "we"), c("too", "on"))
#' }
-bracketX <-
+bracketX <-
function (text.var, bracket = "all", missing = NULL, names = FALSE) {
- X <- switch(bracket,
- html = sapply(text.var, function(x) gsub("<.+?>", "", x)),
- angle = sapply(text.var, function(x) gsub("<.+?>", "", x)),
- square = sapply(text.var, function(x) gsub("\\[.+?\\]", "", x)),
- round = sapply(text.var, function(x) gsub("\\(.+?\\)", "", x)),
- curly = sapply(text.var, function(x) gsub("\\{.+?\\}", "", x)),
- all = {
- P1 <- sapply(text.var, function(x) gsub("\\[.+?\\]", "", x))
- P1 <- sapply(P1, function(x) gsub("\\(.+?\\)", "", x))
- P1 <- sapply(P1, function(x) gsub("<.+?>", "", x))
- sapply(P1, function(x) gsub("\\{.+?\\}", "", x))
+ FUN <- function(bracket, text.var, missing, names) {
+ X <- switch(bracket,
+ html = sapply(text.var, function(x) gsub("<.+?>", "", x)),
+ angle = sapply(text.var, function(x) gsub("<.+?>", "", x)),
+ square = sapply(text.var, function(x) gsub("\\[.+?\\]", "", x)),
+ round = sapply(text.var, function(x) gsub("\\(.+?\\)", "", x)),
+ curly = sapply(text.var, function(x) gsub("\\{.+?\\}", "", x)),
+ all = {
+ P1 <- sapply(text.var, function(x) gsub("\\[.+?\\]", "", x))
+ P1 <- sapply(P1, function(x) gsub("\\(.+?\\)", "", x))
+ P1 <- sapply(P1, function(x) gsub("<.+?>", "", x))
+ sapply(P1, function(x) gsub("\\{.+?\\}", "", x))
+ }
+ )
+ X <- scrubber(gsub(" +", " ", X))
+ if (!is.null(missing)) {
+ X[X == ""] <- missing
}
- )
- X <- scrubber(gsub(" +", " ", X))
- if (!is.null(missing)) {
- X[X == ""] <- missing
+ if (!names) names(X) <- NULL
+ X
}
- if (!names) names(X) <- NULL
- X
+ invisible(lapply(bracket, function(x) {
+ text.var <<- FUN(x, text.var = text.var,
+ missing = missing, names = names)
+ }))
+ text.var
}
+
#' bracketXtract
#'
#' \code{bracketXtract} - Apply bracket extraction to character vectors.
#'
#' @rdname bracketX
#' @param with logical. If TRUE returns the brackets and the bracketed text.
+#' @param merge logical. If TRUE the results of each bracket type will be
+#' merged by sentence. FALSE returns a named list of lists of vectors of
+#' bracketed text per bracket type.
#' @return \code{bracketXtract} - returns a list of vectors of bracketed text.
#' @author Martin Morgan and Tyler Rinker <tyler.rinker@@gmail.com>.
#' @export
bracketXtract <-
-function(text.var, bracket = "all", with = FALSE){
- br <- bracket
- br <- ifelse(br=="round", "(",
- ifelse(br=="square", "[",
- ifelse(br=="curly", "{",
- ifelse(br=="html", "<",
- ifelse(br=="angle", "<", br)))))
- left <- if ("all" == br) {
- "\\(|\\{|<|\\["
- } else {
- sprintf("\\%s", br)
+function(text.var, bracket = "all", with = FALSE, merge = TRUE){
+ FUN <- function(text.var, bracket, with){
+ br <- bracket
+ br <- ifelse(br=="round", "(",
+ ifelse(br=="square", "[",
+ ifelse(br=="curly", "{",
+ ifelse(br=="html", "<",
+ ifelse(br=="angle", "<", br)))))
+ left <- if ("all" == br) {
+ "\\(|\\{|<|\\["
+ } else {
+ sprintf("\\%s", br)
+ }
+ map <- c(`\\(`="\\)", `\\[`="\\]", `\\{`="\\}",
+ `\\<`="\\>", `\\(|\\{|<|\\[`="\\)|\\}|\\>|\\]")
+ fmt <- if (with==TRUE) {
+ "(%s).*?(%s)"
+ } else {
+ "(?<=%s).*?(?=%s)"
+ }
+ re <- sprintf(fmt, left, map[left])
+ if(length(text.var)==1){
+ unlist(regmatches(text.var, gregexpr(re, text.var, perl=TRUE)))
+ }else{
+ regmatches(text.var, gregexpr(re, text.var, perl=TRUE))
+ }
}
- map <- c(`\\(`="\\)", `\\[`="\\]", `\\{`="\\}",
- `\\<`="\\>", `\\(|\\{|<|\\[`="\\)|\\}|\\>|\\]")
- fmt <- if (with==TRUE) {
- "(%s).*?(%s)"
+ out <- invisible(lapply(bracket, function(x) {
+ FUN(x, text.var = text.var, with = with)
+ }))
+ names(out) <- bracket
+ if (length(bracket) == 1) {
+ return(unlist(out, recursive = FALSE))
} else {
- "(?<=%s).*?(?=%s)"
+ if (merge) {
+ out <- invisible(lapply(seq_along(text.var), function(i) {
+ unlist(invisible(lapply(seq_along(out), function(j) {
+ out[[j]][[i]]
+ })))
+ }))
+ }
+ }
+ out
+}
+
+
+#' genX
+#'
+#' \code{genX} - Apply general chunk extraction to character vectors.
+#'
+#' @param left A vector of charcter or numeric symbols as the left edge to
+#' extract.
+#' @param right A vector of charcter or numeric symbols as the right edge to
+#' extract.
+#' @rdname bracketX
+#' @return \code{genXtract} - returns a vector of text with checks removed.
+#' @export
+genX <-
+function (text.var, left, right, missing = NULL, names = FALSE) {
+ if (length(left) != length(right)) {
+ stop("left and right must be equal length")
}
- re <- sprintf(fmt, left, map[left])
- if(length(text.var)==1){
- unlist(regmatches(text.var, gregexpr(re, text.var, perl=TRUE)))
- }else{
- regmatches(text.var, gregexpr(re, text.var, perl=TRUE))
+ FUN <- function(left, right, text.var, missing, names) {
+ X <- sapply(text.var, function(x) gsub(paste0(left, ".+?", right), "", x))
+ X <- scrubber(gsub(" +", " ", X))
+ if (!is.null(missing)) {
+ X[X == ""] <- missing
+ }
+ if (!names) names(X) <- NULL
+ X
+ }
+ invisible(lapply(seq_along(left), function(i) {
+ text.var <<- FUN(left[i], right[i], text.var = text.var,
+ missing = missing, names = names)
+ }))
+ text.var
+}
+
+
+#' genXtract
+#'
+#' \code{genXtract} - Apply general chunk removal to character vectors.
+#'
+#' @rdname bracketX
+#' @return \code{genX} - returns a list of vectors of removed text.
+#' @export
+genXtract <-
+function(text.var, left, right, with = FALSE, merge = TRUE){
+ if (length(left) != length(right)) {
+ stop("left and right must be equal length")
+ }
+ FUN <- function(left, right, text.var, with){
+ fmt <- if (with==TRUE) {
+ "(%s).*?(%s)"
+ } else {
+ "(?<=%s).*?(?=%s)"
+ }
+ re <- sprintf(fmt, as.character(left), as.character(right))
+ if(length(text.var)==1){
+ unlist(regmatches(text.var, gregexpr(re, text.var, perl=TRUE)))
+ }else{
+ regmatches(text.var, gregexpr(re, text.var, perl=TRUE))
+ }
}
-}
+ out <- invisible(lapply(seq_along(left), function(i) {
+ FUN(left[i], right[i], text.var = text.var, with = with)
+ }))
+ names(out) <- paste(left, " : ", "right")
+ if (length(left) == 1) {
+ return(unlist(out, recursive = FALSE))
+ } else {
+ if (merge) {
+ out <- invisible(lapply(seq_along(text.var), function(i) {
+ unlist(invisible(lapply(seq_along(out), function(j) {
+ out[[j]][[i]]
+ })))
+ }))
+ }
+ }
+ out
+}
View
2 R/cm_bidist.R
@@ -36,4 +36,4 @@ function(code_x, code_y, grouping.var = NULL) {
return(x)
}))
return(v2)
-}
+}
View
5 R/cm_df.temp.R
@@ -13,6 +13,7 @@
#' @param transpose logical. If TRUE transposes the dataframe so that the text
#' is across the top.
#' @param strip logical. If TRUE all punctuation is removed.
+#' @param \ldots Other arguments passed to strip.
#' @return Generates a dataframe, and optional csv file, of individual words
#' while maintaining demographic information. If a vector of codes is provided the
#' outcome is a matrix of words used by codes filled with zeros. This dataframe
@@ -35,10 +36,10 @@
#' cm_df.temp(raj.act.1, "dialogue", codes, transpose = TRUE)[, 1:9]
#' }
cm_df.temp <- function(dataframe, text.var, codes = NULL, csv = TRUE,
- file.name = NULL, transpose = FALSE, strip =FALSE){
+ file.name = NULL, transpose = FALSE, strip =FALSE, ...){
tv <- as.character(dataframe[, text.var])
if (strip) {
- tv <- strip(tv)
+ tv <- strip(tv, ...)
}
wrds <- lapply(tv, function(x) Trim(unlist(strsplit(x, " "))))
lens <- sapply(wrds, length)
View
15 R/cm_df.transcript.R
@@ -12,6 +12,7 @@
#' @param indent Number of spaces to indent.
#' @param width Width to output the file (defaults to 70; this is generally a
#' good width and indent for a .docx file).
+#' @param \ldots Other arguments passed to strip.
#' @return Returns a transcript by grouping variable with word number above each
#' word. This makes use with cm_df2long transfer/usage easier because the
#' researcher has coded on a transcript with the numeric word index already.
@@ -25,19 +26,29 @@
#' @export
#' @examples
#' \dontrun{
+#' with(DATA, cm_df.transcript(state, person))
+#' with(DATA, cm_df.transcript(state, list(sex, adult)))
+#' #use it with nested variables just to keep track of demographic info
+#' with(DATA, cm_df.transcript(state, list(person, sex, adult)))
+#'
+#' #use double tilde "~~" to keep word group as one word
+#' DATA$state <- mgsub("be certain", "be~~certain", DATA$state, fixed = TRUE)
+#' with(DATA, cm_df.transcript(state, person))
+#' DATA <- qdap::DATA
+#'
#' with(mraja1spl, cm_df.transcript(dialogue, list(person)))
#' with(mraja1spl, cm_df.transcript(dialogue, list(sex, fam.aff, died)))
#' with(mraja1spl, cm_df.transcript(dialogue, list(person), file="foo.doc"))
#' # delete("foo.doc") #delete the file just created
#' }
cm_df.transcript <-
-function(text.var, grouping.var, file = NULL, indent = 4, width = 70){
+function(text.var, grouping.var, file = NULL, indent = 4, width = 70, ...){
if (is.list(grouping.var)) {
grouping.var <- paste2(grouping.var)
}
L2 <- sentCombine(text.var, grouping.var)
DF <- data.frame(group = names(L2), text=unlist(L2))
- DF2 <- cm_df.temp(DF, "text")
+ DF2 <- cm_df.temp(DF, "text", ...)
y <- rle(as.character(DF2$group))
lens <- y$lengths
group <- y$values
View
2 R/gantt_plot.R
@@ -38,6 +38,8 @@
#' replace=TRUE))
#' z <- with(rajSPLIT2, gantt_plot(dialogue, list(fam.aff, sex),
#' list(act, newb), size = 4))
+#'
+#' library(ggplot2); library(scales); library(RColorBrewer); library(grid)
#' z + theme(panel.margin = unit(1, "lines")) + scale_colour_grey()
#' z + scale_colour_brewer(palette="Dark2")
#' }
View
2 R/gantt_wrap.R
@@ -74,6 +74,8 @@
#' units = "words", col.sep = "_")))
#' x <- gantt_wrap(dat3, fam.aff_sex, facet.vars = "act",
#' title = "Repeated Measures Gantt Plot")
+#'
+#' library(ggplot2); library(scales); library(RColorBrewer)
#' x + scale_color_manual(values=rep("black", length(levels(dat3$fam.aff_sex))))
#' }
gantt_wrap <-
View
19 R/kullback.leibler.R
@@ -69,12 +69,17 @@ function(x, y = NULL){
#' @S3method print kullback.leibler
print.kullback.leibler <-
function(x, digits = 3, ...) {
- WD <- options()[["width"]]
- options(width=3000)
- class(x) <- "matrix"
- if (!is.null(digits)) {
- x <- round(x, digits = digits)
+ if (length(x) == 1) {
+ y <- unclass(x)
+ print(y)
+ } else {
+ WD <- options()[["width"]]
+ options(width=3000)
+ class(x) <- "matrix"
+ if (!is.null(digits)) {
+ x <- round(x, digits = digits)
+ }
+ print(x)
+ options(width=WD)
}
- print(x)
- options(width=WD)
}
View
24 R/polarity.R
@@ -122,6 +122,28 @@ function (text.var, grouping.var = NULL, positive.list = positive.words,
grouping <- unlist(grouping.var)
}
}
+ allwrds <- c(positive.list, negative.list, negation.list, amplification.list)
+ ps <- grepl("\\s", positive.list)
+ ns <- grepl("\\s", negative.list)
+ ngs <- grepl("\\s", negation.list)
+ as <- grepl("\\s", amplification.list)
+ spcs <- c(ps, ns, ngs, as)
+ if (any(spcs)) {
+ dbls <- allwrds[spcs]
+ text.var <- mgsub(dbls, gsub("\\s", "~~", dbls), reducer(text.var))
+ if (any(ps)) {
+ positive.list <- gsub("\\s", "~~", positive.list)
+ }
+ if (any(ns)) {
+ negative.list <- gsub("\\s", "~~", negative.list)
+ }
+ if (any(ngs)) {
+ negation.list <- gsub("\\s", "~~", negation.list)
+ }
+ if (any(as)) {
+ amplification.list <- gsub("\\s", "~~", amplification.list)
+ }
+ }
unblank <- function(x) {
return(x[x != ""])
}
@@ -142,7 +164,7 @@ function (text.var, grouping.var = NULL, positive.list = positive.words,
amplification <- strip(amplification.list)
x <- as.data.frame(text.var)
x$words <- unblank(word.split(strip(text.var)))
- x$wc <- word.count(text.var)
+ x$wc <- word.count(gsub("~~", " ", text.var))
pos.matchesPOS <- lapply(x$words, function(x) match(x, positive.list))
neg.matchesPOS <- lapply(x$words, function(x) match(x, negative.list))
pos <- lapply(pos.matchesPOS, no.na)
View
6 R/qda.handler.R
@@ -6,17 +6,17 @@ function(x) {
return(freqTab2words(x))
} else {
if (!comment(x) %in% "bagOwords") {
- stop("Must be a qdap object or a vector of raw words")
+ stop("Must be a word_list object or a vector of raw words")
} else {
return(x)
}
}
} else {
if (is.vector(x)) {
- warning ("Not a qdap object.")
+ warning ("Not a word_listobject.")
return(x)
} else {
- stop("Must be a qdap object or a vector of raw words")
+ stop("Must be a word_list object or a vector of raw words")
}
}
}
View
20 R/qdap-package.R
@@ -243,7 +243,25 @@ NULL
#' @references
#' \url{http://icon.shef.ac.uk/Moby/mpos.html}
NULL
-
+
+#' Contraction Conversions
+#'
+#' A dataset containing common contrctions and their expanded form.
+#'
+#' @details
+#' \itemize{
+#' \item contraction. The contraction word.
+#' \item expanded. The expanded form of the contraction.
+#' }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name contractions
+#' @usage data(contractions)
+#' @format A data frame with 65 rows and 2 variables
+NULL
+
+
#' Emoticons Data Set
#'
#' A dataset containing common emoticons (adapted from
View
39 R/question_type.R
@@ -28,13 +28,15 @@
#' 1) whose 2) whom 3) who 4) where 5) what 6) which 7) why 8) when 9) were
#' 10) was 11) does 12) did 13) do 14) is 15) are 16) will 17) how 18) should
#' 19) could 20) would 21) shall 22) may 23) might 24) must 25) can 26) has
-#' 27) have 28) had 29) ok 30) right 31) correct
+#' 27) have 28) had 29) ok 30) right 31) correct 32) implied do/does
#'
#' The interrogative word that is found first (with the exception of "ok", "right"
#' and "correct") in the question determines the sentence type. "ok", "right" and
#' "correct" sentence types are determined if the sentence is a question with no
#' other interrogative words found and "ok", "right" or "correct" is the last
-#' word of the sentence. Those with undetermined sentence type are labeled
+#' word of the sentence. Those interrogative sentences beginning with the word
+#' "you" are categorized as implying do or does question type, though the use of
+#' do/does is not explicit. Those with undetermined sentence type are labeled
#' unknown.
#' @keywords question, question-count
#' @export
@@ -54,8 +56,9 @@
#'
#' with(mraja1spl, question_type(dialogue, person))
#' with(mraja1spl, question_type(dialogue, list(sex, fam.aff)))
-#' with(mraja1spl, question_type(dialogue, list(sex, fam.aff),
-#' percent = FALSE))
+#' (x <- with(mraja1spl, question_type(dialogue, list(sex, fam.aff),
+#' percent = FALSE)))
+#' plot(x)
#' }
question_type <- function(text.var, grouping.var = NULL,
neg.cont = FALSE, percent = TRUE, zero.replace = 0, digits = 2) {
@@ -83,7 +86,8 @@ question_type <- function(text.var, grouping.var = NULL,
grouping <- unlist(grouping.var)
}
}
- text.var <- as.character(text.var)
+ text.var <- replace_contraction(as.character(text.var),
+ qdap::contractions[grepl("you", qdap::contractions[, 1]), ])
DF <- data.frame(grouping, text.var, check.names = FALSE,
stringsAsFactors = FALSE, orig.row.num = seq_len(length(text.var)))
DF$grouping <- factor(DF$grouping)
@@ -118,6 +122,7 @@ question_type <- function(text.var, grouping.var = NULL,
x[, " right"] <- (y-6) == sapply(gregexpr("right", z), "[", 1)
x[, "correct"] <- (y-7) == sapply(gregexpr("correct", z), "[", 1)
x[, "huh"] <- (y-3) == sapply(gregexpr("huh", z), "[", 1)
+ x[, "implied_do/does"] <- sapply(gregexpr("you", z), "[", 1) == 2
x
})
L2 <- invisible(lapply(L1, function(x) {
@@ -139,16 +144,19 @@ question_type <- function(text.var, grouping.var = NULL,
L2[[i]]
})
DF3a <- data.frame(ords = unlist(lapply(L1, "[", "orig.row.num")),
- q.type = unlist(L2))
+ q.type = unlist(L2), stringsAsFactors = FALSE)
+ DF3a[unlist(lapply(L1, "[", "implied_do/does")), 2] <- "implied_do/does"
DF3 <- data.frame(DF, q.type = DF3a[order(DF3a[, "ords"]), 2])
names(DF3) <- c(G, "raw.text", "n.row", "endmark", "strip.text", "q.type")
- WFM <- t(wfm(unlist(L2), rep(names(L1), sapply(L2, length))))
- cols <- c(key[, "x"], "ok", "alright", "right", "correct", "huh", "unknown")
+ unL2 <- unlist(L2)
+ unL2[unlist(lapply(L1, "[", "implied_do/does"))] <- "idd"
+ WFM <- t(wfm(unL2, rep(names(L1), sapply(L2, length))))
+ cols <- c(key[, "x"], "ok", "alright", "right", "correct", "huh", "idd", "unknown")
cols2 <- cols[cols %in% colnames(WFM)]
- WFM <- WFM[, cols2]
+ WFM <- WFM[, cols2, drop = FALSE]
if (all(grouping %in% "all")) {
DF <- as.data.frame(matrix(WFM, nrow = 1))
- colnames(DF) <- names(WFM)
+ colnames(DF) <- colnames(WFM)
rownames(DF) <- "all"
} else {
grvar <- levels(DF[, "grouping"])
@@ -175,19 +183,26 @@ question_type <- function(text.var, grouping.var = NULL,
have = c("haven't", "have"),
had = c("hadn't", "had")
)
- if(!neg.cont) {
+ if(!neg.cont & ncol(DF) > 1) {
ord <- c("whose", "whom", "who", "where", "what", "which",
"why", "when", "were", "was", "does", "did", "do",
"is", "are", "will", "how", "should", "could", "would",
"shall", "may", "might", "must", "can", "has", "have", "had")
comdcol <- lapply(comdcol, function(x) gsub("'", "", x))
DF <- qcombine(DF, comdcol)
ord <- c(ord[ord %in% colnames(DF)], "ok", "alright", "right",
- "correct", "huh", "unknown")
+ "correct", "huh", "idd", "unknown")
DF <- DF[, ord[ord %in% colnames(DF)]]
}
+ colnames(DF)[colnames(DF) == "idd"] <- "implied_do/does"
DF <- data.frame(group=rownames(DF), tot.quest = tq, DF, row.names = NULL,
check.names = FALSE)
+ if(ncol(DF) == 3) {
+ warning(paste0("Text does not contain enough questions to give",
+ "an output of the class \"question_type\":\n",
+ " ...only counts are returned"))
+ return(DF)
+ }
DF <- DF[sort(DF[, "group"]), ]
colnames(DF)[1] <- G
yesap <- sapply(comdcol, "[", 1)
View
96 R/replace_contraction.R
@@ -0,0 +1,96 @@
+#' Replace COntractions
+#'
+#' This function replaces contractions with long form.
+#'
+#' @param text.var The text variable.
+#' @param contraction A two column key of contractions (column 1) and expanded
+#' form replacements (column 2) or a vector of contractions. Default is to use
+#' qdap's contractions data set.
+#' @param replace A vector of expanded form replacements if a data frame is not
+#' supplied to the contraction argument.
+#' @param ignore.case logical. If TRUE replaces without regard to capitalization.
+#' @param sent.cap logical. If TRUE capitalizes the begining of every sentence.
+#' @return Returns a vector with contractions replaced.
+#' @keywords contraction
+#' @seealso
+#' \code{\link[qdap]{bracketX}},
+#' \code{\link[qdap]{qprep}},
+#' \code{\link[qdap]{replace_abbreviation}},
+#' \code{\link[qdap]{replace_number}},
+#' \code{\link[qdap]{replace_symbol}}
+#' @export
+#' @examples
+#' \dontrun{
+#' x <- c("Mr. Jones isn't going.",
+#' "Check it out what's going on.",
+#' "He's here but didn't go.",
+#' "the robot at t.s. wasn't nice",
+#' "he'd like it if i'd go away")
+#'
+#' replace_contraction(x)
+#'
+#' #create abbreviation and replacement vectors
+#' abv <- c("isn't", "I'd")
+#' repl <- c("is not", "I would")
+#'
+#' replace_abbreviation(x, abv, repl)
+#'
+#' KEY <- rbind(abbreviations, data.frame(abv = abv, rep = repl))
+#' replace_abbreviation(x, KEY)
+#' }
+replace_contraction <-
+function(text.var, contraction = qdap::contractions, replace = NULL,
+ ignore.case=TRUE, sent.cap = TRUE) {
+ if (!is.null(replace)) {
+ ab <- data.frame(abv=contraction, repl=replace)
+ } else {
+ if (is.list(contraction)) {
+ ab <- data.frame(abv=contraction[[1]], repl=contraction[[2]])
+ } else {
+ stop("must supply vector of contractions and vector of replacements")
+ }
+ }
+ capit <- function(x) {
+ z <- paste0(toupper(substring(x, 1, 1)), substring(x, 2))
+ z[is.na(x)] <- NA
+ z
+ }
+ if (ignore.case) {
+ ab[, 1] <- tolower(ab[, 1])
+ caps <- function(string, all = FALSE) {
+ capit <- function(x) paste0(toupper(substring(x, 1, 1)), substring(x, 2))
+ if (all) {
+ x <- paste(unlist(lapply(strsplit(string, " "), capit)), collapse=" ")
+ y <- paste(unlist(lapply(strsplit(x, NULL), capit)), collapse="")
+ x <- c(x, y)
+ } else {
+ x <- capit(string)
+ }
+ return(x)
+ }
+ ab2 <- do.call(rbind, list(ab, ab))
+ temp <- unlist(lapply(ab2[, 1], caps, TRUE))
+ ab2[, 1] <- temp[1:(length(temp)/2)]
+ v <- as.character(ab[, 2])
+ ab <- data.frame(rbind(ab, ab2))
+ ab[, 2] <- c(v, rep(v, each=2))
+ ab[, 2] <- spaste(ab[, 2])
+ }
+ text.var <- Trim(text.var)
+ pn <- which(substring(text.var, nchar(text.var)) == ".")
+ text.var <- mgsub(ab[, 1], ab[, 2], text.var)
+ x <- Trim(gsub("\\s+", " ", text.var))
+ x[pn] <- sapply(x[pn], function(z) {
+ if (substring(z, nchar(z)) != ".") {
+ paste(z, ".", sep="")
+ } else {
+ z
+ }
+ }, USE.NAMES = FALSE)
+ x <- scrubber(x)
+ if (sent.cap) {
+ return(capit(x))
+ }
+ x
+}
+
View
3 R/scrubber.R
@@ -29,9 +29,10 @@ function(text.var, num2word = FALSE, rm.quote = TRUE, fix.comma = TRUE, ...){
}
ncx <- nchar(x)
x <- paste0(Trim(substring(x, 1, ncx - 1)), substring(x, ncx))
- x[x=="NANA"] <- NA
+ x[is.na(text.var)] <- NA
if (num2word) {
x <- replace_number(x, ...)
}
x
}
+
View
41 R/space_fill.R
@@ -0,0 +1,41 @@
+#' Replace Spaces
+#'
+#' Replace spaces in words groups that should be grouped together.
+#'
+#' @param text.var The text variable.
+#' @param terms A character vector of grouped word terms to insert a new
+#' separating/space character.
+#' @param sep A character string to separate the terms.
+#' @param rm.extra logical. Should trailing, leading and > 1 continuous white
+#' spaces be removed?
+#' @param ignore.case logical. If FALSE, the pattern matching is case sensitive
+#' and if TRUE, case is ignored during matching.
+#' @param fixed ogical. If TRUE, pattern is a string to be matched as is.
+#' Overrides all conflicting arguments.
+#' @param \ldots Other arguments passed to \code{\link[base]{gsub}}.
+#' @return Returns a character vector with trailing and/or leading spaces.
+#' @note \code{link[qdap]{strip}} by default does not remove the double tilde
+#' \code{"~~"} character.
+#' @details \code{link[qdap]{space_fill}} ise useful for keeping grouped words
+#' together. Many functions in qdap take a \code{char.keep} or
+#' \code{char2space} argument. This can be used to prepare multi word phrases
+#' (e.g. proper nouns) as a single unit.
+#' @export
+#' @examples
+#' \dontrun{
+#' x <- c("I want to hear the Dr. Martin Luther King Jr. speech.",
+#' "I also want to go to the white House to see President Obama speak.")
+#'
+#' keeps <- c("Dr. Martin Luther King Jr.", "The White House", "President Obama")
+#' space_fill(x, keeps)
+#' strip(space_fill(x, keeps))
+#' }
+space_fill <- function(text.var, terms, sep = "~~", rm.extra = TRUE,
+ ignore.case = TRUE, fixed = FALSE, ...) {
+ if (rm.extra) {
+ terms <- Trim(reducer(terms))
+ }
+ reps <- gsub("\\s", sep, terms, ignore.case = ignore.case, ...)
+ mgsub(terms, reps, text.var, ignore.case = ignore.case, fixed = fixed, ...)
+}
+
View
6 R/strip.R
@@ -5,7 +5,9 @@
#' @param x The text variable.
#' @param char.keep A character vector of symbols (i.e. punctuation) that
#' \code{strip} should keep. The default is to strip every symbol except
-#' apostrophes.
+#' apostrophes and a double tilde \code{"~~"}. The double tilde \code{"~~"} is
+#' included for a convenient means of keeping word groups together in functions
+#' that split text apart based on spaces.
#' @param digit.remove logical. If TRUE strips digits from the text.
#' @param apostrophe.remove logical. If TRUE removes apostrophes from the
#' output.
@@ -20,7 +22,7 @@
#' strip(DATA$state, char.keep = c("?", "."))
#' }
strip <-
-function (x, char.keep = NULL, digit.remove = TRUE, apostrophe.remove = TRUE,
+function (x, char.keep = "~~", digit.remove = TRUE, apostrophe.remove = TRUE,
lower.case = TRUE) {
strp <- function(x, digit.remove, apostrophe.remove, char.keep, lower.case) {
if (!is.null(char.keep)) {
View
63 R/term.find.R
@@ -1,32 +1,37 @@
-#helper function for termco_d and termco(not exported)
+#helper function for word.associate, termco_d and termco(not exported)
term.find <-
-function(str, mat, logic = FALSE){
- tester <- function(x, y, logic){
- p <- suppressWarnings(unlist(grepl(x, y, fixed = TRUE)))
- if (!logic) {
- p <- which(p)
+function(str, mat, logic = FALSE, unlist = FALSE){
+ tester <- function(x, y, logic){
+ p <- suppressWarnings(unlist(grepl(x, y, fixed = TRUE)))
+ if (!logic) {
+ p <- which(p)
+ }
+ return(p)
}
- return(p)
- }
- spacer <- function(string){
- sapply(string, function(x) paste0(" ", x, " "), USE.NAMES = FALSE)
- }
- str <- spacer(strip(str, lower.case = FALSE))
- findit1 <- function(x, logic = TRUE) {
- sapply(x, function(z) tester(z, str, logic))
- }
- findit2 <- function(x, logic = FALSE) {
- sort(unique(c(unlist(sapply(x, function(z) tester(z, str, logic))))))
- }
- if (logic) {
- findit <- findit1
- } else {
- findit <- findit1
- }
- if (is.list(mat)) {
- a <- lapply(mat, findit, logic = logic)
- } else {
- a <- findit(mat, logic = logic)
- }
- return(a)
+ spacer <- function(string){
+ sapply(string, function(x) paste0(" ", x, " "),
+ USE.NAMES = FALSE)
+ }
+ str <- spacer(strip(str, lower.case = FALSE))
+ findit1 <- function(x, logic = TRUE) {
+ sapply(x, function(z) tester(z, str, logic))
+ }
+ findit2 <- function(x, logic = FALSE) {
+ sort(unique(c(unlist(sapply(x, function(z) {
+ tester(z, str, logic)})))))
+ }
+ if (logic) {
+ findit <- findit1
+ } else {
+ findit <- findit1
+ }
+ if (is.list(mat)) {
+ a <- lapply(mat, findit, logic = logic)
+ } else {
+ a <- findit(mat, logic = logic)
+ }
+ if (unlist) {
+ a <- unlist(a)
+ }
+ a
}
View
2 R/trans.cloud.R
@@ -88,7 +88,7 @@ function(text.var = NULL, grouping.var = NULL, word.list = NULL, stem = FALSE,
title.padj = -4.5, title.location = 3, title.cex = NULL, title.names = NULL,
proportional = FALSE, max.word.size = NULL, min.word.size = 0.5,
legend = NULL, legend.cex = .8, legend.location = c(-.03, 1.03),
- char.keep = NULL, char2space = NULL) {
+ char.keep = "~~", char2space = NULL) {
if(!is.null(char2space) & is.null(char.keep)) {
char.keep <- char2space
}
View
53 R/wfm.R
@@ -14,26 +14,41 @@
#' @param digits An integer indicating the number of decimal places (round) or
#' significant digits (signif) to be used. Negative values are allowed
#' @param margins logical. If TRUE provides grouping.var and word variable totals.
-#' @param \ldots Other arguments supplied to \code{wfm}.
+#' @param \ldots Other arguments supplied to \code{\link[qdap]{strip}}.
#' @param wf.obj A \code{wfm} or \code{wfdf} object.
#' @param word.lists A list of character vectors of words to pass to
#' \code{wf.combine}
#' @param matrix logical. If TRUE returns the output as a \code{wfm} rather
#' than a \code{wfdf} object
+#' @param char2space A vector of characters to be turned into spaces. If
+#' \code{char.keep} is NULL, \code{char2space} will activate this argument.
#' @return \code{wfm} - returns a word frequency of the class matrix.
#' @rdname Word_Frequency_Matrix
+#' @note Words can be kept as one by inserting a double tilde (\code{"~~"}), or
+#' other character strings passed to char2space, as a single word/entry. This is
+#' useful for keeping proper names as a single unit.
#' @keywords word-frequency-matrix
#' @export
#' @examples
#' \dontrun{
#' #word frequency matrix (wfm) example:
#' with(DATA, wfm(state, list(sex, adult)))
#' dat <- with(DATA, wfm(state, person))
+#'
+#' #inset double tilde ("~~") to keep dual words (e.i. first last name)
+#' alts <- c(" fun", "I ")
+#' state2 <- mgsub(alts, gsub("\\s", "~~", alts), DATA$state)
+#' with(DATA, wfm(state2, list(sex, adult)))
#'
#' #word frequency dataframe (wfdf) example:
#' with(DATA, wfdf(state, list(sex, adult)))
#' with(DATA, wfdf(state, person))
#'
+#' #inset double tilde ("~~") to keep dual words (e.i. first last name)
+#' alts <- c(" fun", "I ")
+#' state2 <- mgsub(alts, gsub("\\s", "~~", alts), DATA$state)
+#' with(DATA, wfdf(state2, list(sex, adult)))
+#'
#' #wfm.expanded example:
#' z <- wfm(DATA$state, DATA$person)
#' wfm.expanded(z)
@@ -78,7 +93,7 @@
#' }
wfm <-
function(text.var = NULL, grouping.var = NULL, wfdf = NULL,
- output = "raw", stopwords = NULL, digits = 2){
+ output = "raw", stopwords = NULL, digits = 2, char2space = "~~", ...){
if (!is.null(wfdf)) {
if (comment(wfdf) == "t.df") {
wfdf <- wfdf
@@ -95,12 +110,13 @@ function(text.var = NULL, grouping.var = NULL, wfdf = NULL,
} else {
if (!is.null(text.var)) {
wfdf <- wfdf(text.var = text.var, grouping.var = grouping.var,
- stopwords = stopwords, output = output, digits = digits)
+ stopwords = stopwords, output = output, digits = digits,
+ char2space = char2space, ...)
x2 <- wfdf[, -1, drop = FALSE]
rownames(x2) <- wfdf[, 1]
x2 <- as.matrix(x2)
} else {
- stop ("must specify both text.var & grouping var or wfdf")
+ stop ("must specify both text.var or wfdf")
}
}
comment(x2) <- "true.matrix"
@@ -118,17 +134,18 @@ function(text.var = NULL, grouping.var = NULL, wfdf = NULL,
#' column and optional margin sums.
wfdf <-
function(text.var, grouping.var = NULL, stopwords = NULL,
- margins = FALSE, output = "raw", digits = 2){
- grouping.var <- if (is.list(grouping.var) & length(grouping.var)>1) {
- apply(data.frame(grouping.var), 1, function(x){
- if (any(is.na(x)))NA else paste(x, collapse = ".")
- }
- )
+ margins = FALSE, output = "raw", digits = 2, char2space = "~~", ...){
+ if(is.null(grouping.var)){
+ grouping <- rep("all", length(text.var))
} else {
- grouping.var
- }
- bl <- split(text.var, grouping.var)
- x <- lapply(bl, bag.o.words)
+ if (is.list(grouping.var) & length(grouping.var)>1) {
+ grouping <- paste2(grouping.var)
+ } else {
+ grouping <- unlist(grouping.var)
+ }
+ }
+ bl <- split(text.var, grouping)
+ x <- lapply(bl, bag.o.words, char.keep = char2space, ...)
tabs <- lapply(x, function(x) as.data.frame(table(x)))
tabs <- tabs[sapply(tabs, nrow)!=0]
lapply(seq_along(tabs), function(x) {
@@ -178,10 +195,12 @@ function(text.var, grouping.var = NULL, stopwords = NULL,
comment(DF) <- "f.df"
}
}
- return(DF)
+ if (!is.null(char2space)) {
+ DF[, "Words"] <- mgsub(char2space, " ", DF[, "Words"])
+ }
+ DF
}
-
#' Expanded Word Frequency Matrix
#'
#' \code{wfm.expanded} - Expand a word frequency matrix to have multiple rows
@@ -281,4 +300,4 @@ function(wf.obj, word.lists, matrix = FALSE){
}
comment(DFF) <- ifelse(!matrix, "t.df", "true.matrix")
DFF
-}
+}
View
26 R/word.network.plot.R
@@ -49,8 +49,12 @@
#' \code{"topright"}, \code{"right"} and \code{"center"}. This places the legend on
#' the inside of the plot frame at the given location.
#' @param plot logical. If TRUE plots a network plot of the words.
-#' @return Silently returns a list of igraph parameters. Optionally, plots the
-#' output.
+#' @param char2space A vector of characters to be turned into spaces. If
+#' \code{char.keep} is NULL, \code{char2space} will activate this argument.
+#' @param \ldots Other arguments passed to \code{\link[qdap]{strip}}.
+#' @note Words can be kept as one by inserting a double tilde (\code{"~~"}), or
+#' other character strings passed to char2space, as a single word/entry. This is
+#' useful for keeping proper names as a single unit.
#' @seealso \code{\link[qdap]{word.network.plot}},
#' \code{\link[igraph]{graph.adjacency}}
#' @keywords network
@@ -60,24 +64,30 @@
#' \dontrun{
#' word.network.plot(text.var=DATA$state, grouping.var=DATA$person)
#' word.network.plot(text.var=DATA$state, grouping.var=list(DATA$sex,
-#' DATA$adult))
+#' DATA$adult))
#' word.network.plot(text.var=DATA$state, grouping.var=DATA$person,
#' title.name = "TITLE", log.labels=TRUE)
#' word.network.plot(text.var=raj.act.1, grouping.var=raj.act.1$person,
#' stopwords = Top200Words)
+#'
+#' #inset double tilde ("~~") to keep dual words (e.i. first last name)
+#' alts <- c(" fun", "I ")
+#' state2 <- mgsub(alts, gsub("\\s", "~~", alts), DATA$state)
+#' word.network.plot(text.var=state2, grouping.var=DATA$person)
#' }
word.network.plot <-
function(text.var, grouping.var = NULL, target.words = NULL, stopwords = Top100Words,
label.cex = .8, label.size = .5, edge.curved = TRUE, vertex.shape = "circle",
edge.color = "gray70", label.colors = "black", layout = NULL,
title.name = NULL, title.padj = -4.5, title.location = 3, title.font = NULL,
title.cex = .8, log.labels = FALSE, title.color = "black",
- legend = NULL, legend.cex = .8, legend.location = c(-1.54, 1.41), plot = TRUE) {
- if (class(text.var) == "adjacency.matrix") { #actually takes an adjaceny matrix
+ legend = NULL, legend.cex = .8, legend.location = c(-1.54, 1.41), plot = TRUE,
+ char2space = "~~", ...) {
+ if (class(text.var) == "adjacency_matrix") { #actually takes an adjaceny matrix
adj.mat.object <- text.var[["adjacency"]]
- } else {
+ } else {
z <- wfm(text.var = text.var, grouping.var = grouping.var,
- stopwords = stopwords)
+ stopwords = stopwords, char2space = char2space, ...)
adj.mat.object <- adjmat(t(z))[["adjacency"]]
}
g <- graph.adjacency(adj.mat.object, weighted=TRUE, mode ='undirected')
@@ -128,3 +138,5 @@ function(text.var, grouping.var = NULL, target.words = NULL, stopwords = Top100W
invisible(g)
}
+
+
View
112 R/word.associate.R → R/word_associate.R
@@ -98,7 +98,7 @@
#' \dontrun{
#' ms <- c(" I", "you")
#' et <- c(" it", " no")
-#' word.associate(DATA2$state, DATA2$person, match.string = ms,
+#' word_associate(DATA2$state, DATA2$person, match.string = ms,
#' wordcloud = TRUE, proportional = TRUE,
#' network.plot = TRUE, nw.label.proportional = TRUE, extra.terms = et,
#' cloud.legend =c("A", "B", "C", "D"),
@@ -116,13 +116,13 @@
#' C = c(" it", " no")
#' )
#'
-#' word.associate(DATA2$state, DATA2$person, match.string = ms,
+#' word_associate(DATA2$state, DATA2$person, match.string = ms,
#' wordcloud = TRUE, proportional = TRUE,
#' network.plot = TRUE, nw.label.proportional = TRUE, extra.terms = et,
#' cloud.legend =c("A", "B", "C", "D"),
#' title.color = "blue", cloud.colors = c("red", "blue", "purple", "gray70"))
#'
-#' word.associate(DATA2$state, list(DATA2$day, DATA2$person), match.string = ms)
+#' word_associate(DATA2$state, list(DATA2$day, DATA2$person), match.string = ms)
#'
#' #======================================
#' m <- list(
@@ -135,14 +135,35 @@
#' C = c(" it", " no")
#' )
#'
-#' word.associate(DATA2$state, list(DATA2$day, DATA2$person), match.string = m)
-#' word.associate(raj.act.1$dialogue, list(raj.act.1$person), match.string = m)
-#' (out <- with(mraja1spl, word.associate(dialogue, list(fam.aff, sex), match.string = m)))
+#' word_associate(DATA2$state, list(DATA2$day, DATA2$person), match.string = m)
+#' word_associate(raj.act.1$dialogue, list(raj.act.1$person), match.string = m)
+#' (out <- with(mraja1spl, word_associate(dialogue, list(fam.aff, sex), match.string = m)))
#' names(out)
#' lapply(out$dialogue, htruncdf, n = 20, w = 20)
#' out$cap.f
+#'
+#' #======================================
+#' DATA2$state2 <- space_fill(DATA2$state, c("is fun", "too fun"))
+#'
+#' ms <- list(
+#' list1 = c(" I ", " you", "is fun", "too fun"),
+#' list2 = c(" wh")
+#' )
+#'
+#' et <- list(
+#' B = c(" the", " on"),
+#' C = c(" it", " no")
+#' )
+#'
+#' word_associate(DATA2$state2, DATA2$person, match.string = ms,
+#' wordcloud = TRUE, proportional = TRUE,
+#' network.plot = TRUE, nw.label.proportional = TRUE, extra.terms = et,
+#' cloud.legend =c("A", "B", "C", "D"),
+#' title.color = "blue", cloud.colors = c("red", "blue", "purple", "gray70"))
+#'
+#' DATA2 <- qdap::DATA2
#' }
-word.associate <-
+word_associate <-
function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
extra.terms = NULL, target.exclude = NULL, stopwords = NULL,
network.plot = FALSE, wordcloud = FALSE, cloud.colors = c("black", "gray55"),
@@ -152,9 +173,7 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
title.font = NULL, title.cex = NULL, nw.edge.curved = TRUE,
cloud.legend = NULL, cloud.legend.cex = .8, cloud.legend.location = c(-.03, 1.03),
nw.legend = NULL, nw.legend.cex = .8, nw.legend.location = c(-1.54, 1.41),
- legend.override = FALSE, char2space = NULL, ...){
-#currently char2space is a road to nowhere. Connect the road and add char.keep
-#argument as well
+ legend.override = FALSE, char2space = "~~", ...){
network.graph <- NULL
if (network.plot | wordcloud) {
if(is.null(nw.label.colors)) {
@@ -207,47 +226,49 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
strip = TRUE, unique = TRUE, names = FALSE, char.keep = char2space)
if (!is.null(char2space)) {
Terms2 <- mgsub(char2space, " ", Terms2)
- }
- TM2 <- lapply(match.string, function(x) term.find(Terms2,
- mat = tolower(x)))
+ }
+ TM2 <- lapply(match.string, function(x) {
+ term.find(Terms2, mat = tolower(x), unlist = TRUE)
+ })
match.string <- lapply(TM2, function(i) Terms2[i])
if (!is.null(target.exclude)) {
match.string <- lapply(match.string, function(x)
x[!x %in% unlist(tolower(target.exclude))])
}
- match.string <- lapply(match.string, function(x) paste0(" ", x, " "))
+ match.string <- spaste(match.string)
if (!is.null(extra.terms)) {
- TM3 <- lapply(extra.terms, function(x) term.find(Terms2,
- mat = tolower(x)))
+ TM3 <- lapply(extra.terms, function(x) {
+ term.find(Terms2, mat = tolower(x), unlist = TRUE)
+ })
TM3 <- lapply(TM3, function(i) Terms2[i])
if (!is.null(target.exclude)) {
TM3 <- lapply(TM3, function(x)
x[!x %in% unlist(tolower(target.exclude))])
}
}
- TU <- suppressWarnings(if(is.null(text.unit)) {
- "row"
+ suppressWarnings(if(is.null(text.unit)) {
+ TU <- "row"
} else {
if (is.list(text.unit)) {
m <- unlist(as.character(substitute(text.unit))[-1])
m <- sapply(strsplit(m, "$", fixed=TRUE),
function(x) x[length(x)])
- paste(m, collapse="&")
+ TU <- paste(m, collapse="&")
} else {
if (is.vector(text.unit) & length(text.unit) == 1 &
text.unit == "sentence") {
- "sentence"
+ TU <- "sentence"
} else {
TU <- as.character(substitute(text.unit))
- TU[length(TU)]
+ TU <- TU[length(TU)]
}
}
})
- texting <- if(is.null(text.unit)){
- as.factor(1:length(text.var))
+ if(is.null(text.unit)){
+ texting <- as.factor(1:length(text.var))
} else {
if(is.list(text.unit) & length(text.unit)>1) {
- apply(data.frame(text.unit), 1, function(x){
+ texting <- apply(data.frame(text.unit), 1, function(x){
if(any(is.na(x))){
NA
}else{
@@ -256,17 +277,21 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
})
} else {
if (TU == "tot") {
- sapply(strsplit(as.character(text.unit), ".",
+ texting <- sapply(strsplit(as.character(text.unit), ".",
fixed=TRUE), function(x) x[[1]])
} else {
if (TU %in% c("sentence", "sent")) {
- as.factor(1:length(text.var))
+ texting <- as.factor(1:length(text.var))
} else {
- unlist(text.unit)
+ texting <- unlist(text.unit)
}
}
}
}
+ text.var2 <- text.var
+ if (!is.null(char2space)) {
+ text.var <- mgsub(char2space, " ", text.var)
+ }
DF <- data.frame(row = seq_along(text.var), group = grouping, unit = texting,
text = as.character(text.var), stringsAsFactors = FALSE)
LOG <- lapply(match.string, function(x) {
@@ -288,10 +313,13 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
}
DFsl <- lapply(ALN, function(i) na.omit(DF3[shortDF(DF3, i), 1:4]))
names(DFsl) <- colnames(DF3)[-c(1:4)]
- Terms <- qdap::stopwords(text.var, stopwords = NULL, unlist = TRUE,
- strip = TRUE, unique = TRUE, names = FALSE)
+ Terms <- qdap::stopwords(text.var2, stopwords = NULL, unlist = TRUE,
+ strip = TRUE, unique = TRUE, names = FALSE, char.keep = char2space)
+ if (!is.null(char2space)) {
+ Terms <- mgsub(char2space, " ", Terms)
+ }
TM <- lapply(match.string, function(x) term.find(Terms,
- mat = x))
+ mat = x, unlist = TRUE))
COLTERMS <- lapply(TM, function(i) Terms[i])
if (!is.null(target.exclude)) {
COLTERMS <- lapply(COLTERMS, function(x) x[!x %in% target.exclude])
@@ -311,12 +339,14 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
}
if (!is.null(extra.terms)) {
UET <- unlist(extra.terms, recursive = FALSE)
- ECOLTERMS <- lapply(UET, function(x) term.find(Terms, mat = x))
+ ECOLTERMS <- lapply(UET, function(x) term.find(Terms, mat = x, ))
ECOLTERMS <- lapply(ECOLTERMS, function(i) Terms[i])
if (!is.null(target.exclude)) {
ECOLTERMS <- lapply(ECOLTERMS, function(x) x[!x %in% target.exclude])
}
}
+ DF4 <- DF3
+ DF4$text <- text.var2
if (wordcloud | network.plot) {
if (!is.null(extra.terms)) {
nm <- length(match.string)
@@ -355,7 +385,7 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
network.graph, wordcloud, cloud.colors, title.color, nw.label.cex,
nw.label.colors, nw.layout, nw.edge.color, LN, nw.label.proportional,
ECOLTERMS, cloud.legend, cloud.legend.cex, cloud.legend.location,
- nw.legend, nw.legend.cex, nw.legend.location, ...){
+ nw.legend, nw.legend.cex, nw.legend.location, char.keep, char2space, ...){
LIST <- lapply(LN, function(x) dat[dat[, x], 2:4])
FUN <- function(x) {
(nrow(x) > 1) & !is.null(x)
@@ -404,15 +434,15 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
LIST2 <- LIST2[!sapply(LIST2, is.null)]
mats <- lapply(LIST2, function(x) {
wfm(grouping.var = Trim(x[, "unit"]), text.var = x[, "text"],
- stopwords = stopwords)
+ stopwords = stopwords, char2space = char2space)
}
)
mats2 <- lapply(mats, function(x) {
adjacency_matrix(t(x))
}
)
freqlist <- lapply(LIST, function(x) {
- word_list(x$text, stopwords = stopwords)
+ word_list(x$text, stopwords = stopwords, char.keep = char2space)
})
o <- list(list = LISTb, search.terms = COLTERMS, freqlist = freqlist,
freqmat = mats, adjmat = mats2)
@@ -421,7 +451,7 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
}
o <- unlist(o, recursive = FALSE)
if (network.plot) {
- an <- which(substring(names(o), 1, 6) == "adjmat")
+ an <- grep("adjmat", names(o))
ads <- lapply(an, function(i) o[[i]])
lapply(seq_along(ads), function(i) {
word.network.plot(ads[[i]], label.cex = nw.label.cex,
@@ -434,28 +464,32 @@ function(text.var, grouping.var = NULL, match.string, text.unit = "sentence",
title.font = title.font, title.cex = title.cex,
legend = nw.legend, legend.cex = nw.legend.cex,
legend.location = nw.legend.location,
+ char2space = char2space,
target.words = WSEARCH[choosennames][[i]])
})
}
if (wordcloud) {
lapply(seq_along(freqlist), function(i) {
- suppressWarnings(trans.cloud(word.list = freqlist[[i]]$swl,
- target.words = WSEARCH[choosennames2][[i]], stopwords = stopwords,
+ suppressWarnings(trans.cloud(
+ word.list = freqlist[[i]]$swl,
+ target.words = WSEARCH[choosennames2][[i]],
+ stopwords = stopwords,
cloud.colors = cloud.colors, expand.target = FALSE,
title.color = title.color, title.names = namesL1[[i]],
legend = cloud.legend, legend.cex = cloud.legend.cex,
+ char2space = char2space, char.keep = char2space,
legend.location = cloud.legend.location, ...))
})
}
return(o)
}
- Zdat <- split(DF3, DF3$group)
+ Zdat <- split(DF4, DF3$group)
invisible(lapply(seq_along(Zdat), function(i) {rownames(Zdat[[1]]) <<- NULL}))
o2 <- lapply(seq_along(Zdat), function(i) word.as(dat = Zdat[[i]],
stopwords = stopwords, network.graph = network.graph,
wordcloud = wordcloud, ECOLTERMS = ECOLTERMS,
cloud.colors = cloud.colors, title.color =title.color,
- nw.label.proportional = nw.label.proportional,
+ nw.label.proportional = nw.label.proportional, char2space = char2space,
nw.label.cex = nw.label.cex, nw.label.colors = nw.label.colors,
nw.layout = nw.layout, nw.edge.color = nw.edge.color,
LN = LN, cloud.legend = cloud.legend, cloud.legend.cex = cloud.legend.cex,
View
2 README.md
@@ -34,4 +34,4 @@ install.packages("openNLP", type = "source")
## Help
For a variety of qdap help files and videos [click here](https://github.com/trinker/qdap/wiki).
-For the package help manual [click here](https://dl.dropbox.com/u/61803503/qdap.pdf).
+For the package pdf help manual [click here](https://dl.dropbox.com/u/61803503/qdap.pdf).
View
BIN data/contractions.rda
Binary file not shown.
View
105 inst/staticdocs/BuckleySaltonSWL.html
@@ -0,0 +1,105 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+<title>BuckleySaltonSWL. qdap 0.1.0</title>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta name="author" content="">
+
+<link href="css/bootstrap.css" rel="stylesheet">
+<link href="css/bootstrap-responsive.css" rel="stylesheet">
+<link href="css/highlight.css" rel="stylesheet">
+<link href="css/staticdocs.css" rel="stylesheet">
+
+<!--[if lt IE 9]>
+ <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+<![endif]-->
+ </head>
+
+ <body>
+ <div class="navbar">
+ <div class="navbar-inner">
+ <div class="container">
+ <a class="brand" href="#">qdap 0.1.0</a>
+ <div class="nav">
+ <ul class="nav">
+ <li><a href="index.html"><i class="icon-home icon-white"></i> Index</a></li>
+ </ul>
+ </div>
+ </div>
+ </div>
+</div>
+
+ <div class="container">
+ <header>
+
+ </header>
+
+ <h1>Buckley & Salton Stopword List</h1>
+
+<div class="row">
+ <div class="span8">
+
+ <div class="Format">
+ <h2>Format</h2>
+
+ <p>A character vector with 546 elements</p>
+
+ </div>
+
+ <div class="Description">
+ <h2>Description</h2>
+
+ <p>A stopword list containing a character vector of
+ stopwords.</p>
+
+ </div>
+
+ <div class="Details">
+ <h2>Details</h2>
+
+ <p><a href = 'http://www.lextek.com/manuals/onix/stopwords2.html'>From
+ Onix Text Retrieval Toolkit API Reference</a>: "This
+ stopword list was built by Gerard Salton and Chris
+ Buckley for the experimental SMART information retrieval
+ system at Cornell University. This stopword list is
+ generally considered to be on the larger side and so when
+ it is used, some implementations edit it so that it is
+ better suited for a given domain and audience while
+ others use this stopword list as it stands."</p>
+
+ </div>
+
+ <div class="Note">
+ <h2>Note</h2>
+
+ <p>Reduced from the original 571 words to 546.</p>
+
+ </div>
+
+ <div class="References">
+ <h2>References</h2>
+
+ <p><a href = 'http://www.lextek.com/manuals/onix/stopwords2.html'>http://www.lextek.com/manuals/onix/stopwords2.html</a></p>
+
+ </div>
+ </div>
+ <div class="span4">
+ <!-- <ul>
+ <li>BuckleySaltonSWL</li>
+ </ul>
+ <ul>
+ <li>datasets</li>
+ </ul> -->
+
+
+ </div>
+</div>
+
+ <footer>
+ <p class="pull-right"><a href="#">Back to top</a></p>
+<p>Built by <a href="https://github.com/hadley/staticdocs">staticdocs</a>. Styled with <a href="http://twitter.github.com/bootstrap">bootstrap</a>.</p>
+ </footer>
+ </div>
+ </body>
+</html>
View
92 inst/staticdocs/DATA.html
@@ -0,0 +1,92 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+<title>DATA. qdap 0.1.0</title>
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta name="author" content="">
+
+<link href="css/bootstrap.css" rel="stylesheet">
+<link href="css/bootstrap-responsive.css" rel="stylesheet">
+<link href="css/highlight.css" rel="stylesheet">
+<link href="css/staticdocs.css" rel="stylesheet">
+
+<!--[if lt IE 9]>
+ <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+<![endif]-->
+ </head>
+