* replace_grade and replace_rating have been moved from the *sent…

…imentr* package to *textclean*, a more fitting home.
trinker · Jan 15, 2017 · 2b99567 · 2b99567
1 parent 804fe47
commit 2b99567
Show file tree

Hide file tree

Showing 19 changed files with 319 additions and 12 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Description: Tools to clean and process text.
 Depends: R (>= 3.2.3)
 Imports: english, qdapRegex, stringi, textshape, utils
 Suggests: testthat, lexicon
-Date: 2017-01-13
+Date: 2017-01-15
 License: GPL-2
 LazyData: TRUE
 Roxygen: list(wrap = FALSE)

diff --git a/NAMESPACE b/NAMESPACE
@@ -15,10 +15,13 @@ export(filter_row)
 export(has_endmark)
 export(mgsub)
 export(replace_contraction)
+export(replace_emoticon)
+export(replace_grade)
 export(replace_incomplete)
 export(replace_non_ascii)
 export(replace_number)
 export(replace_ordinal)
+export(replace_rating)
 export(replace_symbol)
 export(replace_white)
 export(strip)

diff --git a/NEWS b/NEWS
@@ -24,6 +24,9 @@ BUG FIXES
 
 NEW FEATURES
 
+* `replace_grade` and `replace_rating` have been moved from the *sentimentr*
+  package to *textclean*, a more fitting home.
+
 MINOR FEATURES
 
 IMPROVEMENTS

diff --git a/NEWS.md b/NEWS.md
@@ -24,6 +24,9 @@ textclean 0.2.1 -
 
 **NEW FEATURES**
 
+* `replace_grade` and `replace_rating` have been moved from the *sentimentr*
+  package to *textclean*, a more fitting home.
+
 **MINOR FEATURES**
 
 **IMPROVEMENTS**

diff --git a/R/replace_emoticon.R b/R/replace_emoticon.R
@@ -0,0 +1,31 @@
+#' Replace Emoticons With Words
+#'
+#' Replaces emoticons with word equivalents.
+#'
+#' @param text.var The text variable.
+#' @param emoticon_dt A \pkg{data.table} of emoticons (graphical representations)
+#' and corresponding word meanings.
+#' @param \ldots Other arguments passed to \code{.mgsub} (see
+#' \code{sentimentr:::.mgsub} for details).
+#' @return Returns a vector of strings with emoticons replaced with word
+#' equivalents.
+#' @keywords emoticon
+#' @export
+#' @examples
+#' x <- c(
+#'     "text from: http://www.webopedia.com/quick_ref/textmessageabbreviations_02.asp",
+#'     "... understanding what different characters used in smiley faces mean:",
+#'     "The close bracket represents a sideways smile  )",
+#'     "Add in the colon and you have sideways eyes   :",
+#'     "Put them together to make a smiley face  :)",
+#'     "Use the dash -  to add a nose   :-)",
+#'     "Change the colon to a semi-colon ; and you have a winking face ;)  with a nose  ;-)",
+#'     "Put a zero 0 (halo) on top and now you have a winking, smiling angel 0;) with a nose 0;-)",
+#'     "Use the letter 8 in place of the colon for sunglasses 8-)",
+#'     "Use the open bracket ( to turn the smile into a frown  :-("
+#' )
+#'
+#' replace_emoticon(x)
+replace_emoticon <- function(text.var, emoticon_dt = lexicon::hash_emoticons, ...){
+    gsub("\\s+", " ", .mgsub(emoticon_dt[["x"]], paste0(" ", emoticon_dt[["y"]], " "), text.var, ...))
+}
diff --git a/R/replace_grade.R b/R/replace_grade.R
@@ -0,0 +1,23 @@
+#' Replace Grades With Words
+#'
+#' Replaces grades with word equivalents.
+#'
+#' @param x The text variable.
+#' @param grade_dt A \pkg{data.table} of grades and corresponding word meanings.
+#' @param \ldots ignored.
+#' @return Returns a vector of strings with grades replaced with word
+#' equivalents.
+#' @keywords grade
+#' @export
+#' @examples
+#' (text <- replace_grade(c(
+#'     "I give an A+",
+#'     "He deserves an F",
+#'     "It's C+ work",
+#'     "A poor example deserves a C!"
+#' )))
+#' replace_grade(text)
+replace_grade <- function (x, grade_dt = lexicon::key_grades, ...) {
+    mgsub(x, grade_dt[["x"]], grade_dt[["y"]], fixed = FALSE)
+}
+
diff --git a/R/replace_ordinal.R b/R/replace_ordinal.R
@@ -8,6 +8,7 @@
 #' separated with spaces.  If \code{FALSE} the elements will be joined without 
 #' spaces.
 #' @param remove logical.  If \code{TRUE} ordinal numbers are removed from the text.
+#' @param \ldots ignored.
 #' @keywords ordinal-to-word
 #' @note Currently only implemented for ordinal values 1 through 100
 #' @export
@@ -20,7 +21,7 @@
 #' replace_ordinal(x, TRUE)
 #' replace_ordinal(x, remove = TRUE)
 #' replace_number(replace_ordinal("I like the 1st 1 not the 22nd 1."))
-replace_ordinal <- function(x, num.paste = FALSE, remove = FALSE) {
+replace_ordinal <- function(x, num.paste = FALSE, remove = FALSE, ...) {
 
     symb <- c("1st", "2nd", "3rd", paste0(4:19, "th"),
         paste0(20:100, c("th", "st", "nd", "rd", rep("th", 6))))

diff --git a/R/replace_rating.R b/R/replace_rating.R
@@ -0,0 +1,31 @@
+#' Replace Ratings With Words
+#'
+#' Replaces ratings with word equivalents.
+#'
+#' @param x The text variable.
+#' @param rating_dt A \pkg{data.table} of ratings and corresponding word meanings.
+#' @param \ldots ignored.
+#' @return Returns a vector of strings with ratings replaced with word
+#' equivalents.
+#' @keywords rating
+#' @export
+#' @examples
+#' x <- c("This place receives 5 stars for their APPETIZERS!!!",
+#'      "Four stars for the food & the guy in the blue shirt for his great vibe!",
+#'      "10 out of 10 for both the movie and trilogy.",
+#'      "* Both the Hot & Sour & the Egg Flower Soups were absolutely 5 Stars!",
+#'      "For service, I give them no stars.", "This place deserves no stars.",
+#'      "10 out of 10 stars.",
+#'      "My rating: just 3 out of 10.",
+#'      "If there were zero stars I would give it zero stars.",
+#'      "Rating: 1 out of 10.",
+#'      "I gave it 5 stars because of the sound quality.",
+#'      "If it were possible to give them 0/10, they'd have it."
+#' )
+#'
+#' replace_rating(x)
+replace_rating <- function (x, rating_dt = lexicon::key_ratings, ...) {
+    gsub("\\s+", " ", .mgsub(rating_dt[["x"]], paste0(" ",
+        rating_dt[["y"]], " "), x, ...))
+}
+
diff --git a/R/replace_symbol.R b/R/replace_symbol.R
@@ -14,6 +14,7 @@
 #' @param and logical.  If \code{TRUE} replaces and sign (&) with \code{"and"}.
 #' @param with logical.  If \code{TRUE} replaces with sign (w/) with 
 #' \code{"with"}.
+#' @param \ldots ignored.
 #' @return Returns a character vector with symbols replaced..
 #' @keywords symbol-replace
 #' @export
@@ -24,7 +25,7 @@
 #' )
 #' replace_symbol(x)
 replace_symbol <- function(x, dollar = TRUE, percent = TRUE, 
-    pound = TRUE, at = TRUE, and = TRUE, with = TRUE) {
+    pound = TRUE, at = TRUE, and = TRUE, with = TRUE, ...) {
 
     y <- c(dollar, percent, pound, at, and, with, with)
 

diff --git a/R/utils.R b/R/utils.R
@@ -69,3 +69,18 @@ check_install <- function(x, fun = 'function'){
     }  
 
 }
+
+.mgsub <- function (pattern, replacement, text.var, ...) {
+
+    ord <- rev(order(nchar(pattern)))
+    pattern <- pattern[ord]
+    if (length(replacement) != 1) replacement <- replacement[ord]
+
+    if (length(replacement) == 1) replacement <- rep(replacement, length(pattern))
+
+    text.var <- stringi::stri_replace_all_fixed(text.var, pattern, replacement,
+        vectorize_all=FALSE, opts_fixed = list(case_insensitive = TRUE)
+    )
+
+    text.var
+}
diff --git a/README.Rmd b/README.Rmd
@@ -41,10 +41,13 @@ The main functions, task category, & descriptions are summarized in the table be
 | `filter_NA`               | filter rows | Remove `NA` text rows                 |
 | `filter_element`          | filter elements | Remove matching elements from a vector   |
 | `replace_contractions`    | replacement | Replace contractions with both words  |
+| `replace_emoticon`| Replace emoticons with word equivalent               |
+| `replace_grade`  | Replace grades (e.g., "A+") with word equivalent     |
 | `replace_incomplete`      | replacement | Replace incomplete sentence end-marks  |
 | `replace_non_ascii`       | replacement | Replace non-ascii with equivalent or remove   |
 | `replace_number`          | replacement | Replace common numbers                |
 | `replace_ordinal`         | replacement | Replace common ordinal number form    |
+| `replace_rating` | Replace ratings (e.g., "10 out of 10", "3 stars") with word equivalent |
 | `replace_symbol`          | replacement | Replace common symbols                |
 | `replace_white`           | replacement | Replace regex white space characters  |
 | `add_comma_space`         | repalcement | Replace non-space after comma         |
@@ -198,6 +201,9 @@ x <- c("Mr. Jones isn't going.",
 replace_contraction(x)
 ```
 
+### Grades
+
+
 ### Incomplete Sentences
 
 Sometimes an incomplete sentence is denoted with multiple end marks or no punctuation at all.  `replace_incomplete` standardizes these sentences with a pipe (`|`) endmark (or one of the user's choice).
@@ -239,6 +245,9 @@ replace_number(x, num.paste = TRUE)
 replace_number(x, remove=TRUE)
 ```
 
+### Ratings
+
+
 ### Ordinal Numbers
 
 Again, some analysis requires numbers, including ordinal numbers, to be converted to text form.  `replace_ordinal` attempts to perform this task for ordinal number 1-100 (i.e., 1st - 100th).  

diff --git a/README.md b/README.md
@@ -48,7 +48,12 @@ Functions
 The main functions, task category, & descriptions are summarized in the
 table below:
 
-<table>
+<table style="width:114%;">
+<colgroup>
+<col width="38%" />
+<col width="19%" />
+<col width="55%" />
+</colgroup>
 <thead>
 <tr class="header">
 <th align="left">Function</th>
@@ -103,6 +108,14 @@ table below:
 <td align="left">Replace contractions with both words</td>
 </tr>
 <tr class="even">
+<td align="left"><code>replace_emoticon</code></td>
+<td align="left">Replace emoticons with word equivalent</td>
+</tr>
+<tr class="odd">
+<td align="left"><code>replace_grade</code></td>
+<td align="left">Replace grades (e.g., &quot;A+&quot;) with word equivalent</td>
+</tr>
+<tr class="even">
 <td align="left"><code>replace_incomplete</code></td>
 <td align="left">replacement</td>
 <td align="left">Replace incomplete sentence end-marks</td>
@@ -123,26 +136,30 @@ table below:
 <td align="left">Replace common ordinal number form</td>
 </tr>
 <tr class="even">
+<td align="left"><code>replace_rating</code></td>
+<td align="left">Replace ratings (e.g., &quot;10 out of 10&quot;, &quot;3 stars&quot;) with word equivalent</td>
+</tr>
+<tr class="odd">
 <td align="left"><code>replace_symbol</code></td>
 <td align="left">replacement</td>
 <td align="left">Replace common symbols</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td align="left"><code>replace_white</code></td>
 <td align="left">replacement</td>
 <td align="left">Replace regex white space characters</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td align="left"><code>add_comma_space</code></td>
 <td align="left">repalcement</td>
 <td align="left">Replace non-space after comma</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td align="left"><code>check_text</code></td>
 <td align="left">check</td>
 <td align="left">Text report of potential issues</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td align="left"><code>has_endmark</code></td>
 <td align="left">check</td>
 <td align="left">Check if an element has an end-mark</td>
@@ -404,7 +421,7 @@ And if all is well the user should be greeted by a cow:
     ## 
     ##  ------- 
     ## No problems found!
-    ## You are phenomenal! 
+    ## You are virtuosic! 
     ##  -------- 
     ##     \   ^__^ 
     ##      \  (oo)\ ________ 
@@ -809,7 +826,7 @@ comma separated numbers as well.
 
     replace_number(x, remove=TRUE)
 
-    ## [1] "I like , ice cream cones." "They are  percent good"
+    ## [1] "I like  ice cream cones." "They are  percent good"
 
 ### Ordinal Numbers
 

diff --git a/man/replace_emoticon.Rd b/man/replace_emoticon.Rd
diff --git a/man/replace_grade.Rd b/man/replace_grade.Rd
diff --git a/man/replace_ordinal.Rd b/man/replace_ordinal.Rd