Skip to content

Commit

Permalink
Add , perl = TRUE to grepl() calls per #51
Browse files Browse the repository at this point in the history
  • Loading branch information
TylerRinker committed Oct 11, 2021
1 parent c08bdfc commit 73985f9
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 21 deletions.
20 changes: 10 additions & 10 deletions R/check_text_logicals.R
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ contraction <- function(x){
"([a-z]'(nt|t|ve|d|ll|m|re))|('(cause|tis|twas))|(\\b(he|how|it|",
"let|she|that|there|what|when|where|who|why)'s)"
),
x, ignore.case = TRUE
x, ignore.case = TRUE, perl = TRUE
)
}
#contraction(c('jon\'s a good man', "'cause I want to", '4was\'nt', 'the dog',
Expand All @@ -337,7 +337,7 @@ date <- qr2fun('rm_date')

## digits
digit <- function(x) {
grepl('\\d', x)
grepl('\\d', x, perl = TRUE)
}

## email addresses
Expand All @@ -350,12 +350,12 @@ emoticon <- qr2fun('rm_emoticon')
## just white space
empty <- function(x) {
#any(grepl("^\\s*$", stats::na.omit(x)))
grepl("^\\s*$", x)
grepl("^\\s*$", x, perl = TRUE)
}

## are there escaped backslashes
escaped <- function(x) {
grepl("[\\\\]", x) & !grepl("\\\"|\\\'|\\\`", x)
grepl("[\\\\]", x) & !grepl("\\\"|\\\'|\\\`", x, perl = TRUE)
}


Expand All @@ -366,7 +366,7 @@ hash <- qr2fun('rm_hash')
## contains html
html <- function(x) {
pat <- paste0("<[^>]+>|", paste(html_symbols[['html']], collapse ="|"))
grepl(pat, x)
grepl(pat, x, perl = TRUE)
}

## incomplete sentences usually indicated by 2-4 enmarks that are
Expand All @@ -376,12 +376,12 @@ incomplete <- function(x) {
"\\?*\\?[.]+|[.?!]*\\? [.][.?!]+|[.?!]*\\. [.?!]+|[.?!]+\\. [.?!]*|",
"[.?!]+\\.[.?!]*|[.?!]*\\.[.?!]+"
)
grepl(pat, x)
grepl(pat, x, perl = TRUE)
}

## contains kerning
kern <- function(x) {
grepl('(([A-Z]\\s+){2,}[A-Z])', x)
grepl('(([A-Z]\\s+){2,}[A-Z])', x, perl = TRUE)
}

## check if something is a list of vectors
Expand Down Expand Up @@ -419,7 +419,7 @@ misspelled <- function(x){

## Does it have no letters
no_alpha <- function(x) {
!is.na(x) & !grepl("[a-zA-Z]", x)
!is.na(x) & !grepl("[a-zA-Z]", x, perl = TRUE)
}


Expand All @@ -430,14 +430,14 @@ no_endmark <- function(x) {

## is comma with no space
no_space_after_comma <- function(x) {
grepl("(,)([^ ])", x)
grepl("(,)([^ ])", x, perl = TRUE)
}



## are there any non ascii characters
non_ascii <- function(x) {
grepl("[^ -~]", x) & !is.na(x) & !grepl("^\\s*$", x)
grepl("[^ -~]", x, perl = TRUE) & !is.na(x) & !grepl("^\\s*$", x, perl = TRUE)
}

## not character
Expand Down
2 changes: 1 addition & 1 deletion R/drop_row.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ drop_empty_row <- function(dataframe) {
x <- apply(dataframe, 1, function(x) {
paste(stats::na.omit(x), collapse = "")
})
return(dataframe[!grepl("^\\s*$", x), ,drop = FALSE] )
return(dataframe[!grepl("^\\s*$", x, perl = TRUE), ,drop = FALSE] )
}


Expand Down
2 changes: 1 addition & 1 deletion R/has_endmark.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
has_endmark <- function(x, endmarks = c('?', '.', '!'), ...){
!is.na(x) & grepl(
sprintf('[%s]\\s*$', paste(endmarks, collapse = "")),
x,
x, perl = TRUE,
...
)
}
Expand Down
2 changes: 1 addition & 1 deletion R/make_plural.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ make_plural <- function (x, keep.original = FALSE,
hits <- match(tolower(x), tolower(irregular[[1]]))

ends <- "(sh?|x|z|ch)$"
pluralify <- ifelse(grepl(ends, x), "es", "s")
pluralify <- ifelse(grepl(ends, x, perl = TRUE), "es", "s")
out <- gsub("ys$", "ies", paste0(x, pluralify))
out[which(!is.na(hits))] <- irregular[[2]][hits[which(!is.na(hits))]]

Expand Down
2 changes: 1 addition & 1 deletion R/match_tokens.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ match_tokens <- function(x, pattern, ignore.case = TRUE, ...){
y <- rm_na(unique(unlist(textshape::split_token(x, lower = ignore.case))))
if (isTRUE(ignore.case)) pattern <- tolower(pattern)

y[grepl(paste(paste0('(', pattern, ')'), collapse = '|'), y)]
y[grepl(paste(paste0('(', pattern, ')'), collapse = '|'), y, perl = TRUE)]

}

Expand Down
4 changes: 2 additions & 2 deletions R/replace_money.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ replace_money <- function(x, pattern = '(-?)([$])([0-9,]+)(\\.\\d{2})?',

replace_money_fun <- function(x, decimal = ' and '){

sign <- ifelse(grepl('^-', x), 'negative ', '')
if (grepl('\\.', x)) {
sign <- ifelse(grepl('^-', x, perl = TRUE), 'negative ', '')
if (grepl('\\.', x, perl = TRUE)) {
number <- replace_number(
gsub(
'\\.',
Expand Down
4 changes: 2 additions & 2 deletions R/replace_number.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ replace_number <- function(x, num.paste = FALSE, remove = FALSE, ...) {
num_df[['den']][is.na(num_df[['den']])] <- ""
num_df[['int']] <- eng(num_df[['integer']], ...)

is_decimal <- grepl("\\.", num_df[[1]])
not_integer_decimal <- !grepl('\\d\\.', num_df[[1]])
is_decimal <- grepl("\\.", num_df[[1]], perl = TRUE)
not_integer_decimal <- !grepl('\\d\\.', num_df[[1]], perl = TRUE)

num_df[['int']][is_decimal & not_integer_decimal] <- ""

Expand Down
2 changes: 1 addition & 1 deletion R/replace_word_elongation.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#' 2011 Conference on Empirical Methods in Natural Language Processing (pp.
#' 562-570). Edinburgh, Scotland. Retrieved from
#' http://www.aclweb.org/anthology/D11-1052 \cr \cr
#' \url{http://storage.googleapis.com/books/ngrams/books/datasetsv2.html} \cr \cr
#' \url{https://storage.googleapis.com/books/ngrams/books/datasetsv2.html} \cr \cr
#' \url{https://www.theatlantic.com/magazine/archive/2013/03/dragging-it-out/309220} \cr \cr
#' \url{https://english.stackexchange.com/questions/189517/is-there-a-name-term-for-multiplied-vowels}
#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ drop_sci_note <- function(x, ...){

x <- as.character(as.numeric(x))

locs <- grepl('e\\+', x)
locs <- grepl('e\\+', x, perl = TRUE)

x[locs] <- unlist(Map(function(b, e) {

Expand Down
2 changes: 1 addition & 1 deletion man/replace_word_elongation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 73985f9

Please sign in to comment.