Skip to content
Permalink
Browse files

add tests/testit/test-clean.R to test functions in R/clean.R (#405)

  • Loading branch information
novica authored and yihui committed Oct 1, 2019
1 parent 8a460ab commit 9da91363e8dd0e48de8ba4b468b4a32596c478e4
Showing with 71 additions and 10 deletions.
  1. +8 −9 R/clean.R
  2. +62 −0 tests/testit/test-clean.R
  3. +1 −1 tests/testit/test-utils.R
@@ -2,24 +2,23 @@
# with the XML file exported from WordPress

# a wrapper function to read a file as UTF-8, process the text, and write back
process_file = function(f, FUN) {
x = read_utf8(f)
process_file = function(f, FUN, x = read_utf8(f)) {
x = FUN(x)
write_utf8(x, f)
if (missing(f)) x else write_utf8(x, f)
}

# replace three or more \n with two, i.e. two or more empty lines with one
remove_extra_empty_lines = function(f) process_file(f, function(x) {
remove_extra_empty_lines = function(...) process_file(..., FUN = function(x) {
x = paste(gsub('\\s+$', '', x), collapse = '\n')
trim_ws(gsub('\n{3,}', '\n\n', x))
})

# replace [url](url) with <url>
process_bare_urls = function(f) process_file(f, function(x) {
process_bare_urls = function(...) process_file(..., FUN = function(x) {
gsub('\\[([^]]+)]\\(\\1/?\\)', '<\\1>', x)
})

normalize_chars = function(f) process_file(f, function(x) {
normalize_chars = function(...) process_file(..., FUN = function(x) {
# curly single and double quotes to straight quotes
x = gsub(paste0('[', intToUtf8(8216:8217), ']'), "'", x)
x = gsub(paste0('[', intToUtf8(8220:8221), ']'), '"', x)
@@ -29,7 +28,7 @@ normalize_chars = function(f) process_file(f, function(x) {
})

# clean up code blocks that have been syntax highlighted by Pandoc
remove_highlight_tags = function(f) process_file(f, function(x) {
remove_highlight_tags = function(...) process_file(..., FUN = function(x) {
clean = function(x) {
# remove the <code></code> tags
x = gsub('^(\\s+)<code( class="[^"]*")?>(.*)', '\\1\\3', x)
@@ -44,7 +43,7 @@ remove_highlight_tags = function(f) process_file(f, function(x) {
x
})

# <img></img> to <img/>
fix_img_tags = function(f) process_file(f, function(x) {
# <img></img> to <img />
fix_img_tags = function(...) process_file(..., FUN = function(x) {
gsub('></img>', ' />', x)
})
@@ -0,0 +1,62 @@
library(testit)

assert('remove_extra_empty_lines() replaces two or more empty lines with one', {

x0 = 'a line with some new lines.\n\n and some text.'
x1 = c('a line with some new lines.', '', '', '', ' and some text.')
x2 = x1[-2]
x3 = x1[-(2:3)]

(remove_extra_empty_lines(x = x1) %==% x0)
(remove_extra_empty_lines(x = x2) %==% x0)
(remove_extra_empty_lines(x = x3) %==% x0)

})


assert('process_bare_urls() replaces [url](url) with <url>', {

x4 = '[url](url)'
x5 = 'some text before [url](url) and after'

(process_bare_urls(x = x4) %==% '<url>')
(process_bare_urls(x = x5) %==% 'some text before <url> and after')

})


assert('normalize_chars() converts curly quotes to straight quotes', {

x6 = intToUtf8(8216:8217)
x7 = intToUtf8(8220:8221)
x8 = intToUtf8(8230)
x9 = intToUtf8(160)

(normalize_chars(x = x6) %==% "''")
(normalize_chars(x = x7) %==% '""')
(normalize_chars(x = x8) %==% '...')
(normalize_chars(x = x9) %==% ' ')

})


assert('remove_highlight_tags() cleans up code blocks syntax highlighted by Pandoc', {

x10 = ' <code>some code</code>'
x11 = ' <span>some span</span>'

(remove_highlight_tags(x = x10) %==% ' some code')
(remove_highlight_tags(x = x11) %==% ' some span')

})


assert('fix_img_tags() converts <img></img> to <img />', {

x12 = '<img></img>'
x13 = 'text before <img></img> and after'

(fix_img_tags(x = x12) %==% '<img />')
(fix_img_tags(x = x13) %==% 'text before <img /> and after')

})
@@ -60,6 +60,6 @@ assert('modify_yaml perserves original values properly', {
write(test_rmd, test_rmd_file)
old_content = readLines(test_rmd_file)

modify_yaml(test_rmd_file)
modify_yaml(test_rmd_file, .keep_empty = TRUE)
readLines(test_rmd_file) %==% old_content
})

0 comments on commit 9da9136

Please sign in to comment.
You can’t perform that action at this time.