From f97f2dfdfafc258275e60070c5df43559db4ae92 Mon Sep 17 00:00:00 2001 From: Harry Date: Fri, 28 Apr 2023 00:33:43 +0200 Subject: [PATCH] provide a simpler markdown processor that html encodes user input and after that processes the custom markdown --- .../livefx/markdown/MarkdownProcessor.java | 5 ++ .../nipafx/livefx/markdown/SimpleMark.java | 2 +- .../nipafx/livefx/markdown/SimplerMark.java | 79 +++++++++++++++++++ .../livefx/markdown/SimpleMarkTests.java | 4 +- 4 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 src/main/java/dev/nipafx/livefx/markdown/MarkdownProcessor.java create mode 100644 src/main/java/dev/nipafx/livefx/markdown/SimplerMark.java diff --git a/src/main/java/dev/nipafx/livefx/markdown/MarkdownProcessor.java b/src/main/java/dev/nipafx/livefx/markdown/MarkdownProcessor.java new file mode 100644 index 0000000..35d011d --- /dev/null +++ b/src/main/java/dev/nipafx/livefx/markdown/MarkdownProcessor.java @@ -0,0 +1,5 @@ +package dev.nipafx.livefx.markdown; + +public interface MarkdownProcessor { + String parse(String text); +} diff --git a/src/main/java/dev/nipafx/livefx/markdown/SimpleMark.java b/src/main/java/dev/nipafx/livefx/markdown/SimpleMark.java index 0daeec0..3402a6b 100644 --- a/src/main/java/dev/nipafx/livefx/markdown/SimpleMark.java +++ b/src/main/java/dev/nipafx/livefx/markdown/SimpleMark.java @@ -8,7 +8,7 @@ import static java.util.stream.Collectors.joining; -public class SimpleMark { +public class SimpleMark implements MarkdownProcessor { private static final Pattern BOLD = createPatternForInlineMarkup("*"); private static final Pattern ITALIC = createPatternForInlineMarkup("_"); diff --git a/src/main/java/dev/nipafx/livefx/markdown/SimplerMark.java b/src/main/java/dev/nipafx/livefx/markdown/SimplerMark.java new file mode 100644 index 0000000..7330f2c --- /dev/null +++ b/src/main/java/dev/nipafx/livefx/markdown/SimplerMark.java @@ -0,0 +1,79 @@ +package dev.nipafx.livefx.markdown; + +import java.util.List; +import java.util.function.Function; +import java.util.regex.MatchResult; +import java.util.regex.Pattern; + +public class SimplerMark implements MarkdownProcessor { + private static final List> replacers = List.of( + regexReplacer(wrapper("*"), "$1"), + regexReplacer(wrapper("_"), "$1"), + regexReplacer(wrapper("+"), "$1"), + regexReplacer(wrapper("~"), "$1"), + regexReplacer("\\s*```(?:(?java|javascript) )?(?.*?)```\\s*", SimplerMark::handleCodeBlock), + regexReplacer(wrapper("`"), "$1") + ); + + @Override + public String parse(String text) { + var sanitized = sanitize(text); + var escapedInput = htmlEncode(sanitized); + var content = replacers + .stream() + .reduce(escapedInput, (t, replacer) -> replacer.apply(t), String::concat); + var html = "

" + content + "

"; + return html.replace("

", ""); + } + + private static final List> sanitizers = List.of( + regexReplacer("([^<]*?)", "$1"), + regexReplacer("
([^<]*?)
", "$1"), + regexReplacer("\\s*?", ""), + regexReplacer("\\s*?]*?>([^<]*?)", "$1") + ); + + // todo remove these arbitrary rules + // the sanitizers are only there to pass the (probably) abitrary tests for the "dangerous" html input + // however, the normal behaviour of the SimplerMark is that those cases will be properly escaped + // and will turn into encoded html that should render exactly as what the input was + // if that is acceptable, then the sanitize() function can be removed + // and the input text in the parse() function can be directly passed to the htmlEncode() + private static String sanitize(String text) { + return sanitizers + .stream() + .reduce(text, (t, replacer) -> replacer.apply(t), String::concat); + } + + private static String handleCodeBlock(MatchResult result) { + /*@Nullable*/ String language = result.group("lang"); + String code = result.group("code"); + var languageClass = language == null ? "" : " class=\"language-" + language + "\""; + + return "

" + code.strip() + "

"; + } + + private static /*@Language("RegExp")*/ String wrapper(String key) { + /*@Language("RegExp")*/ var escapedKey = Pattern.quote(key); + return "(?<=^|\\W)" + escapedKey + "(?=\\S)(.*?)(?<=\\S)" + escapedKey + "(?=$|\\W)"; + } + + private static Function regexReplacer(/*@Language("RegExp")*/ String regex, String replacement) { + var pattern = Pattern.compile(regex); + return (String text) -> pattern.matcher(text).replaceAll(replacement); + } + + private static Function regexReplacer(/*@Language("RegExp")*/ String regex, Function replacer) { + var pattern = Pattern.compile(regex); + return (String text) -> pattern.matcher(text).replaceAll(replacer); + } + + private static String htmlEncode(String text) { + // should be sufficient, but perhaps JSoup might do a better job and be more secure for example with escaping of % and \ + // todo ^ decide if JSoup should be used + return text + .replace("&", "&") + .replace("<", "<") + .replace(">", ">"); + } +} diff --git a/src/test/java/dev/nipafx/livefx/markdown/SimpleMarkTests.java b/src/test/java/dev/nipafx/livefx/markdown/SimpleMarkTests.java index 9d60242..f631970 100644 --- a/src/test/java/dev/nipafx/livefx/markdown/SimpleMarkTests.java +++ b/src/test/java/dev/nipafx/livefx/markdown/SimpleMarkTests.java @@ -7,7 +7,7 @@ class SimpleMarkTests { - private final SimpleMark mark = new SimpleMark(); + private final MarkdownProcessor mark = new SimplerMark(); @Test void emptyLine_emptyString() { @@ -178,4 +178,4 @@ void paragraphThenBlockThenParagraph() { } -} \ No newline at end of file +}