diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index e0a57387e0..a6fcfe36bb 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1011,14 +1011,20 @@ static inline auto html_append_content(struct html_content *hc, std::string_view data, bool transparent) -> auto { auto cur_offset = hc->parsed.size(); - hc->parsed.append(data); - if (cur_offset > 0 && data.size() > 0) { - auto last = hc->parsed.back(); - auto first_appended = data.front(); - if (first_appended == ' ' && !g_ascii_isspace(last)) { - cur_offset++; + if (data.size() > 0) { + /* Handle multiple spaces at the begin */ + + if (cur_offset > 0) { + auto last = hc->parsed.back(); + if (!g_ascii_isspace(last) && g_ascii_isspace(data.front())) { + hc->parsed.append(" "); + data = {data.data() + 1, data.size() - 1}; + cur_offset ++; + } } + + hc->parsed.append(data); } auto nlen = decode_html_entitles_inplace(hc->parsed.data() + cur_offset, @@ -2075,7 +2081,7 @@ TEST_CASE("html text extraction") "

\n" " stuff

?\n" " \n" - "", "Hello, world! test\ndata<>\nstuff?"}, + "", "Hello, world! test \ndata<>\nstuff?"}, {"

test

", "test\n"}, /* Tables */ {"\n" @@ -2118,9 +2124,15 @@ TEST_CASE("html text extraction") " Sincerely,\n Skype Web\n"}, /* bgcolor propagation */ {"\n" - "FRevie\n" + "FRevie" "wFΜΉ", " Review"}, + /* Colors */ + {"goodbye cruel" + "world", "goodbye cruelworld"}, + /* Newline before tag -> must be space */ + {"goodbye cruel\n" + "world", "goodbye cruel world"}, }; rspamd_url_init(NULL);