Skip to content

Commit

Permalink
[Test] Add tests for urls extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
vstakhov committed Jul 13, 2021
1 parent e930958 commit ccf4d5d
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/libserver/html/html.cxx
Expand Up @@ -1331,6 +1331,9 @@ html_process_input(rspamd_mempool_t *pool,
url->count++;
}
}
if (part_urls) {
g_ptr_array_add(part_urls, url);
}

href_offset = hc->parsed.size();
}
Expand Down
32 changes: 32 additions & 0 deletions src/libserver/html/html_tests.cxx
Expand Up @@ -217,6 +217,38 @@ TEST_CASE("html text extraction")
rspamd_mempool_delete(pool);
}

TEST_CASE("html urls extraction")
{
using namespace std::string_literals;
const std::vector<std::pair<std::string, std::vector<std::string>>> cases{
{"<a href=\"https://example.com\">test</a>", {"https://example.com"}}
};

rspamd_url_init(NULL);
auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
"html", 0);
auto i = 1;
for (const auto &c : cases) {
SUBCASE((fmt::format("html url extraction case {}", i)).c_str()) {
GPtrArray *purls = g_ptr_array_new();
GByteArray *tmp = g_byte_array_sized_new(c.first.size());
g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
auto *hc = html_process_input(pool, tmp, nullptr, nullptr, purls, true);
CHECK(hc != nullptr);
auto expected = c.second;
CHECK(expected.size() == purls->len);
for (auto j = 0; j < expected.size(); ++j) {
auto *url = (rspamd_url *)g_ptr_array_index(purls, j);
CHECK(expected[j] == std::string{url->string, url->urllen});
}
g_byte_array_free(tmp, TRUE);
g_ptr_array_free(purls, TRUE);
}
}

rspamd_mempool_delete(pool);
}

}

} /* namespace rspamd::html */
Expand Down

0 comments on commit ccf4d5d

Please sign in to comment.