Skip to content

Commit

Permalink
Adding tests for Custom Binary Search; #529
Browse files Browse the repository at this point in the history
  • Loading branch information
the-moisrex committed Apr 12, 2024
1 parent 8cae92c commit 0b82615
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 9 deletions.
89 changes: 89 additions & 0 deletions tests/idna_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,95 @@ TEST(BasicIDNATests, MappingFindAlgorithmTest) {
EXPECT_EQ(*(++mapped_pos), 0x0646);
EXPECT_EQ(*(++mapped_pos), 0x062C);
EXPECT_EQ(*(++mapped_pos), 0x0645);


// Last character should not blow us up!
auto const last_pos = uri::idna::find_mapping_byte(0x10'FFFF);
EXPECT_EQ(*last_pos,
uri::idna::details::idna_mapping_table[uri::idna::details::idna_mapping_table.size() - 3])
<< "Position of the iterator: "
<< stl::distance(uri::idna::details::idna_mapping_table.begin(), last_pos)
<< "\nRange Start Character: " << std::hex << (*last_pos & ~uri::idna::details::disallowed_mask)
<< std::dec;


// First character should not blow us up! (First character is a valid character, so it's actual value
// should not be in the mapping table)
auto const first_pos = uri::idna::find_mapping_byte(0x0);
EXPECT_EQ(stl::distance(uri::idna::details::idna_mapping_table.begin(), first_pos), 0)
<< "Position of the iterator: "
<< stl::distance(uri::idna::details::idna_mapping_table.begin(), first_pos)
<< "\nRange Start Character: " << (*first_pos & ~uri::idna::details::disallowed_mask);
}

TEST(BasicIDNATests, TestingAllTheTable) {
stl::size_t errors = 0;
stl::size_t picking_last_one = 0;
stl::size_t picking_next_one = 0;

stl::uint32_t last_one = 0;
for (stl::uint32_t index = 0; index != uri::idna::details::idna_mapping_table.size(); ++index) {
auto const cur = uri::idna::details::idna_mapping_table[index];
if ((cur & uri::idna::details::mapped_mask) == 0) {
continue;
}
auto const length = cur & ~uri::idna::details::mapped_mask >> 24U;

auto range_start = cur & ~uri::idna::details::disallowed_mask;
auto range_end = uri::idna::details::idna_mapping_table[index + 1];
std::string_view action = "disallowed";
if ([[maybe_unused]] bool const is_mapped =
(cur & uri::idna::details::disallowed_mask) != uri::idna::details::disallowed_mask)
{
range_end = range_start + length;
action = "mapped/ignored";
}

for (stl::uint32_t sub_index = range_start; sub_index <= range_end;) {
auto const sub_pos = uri::idna::find_mapping_byte(sub_index);
std::string_view state = "";
if (*sub_pos != cur) {
++errors;
if (*sub_pos == last_one) {
state = " (last one) ";
++picking_last_one;
} else {
auto next_index = index + 1;
auto next = uri::idna::details::idna_mapping_table[next_index];
while ((next & uri::idna::details::mapped_mask) == 0) {
++next_index;
next = uri::idna::details::idna_mapping_table[next_index];
}
if (*sub_pos == next) {
++picking_next_one;
state = " (next one) ";
}
}
EXPECT_EQ(*sub_pos, cur)
<< "Index: " << index << "\n"
<< "Sub Index: " << sub_index << " HexChar: " << std::hex << sub_index << std::dec
<< " diff: " << (sub_index - range_start) << "\n"
<< "Range start: " << range_start << "\n"
<< "Range end: " << range_end << "\n"
<< "Position of the iterator: "
<< stl::distance(uri::idna::details::idna_mapping_table.begin(), sub_pos)
<< "\nCurrent: " << std::hex << (*sub_pos & ~uri::idna::details::disallowed_mask)
<< std::dec << state << "\nExpected: " << std::hex
<< (cur & ~uri::idna::details::disallowed_mask) << std::dec << "\naction: " << action;
}

auto const half = (range_end - range_start) / 2;
sub_index += half;
if (half == 0) {
++sub_index;
}
}

last_one = cur;
}
EXPECT_EQ(errors, 0);
EXPECT_EQ(picking_last_one, 0);
EXPECT_EQ(picking_next_one, 0);
}

TEST(BasicIDNATests, PerformMappingTest) {
Expand Down
36 changes: 27 additions & 9 deletions webpp/uri/idna/idna_mappings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,27 +64,41 @@ namespace webpp::uri::idna {
using details::mapped_mask;
using details::not_mapped_mask;

map_table_byte_type const byte = static_cast<map_table_byte_type>(inp_ch) | disallowed_mask;
map_table_byte_type const element = static_cast<map_table_byte_type>(inp_ch) | disallowed_mask;

// this is almost the same thing as std::partition_point and std::lower_bound, but with modifications.
auto length = idna_mapping_table.size();
auto first = idna_mapping_table.begin(); // NOLINT(*-qualified-auto)

while (length > 0) {
auto const half = length >> 1U;
auto middle = first; // NOLINT(*-qualified-auto)
std::advance(middle, half);
// ([element] [mapped] [mapped] [mapped], ...)
// ^ --------------------------
// | ^
// | |
// | Should be ignored during binary search
// |
// first-byte: this is the byte we should find and compare against
for (;;) {
length >>= 1U; // devided by 2
auto middle = first; // NOLINT(*-qualified-auto)
std::advance(middle, length);

// non-first-characters are ignored here
decltype(length) remaining = 0;
while ((*middle & mapped_mask) == 0U) {
--middle;
++remaining;
}

if ((*middle | disallowed_mask) < byte) {
length = length - half - 1;
first = middle;
if (middle == first) {
break;
}
if (element < (*middle | disallowed_mask)) {
length -= remaining;
} else {
length = half;
// let's look into the hight half now
first = middle;
length += remaining;
++length;
}
}
return first;
Expand Down Expand Up @@ -143,6 +157,10 @@ namespace webpp::uri::idna {
return true;
}

/**
* Mapping Step of the IDNA Proccessing
* UTS #46: https://www.unicode.org/reports/tr46/#ProcessingStepMap
*/
template <bool UseSTD3ASCIIRules = false, istl::String OutStrT, typename Iter>
[[nodiscard]] static constexpr bool map(Iter beg, Iter end, OutStrT& out) {
using details::idna_reference_table;
Expand Down

0 comments on commit 0b82615

Please sign in to comment.