Skip to content

Commit

Permalink
[utf8-validator] eliminate unnecessary comparison from must_be_2_3_co…
Browse files Browse the repository at this point in the history
…ntinuation (#2113)

* [utf8-validator] eliminate unnecessary comparison from must_be_2_3_continuation

* Fix comment in
  • Loading branch information
Validark committed Jan 28, 2024
1 parent ddecadd commit 9b0435d
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 26 deletions.
8 changes: 4 additions & 4 deletions src/arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uin
return is_second_byte ^ is_third_byte ^ is_fourth_byte;
}

simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<bool> is_third_byte = prev2 >= uint8_t(0xe0u);
simd8<bool> is_fourth_byte = prev3 >= uint8_t(0xf0u);
return is_third_byte ^ is_fourth_byte;
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}

} // unnamed namespace
Expand Down
2 changes: 1 addition & 1 deletion src/generic/dom_parser_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace {

simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3);
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3);
simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input);

} // unnamed namespace
Expand Down
2 changes: 1 addition & 1 deletion src/generic/stage1/utf8_lookup4_algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ using namespace simd;
const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
simd8<uint8_t> prev2 = input.prev<2>(prev_input);
simd8<uint8_t> prev3 = input.prev<3>(prev_input);
simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
simd8<uint8_t> must23 = must_be_2_3_continuation(prev2, prev3);
simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
return must23_80 ^ sc;
}
Expand Down
9 changes: 4 additions & 5 deletions src/haswell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,10 @@ simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uin
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}

simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}

} // unnamed namespace
Expand Down
9 changes: 4 additions & 5 deletions src/icelake.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,10 @@ simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uin
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}

simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}

} // unnamed namespace
Expand Down
9 changes: 4 additions & 5 deletions src/ppc64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,10 @@ simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uin
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}

simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}

} // unnamed namespace
Expand Down
9 changes: 4 additions & 5 deletions src/westmere.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,10 @@ simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uin
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}

simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}

} // unnamed namespace
Expand Down

0 comments on commit 9b0435d

Please sign in to comment.