diff --git a/src/core/regex/include/sourcemeta/core/regex.h b/src/core/regex/include/sourcemeta/core/regex.h index 90b4edd90..5defa5931 100644 --- a/src/core/regex/include/sourcemeta/core/regex.h +++ b/src/core/regex/include/sourcemeta/core/regex.h @@ -65,7 +65,7 @@ enum class RegexIndex : std::uint8_t { /// @ingroup regex /// /// Compile a regular expression from a string. If the regular expression is -/// invalid, no value is returned. In this function +/// invalid, no value is returned. In this function: /// /// - Regexes are NOT automatically anchored /// - Regexes assume `DOTALL` diff --git a/src/core/regex/preprocess.h b/src/core/regex/preprocess.h index 5d503731f..a6cb57d26 100644 --- a/src/core/regex/preprocess.h +++ b/src/core/regex/preprocess.h @@ -1,284 +1,682 @@ #ifndef SOURCEMETA_CORE_REGEX_PREPROCESS_H_ #define SOURCEMETA_CORE_REGEX_PREPROCESS_H_ +#include // std::array +#include // std::bitset #include // std::size_t #include // std::optional #include // std::string #include // std::string_view +#include // std::pair namespace sourcemeta::core { namespace { -inline auto is_escaped(const std::string &pattern, const std::size_t index) - -> bool { - std::size_t count{0}; - std::size_t position{index}; - while (position > 0 && pattern[position - 1] == '\\') { - ++count; - --position; +constexpr std::array, 42> + unicode_property_map{{{"digit", "Nd"}, + {"Decimal_Number", "Nd"}, + {"space", "White_Space"}, + {"White_Space", "White_Space"}, + {"ASCII", "ASCII"}, + {"Hex_Digit", "Hex_Digit"}, + {"Alphabetic", "Alphabetic"}, + {"Letter", "L"}, + {"Uppercase_Letter", "Lu"}, + {"Lowercase_Letter", "Ll"}, + {"Titlecase_Letter", "Lt"}, + {"Modifier_Letter", "Lm"}, + {"Other_Letter", "Lo"}, + {"Mark", "M"}, + {"Nonspacing_Mark", "Mn"}, + {"Spacing_Mark", "Mc"}, + {"Enclosing_Mark", "Me"}, + {"Number", "N"}, + {"Letter_Number", "Nl"}, + {"Other_Number", "No"}, + {"Punctuation", "P"}, + {"Connector_Punctuation", "Pc"}, + {"Dash_Punctuation", "Pd"}, + {"Open_Punctuation", "Ps"}, + {"Close_Punctuation", "Pe"}, + {"Initial_Punctuation", "Pi"}, + {"Final_Punctuation", "Pf"}, + {"Other_Punctuation", "Po"}, + {"Symbol", "S"}, + {"Math_Symbol", "Sm"}, + {"Currency_Symbol", "Sc"}, + {"Modifier_Symbol", "Sk"}, + {"Other_Symbol", "So"}, + {"Separator", "Z"}, + {"Space_Separator", "Zs"}, + {"Line_Separator", "Zl"}, + {"Paragraph_Separator", "Zp"}, + {"Other", "C"}, + {"Control", "Cc"}, + {"Format", "Cf"}, + {"Unassigned", "Cn"}, + {"Private_Use", "Co"}}}; + +constexpr std::string_view shorthand_chars{"dDwWsS"}; +constexpr std::string_view simple_escapes{"btnrfv0"}; +constexpr std::string_view simple_escape_values{"\b\t\n\r\f\v"}; +constexpr std::string_view v_flag_syntax{"-][(){}/'|!#%&*+,.:;<=>?@`~^$"}; + +inline auto hex_value(char character) -> int { + if (character >= '0' && character <= '9') { + return character - '0'; } - return (count % 2) == 1; + if (character >= 'a' && character <= 'f') { + return character - 'a' + 10; + } + + if (character >= 'A' && character <= 'F') { + return character - 'A' + 10; + } + + return -1; } -inline auto translate_unicode_property(const std::string_view property_name, - const bool negated) - -> std::optional { - const char prefix{negated ? 'P' : 'p'}; - - // NOLINTNEXTLINE(bugprone-branch-clone) - if (property_name == "digit") { - return std::string("\\") + prefix + "{Nd}"; - } - // NOLINTNEXTLINE(bugprone-branch-clone) - else if (property_name == "space") { - return std::string("\\") + prefix + "{White_Space}"; - } else if (property_name == "ASCII") { - return std::string("\\") + prefix + "{ASCII}"; - } else if (property_name == "Hex_Digit") { - return std::string("\\") + prefix + "{Hex_Digit}"; - } else if (property_name == "Alphabetic") { - return std::string("\\") + prefix + "{Alphabetic}"; - } else if (property_name == "White_Space") { - return std::string("\\") + prefix + "{White_Space}"; - } else if (property_name == "Letter") { - return std::string("\\") + prefix + "{L}"; - } else if (property_name == "Uppercase_Letter") { - return std::string("\\") + prefix + "{Lu}"; - } else if (property_name == "Lowercase_Letter") { - return std::string("\\") + prefix + "{Ll}"; - } else if (property_name == "Titlecase_Letter") { - return std::string("\\") + prefix + "{Lt}"; - } else if (property_name == "Modifier_Letter") { - return std::string("\\") + prefix + "{Lm}"; - } else if (property_name == "Other_Letter") { - return std::string("\\") + prefix + "{Lo}"; - } else if (property_name == "Mark") { - return std::string("\\") + prefix + "{M}"; - } else if (property_name == "Nonspacing_Mark") { - return std::string("\\") + prefix + "{Mn}"; - } else if (property_name == "Spacing_Mark") { - return std::string("\\") + prefix + "{Mc}"; - } else if (property_name == "Enclosing_Mark") { - return std::string("\\") + prefix + "{Me}"; - } else if (property_name == "Number") { - return std::string("\\") + prefix + "{N}"; - } else if (property_name == "Decimal_Number") { - return std::string("\\") + prefix + "{Nd}"; - } else if (property_name == "Letter_Number") { - return std::string("\\") + prefix + "{Nl}"; - } else if (property_name == "Other_Number") { - return std::string("\\") + prefix + "{No}"; - } else if (property_name == "Punctuation") { - return std::string("\\") + prefix + "{P}"; - } else if (property_name == "Connector_Punctuation") { - return std::string("\\") + prefix + "{Pc}"; - } else if (property_name == "Dash_Punctuation") { - return std::string("\\") + prefix + "{Pd}"; - } else if (property_name == "Open_Punctuation") { - return std::string("\\") + prefix + "{Ps}"; - } else if (property_name == "Close_Punctuation") { - return std::string("\\") + prefix + "{Pe}"; - } else if (property_name == "Initial_Punctuation") { - return std::string("\\") + prefix + "{Pi}"; - } else if (property_name == "Final_Punctuation") { - return std::string("\\") + prefix + "{Pf}"; - } else if (property_name == "Other_Punctuation") { - return std::string("\\") + prefix + "{Po}"; - } else if (property_name == "Symbol") { - return std::string("\\") + prefix + "{S}"; - } else if (property_name == "Math_Symbol") { - return std::string("\\") + prefix + "{Sm}"; - } else if (property_name == "Currency_Symbol") { - return std::string("\\") + prefix + "{Sc}"; - } else if (property_name == "Modifier_Symbol") { - return std::string("\\") + prefix + "{Sk}"; - } else if (property_name == "Other_Symbol") { - return std::string("\\") + prefix + "{So}"; - } else if (property_name == "Separator") { - return std::string("\\") + prefix + "{Z}"; - } else if (property_name == "Space_Separator") { - return std::string("\\") + prefix + "{Zs}"; - } else if (property_name == "Line_Separator") { - return std::string("\\") + prefix + "{Zl}"; - } else if (property_name == "Paragraph_Separator") { - return std::string("\\") + prefix + "{Zp}"; - } else if (property_name == "Other") { - return std::string("\\") + prefix + "{C}"; - } else if (property_name == "Control") { - return std::string("\\") + prefix + "{Cc}"; - } else if (property_name == "Format") { - return std::string("\\") + prefix + "{Cf}"; - } else if (property_name == "Unassigned") { - return std::string("\\") + prefix + "{Cn}"; - } else if (property_name == "Private_Use") { - return std::string("\\") + prefix + "{Co}"; +inline auto all_hex(const std::string &content, std::size_t start, + std::size_t count) -> bool { + for (std::size_t offset = 0; offset < count; ++offset) { + if (hex_value(content[start + offset]) < 0) { + return false; + } } - return std::nullopt; + return true; } -} // namespace +inline auto parse_hex_digits(const std::string &content, std::size_t start, + std::size_t count) -> int { + int value = 0; + for (std::size_t offset = 0; offset < count; ++offset) { + value = (value << 4) | hex_value(content[start + offset]); + } -inline auto preprocess_regex(const std::string &pattern) -> std::string { - std::string result; - result.reserve(pattern.size() * 2); - bool in_char_class{false}; + return value; +} + +inline auto set_range(std::bitset<128> &bits, int from, int to) -> void { + for (int code = from; code <= to; ++code) { + bits.set(static_cast(code)); + } +} + +inline auto set_shorthand_class(std::bitset<128> &characters, + const char shorthand) -> void { + std::bitset<128> base; + const char lower = static_cast(shorthand | 32); + if (lower == 'd') { + set_range(base, '0', '9'); + } else if (lower == 'w') { + set_range(base, 'a', 'z'); + set_range(base, 'A', 'Z'); + set_range(base, '0', '9'); + base.set('_'); + } else if (lower == 's') { + for (const char code : {' ', '\t', '\n', '\r', '\f', '\v'}) { + base.set(static_cast(code)); + } + } else { + return; + } + + if (shorthand >= 'A' && shorthand <= 'Z') { + base.flip(); + base.reset(0); + } + + characters |= base; +} + +inline auto find_bracket_end(const std::string &content, std::size_t start) + -> std::size_t { + for (std::size_t depth = 1, position = start; position < content.size(); + ++position) { + if (content[position] == '\\' && position + 1 < content.size()) { + ++position; + } else if (content[position] == '[') { + ++depth; + } else if (content[position] == ']' && --depth == 0) { + return position + 1; + } + } + + return content.size(); +} + +inline auto parse_escape(const std::string &content, std::size_t position, + std::size_t &end, int &code_point) -> void { + if (position >= content.size()) { + end = position; + code_point = -1; + return; + } + + if (content[position] != '\\' || position + 1 >= content.size()) { + end = position + 1; + code_point = static_cast(content[position]); + return; + } + + const char next = content[position + 1]; + if (next == 'x' && position + 3 < content.size() && + all_hex(content, position + 2, 2)) { + end = position + 4; + code_point = parse_hex_digits(content, position + 2, 2); + return; + } + + if (next == 'u' && position + 2 < content.size()) { + if (content[position + 2] == '{') { + std::size_t brace_end = position + 3; + while (brace_end < content.size() && content[brace_end] != '}' && + hex_value(content[brace_end]) >= 0) { + ++brace_end; + } + + if (brace_end < content.size() && content[brace_end] == '}') { + const int value = + parse_hex_digits(content, position + 3, brace_end - position - 3); + if (value < 128) { + end = brace_end + 1; + code_point = value; + return; + } + } + } else if (position + 5 < content.size() && + all_hex(content, position + 2, 4)) { + const int value = parse_hex_digits(content, position + 2, 4); + if (value < 128) { + end = position + 6; + code_point = value; + return; + } + } + } + + if (next == 'c' && position + 2 < content.size()) { + const char control = content[position + 2]; + if ((control >= 'A' && control <= 'Z') || + (control >= 'a' && control <= 'z')) { + end = position + 3; + code_point = control % 32; + return; + } + } + + end = position + 2; + const auto escape_index = simple_escapes.find(next); + if (escape_index < 6) { + code_point = static_cast(simple_escape_values[escape_index]); + } else if (next == '0') { + code_point = 0; + } else if (shorthand_chars.find(next) != std::string_view::npos) { + code_point = -1; + } else { + code_point = static_cast(next); + } +} + +inline auto first_operator(const std::string &content, std::size_t start) + -> std::pair { + for (std::size_t depth = 0, position = start; position + 1 < content.size(); + ++position) { + const char current = content[position]; + if (current == '\\') { + ++position; + } else if (current == '[') { + ++depth; + } else if (current == ']' && depth > 0) { + --depth; + } else if (depth == 0 && (current == '-' || current == '&') && + content[position + 1] == current) { + return {position, current}; + } + } - for (std::size_t index{0}; index < pattern.size(); ++index) { - if (pattern[index] == '[' && !is_escaped(pattern, index)) { - if (in_char_class) { - result += "\\["; + return {std::string::npos, '\0'}; +} + +inline auto has_nested_brackets(const std::string &content) -> bool { + for (std::size_t position = 0; position < content.size(); ++position) { + if (content[position] == '\\') { + ++position; + } else if (content[position] == '[') { + return true; + } + } + + return false; +} + +// Forward declaration +inline auto expand_set_ops(const std::string &content, std::bitset<128> &result) + -> bool; + +inline auto parse_class_to_bitset(const std::string &content, std::size_t start, + std::bitset<128> &characters) -> std::size_t { + std::size_t position = start; + bool negated = position < content.size() && content[position] == '^'; + if (negated) { + ++position; + } + + while (position < content.size() && content[position] != ']') { + if (content[position] == '\\' && position + 1 < content.size() && + shorthand_chars.find(content[position + 1]) != std::string_view::npos) { + set_shorthand_class(characters, content[position + 1]); + position += 2; + continue; + } + + if (content[position] == '[') { + const auto nested_end = find_bracket_end(content, position + 1); + const auto nested = + content.substr(position + 1, nested_end - position - 2); + std::bitset<128> nested_chars; + if (first_operator(nested, 0).first != std::string::npos) { + expand_set_ops(nested, nested_chars); } else { - in_char_class = true; - result += pattern[index]; + parse_class_to_bitset(nested, 0, nested_chars); } + + characters |= nested_chars; + position = nested_end; continue; } - if (pattern[index] == ']' && !is_escaped(pattern, index)) { - in_char_class = false; - result += pattern[index]; + std::size_t end{0}; + int first{0}; + parse_escape(content, position, end, first); + if (first < 0) { + position = end; continue; } - if (pattern[index] == '\\' && index + 1 < pattern.size()) { - const char next_char{pattern[index + 1]}; + if (end < content.size() && content[end] == '-' && + end + 1 < content.size() && content[end + 1] != ']') { + std::size_t range_end{0}; + int second{0}; + parse_escape(content, end + 1, range_end, second); + if (second >= 0) { + for (int code = first; code <= second && code < 128; ++code) { + characters.set(static_cast(code)); + } - if (next_char == '\\') { - result += "\\\\"; - ++index; + position = range_end; continue; } + } - if (next_char == '[' || next_char == ']' || next_char == '^' || - next_char == '$') { - result += '\\'; - result += next_char; - ++index; - continue; + if (first < 128) { + characters.set(static_cast(first)); + } + + position = end; + } + + if (negated) { + for (std::size_t code = 32; code < 128; ++code) { + characters.flip(code); + } + } + + return position; +} + +inline auto append_char(std::string &result, std::size_t value) -> void { + constexpr std::string_view hex{"0123456789abcdef"}; + if (value < 32 || value == 127) { + result += "\\x"; + result += hex[(value >> 4) & 0xF]; + result += hex[value & 0xF]; + } else { + if (std::string_view{"-]\\^"}.find(static_cast(value)) != + std::string_view::npos) { + result += '\\'; + } + + result += static_cast(value); + } +} + +inline auto bitset_to_class(const std::bitset<128> &characters) -> std::string { + std::string result{"["}; + for (std::size_t code = 0; code < 128; ++code) { + if (!characters.test(code)) { + continue; + } + + const std::size_t range_start = code; + while (code + 1 < 128 && characters.test(code + 1)) { + ++code; + } + + if (code - range_start >= 2) { + append_char(result, range_start); + result += '-'; + append_char(result, code); + } else { + for (std::size_t character = range_start; character <= code; + ++character) { + append_char(result, character); } + } + } - if (next_char == 'u' && index + 2 < pattern.size() && - pattern[index + 2] == '{') { - result += "\\x{"; - index += 2; - for (++index; index < pattern.size() && pattern[index] != '}'; - ++index) { - result += pattern[index]; - } - if (index < pattern.size()) { - result += '}'; + result += ']'; + return result; +} + +inline auto is_valid_escape(const std::string &content, std::size_t position) + -> bool { + if (position + 1 >= content.size()) { + return false; + } + + const char next = content[position + 1]; + if (std::string_view{"dDwWsSnrtfvb0\\"}.find(next) != + std::string_view::npos) { + return true; + } + + if (next == 'x') { + return position + 3 < content.size() && all_hex(content, position + 2, 2); + } + + if (next == 'u' && position + 2 < content.size()) { + if (content[position + 2] == '{') { + for (auto end = position + 3; end < content.size(); ++end) { + if (content[end] == '}') { + return true; + } else if (hex_value(content[end]) < 0) { + return false; } - continue; } - if (next_char == 'u' && index + 5 < pattern.size()) { - bool is_hex{true}; - for (std::size_t offset{0}; offset < 4; ++offset) { - const char hex_char{pattern[index + 2 + offset]}; - if (!((hex_char >= '0' && hex_char <= '9') || - (hex_char >= 'a' && hex_char <= 'f') || - (hex_char >= 'A' && hex_char <= 'F'))) { - is_hex = false; - break; + return false; + } + + return position + 5 < content.size() && all_hex(content, position + 2, 4); + } + + if (next == 'c' && position + 2 < content.size()) { + const char ctrl = content[position + 2]; + return (ctrl >= 'A' && ctrl <= 'Z') || (ctrl >= 'a' && ctrl <= 'z'); + } + + return v_flag_syntax.find(next) != std::string_view::npos; +} + +inline auto is_valid_operand(const std::string &operand) -> bool { + if (operand.empty()) { + return false; + } + + if (operand.front() == '[') { + if (operand.size() >= 2 && operand.back() == ']') { + const auto inner = operand.substr(1, operand.size() - 2); + for (std::size_t position = 0; position < inner.size(); ++position) { + if (inner[position] == '\\' && position + 1 < inner.size()) { + if (!is_valid_escape(inner, position)) { + return false; } - } - if (is_hex) { - result += "\\x{"; - result += pattern[index + 2]; - result += pattern[index + 3]; - result += pattern[index + 4]; - result += pattern[index + 5]; - result += '}'; - index += 5; - continue; + ++position; + } else if (inner[position] == '|' || + (inner[position] == '-' && (position == inner.size() - 1 || + inner[position + 1] == ']'))) { + return false; } } + } - if ((next_char == 'p' || next_char == 'P') && - index + 2 < pattern.size() && pattern[index + 2] == '{') { - const bool negated{next_char == 'P'}; - const std::size_t start_index{index}; - index += 3; - std::string property_name; - while (index < pattern.size() && pattern[index] != '}') { - property_name += pattern[index]; - ++index; - } + return true; + } - if (index < pattern.size()) { - const auto translated{ - translate_unicode_property(property_name, negated)}; - if (translated.has_value()) { - result += translated.value(); - } else { - result += pattern.substr(start_index, index - start_index + 1); - } - } else { - index = start_index; - result += pattern[index]; + if (operand.size() == 2 && operand[0] == '\\' && + shorthand_chars.find(operand[1]) != std::string_view::npos) { + return true; + } + + for (std::size_t position = 0; position < operand.size(); ++position) { + if (operand[position] == '\\' && position + 1 < operand.size()) { + ++position; + } else if (operand[position] == '-' && position > 0 && + position + 1 < operand.size()) { + return false; + } + } + + return true; +} + +inline auto parse_operand(const std::string &operand, + std::bitset<128> &characters) -> bool { + if (!is_valid_operand(operand)) { + return false; + } + + if (operand.size() >= 2 && operand.front() == '[' && operand.back() == ']') { + const auto inner = operand.substr(1, operand.size() - 2); + if (first_operator(inner, 0).first != std::string::npos) { + return expand_set_ops(inner, characters); + } + } + + parse_class_to_bitset(operand, 0, characters); + return true; +} + +inline auto expand_set_ops(const std::string &content, std::bitset<128> &result) + -> bool { + auto [op_pos, op_char] = first_operator(content, 0); + if (op_pos == std::string::npos) { + parse_class_to_bitset(content, 0, result); + return true; + } + + if (auto [next_pos, next_op] = first_operator(content, op_pos + 2); + next_pos != std::string::npos && next_op != op_char) { + return false; + } + + if (!parse_operand(content.substr(0, op_pos), result)) { + return false; + } + + for (std::size_t position = op_pos; position + 1 < content.size() && + content[position] == op_char && + content[position + 1] == op_char;) { + auto [next, unused] = first_operator(content, position += 2); + std::bitset<128> operand_chars; + if (!parse_operand(next != std::string::npos + ? content.substr(position, next - position) + : content.substr(position), + operand_chars)) { + return false; + } + + result = + op_char == '-' ? (result & ~operand_chars) : (result & operand_chars); + position = next; + } + + return true; +} + +inline auto expand_char_class(const std::string &content) + -> std::optional { + if (first_operator(content, 0).first == std::string::npos && + !has_nested_brackets(content)) { + return "[" + content + "]"; + } + + std::bitset<128> result; + if (!expand_set_ops(content, result)) { + return std::nullopt; + } + + return result.none() ? "(?!)" : bitset_to_class(result); +} + +inline auto translate_property(std::string_view name, bool negated) + -> std::optional { + for (const auto &[prop_name, pcre_name] : unicode_property_map) { + if (name == prop_name) { + return std::string("\\") + (negated ? 'P' : 'p') + '{' + + std::string(pcre_name) + '}'; + } + } + + return std::nullopt; +} + +inline auto is_escaped(const std::string &pattern, std::size_t index) -> bool { + std::size_t count = 0; + for (auto position = index; position > 0 && pattern[position - 1] == '\\'; + --position) { + ++count; + } + + return (count % 2) == 1; +} + +struct ShorthandExpansion { + char escape; + std::string_view inside_class; + std::string_view outside_class; +}; + +// clang-format off +constexpr std::array shorthand_expansions{{ + {.escape = 'd', .inside_class = "0-9", .outside_class = "[0-9]"}, + {.escape = 'D', .inside_class = "", .outside_class = "[^0-9]"}, + {.escape = 'w', .inside_class = "a-zA-Z0-9_", .outside_class = "[a-zA-Z0-9_]"}, + {.escape = 'W', .inside_class = "", .outside_class = "[^a-zA-Z0-9_]"}, + {.escape = 's', .inside_class = R"(\t\v\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029})", + .outside_class = R"([\t\v\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029}])"}, + {.escape = 'S', .inside_class = "", .outside_class = R"([^\t\v\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029}])"}, + {.escape = 'b', .inside_class = "", .outside_class = R"((?:(? const ShorthandExpansion * { + for (const auto &expansion : shorthand_expansions) { + if (expansion.escape == escape) { + return &expansion; + } + } + + return nullptr; +} + +} // namespace + +inline auto preprocess_regex(const std::string &pattern) + -> std::optional { + std::string result; + result.reserve(pattern.size() * 2); + bool in_class = false; + + for (std::size_t position = 0; position < pattern.size(); ++position) { + const char current = pattern[position]; + if (current == '[' && !is_escaped(pattern, position) && !in_class) { + const auto end = find_bracket_end(pattern, position + 1); + const auto content = pattern.substr(position + 1, end - position - 2); + if (content.find("--") != std::string::npos || + content.find("&&") != std::string::npos || + has_nested_brackets(content)) { + const auto expanded = expand_char_class(content); + if (!expanded) { + return std::nullopt; } - continue; - } - if (next_char == 'd') { - result += in_char_class ? "0-9" : "[0-9]"; - ++index; + result += *expanded; + position = end - 1; continue; } - if (next_char == 'D' && !in_char_class) { - result += "[^0-9]"; - ++index; - continue; - } + in_class = true; + } else if (current == ']' && !is_escaped(pattern, position)) { + in_class = false; + } - if (next_char == 'w') { - result += in_char_class ? "a-zA-Z0-9_" : "[a-zA-Z0-9_]"; - ++index; - continue; - } + if (current != '\\' || position + 1 >= pattern.size()) { + result += current; + continue; + } - if (next_char == 'W' && !in_char_class) { - result += "[^a-zA-Z0-9_]"; - ++index; - continue; - } + const char next = pattern[position + 1]; + if (std::string_view{"\\[]^$"}.find(next) != std::string_view::npos) { + result += current; + result += next; + ++position; + continue; + } + + if (next == 'u' && position + 2 < pattern.size()) { + if (pattern[position + 2] == '{') { + result += "\\x{"; + for (position += 3; + position < pattern.size() && pattern[position] != '}'; + ++position) { + result += pattern[position]; + } + + if (position < pattern.size()) { + result += '}'; + } - if (next_char == 's') { - result += - in_char_class - ? R"(\t\v\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029})" - : R"([\t\v\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029}])"; - ++index; continue; } - if (next_char == 'S' && !in_char_class) { - result += R"([^\t\v\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029}])"; - ++index; + if (position + 5 < pattern.size() && all_hex(pattern, position + 2, 4)) { + result += "\\x{" + pattern.substr(position + 2, 4) + '}'; + position += 5; continue; } + } - if (next_char == 'b' && !in_char_class) { - result += - R"((?:(?inside_class.empty()) { + result += std::string{current} + next; + } else { + result += in_class ? expansion->inside_class : expansion->outside_class; + } + + ++position; + } else { + result += current; + } } return result; diff --git a/src/core/regex/regex.cc b/src/core/regex/regex.cc index e14e93c9f..84c24ffe2 100644 --- a/src/core/regex/regex.cc +++ b/src/core/regex/regex.cc @@ -39,11 +39,16 @@ auto to_regex(const std::string &pattern) -> std::optional { return RegexTypeRange{minimum, maximum}; } - const std::string pcre2_pattern{preprocess_regex(pattern)}; + const auto pcre2_pattern{preprocess_regex(pattern)}; + if (!pcre2_pattern.has_value()) { + return std::nullopt; + } + int pcre2_error_code{0}; PCRE2_SIZE pcre2_error_offset{0}; pcre2_code *pcre2_regex_raw{pcre2_compile( - reinterpret_cast(pcre2_pattern.c_str()), pcre2_pattern.size(), + reinterpret_cast(pcre2_pattern.value().c_str()), + pcre2_pattern.value().size(), PCRE2_UTF | PCRE2_UCP | PCRE2_NO_AUTO_CAPTURE | PCRE2_DOTALL | PCRE2_DOLLAR_ENDONLY | PCRE2_NEVER_BACKSLASH_C | PCRE2_NO_UTF_CHECK, &pcre2_error_code, &pcre2_error_offset, nullptr)}; diff --git a/test/regex/regex_matches_ecma262_test.cc b/test/regex/regex_matches_ecma262_test.cc index b7a6d8c33..230866df3 100644 --- a/test/regex/regex_matches_ecma262_test.cc +++ b/test/regex/regex_matches_ecma262_test.cc @@ -970,13 +970,12 @@ TEST(Regex_matches, ecma262_backslash_at_end) { TEST(Regex_matches, ecma262_nested_char_class_attempt) { const auto regex{sourcemeta::core::to_regex("[[abc]]")}; EXPECT_TRUE(regex.has_value()); - EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a]")); - EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b]")); - EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c]")); - EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "[]")); - EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); - EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "]")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "[")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "]")); } TEST(Regex_matches, ecma262_dollar_before_alternation) { @@ -1014,9 +1013,6 @@ TEST(Regex_matches, ecma262_right_bracket_negated_class) { TEST(Regex_matches, ecma262_dollar_before_opening_paren) { const auto regex{sourcemeta::core::to_regex("$(abc)")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, $ is always an end assertion - // Pattern "$(abc)" means: assert end, then match "abc" - // This can NEVER match anything EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "$abc")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "abc")); } @@ -1033,9 +1029,6 @@ TEST(Regex_matches, ecma262_caret_not_at_start_of_class) { TEST(Regex_matches, ecma262_caret_in_middle_of_pattern) { const auto regex{sourcemeta::core::to_regex("a^b")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, ^ is always a start assertion, never a literal - // Pattern "a^b" means: match "a", assert start, match "b" - // This can NEVER match anything EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a^b")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "ab")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); @@ -1068,9 +1061,6 @@ TEST(Regex_matches, ecma262_dash_at_start_of_class) { TEST(Regex_matches, ecma262_double_caret) { const auto regex{sourcemeta::core::to_regex("^^abc")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, ^ is always a start assertion - // Pattern "^^abc" means: assert start, assert start (redundant), match "abc" - // This matches strings starting with "abc" EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "abc")); EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "abcdef")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "^abc")); @@ -1095,9 +1085,6 @@ TEST(Regex_matches, ecma262_caret_after_opening_paren) { TEST(Regex_matches, ecma262_dollar_then_caret) { const auto regex{sourcemeta::core::to_regex("a$^b")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, $ and ^ are always assertions - // Pattern "a$^b" means: match "a", assert end, assert start, match "b" - // This can NEVER match anything EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a$^b")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "ab")); } @@ -1246,9 +1233,6 @@ TEST(Regex_matches, ecma262_hex_escape) { TEST(Regex_matches, ecma262_dollar_before_bracket) { const auto regex{sourcemeta::core::to_regex("$[abc]")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, $ is always an end assertion - // Pattern "$[abc]" means: assert end, then match one of a/b/c - // This can NEVER match anything EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "$a")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "$b")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); @@ -1294,9 +1278,6 @@ TEST(Regex_matches, ecma262_dollar_at_start) { TEST(Regex_matches, ecma262_caret_at_end) { const auto regex{sourcemeta::core::to_regex("abc^")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, ^ is always a start assertion - // Pattern "abc^" means: match "abc", then assert start - // This can NEVER match anything EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "abc^")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "abc")); } @@ -1360,9 +1341,6 @@ TEST(Regex_matches, ecma262_caret_dollar_together) { TEST(Regex_matches, ecma262_multiple_carets_at_start) { const auto regex{sourcemeta::core::to_regex("^^^abc")}; EXPECT_TRUE(regex.has_value()); - // In ECMA-262, ^ is always a start assertion - // Pattern "^^^abc" means: assert start three times (redundant), match "abc" - // This matches strings starting with "abc" EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "abc")); EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "abcdef")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "^^abc")); @@ -1375,3 +1353,1573 @@ TEST(Regex_matches, ecma262_multiple_dollars_at_end) { EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "abc")); EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "abc$")); } + +TEST(Regex_matches, ecma262_v_flag_set_subtraction) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--a]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_set_intersection) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[0-5]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_vowels) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "i")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "o")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "u")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_range) { + const auto regex{sourcemeta::core::to_regex("[[0-9]--[3-7]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "2")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "8")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "3")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "7")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_digit) { + const auto regex{sourcemeta::core::to_regex("[[\\d]--[0]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_word) { + const auto regex{sourcemeta::core::to_regex("[[\\w]--[0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "_")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "!")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_word_lowercase) { + const auto regex{sourcemeta::core::to_regex("[[\\w]&&[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_even_digits) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[02468]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "2")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "8")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_uppercase) { + const auto regex{sourcemeta::core::to_regex("[[A-Za-z]--[A-Z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "Z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_same_char) { + const auto regex{sourcemeta::core::to_regex("[[a]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_same_range) { + const auto regex{sourcemeta::core::to_regex("[[a-z]&&[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_disjoint) { + const auto regex{sourcemeta::core::to_regex("[[a-c]&&[x-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "m")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_hex_letters) { + const auto regex{sourcemeta::core::to_regex("[[0-9a-f]--[a-f]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "f")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_partial_overlap) { + const auto regex{sourcemeta::core::to_regex("[[a-m]&&[h-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "h")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "j")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "g")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_start_of_range) { + const auto regex{sourcemeta::core::to_regex("[[a-e]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_end_of_range) { + const auto regex{sourcemeta::core::to_regex("[[a-e]--[e]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_middle_of_range) { + const auto regex{sourcemeta::core::to_regex("[[a-e]--[c]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "c")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_single_char) { + const auto regex{sourcemeta::core::to_regex("[[a-z]&&[m]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtraction_in_pattern) { + const auto regex{sourcemeta::core::to_regex("^[[a-z]--[aeiou]]+$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "bcdfg")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "xyz")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "hello")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "aaa")); +} + +TEST(Regex_matches, ecma262_v_flag_intersection_in_pattern) { + const auto regex{sourcemeta::core::to_regex("^[\\d&&[0-5]]+$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "012345")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "6789")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "123456")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_digit_from_word) { + const auto regex{sourcemeta::core::to_regex("[[\\w]--[\\d]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "_")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_digit_word) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[\\w]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_specific_punct) { + const auto regex{sourcemeta::core::to_regex("[[\\!-\\/]--[#$%]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\"")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "&")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "/")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "#")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "$")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "%")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_empty) { + const auto regex{sourcemeta::core::to_regex("[[a-c]--[x-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_printable_alpha) { + const auto regex{sourcemeta::core::to_regex("[[ -~]&&[A-Z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "M")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), " ")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_multiple_ranges) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[a-cx-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "w")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "y")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_space) { + const auto regex{sourcemeta::core::to_regex("[[ -\\/]--[ ]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "/")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), " ")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_to_single) { + const auto regex{sourcemeta::core::to_regex("[[a-c]&&[c-e]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_subtraction_with_quantifier) { + const auto regex{sourcemeta::core::to_regex("^[[0-9]--[0]]{3}$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "123")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "999")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "012")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "100")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "12")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_caret) { + const auto regex{sourcemeta::core::to_regex("[[!-@]--[\\^]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "@")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "^")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_alphanum_upper) { + const auto regex{sourcemeta::core::to_regex("[[0-9A-Za-z]&&[A-Z0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_entire_range) { + const auto regex{sourcemeta::core::to_regex("[[a-c]--[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); +} + +TEST(Regex_matches, ecma262_v_flag_multiple_set_ops_pattern) { + const auto regex{ + sourcemeta::core::to_regex("^[[a-z]--[aeiou]][[0-9]--[0]]$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b1")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a1")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e5")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_digit_ranges) { + const auto regex{sourcemeta::core::to_regex("[[0-5]&&[3-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "3")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "4")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "2")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "6")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_preserving_hyphen) { + const auto regex{sourcemeta::core::to_regex("[[0-9\\-]--[5-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "4")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "-")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_word_underscore) { + const auto regex{sourcemeta::core::to_regex("[[\\w]&&[_]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "_")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_union) { + const auto regex{sourcemeta::core::to_regex("[[a-zA-Z0-9]--[A-Z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "Z")); +} + +TEST(Regex_matches, ecma262_v_flag_large_subtraction) { + const auto regex{sourcemeta::core::to_regex("[[\\x20-\\x7e]--[a-zA-Z0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "@")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "~")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); +} + +TEST(Regex_matches, ecma262_v_flag_hex_range_subtract_literal) { + const auto regex{sourcemeta::core::to_regex("[[\\x61-\\x7a]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_single_from_single) { + const auto regex{sourcemeta::core::to_regex("[[a]--[b]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_single_chars_same) { + const auto regex{sourcemeta::core::to_regex("[[a]&&[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_single_chars_different) { + const auto regex{sourcemeta::core::to_regex("[[a]&&[b]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_hex_intersect_hex) { + const auto regex{ + sourcemeta::core::to_regex("[[\\x30-\\x39]&&[\\x33-\\x37]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "3")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "7")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "2")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "8")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_all_digits_from_word) { + const auto regex{sourcemeta::core::to_regex("[[[\\w]--[\\d]]--[_]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_with_escaped_backslash) { + const auto regex{sourcemeta::core::to_regex("[[\\\\a-z]&&[\\\\]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\\")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_control_chars) { + const auto regex{ + sourcemeta::core::to_regex("[[\\x00-\\x1f]--[\\x00-\\x08]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\t")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\x00")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\x08")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_with_tab_newline) { + const auto regex{sourcemeta::core::to_regex("[[\\t\\n\\r ]&&[\\t\\n]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\t")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\r")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), " ")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_leaves_single) { + const auto regex{sourcemeta::core::to_regex("[[a-c]--[ab]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_overlapping_three_char) { + const auto regex{sourcemeta::core::to_regex("[[abc]&&[bcd]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); +} + +TEST(Regex_matches, ecma262_v_flag_mixed_hex_and_literal_in_subtract) { + const auto regex{sourcemeta::core::to_regex("[[\\x41-\\x5a]--[AEIOU]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "B")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "E")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_boundary_chars) { + const auto regex{sourcemeta::core::to_regex("[[!-~]--[!~]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\"")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "}")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "~")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_complement_like) { + const auto regex{ + sourcemeta::core::to_regex("[[\\x00-\\x60\\x7b-\\x7f]&&[\\x00-\\x7f]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_shorthand_with_hex) { + const auto regex{sourcemeta::core::to_regex("[[\\d]--[\\x30-\\x34]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "4")); +} + +TEST(Regex_matches, ecma262_v_flag_complex_alphanumeric_filter) { + const auto regex{sourcemeta::core::to_regex("[[0-9A-Fa-f]--[g-zG-Z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "f")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "F")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "g")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "G")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_ranges_at_boundary) { + const auto regex{sourcemeta::core::to_regex("[[a-f]&&[f-k]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "f")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "g")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "k")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_all_but_one_from_range) { + const auto regex{sourcemeta::core::to_regex("[[a-e]--[abde]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_multiple_hex_escapes_in_subtraction) { + const auto regex{sourcemeta::core::to_regex( + "[[\\x20-\\x7e]--[\\x30-\\x39]--[\\x41-\\x5a]--[\\x61-\\x7a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "@")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_mixed_subtract_then_intersect) { + const auto regex{sourcemeta::core::to_regex("[[[a-z]--[aeiou]]&&[a-m]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_mixed_intersect_then_subtract) { + const auto regex{sourcemeta::core::to_regex("[[[a-z]&&[a-m]]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "n")); +} + +TEST(Regex_matches, ecma262_v_flag_nested_class_in_operand) { + const auto regex{sourcemeta::core::to_regex("[[[a-z][0-9]]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_deeply_nested_brackets) { + const auto regex{sourcemeta::core::to_regex("[[[[a-z]]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_nested_from_nested) { + const auto regex{sourcemeta::core::to_regex("[[[a-z]]--[[x-z]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "w")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_nested_subtract) { + const auto regex{sourcemeta::core::to_regex("[[[\\d]]--[0]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_triple_chained_intersection) { + const auto regex{sourcemeta::core::to_regex("[[a-z]&&[a-m]&&[f-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "f")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_empty_result_in_chain) { + const auto regex{sourcemeta::core::to_regex("[[a]--[a]--[b]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_empty_result_in_chain) { + const auto regex{sourcemeta::core::to_regex("[[a-c]&&[x-z]&&[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); +} + +TEST(Regex_matches, ecma262_v_flag_operator_inside_nested_not_toplevel) { + const auto regex{sourcemeta::core::to_regex("[[a\\-\\-b]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "-")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_alternation_after_set_op) { + const auto regex{sourcemeta::core::to_regex("^([[a-z]--[aeiou]]|[0-9])$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "ab")); +} + +TEST(Regex_matches, ecma262_v_flag_set_op_with_dot_star) { + const auto regex{sourcemeta::core::to_regex("^.*[[a-z]--[aeiou]]$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "xxxb")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "123z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "xxxa")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "123e")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_only_intersection) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[\\w]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_double_nested_with_subtract) { + const auto regex{sourcemeta::core::to_regex("[[[a-z]]--[[aeiou]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_union_in_nested) { + const auto regex{sourcemeta::core::to_regex("[[[a-c][x-z]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "w")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_union_nested) { + const auto regex{sourcemeta::core::to_regex("[[[a-z][0-9]]--[aeiou0]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_four_way_intersection) { + const auto regex{sourcemeta::core::to_regex("[[a-z]&&[a-m]&&[f-z]&&[g-l]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "g")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "l")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "f")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "m")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_alternating_ops) { + const auto regex{ + sourcemeta::core::to_regex("[[[[a-z]--[aeiou]]&&[a-m]]--[klm]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "j")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "k")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "n")); +} + +TEST(Regex_matches, ecma262_v_flag_repeated_char_class) { + const auto regex{sourcemeta::core::to_regex("^[[a-c]][[d-f]]$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "ad")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "cf")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "aa")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "dd")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_whitespace_subtract_space) { + const auto regex{sourcemeta::core::to_regex("[[\\s]--[ ]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\t")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_negated_digit_intersect) { + const auto regex{sourcemeta::core::to_regex("[[\\D]&&[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_negated_word_intersect) { + const auto regex{sourcemeta::core::to_regex("[[\\W]&&[\\!-\\/]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "/")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_negated_space_intersect) { + const auto regex{sourcemeta::core::to_regex("[[\\S]&&[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\t")); +} + +TEST(Regex_matches, ecma262_v_flag_caret_negation_with_subtract) { + const auto regex{sourcemeta::core::to_regex("[[^a-z]--[0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_single_char_range) { + const auto regex{sourcemeta::core::to_regex("[[a-a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_literal_hyphen_at_end) { + const auto regex{sourcemeta::core::to_regex("[[a-z\\-]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "-")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_literal_hyphen_at_start) { + const auto regex{sourcemeta::core::to_regex("[[\\-a-z]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "-")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_with_caret_in_operand) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[^aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "e")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_with_caret_in_operand) { + const auto regex{sourcemeta::core::to_regex("[[a-z]&&[^x-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "w")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_unicode_escape_4digit_subtract) { + const auto regex{sourcemeta::core::to_regex("[[\\u0041-\\u005A]--[ABC]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "D")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "B")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "C")); +} + +TEST(Regex_matches, ecma262_v_flag_unicode_escape_brace_subtract) { + const auto regex{ + sourcemeta::core::to_regex("[[\\u{0041}-\\u{005A}]--[ABC]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "D")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "B")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "C")); +} + +TEST(Regex_matches, ecma262_v_flag_backspace_in_class) { + const auto regex{sourcemeta::core::to_regex("[[\\b]--[\\x08]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_control_escape_intersect) { + const auto regex{sourcemeta::core::to_regex("[[\\cA-\\cZ]&&[\\x01-\\x03]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x01")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x03")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\x04")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); +} + +TEST(Regex_matches, ecma262_v_flag_empty_first_operand) { + const auto regex{sourcemeta::core::to_regex("[--[a-z]]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_empty_second_operand) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_double_operator_subtraction) { + const auto regex{sourcemeta::core::to_regex("[a-z----abc]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_abc_from_az) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[abc]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "d")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "c")); +} + +TEST(Regex_matches, ecma262_v_flag_rightbracket_first_in_nested) { + const auto regex{sourcemeta::core::to_regex("[[\\]-a]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "]")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_single_shorthand) { + const auto regex{sourcemeta::core::to_regex("[[\\d]--[5]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_two_shorthands) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[\\w]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_shorthand_from_shorthand) { + const auto regex{sourcemeta::core::to_regex("[[\\w]--[\\d]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "_")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_triple_nested_subtract) { + const auto regex{sourcemeta::core::to_regex("[[[a-z]]--[[aeiou]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_empty_nested) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_word_minus_alphanumeric) { + const auto regex{sourcemeta::core::to_regex("[[\\w]--[a-zA-Z0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "_")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_multiple_escape_sequences_subtract) { + const auto regex{sourcemeta::core::to_regex("[[[\\t\\n\\r]]--[\\t]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\n")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\r")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\t")); +} + +TEST(Regex_matches, ecma262_v_flag_mixed_unicode_escape_types) { + const auto regex{sourcemeta::core::to_regex("[[\\u0041-\\u{005A}]--[ABC]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "D")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "B")); +} + +TEST(Regex_matches, ecma262_v_flag_control_char_range_intersect) { + const auto regex{ + sourcemeta::core::to_regex("[[\\x00-\\x1F]&&[\\x00-\\x0F]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), std::string("\x00", 1))); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x0F")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\x10")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\x1F")); +} + +TEST(Regex_matches, ecma262_v_flag_del_char_subtract) { + const auto regex{sourcemeta::core::to_regex("[[\\x7E-\\x7F]--[\\x7E]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x7F")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "~")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_form_feed_and_vertical_tab) { + const auto regex{sourcemeta::core::to_regex("[[\\f\\v]--[\\f]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\v")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\f")); +} + +TEST(Regex_matches, ecma262_v_flag_null_char_in_operation) { + const auto regex{sourcemeta::core::to_regex("[[\\x00-\\x02]--[\\x00]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x01")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x02")); + EXPECT_FALSE( + sourcemeta::core::matches(regex.value(), std::string("\x00", 1))); +} + +TEST(Regex_matches, ecma262_v_flag_printable_minus_alphanumeric) { + const auto regex{sourcemeta::core::to_regex("[[\\x20-\\x7E]--[a-zA-Z0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "@")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); +} + +TEST(Regex_matches, ecma262_v_flag_five_way_chained_intersection) { + const auto regex{ + sourcemeta::core::to_regex("[[a-z]&&[a-w]&&[a-t]&&[a-q]&&[a-n]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "n")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "o")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_lowercase_control_escape) { + const auto regex{sourcemeta::core::to_regex("[[\\ca-\\cz]&&[\\x01-\\x03]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x01")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x03")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\x04")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_all_word_chars) { + const auto regex{sourcemeta::core::to_regex("[[\\w]--[\\w]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_with_itself) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[\\d]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_double_backslash_in_operation) { + const auto regex{sourcemeta::core::to_regex("[[\\\\]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\\")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_from_full_ascii_printable) { + const auto regex{ + sourcemeta::core::to_regex("[[[[[[\\x20-\\x7E]--[\\x20-\\x2F]]--[\\x3A-" + "\\x40]]--[\\x5B-\\x60]]--[\\x7" + "B-\\x7E]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "@")); +} + +TEST(Regex_matches, ecma262_v_flag_all_hex_digits_only) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[a-fA-F0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "F")); +} + +TEST(Regex_matches, ecma262_v_flag_hex_letter_only) { + const auto regex{sourcemeta::core::to_regex("[[a-fA-F0-9]--[\\d]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "f")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "F")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "g")); +} + +TEST(Regex_matches, ecma262_v_flag_escaped_special_regex_chars) { + const auto regex{sourcemeta::core::to_regex("[[.+*?]--[.]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "+")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "*")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "?")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), ".")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_escaped_open_bracket) { + const auto regex{sourcemeta::core::to_regex("[[\\[]--[x]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "[")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "]")); +} + +TEST(Regex_matches, ecma262_v_flag_only_special_punctuation) { + const auto regex{sourcemeta::core::to_regex("[[\\!-\\/]&&[\\(-+]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "(")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), ")")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "*")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "+")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "/")); +} + +TEST(Regex_matches, ecma262_v_flag_range_with_same_start_end) { + const auto regex{sourcemeta::core::to_regex("[[a-a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_at_sign_and_backtick) { + const auto regex{sourcemeta::core::to_regex("[[@`]--[@]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "`")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "@")); +} + +TEST(Regex_matches, ecma262_v_flag_escaped_hyphen_subtract) { + const auto regex{sourcemeta::core::to_regex("[[a\\-z]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "-")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); +} + +TEST(Regex_matches, ecma262_v_flag_hex_to_literal_range) { + const auto regex{sourcemeta::core::to_regex("[[\\x41-Z]--[ABC]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "D")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "C")); +} + +TEST(Regex_matches, ecma262_v_flag_literal_to_hex_range) { + const auto regex{sourcemeta::core::to_regex("[[A-\\x5A]--[ABC]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "D")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "C")); +} + +TEST(Regex_matches, ecma262_v_flag_multiple_char_classes_in_pattern) { + const auto regex{ + sourcemeta::core::to_regex("[[a-z]--[aeiou]][[A-Z]--[AEIOU]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "bB")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "cD")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "aA")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "bE")); +} + +TEST(Regex_matches, ecma262_v_flag_in_group) { + const auto regex{sourcemeta::core::to_regex("(x[[a-z]--[aeiou]]y)")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "xby")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "xcy")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "xay")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "xey")); +} + +TEST(Regex_matches, ecma262_v_flag_with_quantifier_range) { + const auto regex{sourcemeta::core::to_regex("^[[a-z]--[aeiou]]{2,4}$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "bc")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "bcdf")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "bcdfg")); +} + +TEST(Regex_matches, ecma262_v_flag_anchored_both_ends) { + const auto regex{sourcemeta::core::to_regex("^[[a-z]--[aeiou]]$")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "bb")); +} + +TEST(Regex_matches, ecma262_v_flag_alternation_both_branches) { + const auto regex{ + sourcemeta::core::to_regex("[[a-m]--[aeiou]]|[[n-z]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "p")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "o")); +} + +TEST(Regex_matches, ecma262_v_flag_ten_way_chained_subtraction) { + const auto regex{sourcemeta::core::to_regex( + "[[a-z]--[a]--[b]--[c]--[d]--[e]--[f]--[g]--[h]--[i]--[j]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "k")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "j")); +} + +TEST(Regex_matches, ecma262_v_flag_single_char_minus_single_char_same) { + const auto regex{sourcemeta::core::to_regex("[[x]--[x]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "y")); +} + +TEST(Regex_matches, ecma262_v_flag_single_char_minus_single_char_different) { + const auto regex{sourcemeta::core::to_regex("[[x]--[y]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "y")); +} + +TEST(Regex_matches, ecma262_v_flag_caret_not_at_start_in_nested) { + const auto regex{sourcemeta::core::to_regex("[[a^b]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "^")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_dollar_in_class) { + const auto regex{sourcemeta::core::to_regex("[[$a]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "$")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_pipe_in_class) { + const auto regex{sourcemeta::core::to_regex("[[\\|a]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "|")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_lookahead_with_operation) { + const auto regex{sourcemeta::core::to_regex("(?=[[a-z]--[aeiou]]).")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "e")); +} + +TEST(Regex_matches, ecma262_v_flag_lookbehind_with_operation) { + const auto regex{sourcemeta::core::to_regex("(?<=[[a-z]--[aeiou]])x")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "bx")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "ax")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "ex")); +} + +TEST(Regex_matches, ecma262_v_flag_full_ascii_intersect_to_alpha) { + const auto regex{sourcemeta::core::to_regex("[[\\x00-\\x7F]&&[a-zA-Z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "Z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "!")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_to_result_one_char) { + const auto regex{sourcemeta::core::to_regex("[[a-c]--[ab]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_to_result_one_char) { + const auto regex{sourcemeta::core::to_regex("[[abc]&&[cde]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "c")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "d")); +} + +TEST(Regex_matches, ecma262_v_flag_space_in_operation) { + const auto regex{sourcemeta::core::to_regex("[[ a]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), " ")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_bell_char_subtract) { + const auto regex{sourcemeta::core::to_regex("[[\\x07-\\x09]--[\\x08]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\x07")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\t")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\b")); +} + +TEST(Regex_matches, ecma262_v_flag_escape_sequence_vs_hex) { + const auto regex{sourcemeta::core::to_regex("[\\t--[\\x09]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\t")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "t")); +} + +TEST(Regex_matches, ecma262_v_flag_carriage_return_line_feed) { + const auto regex{sourcemeta::core::to_regex("[[\\r\\n]--[\\n]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\r")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "\n")); +} + +TEST(Regex_matches, ecma262_v_flag_repeated_same_subtraction) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[aeiou]--[aeiou]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_quadruple_nested) { + const auto regex{sourcemeta::core::to_regex("[[[[a-z]]]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "z")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "A")); +} + +TEST(Regex_matches, ecma262_v_flag_subtract_superset) { + const auto regex{sourcemeta::core::to_regex("[[abc]--[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "c")); +} + +TEST(Regex_matches, ecma262_v_flag_intersect_disjoint_sets) { + const auto regex{sourcemeta::core::to_regex("[[abc]&&[xyz]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); +} + +TEST(Regex_matches, ecma262_v_flag_negative_lookahead_with_operation) { + const auto regex{sourcemeta::core::to_regex("a(?![[a-z]--[aeiou]])")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "ae")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a1")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "ab")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "az")); +} + +TEST(Regex_matches, ecma262_v_flag_negative_lookbehind_with_operation) { + const auto regex{sourcemeta::core::to_regex("(?]--[=]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "<")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), ">")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "=")); +} + +TEST(Regex_matches, ecma262_v_flag_curly_braces_in_class) { + const auto regex{sourcemeta::core::to_regex("[[\\{\\}]--[\\{]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "}")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "{")); +} + +TEST(Regex_matches, ecma262_v_flag_parentheses_in_class) { + const auto regex{sourcemeta::core::to_regex("[[\\(\\)]--[\\(]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), ")")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "(")); +} + +TEST(Regex_matches, ecma262_v_flag_complex_subtract_intersect_subtract) { + const auto regex{ + sourcemeta::core::to_regex("[[[[a-z]--[aeiou]]&&[b-y]]--[xyz]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "w")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "x")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "z")); +} + +TEST(Regex_matches, ecma262_v_flag_ampersand_in_class) { + const auto regex{sourcemeta::core::to_regex("[[&a]--[a]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "&")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_percent_and_hash) { + const auto regex{sourcemeta::core::to_regex("[[%#]--[%]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "#")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "%")); +} + +TEST(Regex_matches, ecma262_v_flag_colon_semicolon_comma) { + const auto regex{sourcemeta::core::to_regex("[[:;,]--[;]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), ":")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), ",")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), ";")); +} + +TEST(Regex_matches, ecma262_v_flag_question_exclamation) { + const auto regex{sourcemeta::core::to_regex("[[?!]--[?]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "!")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "?")); +} + +TEST(Regex_matches, ecma262_v_flag_underscore_intersect) { + const auto regex{sourcemeta::core::to_regex("[[\\w]&&[^a-zA-Z0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "_")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_digits_odd_only) { + const auto regex{sourcemeta::core::to_regex("[[\\d]--[02468]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "3")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "9")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "8")); +} + +TEST(Regex_matches, ecma262_v_flag_digits_even_only) { + const auto regex{sourcemeta::core::to_regex("[[\\d]&&[02468]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "0")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "4")); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "8")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "1")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "9")); +} + +TEST(Regex_matches, ecma262_v_flag_single_quote_double_quote) { + const auto regex{sourcemeta::core::to_regex("[['\"']--[']]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\"")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "'")); +} + +TEST(Regex_matches, ecma262_v_flag_forward_slash_backslash) { + const auto regex{sourcemeta::core::to_regex("[[\\\\]--[\\/]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "\\")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "/")); +} + +TEST(Regex_matches, ecma262_v_flag_invalid_unbalanced_parens) { + const auto regex{sourcemeta::core::to_regex("([[a-z]--[aeiou]]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_invalid_bad_quantifier) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[aeiou]]{5,2}")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_intersection_valid) { + const auto regex{sourcemeta::core::to_regex("[\\d&&[0-5]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "3")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "7")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_subtraction_valid) { + const auto regex{sourcemeta::core::to_regex("[\\w--[0-9]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_intersection_with_range_valid) { + const auto regex{sourcemeta::core::to_regex("[\\w&&[a-z]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "b")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "B")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_subtract_single_valid) { + const auto regex{sourcemeta::core::to_regex("[\\d--[0]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "0")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_to_shorthand_subtract_valid) { + const auto regex{sourcemeta::core::to_regex("[\\w--\\d]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); +} + +TEST(Regex_matches, ecma262_v_flag_shorthand_to_shorthand_intersect_valid) { + const auto regex{sourcemeta::core::to_regex("[\\d&&\\w]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "a")); +} + +TEST(Regex_matches, ecma262_v_flag_chained_shorthand_ops_valid) { + const auto regex{sourcemeta::core::to_regex("[\\w--\\d--[_]]")}; + EXPECT_TRUE(regex.has_value()); + EXPECT_TRUE(sourcemeta::core::matches(regex.value(), "a")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "5")); + EXPECT_FALSE(sourcemeta::core::matches(regex.value(), "_")); +} + +TEST(Regex_matches, ecma262_v_flag_invalid_range_without_nesting) { + const auto regex{sourcemeta::core::to_regex("[\\x61-\\x7a--[aeiou]]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_invalid_mixed_ops_without_nesting) { + const auto regex{sourcemeta::core::to_regex("[[a-z]--[aeiou]&&[a-m]]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_invalid_unescaped_hyphen_in_class) { + const auto regex{sourcemeta::core::to_regex("[[a-z-]--[x]]")}; + EXPECT_FALSE(regex.has_value()); +} + +TEST(Regex_matches, ecma262_v_flag_invalid_unescaped_pipe) { + const auto regex{sourcemeta::core::to_regex("[[|a]--[a]]")}; + EXPECT_FALSE(regex.has_value()); +}