diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 8194d7f55..a61c6ca68 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -188,7 +188,7 @@ impl<'a> ClassQuery<'a> { fn canonicalize(&self) -> Result { match *self { ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()), - ClassQuery::Binary(name) => self.canonical_binary(name), + ClassQuery::Binary(name) => std::dbg!(self.canonical_binary(name)), ClassQuery::ByValue { property_name, property_value } => { let property_name = symbolic_name_normalize(property_name); let property_value = symbolic_name_normalize(property_value); @@ -243,7 +243,12 @@ impl<'a> ClassQuery<'a> { // a general category. (Currently, we don't even support the // 'Case_Folding' property. But if we do in the future, users will be // required to spell it out.) - if norm != "cf" { + // + // Also 'sc' refers to the 'Currency_Symbol' general category, but is + // also the abbreviation for the 'Script' property. So we avoid calling + // 'canonical_prop' for it too, which would erroneously normalize it + // to 'Script'. + if norm != "cf" && norm != "sc" { if let Some(canon) = canonical_prop(&norm)? { return Ok(CanonicalClassQuery::Binary(canon)); } diff --git a/tests/unicode.rs b/tests/unicode.rs index 9b3228624..748bbb79c 100644 --- a/tests/unicode.rs +++ b/tests/unicode.rs @@ -77,6 +77,7 @@ mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4))); // See: https://github.com/rust-lang/regex/issues/719 mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4))); mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4))); +mat!(uni_class_gencat_format_abbrev3, r"\p{Sc}", "$", Some((0, 1))); mat!( uni_class_gencat_initial_punctuation, r"\p{Initial_Punctuation}",