diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 51019db7c00e4..84596de12ae96 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -316,7 +316,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Include the leading `'` in the real identifier, for macro // expansion purposes. See #12512 for the gory details of why // this is necessary. - let lifetime_name = self.str_from(start); + let lifetime_name = nfc_normalize(self.str_from(start)); self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); if starts_with_number { let span = self.mk_sp(start, self.pos); @@ -325,8 +325,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { .with_span(span) .stash(span, StashKey::LifetimeIsChar); } - let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident, IdentIsRaw::No) + token::Lifetime(lifetime_name, IdentIsRaw::No) } rustc_lexer::TokenKind::RawLifetime => { self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); @@ -373,7 +372,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { String::with_capacity(lifetime_name_without_tick.as_str().len() + 1); lifetime_name.push('\''); lifetime_name += lifetime_name_without_tick.as_str(); - let sym = Symbol::intern(&lifetime_name); + let sym = nfc_normalize(&lifetime_name); // Make sure we mark this as a raw identifier. self.psess.raw_identifier_spans.push(span); @@ -393,9 +392,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> { self.pos = lt_start; self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No); - let lifetime_name = self.str_from(start); - let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident, IdentIsRaw::No) + let lifetime_name = nfc_normalize(self.str_from(start)); + token::Lifetime(lifetime_name, IdentIsRaw::No) } } rustc_lexer::TokenKind::Semi => token::Semi, diff --git a/tests/ui/lexer/ident_normalization.rs b/tests/ui/lexer/ident_normalization.rs new file mode 100644 index 0000000000000..a16c52171106b --- /dev/null +++ b/tests/ui/lexer/ident_normalization.rs @@ -0,0 +1,20 @@ +//@check-pass +//@edition:2021 + +#![allow(non_snake_case)] + +// Tests that identifiers are NFC-normalized as per +// https://rust-lang.github.io/rfcs/2457-non-ascii-idents.html + +// Note that in the first argument of each function `K` is LATIN CAPITAL LETTER K +// and in the second it is K (KELVIN SIGN). + +fn ident_nfc(_p1: K, _p2: K) {} + +fn raw_ident_nfc(_p1: r#K, _p2: r#K) {} + +fn lifetime_nfc<'K>(_p1: &'K str, _p2: &'K str) {} + +fn raw_lifetime_nfc<'K>(_p1: &'r#K str, _p2: &'r#K str) {} + +fn main() {}