Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split Diagnostics for Uncommon Codepoints: Add Individual Identifier Types #120840

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 21 additions & 2 deletions compiler/rustc_lint/messages.ftl
HTGAzureX1212 marked this conversation as resolved.
Show resolved Hide resolved
Expand Up @@ -241,9 +241,28 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
lint_identifier_non_ascii_char = identifier contains non-ASCII characters

lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
[one] an uncommon Unicode codepoint
*[other] uncommon Unicode codepoints
[one] { $identifier_type ->
[Exclusion] a character from an archaic script
[Technical] a character that is for non-linguistic, specialized usage
[Limited_Use] a character from a script in limited use
[Not_NFKC] a non normalized (NFKC) character
*[other] an uncommon character
}
*[other] { $identifier_type ->
[Exclusion] {$codepoints_len} characters from archaic scripts
[Technical] {$codepoints_len} characters that are for non-linguistic, specialized usage
[Limited_Use] {$codepoints_len} characters from scripts in limited use
[Not_NFKC] {$codepoints_len} non normalized (NFKC) characters
*[other] uncommon characters
}
}: {$codepoints}
.note = {$codepoints_len ->
[one] this character is
*[other] these characters are
} included in the{$identifier_type ->
[Restricted] {""}
*[other] {" "}{$identifier_type}
} Unicode general security profile

lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_lint/src/lib.rs
Expand Up @@ -31,6 +31,7 @@
#![feature(array_windows)]
#![feature(box_patterns)]
#![feature(control_flow_enum)]
#![feature(extract_if)]
#![feature(generic_nonzero)]
#![feature(if_let_guard)]
#![feature(iter_order_by)]
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_lint/src/lints.rs
Expand Up @@ -1098,9 +1098,11 @@ pub struct IdentifierNonAsciiChar;

#[derive(LintDiagnostic)]
#[diag(lint_identifier_uncommon_codepoints)]
#[note]
pub struct IdentifierUncommonCodepoints {
pub codepoints: Vec<char>,
pub codepoints_len: usize,
pub identifier_type: &'static str,
}

#[derive(LintDiagnostic)]
Expand Down
47 changes: 39 additions & 8 deletions compiler/rustc_lint/src/non_ascii_idents.rs
Expand Up @@ -7,6 +7,7 @@ use rustc_ast as ast;
use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::unord::UnordMap;
use rustc_span::symbol::Symbol;
use unicode_security::general_security_profile::IdentifierType;

declare_lint! {
/// The `non_ascii_idents` lint detects non-ASCII identifiers.
Expand Down Expand Up @@ -189,17 +190,47 @@ impl EarlyLintPass for NonAsciiIdents {
if check_uncommon_codepoints
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
{
let codepoints: Vec<_> = symbol_str
let mut chars: Vec<_> = symbol_str
.chars()
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
.map(|c| (c, GeneralSecurityProfile::identifier_type(c)))
.collect();
let codepoints_len = codepoints.len();

cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints { codepoints, codepoints_len },
);
for (id_ty, id_ty_descr) in [
(IdentifierType::Exclusion, "Exclusion"),
(IdentifierType::Technical, "Technical"),
(IdentifierType::Limited_Use, "Limited_Use"),
(IdentifierType::Not_NFKC, "Not_NFKC"),
] {
let codepoints: Vec<_> =
chars.extract_if(|(_, ty)| *ty == Some(id_ty)).collect();
if codepoints.is_empty() {
continue;
}
cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints {
codepoints_len: codepoints.len(),
codepoints: codepoints.into_iter().map(|(c, _)| c).collect(),
identifier_type: id_ty_descr,
},
);
}

let remaining = chars
.extract_if(|(c, _)| !GeneralSecurityProfile::identifier_allowed(*c))
.collect::<Vec<_>>();
if !remaining.is_empty() {
cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints {
codepoints_len: remaining.len(),
codepoints: remaining.into_iter().map(|(c, _)| c).collect(),
identifier_type: "Restricted",
},
);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion tests/ui/lexer/lex-emoji-identifiers.rs
Expand Up @@ -4,7 +4,7 @@ fn invalid_emoji_usages() {
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
// FIXME
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
//~^ WARN: identifier contains an uncommon Unicode codepoint
//~^ WARN: identifier contains an uncommon character: '\u{fe0f}'
let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji
Expand Down
3 changes: 2 additions & 1 deletion tests/ui/lexer/lex-emoji-identifiers.stderr
Expand Up @@ -40,12 +40,13 @@ error: identifiers cannot contain emoji: `folded🙏🏿`
LL | let folded🙏🏿 = "modifier sequence";
| ^^^^^^^^^^

warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
warning: identifier contains an uncommon character: '\u{fe0f}'
--> $DIR/lex-emoji-identifiers.rs:6:9
|
LL | let key1️⃣ = "keycap sequence";
| ^^^^
|
= note: this character is included in the Unicode general security profile
= note: `#[warn(uncommon_codepoints)]` on by default

error: aborting due to 7 previous errors; 1 warning emitted
Expand Down
@@ -1,12 +1,13 @@
#![deny(uncommon_codepoints)]

const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
const µ: f64 = 0.000001; //~ identifier contains a non normalized (NFKC) character: 'µ'
//~| WARNING should have an upper case name

fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint
fn dijkstra() {}
//~^ ERROR identifier contains a non normalized (NFKC) character: 'ij'

fn main() {
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon characters: 'ㇻ', 'ㇲ', and 'ㇳ'

// using the same identifier the second time won't trigger the lint.
println!("{}", ㇻㇲㇳ);
Expand Down
@@ -1,26 +1,31 @@
error: identifier contains an uncommon Unicode codepoint: 'µ'
error: identifier contains a non normalized (NFKC) character: 'µ'
--> $DIR/lint-uncommon-codepoints.rs:3:7
|
LL | const µ: f64 = 0.000001;
| ^
|
= note: this character is included in the Not_NFKC Unicode general security profile
note: the lint level is defined here
--> $DIR/lint-uncommon-codepoints.rs:1:9
|
LL | #![deny(uncommon_codepoints)]
| ^^^^^^^^^^^^^^^^^^^

error: identifier contains an uncommon Unicode codepoint: 'ij'
error: identifier contains a non normalized (NFKC) character: 'ij'
--> $DIR/lint-uncommon-codepoints.rs:6:4
|
LL | fn dijkstra() {}
| ^^^^^^^
|
= note: this character is included in the Not_NFKC Unicode general security profile

error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
--> $DIR/lint-uncommon-codepoints.rs:9:9
error: identifier contains uncommon characters: 'ㇻ', 'ㇲ', and 'ㇳ'
--> $DIR/lint-uncommon-codepoints.rs:10:9
|
LL | let ㇻㇲㇳ = "rust";
| ^^^^^^
|
= note: these characters are included in the Unicode general security profile

warning: constant `µ` should have an upper case name
--> $DIR/lint-uncommon-codepoints.rs:3:7
Expand Down