Skip to content

Commit a1d821c

Browse files
committed
parser/lexer: bump to Unicode 17, use faster unicode-ident
Replace unicode-xid with unicode-ident which is 6 times faster
1 parent 23c7bad commit a1d821c

File tree

7 files changed

+20
-18
lines changed

7 files changed

+20
-18
lines changed

Cargo.lock

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4072,8 +4072,8 @@ version = "0.0.0"
40724072
dependencies = [
40734073
"expect-test",
40744074
"memchr",
4075+
"unicode-ident",
40754076
"unicode-properties",
4076-
"unicode-xid",
40774077
]
40784078

40794079
[[package]]
@@ -5890,24 +5890,24 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
58905890

58915891
[[package]]
58925892
name = "unicode-ident"
5893-
version = "1.0.18"
5893+
version = "1.0.22"
58945894
source = "registry+https://github.com/rust-lang/crates.io-index"
5895-
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
5895+
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
58965896

58975897
[[package]]
58985898
name = "unicode-normalization"
5899-
version = "0.1.24"
5899+
version = "0.1.25"
59005900
source = "registry+https://github.com/rust-lang/crates.io-index"
5901-
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
5901+
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
59025902
dependencies = [
59035903
"tinyvec",
59045904
]
59055905

59065906
[[package]]
59075907
name = "unicode-properties"
5908-
version = "0.1.3"
5908+
version = "0.1.4"
59095909
source = "registry+https://github.com/rust-lang/crates.io-index"
5910-
checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
5910+
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
59115911

59125912
[[package]]
59135913
name = "unicode-script"

compiler/rustc_lexer/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ Rust lexer used by rustc. No stability guarantees are provided.
1515
# Note that this crate purposefully does not depend on other rustc crates
1616
[dependencies]
1717
memchr = "2.7.4"
18-
unicode-properties = { version = "0.1.0", default-features = false, features = ["emoji"] }
19-
unicode-xid = "0.2.0"
18+
unicode-properties = { version = "0.1.4", default-features = false, features = ["emoji"] }
19+
unicode-ident = "1.0.22"
2020

2121
[dev-dependencies]
2222
expect-test = "1.4.0"

compiler/rustc_lexer/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ use LiteralKind::*;
3434
use TokenKind::*;
3535
use cursor::EOF_CHAR;
3636
pub use cursor::{Cursor, FrontmatterAllowed};
37+
pub use unicode_ident::UNICODE_VERSION as UNICODE_IDENT_VERSION;
3738
use unicode_properties::UnicodeEmoji;
38-
pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
3939

4040
/// Parsed token.
4141
/// It doesn't contain information about data that has been parsed,
@@ -370,14 +370,14 @@ pub fn is_horizontal_whitespace(c: char) -> bool {
370370
/// a formal definition of valid identifier name.
371371
pub fn is_id_start(c: char) -> bool {
372372
// This is XID_Start OR '_' (which formally is not a XID_Start).
373-
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
373+
c == '_' || unicode_ident::is_xid_start(c)
374374
}
375375

376376
/// True if `c` is valid as a non-first character of an identifier.
377377
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
378378
/// a formal definition of valid identifier name.
379379
pub fn is_id_continue(c: char) -> bool {
380-
unicode_xid::UnicodeXID::is_xid_continue(c)
380+
unicode_ident::is_xid_continue(c)
381381
}
382382

383383
/// The passed string is lexically an identifier.

compiler/rustc_parse/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ rustc_session = { path = "../rustc_session" }
2020
rustc_span = { path = "../rustc_span" }
2121
thin-vec = "0.2.12"
2222
tracing = "0.1"
23-
unicode-normalization = "0.1.11"
24-
unicode-width = "0.2.0"
23+
unicode-normalization = "0.1.25"
24+
unicode-width = "0.2.2"
2525
# tidy-alphabetical-end
2626

2727
[dev-dependencies]

compiler/rustc_span/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ scoped-tls = "1.0"
2020
sha1 = "0.10.0"
2121
sha2 = "0.10.1"
2222
tracing = "0.1"
23-
unicode-width = "0.2.0"
23+
unicode-width = "0.2.2"
2424
# tidy-alphabetical-end

src/tools/tidy/src/deps.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
454454
"unicode-script",
455455
"unicode-security",
456456
"unicode-width",
457-
"unicode-xid",
458457
"utf8parse",
459458
"valuable",
460459
"version_check",

tests/ui-fulldeps/lexer/unicode-version.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ fn main() {
2222
it should also be updated in the reference at \
2323
https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md."
2424
);
25-
println!("Unicode XID version is: {:?}", rustc_lexer::UNICODE_XID_VERSION);
26-
println!("Unicode normalization version is: {:?}", rustc_parse::UNICODE_NORMALIZATION_VERSION);
25+
println!("Unicode version of unicode-ident is: {:?}", rustc_lexer::UNICODE_IDENT_VERSION);
26+
println!(
27+
"Unicode version of unicode-normalization is: {:?}",
28+
rustc_parse::UNICODE_NORMALIZATION_VERSION
29+
);
2730
}

0 commit comments

Comments
 (0)