diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs index c71fa754e68fb..9bc4136517fae 100644 --- a/library/core/src/unicode/mod.rs +++ b/library/core/src/unicode/mod.rs @@ -18,6 +18,7 @@ pub(crate) use unicode_data::white_space::lookup as White_Space; pub(crate) mod printable; +mod rt; #[allow(unreachable_pub)] mod unicode_data; diff --git a/src/tools/unicode-table-generator/src/range_search.rs b/library/core/src/unicode/rt.rs similarity index 91% rename from src/tools/unicode-table-generator/src/range_search.rs rename to library/core/src/unicode/rt.rs index 4d1dd9b423b59..566c3203dd1c0 100644 --- a/src/tools/unicode-table-generator/src/range_search.rs +++ b/library/core/src/unicode/rt.rs @@ -1,5 +1,7 @@ +//! Runtime support for `unicode_data`. + #[inline(always)] -const fn bitset_search< +pub(super) const fn bitset_search< const N: usize, const CHUNK_SIZE: usize, const N1: usize, @@ -46,10 +48,10 @@ const fn bitset_search< } #[repr(transparent)] -struct ShortOffsetRunHeader(u32); +pub(super) struct ShortOffsetRunHeader(pub(super) u32); impl ShortOffsetRunHeader { - const fn new(start_index: usize, prefix_sum: u32) -> Self { + pub(super) const fn new(start_index: usize, prefix_sum: u32) -> Self { assert!(start_index < (1 << 11)); assert!(prefix_sum < (1 << 21)); @@ -57,12 +59,12 @@ impl ShortOffsetRunHeader { } #[inline] - const fn start_index(&self) -> usize { + pub(super) const fn start_index(&self) -> usize { (self.0 >> 21) as usize } #[inline] - const fn prefix_sum(&self) -> u32 { + pub(super) const fn prefix_sum(&self) -> u32 { self.0 & ((1 << 21) - 1) } } @@ -72,7 +74,7 @@ impl ShortOffsetRunHeader { /// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. /// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. #[inline(always)] -unsafe fn skip_search( +pub(super) unsafe fn skip_search( needle: char, short_offset_runs: &[ShortOffsetRunHeader; SOR], offsets: &[u8; OFFSETS], diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index 3c38b44224f87..bda8d9d30afce 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -11,167 +11,64 @@ // to_upper : 13656 bytes // Total : 31911 bytes -#[inline(always)] -const fn bitset_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. - let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { - chunk_idx_map[chunk_map_idx] - } else { - return false; - }; - let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. - let word = if idx < bitset_canonical.len() { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Lower 6 bits - let quantity = mapping & ((1 << 6) - 1); - if mapping & (1 << 7) != 0 { - // shift - word >>= quantity as u64; - } else { - word = word.rotate_left(quantity as u32); - } - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} - -#[repr(transparent)] -struct ShortOffsetRunHeader(u32); - -impl ShortOffsetRunHeader { - const fn new(start_index: usize, prefix_sum: u32) -> Self { - assert!(start_index < (1 << 11)); - assert!(prefix_sum < (1 << 21)); - - Self((start_index as u32) << 21 | prefix_sum) - } - - #[inline] - const fn start_index(&self) -> usize { - (self.0 >> 21) as usize - } - - #[inline] - const fn prefix_sum(&self) -> u32 { - self.0 & ((1 << 21) - 1) - } -} - -/// # Safety -/// -/// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. -/// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. -#[inline(always)] -unsafe fn skip_search( - needle: char, - short_offset_runs: &[ShortOffsetRunHeader; SOR], - offsets: &[u8; OFFSETS], -) -> bool { - let needle = needle as u32; - - let last_idx = - match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) { - Ok(idx) => idx + 1, - Err(idx) => idx, - }; - // SAFETY: `last_idx` *cannot* be past the end of the array, as the last - // element is greater than `std::char::MAX` (the largest possible needle) - // as guaranteed by the caller. - // - // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the - // correct location cannot be past it, so `Err(idx) => idx != length` either. - // - // This means that we can avoid bounds checking for the accesses below, too. - // - // We need to use `intrinsics::assume` since the `panic_nounwind` contained - // in `hint::assert_unchecked` may not be optimized out. - unsafe { crate::intrinsics::assume(last_idx < SOR) }; - - let mut offset_idx = short_offset_runs[last_idx].start_index(); - let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { - (*next).start_index() - offset_idx - } else { - offsets.len() - offset_idx - }; - - let prev = - last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0); - - let total = needle - prev; - let mut prefix_sum = 0; - for _ in 0..(length - 1) { - // SAFETY: It is guaranteed that `length <= OFFSETS - offset_idx`, - // so it follows that `length - 1 + offset_idx < OFFSETS`, therefore - // `offset_idx < OFFSETS` is always true in this loop. - // - // We need to use `intrinsics::assume` since the `panic_nounwind` contained - // in `hint::assert_unchecked` may not be optimized out. - unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) }; - let offset = offsets[offset_idx]; - prefix_sum += offset as u32; - if prefix_sum > total { - break; - } - offset_idx += 1; - } - offset_idx % 2 == 1 -} - pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); -#[rustfmt::skip] +use super::rt::*; + pub mod alphabetic { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 51] = [ - ShortOffsetRunHeader::new(0, 706), ShortOffsetRunHeader::new(12, 4681), - ShortOffsetRunHeader::new(414, 5741), ShortOffsetRunHeader::new(452, 7958), - ShortOffsetRunHeader::new(552, 9398), ShortOffsetRunHeader::new(623, 11264), - ShortOffsetRunHeader::new(625, 12293), ShortOffsetRunHeader::new(663, 13312), - ShortOffsetRunHeader::new(687, 19904), ShortOffsetRunHeader::new(688, 42125), - ShortOffsetRunHeader::new(690, 42509), ShortOffsetRunHeader::new(694, 55204), - ShortOffsetRunHeader::new(778, 63744), ShortOffsetRunHeader::new(783, 64110), - ShortOffsetRunHeader::new(784, 64830), ShortOffsetRunHeader::new(806, 66176), - ShortOffsetRunHeader::new(847, 67383), ShortOffsetRunHeader::new(894, 73440), - ShortOffsetRunHeader::new(1217, 74650), ShortOffsetRunHeader::new(1228, 77712), - ShortOffsetRunHeader::new(1233, 78896), ShortOffsetRunHeader::new(1236, 82939), - ShortOffsetRunHeader::new(1240, 83527), ShortOffsetRunHeader::new(1242, 90368), - ShortOffsetRunHeader::new(1243, 92160), ShortOffsetRunHeader::new(1245, 92729), - ShortOffsetRunHeader::new(1246, 93504), ShortOffsetRunHeader::new(1261, 101590), - ShortOffsetRunHeader::new(1282, 110576), ShortOffsetRunHeader::new(1287, 110883), - ShortOffsetRunHeader::new(1294, 111356), ShortOffsetRunHeader::new(1304, 113664), - ShortOffsetRunHeader::new(1305, 119808), ShortOffsetRunHeader::new(1315, 120486), - ShortOffsetRunHeader::new(1352, 122624), ShortOffsetRunHeader::new(1375, 123536), - ShortOffsetRunHeader::new(1399, 124112), ShortOffsetRunHeader::new(1403, 126464), - ShortOffsetRunHeader::new(1431, 127280), ShortOffsetRunHeader::new(1497, 131072), - ShortOffsetRunHeader::new(1503, 173792), ShortOffsetRunHeader::new(1504, 178206), - ShortOffsetRunHeader::new(1506, 183982), ShortOffsetRunHeader::new(1508, 191457), - ShortOffsetRunHeader::new(1510, 192094), ShortOffsetRunHeader::new(1512, 194560), - ShortOffsetRunHeader::new(1513, 195102), ShortOffsetRunHeader::new(1514, 196608), - ShortOffsetRunHeader::new(1515, 201547), ShortOffsetRunHeader::new(1516, 210042), + ShortOffsetRunHeader::new(0, 706), + ShortOffsetRunHeader::new(12, 4681), + ShortOffsetRunHeader::new(414, 5741), + ShortOffsetRunHeader::new(452, 7958), + ShortOffsetRunHeader::new(552, 9398), + ShortOffsetRunHeader::new(623, 11264), + ShortOffsetRunHeader::new(625, 12293), + ShortOffsetRunHeader::new(663, 13312), + ShortOffsetRunHeader::new(687, 19904), + ShortOffsetRunHeader::new(688, 42125), + ShortOffsetRunHeader::new(690, 42509), + ShortOffsetRunHeader::new(694, 55204), + ShortOffsetRunHeader::new(778, 63744), + ShortOffsetRunHeader::new(783, 64110), + ShortOffsetRunHeader::new(784, 64830), + ShortOffsetRunHeader::new(806, 66176), + ShortOffsetRunHeader::new(847, 67383), + ShortOffsetRunHeader::new(894, 73440), + ShortOffsetRunHeader::new(1217, 74650), + ShortOffsetRunHeader::new(1228, 77712), + ShortOffsetRunHeader::new(1233, 78896), + ShortOffsetRunHeader::new(1236, 82939), + ShortOffsetRunHeader::new(1240, 83527), + ShortOffsetRunHeader::new(1242, 90368), + ShortOffsetRunHeader::new(1243, 92160), + ShortOffsetRunHeader::new(1245, 92729), + ShortOffsetRunHeader::new(1246, 93504), + ShortOffsetRunHeader::new(1261, 101590), + ShortOffsetRunHeader::new(1282, 110576), + ShortOffsetRunHeader::new(1287, 110883), + ShortOffsetRunHeader::new(1294, 111356), + ShortOffsetRunHeader::new(1304, 113664), + ShortOffsetRunHeader::new(1305, 119808), + ShortOffsetRunHeader::new(1315, 120486), + ShortOffsetRunHeader::new(1352, 122624), + ShortOffsetRunHeader::new(1375, 123536), + ShortOffsetRunHeader::new(1399, 124112), + ShortOffsetRunHeader::new(1403, 126464), + ShortOffsetRunHeader::new(1431, 127280), + ShortOffsetRunHeader::new(1497, 131072), + ShortOffsetRunHeader::new(1503, 173792), + ShortOffsetRunHeader::new(1504, 178206), + ShortOffsetRunHeader::new(1506, 183982), + ShortOffsetRunHeader::new(1508, 191457), + ShortOffsetRunHeader::new(1510, 192094), + ShortOffsetRunHeader::new(1512, 194560), + ShortOffsetRunHeader::new(1513, 195102), + ShortOffsetRunHeader::new(1514, 196608), + ShortOffsetRunHeader::new(1515, 201547), + ShortOffsetRunHeader::new(1516, 210042), ShortOffsetRunHeader::new(1518, 1324154), ]; static OFFSETS: [u8; 1519] = [ @@ -180,58 +77,60 @@ pub mod alphabetic { 1, 2, 1, 2, 1, 1, 8, 27, 4, 4, 29, 11, 5, 56, 1, 7, 14, 102, 1, 8, 4, 8, 4, 3, 10, 3, 2, 1, 16, 48, 13, 101, 24, 33, 9, 2, 4, 1, 5, 24, 2, 19, 19, 25, 7, 11, 5, 24, 1, 7, 7, 1, 8, 42, 10, 12, 3, 7, 6, 76, 1, 16, 1, 3, 4, 15, 13, 19, 1, 8, 2, 2, 2, 22, 1, 7, 1, 1, 3, 4, 3, 8, - 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, 2, - 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, 1, 5, - 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, - 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, 3, 3, 12, - 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, 1, 3, 1, - 2, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 5, 3, 1, 4, 13, 3, - 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, 24, 1, 9, 1, - 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, 24, 1, 1, 1, 19, - 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, 67, 55, 1, 1, 2, 5, - 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, 2, 41, 1, 4, 2, 33, - 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, 6, 3, 0, 2, 17, 1, 26, - 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, 19, 14, 1, 4, 1, 67, 89, - 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, 54, 28, 4, 63, 2, 20, 50, 1, - 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, 11, 14, 55, 22, 3, 10, 36, 2, - 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, - 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, - 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5, - 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 2, 56, 7, 1, 16, 23, 9, 7, 1, - 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, 25, 9, 7, 5, 2, 5, 4, 86, 6, 3, - 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, 46, 2, 0, 3, 16, 10, 2, 20, 47, - 5, 8, 3, 113, 39, 9, 2, 103, 2, 82, 20, 21, 1, 33, 24, 52, 12, 68, 1, 1, 44, 6, 3, 1, 1, 3, - 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, 1, 55, 9, 14, 18, 23, 3, 69, 1, - 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, 1, 43, 1, 14, 6, 123, 21, 0, 12, - 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, 1, 1, 1, 2, 1, 2, 1, 108, 33, 0, - 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, 89, 3, 6, 2, 6, 2, 6, 2, 3, 35, - 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, 29, 3, 49, 47, 32, 13, 30, 5, 43, - 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, 8, 52, 12, 11, 1, 15, 1, 7, 1, 2, - 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, 6, 1, 42, 1, 9, 69, 6, 2, 1, 1, - 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, 10, 26, 6, 26, 38, 56, 6, 2, 64, - 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, 27, 54, 10, 22, 10, 19, 13, 18, - 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, 42, 1, 2, 3, 2, 16, 6, 50, 3, 3, - 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, 10, 57, 9, 1, 13, 25, 23, 51, 17, 4, - 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, 1, 34, 2, 1, 6, 4, 62, 7, 1, 1, 1, 4, - 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, - 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, 1, 1, 4, 1, 2, 3, 1, 1, 1, 44, 66, 1, 3, - 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71, - 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, - 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, 72, 5, 1, 18, 73, 103, 8, 88, 33, 31, 9, 1, 45, - 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, - 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 23, 44, 0, 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, - 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, - 4, 31, 21, 5, 19, 0, 45, 211, 64, 32, 25, 2, 25, 44, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 7, - 9, 0, 41, 32, 97, 115, 0, 4, 1, 7, 1, 2, 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, - 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, - 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, - 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, - 1, 112, 45, 10, 7, 16, 1, 0, 30, 18, 44, 0, 28, 228, 30, 2, 1, 207, 31, 1, 22, 8, 2, 224, 7, - 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, - 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, - 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, 0, 0, 5, 0, 0, + 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, + 2, 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, + 1, 5, 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, + 8, 2, 2, 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, + 3, 3, 12, 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, + 1, 3, 1, 2, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 5, 3, 1, + 4, 13, 3, 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, + 24, 1, 9, 1, 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, + 24, 1, 1, 1, 19, 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, + 67, 55, 1, 1, 2, 5, 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, + 2, 41, 1, 4, 2, 33, 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, + 6, 3, 0, 2, 17, 1, 26, 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, + 19, 14, 1, 4, 1, 67, 89, 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, + 54, 28, 4, 63, 2, 20, 50, 1, 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, + 11, 14, 55, 22, 3, 10, 36, 2, 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, + 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, + 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, + 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5, 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, + 1, 2, 56, 7, 1, 16, 23, 9, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, + 25, 9, 7, 5, 2, 5, 4, 86, 6, 3, 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, + 46, 2, 0, 3, 16, 10, 2, 20, 47, 5, 8, 3, 113, 39, 9, 2, 103, 2, 82, 20, 21, 1, 33, 24, 52, + 12, 68, 1, 1, 44, 6, 3, 1, 1, 3, 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, + 1, 55, 9, 14, 18, 23, 3, 69, 1, 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, + 1, 43, 1, 14, 6, 123, 21, 0, 12, 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, + 1, 1, 1, 2, 1, 2, 1, 108, 33, 0, 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, + 89, 3, 6, 2, 6, 2, 6, 2, 3, 35, 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, + 29, 3, 49, 47, 32, 13, 30, 5, 43, 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, + 8, 52, 12, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, + 6, 1, 42, 1, 9, 69, 6, 2, 1, 1, 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, + 10, 26, 6, 26, 38, 56, 6, 2, 64, 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, + 27, 54, 10, 22, 10, 19, 13, 18, 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, + 42, 1, 2, 3, 2, 16, 6, 50, 3, 3, 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, + 10, 57, 9, 1, 13, 25, 23, 51, 17, 4, 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, + 1, 34, 2, 1, 6, 4, 62, 7, 1, 1, 1, 4, 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, + 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, + 1, 1, 4, 1, 2, 3, 1, 1, 1, 44, 66, 1, 3, 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, + 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71, 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, + 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, + 72, 5, 1, 18, 73, 103, 8, 88, 33, 31, 9, 1, 45, 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, + 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 23, 44, 0, + 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, + 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, 4, 31, 21, 5, 19, 0, 45, 211, 64, 32, 25, + 2, 25, 44, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 7, 9, 0, 41, 32, 97, 115, 0, 4, 1, 7, 1, 2, + 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, + 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, + 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, + 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 112, 45, 10, 7, 16, 1, 0, 30, 18, + 44, 0, 28, 228, 30, 2, 1, 207, 31, 1, 22, 8, 2, 224, 7, 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, + 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, + 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, + 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, + 0, 0, 5, 0, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -254,66 +153,84 @@ pub mod alphabetic { } } -#[rustfmt::skip] pub mod case_ignorable { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 36] = [ - ShortOffsetRunHeader::new(0, 688), ShortOffsetRunHeader::new(11, 4957), - ShortOffsetRunHeader::new(263, 5906), ShortOffsetRunHeader::new(265, 8125), - ShortOffsetRunHeader::new(377, 11388), ShortOffsetRunHeader::new(411, 12293), - ShortOffsetRunHeader::new(423, 40981), ShortOffsetRunHeader::new(435, 42232), - ShortOffsetRunHeader::new(437, 42508), ShortOffsetRunHeader::new(439, 64286), - ShortOffsetRunHeader::new(535, 65024), ShortOffsetRunHeader::new(539, 66045), - ShortOffsetRunHeader::new(569, 67456), ShortOffsetRunHeader::new(575, 68097), - ShortOffsetRunHeader::new(581, 68900), ShortOffsetRunHeader::new(593, 69291), - ShortOffsetRunHeader::new(601, 71727), ShortOffsetRunHeader::new(727, 71995), - ShortOffsetRunHeader::new(731, 73459), ShortOffsetRunHeader::new(797, 78896), - ShortOffsetRunHeader::new(809, 90398), ShortOffsetRunHeader::new(813, 92912), - ShortOffsetRunHeader::new(817, 93504), ShortOffsetRunHeader::new(823, 94031), - ShortOffsetRunHeader::new(827, 110576), ShortOffsetRunHeader::new(837, 113821), - ShortOffsetRunHeader::new(843, 118528), ShortOffsetRunHeader::new(847, 119143), - ShortOffsetRunHeader::new(851, 121344), ShortOffsetRunHeader::new(861, 122880), - ShortOffsetRunHeader::new(873, 123566), ShortOffsetRunHeader::new(889, 124139), - ShortOffsetRunHeader::new(893, 125136), ShortOffsetRunHeader::new(907, 127995), - ShortOffsetRunHeader::new(911, 917505), ShortOffsetRunHeader::new(913, 2032112), + ShortOffsetRunHeader::new(0, 688), + ShortOffsetRunHeader::new(11, 4957), + ShortOffsetRunHeader::new(263, 5906), + ShortOffsetRunHeader::new(265, 8125), + ShortOffsetRunHeader::new(377, 11388), + ShortOffsetRunHeader::new(411, 12293), + ShortOffsetRunHeader::new(423, 40981), + ShortOffsetRunHeader::new(435, 42232), + ShortOffsetRunHeader::new(437, 42508), + ShortOffsetRunHeader::new(439, 64286), + ShortOffsetRunHeader::new(535, 65024), + ShortOffsetRunHeader::new(539, 66045), + ShortOffsetRunHeader::new(569, 67456), + ShortOffsetRunHeader::new(575, 68097), + ShortOffsetRunHeader::new(581, 68900), + ShortOffsetRunHeader::new(593, 69291), + ShortOffsetRunHeader::new(601, 71727), + ShortOffsetRunHeader::new(727, 71995), + ShortOffsetRunHeader::new(731, 73459), + ShortOffsetRunHeader::new(797, 78896), + ShortOffsetRunHeader::new(809, 90398), + ShortOffsetRunHeader::new(813, 92912), + ShortOffsetRunHeader::new(817, 93504), + ShortOffsetRunHeader::new(823, 94031), + ShortOffsetRunHeader::new(827, 110576), + ShortOffsetRunHeader::new(837, 113821), + ShortOffsetRunHeader::new(843, 118528), + ShortOffsetRunHeader::new(847, 119143), + ShortOffsetRunHeader::new(851, 121344), + ShortOffsetRunHeader::new(861, 122880), + ShortOffsetRunHeader::new(873, 123566), + ShortOffsetRunHeader::new(889, 124139), + ShortOffsetRunHeader::new(893, 125136), + ShortOffsetRunHeader::new(907, 127995), + ShortOffsetRunHeader::new(911, 917505), + ShortOffsetRunHeader::new(913, 2032112), ]; static OFFSETS: [u8; 919] = [ 168, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2, 1, 1, 251, 7, 207, 1, 5, 1, 49, - 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, 10, - 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, 9, 41, - 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, - 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, - 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, - 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, - 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, - 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, - 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, - 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, - 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 46, 2, 12, 20, 4, 48, 1, 1, 5, 1, 1, 5, 1, - 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, - 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, - 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, - 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, - 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 102, 4, 3, 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, - 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, - 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, - 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, - 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, - 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, 1, 0, 2, 24, 1, 52, 6, 70, 11, - 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, - 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, - 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, - 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, - 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, - 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, - 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 65, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, - 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, - 2, 13, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, - 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, - 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, - 128, 240, 0, + 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, + 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, + 9, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, + 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, + 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, + 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, + 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, + 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, + 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, + 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, + 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 46, 2, 12, 20, 4, 48, + 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, + 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, + 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, + 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, + 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 102, 4, 3, 2, 8, 1, 3, 1, + 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, + 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, + 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, + 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, + 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, + 1, 0, 2, 24, 1, 52, 6, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, + 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, + 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, + 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, + 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, + 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, + 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, + 1, 1, 1, 65, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, + 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, 2, 13, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, + 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, + 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, + 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -336,37 +253,48 @@ pub mod case_ignorable { } } -#[rustfmt::skip] pub mod cased { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [ - ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024), - ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958), - ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264), - ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824), - ShortOffsetRunHeader::new(177, 64256), ShortOffsetRunHeader::new(183, 65313), - ShortOffsetRunHeader::new(187, 66560), ShortOffsetRunHeader::new(191, 67456), - ShortOffsetRunHeader::new(213, 68736), ShortOffsetRunHeader::new(221, 71840), - ShortOffsetRunHeader::new(229, 93760), ShortOffsetRunHeader::new(231, 119808), - ShortOffsetRunHeader::new(237, 120486), ShortOffsetRunHeader::new(274, 122624), - ShortOffsetRunHeader::new(297, 122928), ShortOffsetRunHeader::new(303, 125184), - ShortOffsetRunHeader::new(305, 127280), ShortOffsetRunHeader::new(307, 1241482), + ShortOffsetRunHeader::new(0, 4256), + ShortOffsetRunHeader::new(51, 5024), + ShortOffsetRunHeader::new(61, 7296), + ShortOffsetRunHeader::new(65, 7958), + ShortOffsetRunHeader::new(74, 9398), + ShortOffsetRunHeader::new(149, 11264), + ShortOffsetRunHeader::new(151, 42560), + ShortOffsetRunHeader::new(163, 43824), + ShortOffsetRunHeader::new(177, 64256), + ShortOffsetRunHeader::new(183, 65313), + ShortOffsetRunHeader::new(187, 66560), + ShortOffsetRunHeader::new(191, 67456), + ShortOffsetRunHeader::new(213, 68736), + ShortOffsetRunHeader::new(221, 71840), + ShortOffsetRunHeader::new(229, 93760), + ShortOffsetRunHeader::new(231, 119808), + ShortOffsetRunHeader::new(237, 120486), + ShortOffsetRunHeader::new(274, 122624), + ShortOffsetRunHeader::new(297, 122928), + ShortOffsetRunHeader::new(303, 125184), + ShortOffsetRunHeader::new(305, 127280), + ShortOffsetRunHeader::new(307, 1241482), ]; static OFFSETS: [u8; 313] = [ 170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 2, 35, 7, 2, 30, 5, 96, 1, 42, 4, - 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1, - 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8, - 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, - 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4, - 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132, - 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, - 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, - 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, - 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, - 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0, - 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, + 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, + 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, + 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, + 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, + 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, + 18, 30, 132, 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, + 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, + 42, 1, 9, 0, 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, + 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, + 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, + 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -389,59 +317,75 @@ pub mod cased { } } -#[rustfmt::skip] pub mod grapheme_extend { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 33] = [ - ShortOffsetRunHeader::new(0, 768), ShortOffsetRunHeader::new(1, 1155), - ShortOffsetRunHeader::new(3, 1425), ShortOffsetRunHeader::new(5, 4957), - ShortOffsetRunHeader::new(249, 5906), ShortOffsetRunHeader::new(251, 8204), - ShortOffsetRunHeader::new(347, 11503), ShortOffsetRunHeader::new(351, 12330), - ShortOffsetRunHeader::new(357, 42607), ShortOffsetRunHeader::new(361, 43010), - ShortOffsetRunHeader::new(369, 64286), ShortOffsetRunHeader::new(435, 65024), - ShortOffsetRunHeader::new(437, 65438), ShortOffsetRunHeader::new(441, 66045), - ShortOffsetRunHeader::new(443, 68097), ShortOffsetRunHeader::new(449, 68900), - ShortOffsetRunHeader::new(461, 69291), ShortOffsetRunHeader::new(465, 71727), - ShortOffsetRunHeader::new(601, 73459), ShortOffsetRunHeader::new(669, 78912), - ShortOffsetRunHeader::new(679, 90398), ShortOffsetRunHeader::new(683, 92912), - ShortOffsetRunHeader::new(687, 94031), ShortOffsetRunHeader::new(691, 113821), - ShortOffsetRunHeader::new(699, 118528), ShortOffsetRunHeader::new(701, 119141), - ShortOffsetRunHeader::new(705, 121344), ShortOffsetRunHeader::new(717, 122880), - ShortOffsetRunHeader::new(729, 123566), ShortOffsetRunHeader::new(743, 124140), - ShortOffsetRunHeader::new(747, 125136), ShortOffsetRunHeader::new(759, 917536), + ShortOffsetRunHeader::new(0, 768), + ShortOffsetRunHeader::new(1, 1155), + ShortOffsetRunHeader::new(3, 1425), + ShortOffsetRunHeader::new(5, 4957), + ShortOffsetRunHeader::new(249, 5906), + ShortOffsetRunHeader::new(251, 8204), + ShortOffsetRunHeader::new(347, 11503), + ShortOffsetRunHeader::new(351, 12330), + ShortOffsetRunHeader::new(357, 42607), + ShortOffsetRunHeader::new(361, 43010), + ShortOffsetRunHeader::new(369, 64286), + ShortOffsetRunHeader::new(435, 65024), + ShortOffsetRunHeader::new(437, 65438), + ShortOffsetRunHeader::new(441, 66045), + ShortOffsetRunHeader::new(443, 68097), + ShortOffsetRunHeader::new(449, 68900), + ShortOffsetRunHeader::new(461, 69291), + ShortOffsetRunHeader::new(465, 71727), + ShortOffsetRunHeader::new(601, 73459), + ShortOffsetRunHeader::new(669, 78912), + ShortOffsetRunHeader::new(679, 90398), + ShortOffsetRunHeader::new(683, 92912), + ShortOffsetRunHeader::new(687, 94031), + ShortOffsetRunHeader::new(691, 113821), + ShortOffsetRunHeader::new(699, 118528), + ShortOffsetRunHeader::new(701, 119141), + ShortOffsetRunHeader::new(705, 121344), + ShortOffsetRunHeader::new(717, 122880), + ShortOffsetRunHeader::new(729, 123566), + ShortOffsetRunHeader::new(743, 124140), + ShortOffsetRunHeader::new(747, 125136), + ShortOffsetRunHeader::new(759, 917536), ShortOffsetRunHeader::new(763, 2032112), ]; static OFFSETS: [u8; 767] = [ 0, 112, 0, 7, 0, 45, 1, 1, 1, 2, 1, 2, 1, 1, 72, 11, 48, 21, 16, 1, 101, 7, 2, 6, 2, 2, 1, - 4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 59, 9, 42, 24, 1, 32, - 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, 1, 2, - 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, - 1, 1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, 3, 1, - 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 2, 1, 1, 3, 3, 1, 4, 7, 2, 11, 2, 28, - 2, 57, 2, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, 1, 2, 7, - 12, 8, 98, 1, 2, 9, 11, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, - 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, 4, 28, 3, - 29, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 1, 1, 117, 2, 34, 1, 118, 3, 4, 2, 9, - 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 46, 2, 12, 20, 4, 48, - 10, 4, 3, 38, 9, 12, 2, 32, 4, 2, 6, 56, 1, 1, 2, 3, 1, 1, 5, 56, 8, 2, 2, 152, 3, 1, 13, 1, - 7, 4, 1, 6, 1, 3, 2, 198, 64, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, 2, 0, 4, 1, - 10, 32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 1, 1, - 44, 3, 48, 1, 2, 4, 2, 2, 2, 1, 36, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, 51, 1, 1, 3, - 2, 2, 5, 2, 1, 1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, 0, 1, 226, 1, - 149, 5, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 65, 5, 0, 2, 77, 6, 70, 11, 49, 4, 123, - 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 7, 1, 61, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, - 1, 1, 8, 4, 2, 1, 95, 3, 2, 4, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 1, 1, 1, 1, 12, 1, 9, - 1, 14, 7, 3, 5, 67, 1, 2, 6, 1, 1, 2, 1, 1, 3, 4, 3, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, - 81, 1, 2, 6, 1, 1, 2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, - 106, 1, 1, 1, 2, 8, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 4, 4, 1, 144, 4, 2, 2, - 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, 1, 201, 7, 1, 6, - 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 11, - 2, 52, 5, 5, 3, 23, 1, 0, 1, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 0, 1, 63, 4, 81, 1, 11, 2, 0, - 2, 0, 46, 2, 23, 0, 5, 3, 6, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, - 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, 4, 0, 4, 254, 2, 243, 1, 2, 1, - 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, + 4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 59, 9, 42, 24, 1, + 32, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, + 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, + 22, 6, 1, 1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, + 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 2, 1, 1, 3, 3, 1, 4, 7, 2, 11, + 2, 28, 2, 57, 2, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, + 1, 2, 7, 12, 8, 98, 1, 2, 9, 11, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, + 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, + 4, 28, 3, 29, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 1, 1, 117, 2, 34, 1, 118, 3, + 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 46, 2, 12, + 20, 4, 48, 10, 4, 3, 38, 9, 12, 2, 32, 4, 2, 6, 56, 1, 1, 2, 3, 1, 1, 5, 56, 8, 2, 2, 152, + 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 198, 64, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, + 2, 0, 4, 1, 10, 32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, + 25, 11, 1, 1, 44, 3, 48, 1, 2, 4, 2, 2, 2, 1, 36, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, + 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, + 0, 1, 226, 1, 149, 5, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 65, 5, 0, 2, 77, 6, 70, + 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 7, 1, 61, 3, 36, 5, 1, 8, 62, + 1, 12, 2, 52, 9, 1, 1, 8, 4, 2, 1, 95, 3, 2, 4, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 1, + 1, 1, 1, 12, 1, 9, 1, 14, 7, 3, 5, 67, 1, 2, 6, 1, 1, 2, 1, 1, 3, 4, 3, 1, 1, 14, 2, 85, 8, + 2, 3, 1, 1, 23, 1, 81, 1, 2, 6, 1, 1, 2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, + 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 8, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 4, + 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, + 3, 1, 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, + 2, 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 3, 23, 1, 0, 1, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 0, + 1, 63, 4, 81, 1, 11, 2, 0, 2, 0, 46, 2, 23, 0, 5, 3, 6, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, + 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, + 4, 0, 4, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -464,14 +408,13 @@ pub mod grapheme_extend { } } -#[rustfmt::skip] pub mod lowercase { static BITSET_CHUNKS_MAP: [u8; 123] = [ 12, 17, 0, 0, 9, 0, 0, 13, 14, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, - 3, 18, 0, 7, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, + 0, 3, 18, 0, 7, ]; static BITSET_INDEX_CHUNKS: [[u8; 16]; 20] = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -555,66 +498,107 @@ pub mod lowercase { 0b1110101111000000000000000000000000001111111111111111111111111100, ]; static BITSET_MAPPING: [(u8, u8); 22] = [ - (0, 64), (1, 184), (1, 182), (1, 179), (1, 172), (1, 168), (1, 161), (1, 146), (1, 144), - (1, 140), (1, 136), (1, 132), (2, 146), (2, 144), (2, 83), (3, 93), (3, 147), (3, 133), - (4, 12), (4, 6), (5, 187), (6, 78), + (0, 64), + (1, 184), + (1, 182), + (1, 179), + (1, 172), + (1, 168), + (1, 161), + (1, 146), + (1, 144), + (1, 140), + (1, 136), + (1, 132), + (2, 146), + (2, 144), + (2, 83), + (3, 93), + (3, 147), + (3, 133), + (4, 12), + (4, 6), + (5, 187), + (6, 78), ]; pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); - (c as u32) >= 0xaa && - super::bitset_search( - c as u32, - &BITSET_CHUNKS_MAP, - &BITSET_INDEX_CHUNKS, - &BITSET_CANONICAL, - &BITSET_MAPPING, - ) + (c as u32) >= 0xaa + && super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) } } -#[rustfmt::skip] pub mod n { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 43] = [ - ShortOffsetRunHeader::new(0, 1632), ShortOffsetRunHeader::new(7, 2406), - ShortOffsetRunHeader::new(13, 4160), ShortOffsetRunHeader::new(47, 4969), - ShortOffsetRunHeader::new(51, 5870), ShortOffsetRunHeader::new(53, 6470), - ShortOffsetRunHeader::new(61, 8304), ShortOffsetRunHeader::new(77, 9312), - ShortOffsetRunHeader::new(87, 10102), ShortOffsetRunHeader::new(91, 11517), - ShortOffsetRunHeader::new(93, 12295), ShortOffsetRunHeader::new(95, 12690), - ShortOffsetRunHeader::new(101, 42528), ShortOffsetRunHeader::new(113, 43056), - ShortOffsetRunHeader::new(117, 44016), ShortOffsetRunHeader::new(129, 65296), - ShortOffsetRunHeader::new(131, 65799), ShortOffsetRunHeader::new(133, 66273), - ShortOffsetRunHeader::new(139, 67672), ShortOffsetRunHeader::new(151, 68858), - ShortOffsetRunHeader::new(181, 69216), ShortOffsetRunHeader::new(187, 70736), - ShortOffsetRunHeader::new(207, 71248), ShortOffsetRunHeader::new(211, 71904), - ShortOffsetRunHeader::new(219, 72688), ShortOffsetRunHeader::new(223, 73552), - ShortOffsetRunHeader::new(233, 74752), ShortOffsetRunHeader::new(237, 90416), - ShortOffsetRunHeader::new(239, 92768), ShortOffsetRunHeader::new(241, 93552), - ShortOffsetRunHeader::new(249, 93824), ShortOffsetRunHeader::new(251, 94196), - ShortOffsetRunHeader::new(253, 118000), ShortOffsetRunHeader::new(255, 119488), - ShortOffsetRunHeader::new(257, 120782), ShortOffsetRunHeader::new(263, 123200), - ShortOffsetRunHeader::new(265, 123632), ShortOffsetRunHeader::new(267, 124144), - ShortOffsetRunHeader::new(269, 125127), ShortOffsetRunHeader::new(273, 126065), - ShortOffsetRunHeader::new(277, 127232), ShortOffsetRunHeader::new(287, 130032), + ShortOffsetRunHeader::new(0, 1632), + ShortOffsetRunHeader::new(7, 2406), + ShortOffsetRunHeader::new(13, 4160), + ShortOffsetRunHeader::new(47, 4969), + ShortOffsetRunHeader::new(51, 5870), + ShortOffsetRunHeader::new(53, 6470), + ShortOffsetRunHeader::new(61, 8304), + ShortOffsetRunHeader::new(77, 9312), + ShortOffsetRunHeader::new(87, 10102), + ShortOffsetRunHeader::new(91, 11517), + ShortOffsetRunHeader::new(93, 12295), + ShortOffsetRunHeader::new(95, 12690), + ShortOffsetRunHeader::new(101, 42528), + ShortOffsetRunHeader::new(113, 43056), + ShortOffsetRunHeader::new(117, 44016), + ShortOffsetRunHeader::new(129, 65296), + ShortOffsetRunHeader::new(131, 65799), + ShortOffsetRunHeader::new(133, 66273), + ShortOffsetRunHeader::new(139, 67672), + ShortOffsetRunHeader::new(151, 68858), + ShortOffsetRunHeader::new(181, 69216), + ShortOffsetRunHeader::new(187, 70736), + ShortOffsetRunHeader::new(207, 71248), + ShortOffsetRunHeader::new(211, 71904), + ShortOffsetRunHeader::new(219, 72688), + ShortOffsetRunHeader::new(223, 73552), + ShortOffsetRunHeader::new(233, 74752), + ShortOffsetRunHeader::new(237, 90416), + ShortOffsetRunHeader::new(239, 92768), + ShortOffsetRunHeader::new(241, 93552), + ShortOffsetRunHeader::new(249, 93824), + ShortOffsetRunHeader::new(251, 94196), + ShortOffsetRunHeader::new(253, 118000), + ShortOffsetRunHeader::new(255, 119488), + ShortOffsetRunHeader::new(257, 120782), + ShortOffsetRunHeader::new(263, 123200), + ShortOffsetRunHeader::new(265, 123632), + ShortOffsetRunHeader::new(267, 124144), + ShortOffsetRunHeader::new(269, 125127), + ShortOffsetRunHeader::new(273, 126065), + ShortOffsetRunHeader::new(277, 127232), + ShortOffsetRunHeader::new(287, 130032), ShortOffsetRunHeader::new(289, 1244154), ]; static OFFSETS: [u8; 291] = [ 178, 2, 5, 1, 2, 3, 0, 10, 134, 10, 198, 10, 0, 10, 118, 10, 4, 6, 108, 10, 118, 10, 118, 10, 2, 6, 110, 13, 115, 10, 8, 7, 103, 10, 104, 7, 7, 19, 109, 10, 96, 10, 118, 10, 70, 20, - 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, 10, - 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, 0, 1, - 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, 10, - 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, 1, 8, - 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, 52, 2, - 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, 7, 134, - 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, 20, 76, 12, - 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 54, 10, 0, 10, 102, 21, 0, 111, 0, 10, 0, 10, - 86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 3, 0, 10, 0, 20, 12, 20, 108, 25, 0, 50, 0, 10, 0, - 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0, + 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, + 10, 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, + 0, 1, 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, + 10, 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, + 1, 8, 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, + 52, 2, 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, + 7, 134, 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, + 20, 76, 12, 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 54, 10, 0, 10, 102, 21, 0, 111, + 0, 10, 0, 10, 86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 3, 0, 10, 0, 20, 12, 20, 108, 25, 0, + 50, 0, 10, 0, 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, + 0, 10, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -637,14 +621,13 @@ pub mod n { } } -#[rustfmt::skip] pub mod uppercase { static BITSET_CHUNKS_MAP: [u8; 125] = [ 3, 14, 6, 6, 0, 6, 6, 2, 5, 12, 6, 15, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, 6, - 6, 6, 10, 6, 4, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, + 6, 6, 6, 10, 6, 4, ]; static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [ [44, 44, 5, 35, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 5, 0], @@ -712,37 +695,59 @@ pub mod uppercase { 0b1111111100000000111111110000000000111111000000001111111100000000, ]; static BITSET_MAPPING: [(u8, u8); 25] = [ - (0, 182), (0, 74), (0, 166), (0, 162), (0, 159), (0, 150), (0, 148), (0, 142), (0, 134), - (0, 131), (0, 64), (1, 66), (1, 70), (1, 83), (1, 12), (1, 8), (2, 146), (2, 140), (2, 134), - (2, 130), (3, 164), (3, 146), (3, 20), (4, 178), (4, 171), + (0, 182), + (0, 74), + (0, 166), + (0, 162), + (0, 159), + (0, 150), + (0, 148), + (0, 142), + (0, 134), + (0, 131), + (0, 64), + (1, 66), + (1, 70), + (1, 83), + (1, 12), + (1, 8), + (2, 146), + (2, 140), + (2, 134), + (2, 130), + (3, 164), + (3, 146), + (3, 20), + (4, 178), + (4, 171), ]; pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); - (c as u32) >= 0xc0 && - super::bitset_search( - c as u32, - &BITSET_CHUNKS_MAP, - &BITSET_INDEX_CHUNKS, - &BITSET_CANONICAL, - &BITSET_MAPPING, - ) + (c as u32) >= 0xc0 + && super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) } } -#[rustfmt::skip] pub mod white_space { static WHITESPACE_MAP: [u8; 256] = [ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; + #[inline] pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -756,9 +761,8 @@ pub mod white_space { } } -#[rustfmt::skip] pub mod conversions { - const INDEX_MASK: u32 = 0x400000; + const INDEX_MASK: u32 = 1 << 22; pub fn to_lower(c: char) -> [char; 3] { if c.is_ascii() { @@ -770,7 +774,9 @@ pub mod conversions { let u = LOWERCASE_TABLE[i].1; char::from_u32(u).map(|c| [c, '\0', '\0']).unwrap_or_else(|| { // SAFETY: Index comes from statically generated table - unsafe { *LOWERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) } + unsafe { + *LOWERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) + } }) }) .unwrap_or([c, '\0', '\0']) @@ -787,13 +793,16 @@ pub mod conversions { let u = UPPERCASE_TABLE[i].1; char::from_u32(u).map(|c| [c, '\0', '\0']).unwrap_or_else(|| { // SAFETY: Index comes from statically generated table - unsafe { *UPPERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) } + unsafe { + *UPPERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) + } }) }) .unwrap_or([c, '\0', '\0']) } } + #[rustfmt::skip] static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ ('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228), ('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233), @@ -1150,11 +1159,12 @@ pub mod conversions { ('\u{1e91d}', 125247), ('\u{1e91e}', 125248), ('\u{1e91f}', 125249), ('\u{1e920}', 125250), ('\u{1e921}', 125251), ]; - + #[rustfmt::skip] static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[ ['i', '\u{307}', '\u{0}'], ]; + #[rustfmt::skip] static UPPERCASE_TABLE: &[(char, u32); 1554] = &[ ('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194), ('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199), @@ -1534,7 +1544,7 @@ pub mod conversions { ('\u{1e93d}', 125211), ('\u{1e93e}', 125212), ('\u{1e93f}', 125213), ('\u{1e940}', 125214), ('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217), ]; - + #[rustfmt::skip] static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[ ['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'], ['\u{399}', '\u{308}', '\u{301}'], ['\u{3a5}', '\u{308}', '\u{301}'], diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs index 56e6401908dcf..6ad8b12bc7437 100644 --- a/src/tools/unicode-table-generator/src/cascading_map.rs +++ b/src/tools/unicode-table-generator/src/cascading_map.rs @@ -1,9 +1,8 @@ use std::collections::HashMap; -use std::fmt::Write as _; use std::ops::Range; -use crate::fmt_list; use crate::raw_emitter::RawEmitter; +use crate::writeln; impl RawEmitter { pub fn emit_cascading_map(&mut self, ranges: &[Range]) -> bool { @@ -24,8 +23,6 @@ impl RawEmitter { .flat_map(|r| (r.start..r.end).collect::>()) .collect::>(); - println!("there are {} points", points.len()); - // how many distinct ranges need to be counted? let mut codepoints_by_high_bytes = HashMap::>::new(); for point in points { @@ -37,7 +34,7 @@ impl RawEmitter { } let mut bit_for_high_byte = 1u8; - let mut arms = Vec::::new(); + let mut arms = String::new(); let mut high_bytes: Vec = codepoints_by_high_bytes.keys().copied().collect(); high_bytes.sort(); @@ -45,33 +42,33 @@ impl RawEmitter { let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap(); if codepoints.len() == 1 { let ch = codepoints.pop().unwrap(); - arms.push(format!("{high_byte} => c as u32 == {ch:#04x}")); + writeln!(arms, "{high_byte} => c as u32 == {ch:#04x},"); continue; } // more than 1 codepoint in this arm for codepoint in codepoints { map[(*codepoint & 0xff) as usize] |= bit_for_high_byte; } - arms.push(format!( - "{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0" - )); + writeln!( + arms, + "{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0," + ); bit_for_high_byte <<= 1; } - writeln!(&mut self.file, "static WHITESPACE_MAP: [u8; 256] = [{}];", fmt_list(map.iter())) - .unwrap(); self.bytes_used += 256; + self.file = format!( + "static WHITESPACE_MAP: [u8; 256] = {map:?}; - writeln!(&mut self.file, "#[inline]").unwrap(); - writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap(); - writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); - writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap(); - for arm in arms { - writeln!(&mut self.file, " {arm},").unwrap(); - } - writeln!(&mut self.file, " _ => false,").unwrap(); - writeln!(&mut self.file, " }}").unwrap(); - writeln!(&mut self.file, "}}").unwrap(); + #[inline] + pub const fn lookup(c: char) -> bool {{ + debug_assert!(!c.is_ascii()); + match c as u32 >> 8 {{ + {arms}\ + _ => false, + }} + }}" + ); true } diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs index 49aef3ec33ec7..d634b58b6d4d7 100644 --- a/src/tools/unicode-table-generator/src/case_mapping.rs +++ b/src/tools/unicode-table-generator/src/case_mapping.rs @@ -1,23 +1,18 @@ use std::char; use std::collections::BTreeMap; -use std::fmt::{self, Write}; -use crate::{UnicodeData, fmt_list}; +use crate::{CharEscape, UnicodeData, fmt_list}; const INDEX_MASK: u32 = 1 << 22; pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2]) { - let mut file = String::new(); - - write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap(); - file.push_str("\n\n"); - file.push_str(HEADER.trim_start()); - file.push('\n'); let (lower_tables, lower_size) = generate_tables("LOWER", &data.to_lower); - file.push_str(&lower_tables); - file.push_str("\n\n"); let (upper_tables, upper_size) = generate_tables("UPPER", &data.to_upper); - file.push_str(&upper_tables); + let file = format!( + "{HEADER} + {lower_tables} + {upper_tables}" + ); (file, [lower_size, upper_size]) } @@ -47,43 +42,23 @@ fn generate_tables(case: &str, data: &BTreeMap) -> (String, usize mappings.push((CharEscape(key), value)); } - let mut tables = String::new(); - let mut size = 0; - - size += size_of_val(mappings.as_slice()); - write!( - tables, - "static {}CASE_TABLE: &[(char, u32); {}] = &[{}];", - case, - mappings.len(), - fmt_list(mappings), - ) - .unwrap(); - - tables.push_str("\n\n"); - - size += size_of_val(multis.as_slice()); - write!( - tables, - "static {}CASE_TABLE_MULTI: &[[char; 3]; {}] = &[{}];", - case, - multis.len(), - fmt_list(multis), - ) - .unwrap(); - - (tables, size) -} - -struct CharEscape(char); - -impl fmt::Debug for CharEscape { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "'{}'", self.0.escape_default()) - } + let size = size_of_val(mappings.as_slice()) + size_of_val(multis.as_slice()); + let file = format!( + " + #[rustfmt::skip]\nstatic {case}CASE_TABLE: &[(char, u32); {mappings_len}] = &[{mappings}]; + #[rustfmt::skip]\nstatic {case}CASE_TABLE_MULTI: &[[char; 3]; {multis_len}] = &[{multis}];", + mappings = fmt_list(&mappings), + mappings_len = mappings.len(), + multis = fmt_list(&multis), + multis_len = multis.len(), + ); + + (file, size) } static HEADER: &str = r" +const INDEX_MASK: u32 = 1 << 22; + pub fn to_lower(c: char) -> [char; 3] { if c.is_ascii() { [(c as u8).to_ascii_lowercase() as char, '\0', '\0'] diff --git a/src/tools/unicode-table-generator/src/fmt_helpers.rs b/src/tools/unicode-table-generator/src/fmt_helpers.rs new file mode 100644 index 0000000000000..68fcbb5c53909 --- /dev/null +++ b/src/tools/unicode-table-generator/src/fmt_helpers.rs @@ -0,0 +1,66 @@ +use std::fmt; + +// Convenience macros for writing and unwrapping. +#[macro_export] +macro_rules! writeln { + ($($args:tt)*) => {{ + use std::fmt::Write as _; + std::writeln!($($args)*).unwrap(); + }}; +} +#[macro_export] +macro_rules! write { + ($($args:tt)*) => {{ + use std::fmt::Write as _; + std::write!($($args)*).unwrap(); + }}; +} + +pub fn fmt_list(values: impl IntoIterator) -> String { + let pieces = values.into_iter().map(|b| format!("{b:?}, ")); + let mut out = String::new(); + let mut line = String::from("\n "); + for piece in pieces { + if line.len() + piece.len() < 98 { + line.push_str(&piece); + } else { + writeln!(out, "{}", line.trim_end()); + line = format!(" {piece}"); + } + } + writeln!(out, "{}", line.trim_end()); + out +} + +/// Wrapper type for formatting a `T` using its `Binary` implementation. +#[derive(Copy, Clone)] +pub struct Bin(pub T); + +impl fmt::Debug for Bin { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let bits = size_of::() * 8; + std::write!(f, "0b{:0bits$b}", self.0) + } +} + +impl fmt::Display for Bin { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +/// Wrapper type for formatting a `char` using `escape_default`. +#[derive(Copy, Clone)] +pub struct CharEscape(pub char); + +impl fmt::Debug for CharEscape { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + std::write!(f, "'{}'", self.0.escape_default()) + } +} + +impl fmt::Display for CharEscape { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index ded9205ffc4b9..d30a461dbe8b6 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -72,18 +72,18 @@ //! or not. use std::collections::{BTreeMap, HashMap}; -use std::fmt; -use std::fmt::Write; use std::ops::Range; use ucd_parse::Codepoints; mod cascading_map; mod case_mapping; +mod fmt_helpers; mod raw_emitter; mod skiplist; mod unicode_download; +pub use fmt_helpers::*; use raw_emitter::{RawEmitter, emit_codepoints, emit_whitespace}; static PROPERTIES: &[&str] = &[ @@ -224,12 +224,13 @@ fn main() { let ranges_by_property = &unicode_data.ranges; if let Some(path) = test_path { - std::fs::write(&path, generate_tests(&unicode_data).unwrap()).unwrap(); + std::fs::write(&path, generate_tests(&unicode_data)).unwrap(); } let mut table_file = String::new(); - table_file.push_str( - "//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!\n", + writeln!( + table_file, + "//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!", ); let mut total_bytes = 0; @@ -245,8 +246,9 @@ fn main() { } modules.push((property.to_lowercase().to_string(), emitter.file)); - table_file.push_str(&format!( - "// {:16}: {:5} bytes, {:6} codepoints in {:3} ranges (U+{:06X} - U+{:06X}) using {}\n", + writeln!( + table_file, + "// {:16}: {:5} bytes, {:6} codepoints in {:3} ranges (U+{:06X} - U+{:06X}) using {}", property, emitter.bytes_used, datapoints, @@ -254,47 +256,38 @@ fn main() { ranges.first().unwrap().start, ranges.last().unwrap().end, emitter.desc, - )); + ); total_bytes += emitter.bytes_used; } let (conversions, sizes) = case_mapping::generate_case_mapping(&unicode_data); for (name, size) in ["to_lower", "to_upper"].iter().zip(sizes) { - table_file.push_str(&format!("// {:16}: {:5} bytes\n", name, size)); + writeln!(table_file, "// {:16}: {:5} bytes", name, size); total_bytes += size; } - table_file.push_str(&format!("// {:16}: {:5} bytes\n", "Total", total_bytes)); - - // Include the range search function - table_file.push('\n'); - table_file.push_str(include_str!("range_search.rs")); - table_file.push('\n'); - - table_file.push_str(&version()); + writeln!(table_file, "// {:16}: {:5} bytes\n", "Total", total_bytes); - table_file.push('\n'); + writeln!(table_file, "{}\n", version()); + writeln!(table_file, "use super::rt::*;\n"); modules.push((String::from("conversions"), conversions)); for (name, contents) in modules { - table_file.push_str("#[rustfmt::skip]\n"); - table_file.push_str(&format!("pub mod {name} {{\n")); - for line in contents.lines() { - if !line.trim().is_empty() { - table_file.push_str(" "); - table_file.push_str(line); - } - table_file.push('\n'); + writeln!(table_file, "pub mod {name} {{"); + for line in contents.trim().lines() { + writeln!(table_file, " {line}"); } - table_file.push_str("}\n\n"); + writeln!(table_file, "}}\n"); } - std::fs::write(&write_location, format!("{}\n", table_file.trim_end())).unwrap(); + std::fs::write(&write_location, table_file).unwrap(); + rustfmt(&write_location); } -fn version() -> String { - let mut out = String::new(); - out.push_str("pub const UNICODE_VERSION: (u8, u8, u8) = "); +fn rustfmt(path: &str) { + std::process::Command::new("rustfmt").arg(path).status().expect("rustfmt failed"); +} +fn version() -> String { let readme = std::fs::read_to_string(std::path::Path::new(UNICODE_DIRECTORY).join("ReadMe.txt")) .unwrap(); @@ -306,65 +299,44 @@ fn version() -> String { readme[start..end].split('.').map(|v| v.parse::().expect(v)).collect::>(); let [major, minor, micro] = [version[0], version[1], version[2]]; - out.push_str(&format!("({major}, {minor}, {micro});\n")); - out -} - -fn fmt_list(values: impl IntoIterator) -> String { - let pieces = values.into_iter().map(|b| format!("{b:?}, ")).collect::>(); - let mut out = String::new(); - let mut line = String::from("\n "); - for piece in pieces { - if line.len() + piece.len() < 98 { - line.push_str(&piece); - } else { - out.push_str(line.trim_end()); - out.push('\n'); - line = format!(" {piece}"); - } - } - out.push_str(line.trim_end()); - out.push('\n'); - out + format!("pub const UNICODE_VERSION: (u8, u8, u8) = ({major}, {minor}, {micro});") } -fn generate_tests(data: &UnicodeData) -> Result { - let mut s = String::new(); - writeln!(s, "#![feature(core_intrinsics)]")?; - writeln!(s, "#![allow(internal_features, dead_code)]")?; - writeln!(s, "// ignore-tidy-filelength")?; - writeln!(s, "use std::intrinsics;")?; - writeln!(s, "mod unicode_data;")?; - writeln!(s, "fn main() {{")?; +fn generate_tests(data: &UnicodeData) -> String { + let mut s = format!( + "#![feature(core_intrinsics)] + #![allow(internal_features, dead_code)] + // ignore-tidy-filelength + use std::intrinsics; + mod unicode_data + fn main() {{" + ); for (property, ranges) in &data.ranges { let prop = property.to_lowercase(); - writeln!(s, r#" println!("Testing {prop}");"#)?; - writeln!(s, " {prop}_true();")?; - writeln!(s, " {prop}_false();")?; let (is_true, is_false): (Vec<_>, Vec<_>) = (char::MIN..=char::MAX) .filter(|c| !c.is_ascii()) .map(u32::from) .partition(|c| ranges.iter().any(|r| r.contains(c))); - writeln!(s, " fn {prop}_true() {{")?; - generate_asserts(&mut s, &prop, &is_true, true)?; - writeln!(s, " }}")?; - - writeln!(s, " fn {prop}_false() {{")?; - generate_asserts(&mut s, &prop, &is_false, false)?; - writeln!(s, " }}")?; + writeln!( + s, + "println!(\"Testing {prop}\"); + {prop}_true(); + {prop}_false(); + fn {prop}_true() {{\n{}\n}} + fn {prop}_false() {{\n{}\n}}", + generate_asserts(&prop, &is_true, true), + generate_asserts(&prop, &is_false, false) + ); } for (name, conversion) in ["to_lower", "to_upper"].iter().zip([&data.to_lower, &data.to_upper]) { - writeln!(s, r#" println!("Testing {name}");"#)?; + writeln!(s, r#"println!("Testing {name}");"#); for (c, mapping) in conversion { let c = char::from_u32(*c).unwrap(); let mapping = mapping.map(|c| char::from_u32(c).unwrap()); - writeln!( - s, - r#" assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});"# - )?; + writeln!(s, "assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});"); } let unmapped: Vec<_> = (char::MIN..=char::MAX) .filter(|c| !c.is_ascii()) @@ -375,40 +347,36 @@ fn generate_tests(data: &UnicodeData) -> Result { for range in unmapped_ranges { let start = char::from_u32(range.start).unwrap(); let end = char::from_u32(range.end - 1).unwrap(); - writeln!(s, " for c in {start:?}..={end:?} {{")?; writeln!( s, - r#" assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']);"# - )?; - - writeln!(s, " }}")?; + r#"for c in {start:?}..={end:?} {{ + assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']); + }}"# + ); } } - writeln!(s, "}}")?; - Ok(s) + writeln!(s, "}}"); + s } -fn generate_asserts( - s: &mut String, - prop: &str, - points: &[u32], - truthy: bool, -) -> Result<(), fmt::Error> { +fn generate_asserts(prop: &str, points: &[u32], truthy: bool) -> String { + let mut s = String::new(); let truthy = if truthy { "" } else { "!" }; for range in ranges_from_set(points) { let start = char::from_u32(range.start).unwrap(); let end = char::from_u32(range.end - 1).unwrap(); match range.len() { - 1 => writeln!(s, " assert!({truthy}unicode_data::{prop}::lookup({start:?}));")?, - _ => { - writeln!(s, " for c in {start:?}..={end:?} {{")?; - writeln!(s, " assert!({truthy}unicode_data::{prop}::lookup(c));")?; - writeln!(s, " }}")?; - } + 1 => writeln!(s, "assert!({truthy}unicode_data::{prop}::lookup({start:?}));"), + _ => writeln!( + s, + "for c in {start:?}..={end:?} {{ + assert!({truthy}unicode_data::{prop}::lookup(c)); + }}" + ), } } - Ok(()) + s } /// Group the elements of `set` into contigous ranges diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs index 297965615c1a5..048507a06d44f 100644 --- a/src/tools/unicode-table-generator/src/raw_emitter.rs +++ b/src/tools/unicode-table-generator/src/raw_emitter.rs @@ -1,8 +1,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; -use std::fmt::{self, Write}; use std::ops::Range; -use crate::fmt_list; +use crate::{Bin, fmt_list, writeln}; #[derive(Clone)] pub struct RawEmitter { @@ -16,13 +15,6 @@ impl RawEmitter { RawEmitter { file: String::new(), bytes_used: 0, desc: String::new() } } - fn blank_line(&mut self) { - if self.file.is_empty() || self.file.ends_with("\n\n") { - return; - } - writeln!(&mut self.file).unwrap(); - } - fn emit_bitset(&mut self, ranges: &[Range]) -> Result<(), String> { let first_code_point = ranges.first().unwrap().start; let last_code_point = ranges.last().unwrap().end; @@ -68,48 +60,33 @@ impl RawEmitter { } self.emit_chunk_map(word_indices[&0], &compressed_words, best.unwrap().0); - struct Bits(u64); - impl fmt::Debug for Bits { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "0b{:064b}", self.0) - } - } - - writeln!( - &mut self.file, - "static BITSET_CANONICAL: [u64; {}] = [{}];", - canonicalized.canonical_words.len(), - fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))), - ) - .unwrap(); self.bytes_used += 8 * canonicalized.canonical_words.len(); - writeln!( - &mut self.file, - "static BITSET_MAPPING: [(u8, u8); {}] = [{}];", - canonicalized.canonicalized_words.len(), - fmt_list(&canonicalized.canonicalized_words), - ) - .unwrap(); // 8 bit index into shifted words, 7 bits for shift + optional flip // We only need it for the words that we removed by applying a shift and // flip to them. self.bytes_used += 2 * canonicalized.canonicalized_words.len(); - self.blank_line(); - - writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap(); - writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); - if first_code_point > 0x7f { - writeln!(&mut self.file, " (c as u32) >= {first_code_point:#04x} &&").unwrap(); - } - writeln!(&mut self.file, " super::bitset_search(").unwrap(); - writeln!(&mut self.file, " c as u32,").unwrap(); - writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap(); - writeln!(&mut self.file, " &BITSET_INDEX_CHUNKS,").unwrap(); - writeln!(&mut self.file, " &BITSET_CANONICAL,").unwrap(); - writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap(); - writeln!(&mut self.file, " )").unwrap(); - writeln!(&mut self.file, "}}").unwrap(); + writeln!( + self.file, + "static BITSET_CANONICAL: [u64; {canonical_words_len}] = {canonical_words:?}; + static BITSET_MAPPING: [(u8, u8); {canonicalized_words_len}] = {canonicalized_words:?}; + + pub const fn lookup(c: char) -> bool {{ + debug_assert!(!c.is_ascii()); + (c as u32) >= {first_code_point:#04x} && + super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) + }}", + canonical_words = canonicalized.canonical_words, + canonical_words_len = canonicalized.canonical_words.len(), + canonicalized_words = canonicalized.canonicalized_words, + canonicalized_words_len = canonicalized.canonicalized_words.len(), + ); Ok(()) } @@ -133,29 +110,21 @@ impl RawEmitter { chunk_indices.push(chunk_map[chunk]); } - writeln!( - &mut self.file, - "static BITSET_CHUNKS_MAP: [u8; {}] = [{}];", - chunk_indices.len(), - fmt_list(&chunk_indices), - ) - .unwrap(); self.bytes_used += chunk_indices.len(); writeln!( - &mut self.file, - "static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];", - chunk_length, - chunks.len(), - fmt_list(chunks.iter()), - ) - .unwrap(); + self.file, + "static BITSET_CHUNKS_MAP: [u8; {chunk_indices_len}] = {chunk_indices:?}; + static BITSET_INDEX_CHUNKS: [[u8; {chunk_len}]; {chunks_len}] = [{chunks}];", + chunk_indices_len = chunk_indices.len(), + chunk_len = chunk_length, + chunks_len = chunks.len(), + chunks = fmt_list(chunks.iter()), + ); self.bytes_used += chunk_length * chunks.len(); } } pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range]) { - emitter.blank_line(); - let mut bitset = emitter.clone(); let bitset_ok = bitset.emit_bitset(ranges).is_ok(); @@ -172,8 +141,6 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range]) { } pub fn emit_whitespace(emitter: &mut RawEmitter, ranges: &[Range]) { - emitter.blank_line(); - let mut cascading = emitter.clone(); cascading.emit_cascading_map(ranges); *emitter = cascading; @@ -181,7 +148,7 @@ pub fn emit_whitespace(emitter: &mut RawEmitter, ranges: &[Range]) { } struct Canonicalized { - canonical_words: Vec, + canonical_words: Vec>, canonicalized_words: Vec<(u8, u8)>, /// Maps an input unique word to the associated index (u8) which is into @@ -394,6 +361,7 @@ impl Canonicalized { ) }) .collect::>(); + let canonical_words = canonical_words.into_iter().map(Bin).collect::>(); Canonicalized { unique_mapping, canonical_words, canonicalized_words } } } diff --git a/src/tools/unicode-table-generator/src/skiplist.rs b/src/tools/unicode-table-generator/src/skiplist.rs index 660a8f342f7a7..742d61153db3d 100644 --- a/src/tools/unicode-table-generator/src/skiplist.rs +++ b/src/tools/unicode-table-generator/src/skiplist.rs @@ -1,8 +1,8 @@ -use std::fmt::{self, Write as _}; +use std::fmt::{self}; use std::ops::Range; -use crate::fmt_list; use crate::raw_emitter::RawEmitter; +use crate::writeln; /// This will get packed into a single u32 before inserting into the data set. #[derive(PartialEq)] @@ -68,79 +68,45 @@ impl RawEmitter { assert!(inserted); } - writeln!(&mut self.file, "use super::ShortOffsetRunHeader;\n").unwrap(); - writeln!( - &mut self.file, - "static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; {}] = [{}];", - short_offset_runs.len(), - fmt_list(short_offset_runs.iter()) - ) - .unwrap(); self.bytes_used += 4 * short_offset_runs.len(); - writeln!( - &mut self.file, - "static OFFSETS: [u8; {}] = [{}];", - coded_offsets.len(), - fmt_list(&coded_offsets) - ) - .unwrap(); self.bytes_used += coded_offsets.len(); // The inlining in this code works like the following: // - // The `skip_search` function is always inlined into the parent `lookup` fn, + // The `skip_search` function is always inlined into the parent `lookup_slow` fn, // thus the compiler can generate optimal code based on the referenced `static`s. // - // In the case of ASCII optimization, the lower-bounds check is inlined into - // the caller, and slower-path `skip_search` is outlined into a separate `lookup_slow` fn. - // - // Thus, in both cases, the `skip_search` function is specialized for the `static`s, - // and outlined into the prebuilt `std`. - if first_code_point > 0x7f { - writeln!(&mut self.file, "#[inline]").unwrap(); - writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap(); - writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); - writeln!(&mut self.file, " (c as u32) >= {first_code_point:#04x} && lookup_slow(c)") - .unwrap(); - writeln!(&mut self.file, "}}").unwrap(); - writeln!(&mut self.file).unwrap(); - writeln!(&mut self.file, "#[inline(never)]").unwrap(); - writeln!(&mut self.file, "fn lookup_slow(c: char) -> bool {{").unwrap(); - } else { - writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap(); - writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); - } - writeln!(&mut self.file, " const {{").unwrap(); - writeln!( - &mut self.file, - " assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);", - ) - .unwrap(); - writeln!(&mut self.file, " let mut i = 0;").unwrap(); - writeln!(&mut self.file, " while i < SHORT_OFFSET_RUNS.len() {{").unwrap(); - writeln!( - &mut self.file, - " assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());", - ) - .unwrap(); - writeln!(&mut self.file, " i += 1;").unwrap(); - writeln!(&mut self.file, " }}").unwrap(); - writeln!(&mut self.file, " }}").unwrap(); - writeln!( - &mut self.file, - " // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`", - ) - .unwrap(); - writeln!( - &mut self.file, - " // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.", - ) - .unwrap(); - writeln!( - &mut self.file, - " unsafe {{ super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }}" - ) - .unwrap(); - writeln!(&mut self.file, "}}").unwrap(); + // The lower-bounds check is inlined into the caller, and slower-path + // `skip_search` is outlined into a separate `lookup_slow` fn. + assert!(first_code_point > 0x7f); + writeln!(self.file, + "use super::ShortOffsetRunHeader; + + static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; {short_offset_runs_len}] = {short_offset_runs:?}; + static OFFSETS: [u8; {coded_offset_len}] = {coded_offsets:?}; + + #[inline] + pub fn lookup(c: char) -> bool {{ + debug_assert!(!c.is_ascii()); + (c as u32) >= {first_code_point:#04x} && lookup_slow(c) + }} + + #[inline(never)] + fn lookup_slow(c: char) -> bool {{ + const {{ + assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); + let mut i = 0; + while i < SHORT_OFFSET_RUNS.len() {{ + assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); + i += 1; + }} + }} + // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` + // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. + unsafe {{ super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }} + }}", + short_offset_runs_len = short_offset_runs.len(), + coded_offset_len = coded_offsets.len(), + ); } }