From 4ba7bb636953c8bc78d8af24587f75b007218db8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Jun 2018 14:43:05 +0300 Subject: [PATCH 001/322] initial --- .gitignore | 3 + Cargo.toml | 11 +++ LICENSE-APACHE | 201 +++++++++++++++++++++++++++++++++++++++++++++++++ LICENSE-MIT | 23 ++++++ README.md | 10 +++ src/lib.rs | 186 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 434 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..693699042b1a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000000..5aa9632e9e43 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "text_unit" +version = "0.1.0" +authors = ["Aleksey Kladov "] +description = "Newtypes for text offsets" +license = "MIT OR Apache-2.0" +repository = "https://github.com/matklad/text_unit" +documentation = "https://docs.rs/text_unit" + +[dependencies] +serde = { version = "1", optional = true } diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 000000000000..31aa79387f27 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 000000000000..6887ccf032c3 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# text_unit + +[![Build Status](https://travis-ci.org/matklad/text_unit.svg?branch=master)](https://travis-ci.org/matklad/text_unit) +[![Crates.io](https://img.shields.io/crates/v/text_unit.svg)](https://crates.io/crates/text_unit) +[![API reference](https://docs.rs/text_unit/badge.svg)](https://docs.rs/text_unit/) + + +A library that provides newtype wrappers for `u32` and `(u32, u32)` for use as text offsets. + +See the [docs](https://docs.rs/text_unit/) for more. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000000..6c6444685ae7 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,186 @@ +#[cfg(feature = "serde")] +extern crate serde; + +use std::{fmt, ops}; + + +/// An offset into text. +/// Offset is represented as `u32` storing number of utf8-bytes, +/// but most of the clients should treat it like opaque measure. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct TextUnit(u32); + +impl TextUnit { + /// `TextUnit` equal to the length of this char. + pub fn of_char(c: char) -> TextUnit { + TextUnit(c.len_utf8() as u32) + } + + /// `TextUnit` equal to the length of this string. + /// + /// # Panics + /// Panics if the length of the string is greater than `u32::max_value()` + pub fn of_str(s: &str) -> TextUnit { + if s.len() > u32::max_value() as usize { + panic!("string is to long") + } + TextUnit(s.len() as u32) + } +} + +impl fmt::Debug for TextUnit { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for TextUnit { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl From for u32 { + fn from(tu: TextUnit) -> u32 { + tu.0 + } +} + +impl From for TextUnit { + fn from(tu: u32) -> TextUnit { + TextUnit(tu) + } +} + +impl ops::Add for TextUnit { + type Output = TextUnit; + fn add(self, rhs: TextUnit) -> TextUnit { + TextUnit(self.0 + rhs.0) + } +} + +impl ops::AddAssign for TextUnit { + fn add_assign(&mut self, rhs: TextUnit) { + self.0 += rhs.0 + } +} + +impl ops::Sub for TextUnit { + type Output = TextUnit; + fn sub(self, rhs: TextUnit) -> TextUnit { + TextUnit(self.0 - rhs.0) + } +} + +impl ops::SubAssign for TextUnit { + fn sub_assign(&mut self, rhs: TextUnit) { + self.0 -= rhs.0 + } +} + +/// A range in the text, represented as a pair of `TextUnit`s. +/// +/// # Panics +/// Slicing a `&str` with `TextRange` panics if the result is +/// not a valid utf8 string. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct TextRange { + start: TextUnit, + end: TextUnit, +} + +impl fmt::Debug for TextRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for TextRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[{}; {})", self.start(), self.end()) + } +} + +impl TextRange { + /// The left-inclusive range (`[from..to)`) between to points in the text + pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { + assert!(from <= to, "Invalid text range [{}; {})", from, to); + TextRange { + start: from, + end: to, + } + } + + /// The left-inclusive range (`[offset..offset + len)`) between to points in the text + pub fn offset_len(offset: TextUnit, len: TextUnit) -> TextRange { + TextRange::from_to(offset, offset + len) + } + + /// The inclusive start of this range + pub fn start(&self) -> TextUnit { + self.start + } + + /// The exclusive end of this range + pub fn end(&self) -> TextUnit { + self.end + } + + /// The length of this range + pub fn len(&self) -> TextUnit { + self.end - self.start + } + + /// Is this range empty of any content? + pub fn is_empty(&self) -> bool { + self.start() == self.end() + } +} + +impl ops::Index for str { + type Output = str; + + fn index(&self, index: TextRange) -> &str { + &self[index.start().0 as usize..index.end().0 as usize] + } +} + +impl ops::Index for String { + type Output = str; + + fn index(&self, index: TextRange) -> &str { + &self.as_str()[index] + } +} + +#[cfg(feature = "serde")] +mod serde_impls { + use serde::{Serialize, Serializer, Deserialize, Deserializer}; + use {TextUnit, TextRange}; + + impl Serialize for TextUnit { + fn serialize(&self, serializer: S) -> Result { + self.0.serialize(serializer) + } + } + + impl<'de> Deserialize<'de> for TextUnit { + fn deserialize>(deserializer: D) -> Result { + let value = Deserialize::deserialize(deserializer)?; + Ok(TextUnit(value)) + } + } + + impl Serialize for TextRange { + fn serialize(&self, serializer: S) -> Result { + (self.start, self.end).serialize(serializer) + } + } + + impl<'de> Deserialize<'de> for TextRange { + fn deserialize>(deserializer: D) -> Result { + let (start, end) = Deserialize::deserialize(deserializer)?; + Ok(TextRange { start, end }) + } + } +} From 6f4c8ed5f260fa684bd8cd74f69b47482d8200ca Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Jun 2018 14:44:50 +0300 Subject: [PATCH 002/322] CI --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000000..a7d0a0bc81ab --- /dev/null +++ b/.travis.yml @@ -0,0 +1,2 @@ +language: rust + From 20486e99ee8f43f80981bb6c7f060f8591e5cb36 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 28 Jul 2018 11:54:27 +0300 Subject: [PATCH 003/322] Add more impls --- Cargo.toml | 2 +- src/lib.rs | 72 ++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5aa9632e9e43..6785d8d6dd43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.0" +version = "0.1.1" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" diff --git a/src/lib.rs b/src/lib.rs index 6c6444685ae7..5bb15e1d2ad8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ #[cfg(feature = "serde")] extern crate serde; -use std::{fmt, ops}; +use std::{fmt, ops, iter}; /// An offset into text. @@ -52,29 +52,63 @@ impl From for TextUnit { } } -impl ops::Add for TextUnit { +macro_rules! ops_impls { + ($T:ident, $f:ident, $op:tt, $AT:ident, $af:ident) => { + +impl ops::$T for TextUnit { + type Output = TextUnit; + fn $f(self, rhs: TextUnit) -> TextUnit { + TextUnit(self.0 $op rhs.0) + } +} + +impl<'a> ops::$T<&'a TextUnit> for TextUnit { type Output = TextUnit; - fn add(self, rhs: TextUnit) -> TextUnit { - TextUnit(self.0 + rhs.0) + fn $f(self, rhs: &'a TextUnit) -> TextUnit { + ops::$T::$f(self, *rhs) } } -impl ops::AddAssign for TextUnit { - fn add_assign(&mut self, rhs: TextUnit) { - self.0 += rhs.0 +impl<'a> ops::$T for &'a TextUnit { + type Output = TextUnit; + fn $f(self, rhs: TextUnit) -> TextUnit { + ops::$T::$f(*self, rhs) } } -impl ops::Sub for TextUnit { +impl<'a, 'b> ops::$T<&'a TextUnit> for &'b TextUnit { type Output = TextUnit; - fn sub(self, rhs: TextUnit) -> TextUnit { - TextUnit(self.0 - rhs.0) + fn $f(self, rhs: &'a TextUnit) -> TextUnit { + ops::$T::$f(*self, *rhs) } } -impl ops::SubAssign for TextUnit { - fn sub_assign(&mut self, rhs: TextUnit) { - self.0 -= rhs.0 +impl ops::$AT for TextUnit { + fn $af(&mut self, rhs: TextUnit) { + self.0 = self.0 $op rhs.0 + } +} + +impl<'a> ops::$AT<&'a TextUnit> for TextUnit { + fn $af(&mut self, rhs: &'a TextUnit) { + ops::$AT::$af(self, *rhs) + } +} + }; +} + +ops_impls!(Add, add, +, AddAssign, add_assign); +ops_impls!(Sub, sub, -, SubAssign, sub_assign); + +impl<'a> iter::Sum<&'a TextUnit> for TextUnit { + fn sum>(iter: I) -> TextUnit { + iter.fold(TextUnit::from(0), ops::Add::add) + } +} + +impl iter::Sum for TextUnit { + fn sum>(iter: I) -> TextUnit { + iter.fold(TextUnit::from(0), ops::Add::add) } } @@ -184,3 +218,15 @@ mod serde_impls { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sum() { + let xs: Vec = vec![0.into(), 1.into(), 2.into()]; + assert_eq!(xs.iter().sum::(), 3.into()); + assert_eq!(xs.into_iter().sum::(), 3.into()); + } +} From b26dc781c2907ed59cdcc5f85a7bb7998c2528e4 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 31 Jul 2018 21:40:07 +0300 Subject: [PATCH 004/322] Add inline(always) It's useful for cross-crate inlining --- Cargo.toml | 2 +- src/lib.rs | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6785d8d6dd43..5b7e1c4b5400 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.1" +version = "0.1.2" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" diff --git a/src/lib.rs b/src/lib.rs index 5bb15e1d2ad8..5e34da29f027 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ pub struct TextUnit(u32); impl TextUnit { /// `TextUnit` equal to the length of this char. + #[inline(always)] pub fn of_char(c: char) -> TextUnit { TextUnit(c.len_utf8() as u32) } @@ -20,6 +21,7 @@ impl TextUnit { /// /// # Panics /// Panics if the length of the string is greater than `u32::max_value()` + #[inline(always)] pub fn of_str(s: &str) -> TextUnit { if s.len() > u32::max_value() as usize { panic!("string is to long") @@ -35,18 +37,21 @@ impl fmt::Debug for TextUnit { } impl fmt::Display for TextUnit { + #[inline(always)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.0.fmt(f) } } impl From for u32 { + #[inline(always)] fn from(tu: TextUnit) -> u32 { tu.0 } } impl From for TextUnit { + #[inline(always)] fn from(tu: u32) -> TextUnit { TextUnit(tu) } @@ -57,6 +62,7 @@ macro_rules! ops_impls { impl ops::$T for TextUnit { type Output = TextUnit; + #[inline(always)] fn $f(self, rhs: TextUnit) -> TextUnit { TextUnit(self.0 $op rhs.0) } @@ -64,6 +70,7 @@ impl ops::$T for TextUnit { impl<'a> ops::$T<&'a TextUnit> for TextUnit { type Output = TextUnit; + #[inline(always)] fn $f(self, rhs: &'a TextUnit) -> TextUnit { ops::$T::$f(self, *rhs) } @@ -71,6 +78,7 @@ impl<'a> ops::$T<&'a TextUnit> for TextUnit { impl<'a> ops::$T for &'a TextUnit { type Output = TextUnit; + #[inline(always)] fn $f(self, rhs: TextUnit) -> TextUnit { ops::$T::$f(*self, rhs) } @@ -78,18 +86,21 @@ impl<'a> ops::$T for &'a TextUnit { impl<'a, 'b> ops::$T<&'a TextUnit> for &'b TextUnit { type Output = TextUnit; + #[inline(always)] fn $f(self, rhs: &'a TextUnit) -> TextUnit { ops::$T::$f(*self, *rhs) } } impl ops::$AT for TextUnit { + #[inline(always)] fn $af(&mut self, rhs: TextUnit) { self.0 = self.0 $op rhs.0 } } impl<'a> ops::$AT<&'a TextUnit> for TextUnit { + #[inline(always)] fn $af(&mut self, rhs: &'a TextUnit) { ops::$AT::$af(self, *rhs) } @@ -137,6 +148,7 @@ impl fmt::Display for TextRange { impl TextRange { /// The left-inclusive range (`[from..to)`) between to points in the text + #[inline(always)] pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { assert!(from <= to, "Invalid text range [{}; {})", from, to); TextRange { @@ -146,26 +158,31 @@ impl TextRange { } /// The left-inclusive range (`[offset..offset + len)`) between to points in the text + #[inline(always)] pub fn offset_len(offset: TextUnit, len: TextUnit) -> TextRange { TextRange::from_to(offset, offset + len) } /// The inclusive start of this range + #[inline(always)] pub fn start(&self) -> TextUnit { self.start } /// The exclusive end of this range + #[inline(always)] pub fn end(&self) -> TextUnit { self.end } /// The length of this range + #[inline(always)] pub fn len(&self) -> TextUnit { self.end - self.start } /// Is this range empty of any content? + #[inline(always)] pub fn is_empty(&self) -> bool { self.start() == self.end() } From 9df039fc8c2180f9504ed327e2930e165b4293f9 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 16 Aug 2018 23:32:49 +0300 Subject: [PATCH 005/322] initial --- .gitignore | 2 + .travis.yml | 5 ++ Cargo.toml | 10 +++ LICENSE-APACHE | 201 +++++++++++++++++++++++++++++++++++++++++++++++++ LICENSE-MIT | 23 ++++++ README.md | 20 +++++ src/lib.rs | 155 ++++++++++++++++++++++++++++++++++++++ tests/test.rs | 46 +++++++++++ 8 files changed, 462 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 src/lib.rs create mode 100644 tests/test.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..4470988469a6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +target/ +Cargo.lock \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000000..5070d43aa570 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +language: rust + +script: + - cargo test + - cargo run --example serde diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000000..a82c8aec85c0 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "smol_str" +version = "0.1.0" +authors = ["Aleksey Kladov "] +repository = "https://github.com/matklad/smol_str" +description = "small-string optimized stirng type with O(1) clone" +license = "MIT OR Apache-2.0" + +[dev-dependencies] +proptest = "0.8.3" diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 000000000000..31aa79387f27 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 000000000000..14215cf54fd4 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# typed_key + +[![Build Status](https://travis-ci.org/matklad/smol_str.svg?branch=master)](https://travis-ci.org/matklad/smol_str) +[![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) +[![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) + + +A `SmolStr` is a string type that has the following properties + + * `size_of::() == size_of::()` + * Strings up to 22 bytes long do not use heap allocations + * Runs of `\n` and space symbols (typical whitespace pattern of indentation + in programming laguages) do not use heap allocations + * `Clone` is `O(1)` + +Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +`SmolStr` is a good enough default storage for tokens of typical programming +languages. A specialized interner might be a better solution for some use-cases. + +Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000000..a5a99d227110 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,155 @@ +use std::{fmt, ops::Deref, sync::Arc}; + +/// A `SmolStr` is a string type that has the following properties +/// +/// * `size_of::() == size_of::()` +/// * Strings up to 22 bytes long do not use heap allocations +/// * Runs of `\n` and space symbols (typical whitespace pattern of indentation +/// in programming laguages) do not use heap allocations +/// * `Clone` is `O(1)` +/// +/// Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +/// `SmolStr` is a good enough default storage for tokens of typical programming +/// languages. A specialized interner might be a better solution for some use-cases. +/// +/// Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. +#[derive(Clone)] +pub struct SmolStr(Repr); + +impl SmolStr { + pub fn new(text: &str) -> SmolStr { + SmolStr(Repr::new(text)) + } + + pub fn as_str(&self) -> &str { + self.0.as_str() + } + + pub fn to_string(&self) -> String { + self.as_str().to_string() + } +} + +impl Deref for SmolStr { + type Target = str; + + fn deref(&self) -> &str { + self.as_str() + } +} + +impl PartialEq for SmolStr { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for str { + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a str> for SmolStr { + fn eq(&self, other: &&'a str) -> bool { + self == *other + } +} + +impl<'a> PartialEq for &'a str { + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} + +impl PartialEq for SmolStr { + fn eq(&self, other: &String) -> bool { + self.as_str() == other + } +} + +impl PartialEq for String { + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a String> for SmolStr { + fn eq(&self, other: &&'a String) -> bool { + self == *other + } +} + +impl<'a> PartialEq for &'a String { + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} + +impl fmt::Debug for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +const INLINE_CAP: usize = 22; +const WS_TAG: u8 = (INLINE_CAP + 1) as u8; +const N_NEWLINES: usize = 32; +const N_SPACES: usize = 128; +const WS: &str = + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; + +#[derive(Clone, Debug)] +enum Repr { + Heap(Arc), + Inline { len: u8, buf: [u8; INLINE_CAP] }, +} + +impl Repr { + fn new(text: &str) -> Repr { + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Repr::Inline { + len: len as u8, + buf, + }; + } + + let newlines = text.bytes().take_while(|&b| b == b'\n').count(); + let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); + if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { + let mut buf = [0; INLINE_CAP]; + buf[0] = newlines as u8; + buf[1] = spaces as u8; + return Repr::Inline { len: WS_TAG, buf }; + } + + Repr::Heap(text.to_string().into_boxed_str().into()) + } + + fn as_str(&self) -> &str { + match self { + Repr::Heap(data) => &*data, + Repr::Inline { len, buf } => { + if *len == WS_TAG { + let newlines = buf[0] as usize; + let spaces = buf[1] as usize; + assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); + return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + } + + let len = *len as usize; + let buf = &buf[..len]; + unsafe { ::std::str::from_utf8_unchecked(buf) } + } + } + } +} diff --git a/tests/test.rs b/tests/test.rs new file mode 100644 index 000000000000..44a0f57e4250 --- /dev/null +++ b/tests/test.rs @@ -0,0 +1,46 @@ +extern crate smol_str; +#[macro_use] +extern crate proptest; + +use smol_str::SmolStr; + +#[test] +#[cfg(target_pointer_width = "64")] +fn smol_str_is_smol() { + assert_eq!( + ::std::mem::size_of::(), + ::std::mem::size_of::(), + ); +} + +#[test] +fn assert_traits() { + fn f() {} + f::(); +} + +proptest! { + #[test] + fn roundtrip(s: String) { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } + + #[test] + fn roundtrip_spaces(s in r"( )*") { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } + + #[test] + fn roundtrip_newlines(s in r"\n*") { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } + + #[test] + fn roundtrip_ws(s in r"( |\n)*") { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } +} From 949116de31b53d1b1d333051c81bb18af271ff33 Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Fri, 17 Aug 2018 19:58:10 +0200 Subject: [PATCH 006/322] Implement From<&str> --- src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index a5a99d227110..c4d03dae374f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,6 +98,12 @@ impl fmt::Display for SmolStr { } } +impl<'a> From<&'a str> for SmolStr { + fn from(text: &'a str) -> Self { + Self::new(text) + } +} + const INLINE_CAP: usize = 22; const WS_TAG: u8 = (INLINE_CAP + 1) as u8; const N_NEWLINES: usize = 32; From 8ac4cd9c1d5b6ae0af065e53c41429ace677cee6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 17 Aug 2018 21:01:40 +0300 Subject: [PATCH 007/322] 0.1.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a82c8aec85c0..173a149fe3f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.0" +version = "0.1.1" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" From 85e6ea38129de12af4945ddd5916ff70eef823e1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 17 Aug 2018 22:10:08 +0300 Subject: [PATCH 008/322] fix CI --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5070d43aa570..d848914cf9ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,4 +2,3 @@ language: rust script: - cargo test - - cargo run --example serde From b1f64c1b8bed687004f3a1a1b3d91fdaa8ed2f73 Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Fri, 17 Aug 2018 21:15:24 +0200 Subject: [PATCH 009/322] Implement PartialEq and From --- src/lib.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c4d03dae374f..eeee48b49ce0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,12 @@ impl Deref for SmolStr { } } +impl PartialEq for SmolStr { + fn eq(&self, other: &SmolStr) -> bool { + self.as_str() == other.as_str() + } +} + impl PartialEq for SmolStr { fn eq(&self, other: &str) -> bool { self.as_str() == other @@ -98,6 +104,12 @@ impl fmt::Display for SmolStr { } } +impl From for SmolStr { + fn from(text: String) -> Self { + SmolStr(Repr::new_heap(text)) + } +} + impl<'a> From<&'a str> for SmolStr { fn from(text: &'a str) -> Self { Self::new(text) @@ -118,7 +130,7 @@ enum Repr { } impl Repr { - fn new(text: &str) -> Repr { + fn new(text: &str) -> Self { let len = text.len(); if len <= INLINE_CAP { let mut buf = [0; INLINE_CAP]; @@ -138,7 +150,10 @@ impl Repr { return Repr::Inline { len: WS_TAG, buf }; } - Repr::Heap(text.to_string().into_boxed_str().into()) + Self::new_heap(text.to_string()) + } + fn new_heap(text: String) -> Self { + Repr::Heap(text.into_boxed_str().into()) } fn as_str(&self) -> &str { From aa474ef2fe610ff20e4717b4dc98bada4722fbee Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Sat, 18 Aug 2018 10:38:23 +0200 Subject: [PATCH 010/322] More generics! --- src/lib.rs | 61 +++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index eeee48b49ce0..c9606f904955 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,9 @@ use std::{fmt, ops::Deref, sync::Arc}; pub struct SmolStr(Repr); impl SmolStr { - pub fn new(text: &str) -> SmolStr { + pub fn new(text: T) -> SmolStr + where T: Into + AsRef + { SmolStr(Repr::new(text)) } @@ -104,14 +106,10 @@ impl fmt::Display for SmolStr { } } -impl From for SmolStr { - fn from(text: String) -> Self { - SmolStr(Repr::new_heap(text)) - } -} - -impl<'a> From<&'a str> for SmolStr { - fn from(text: &'a str) -> Self { +impl From for SmolStr + where T: Into + AsRef +{ + fn from(text: T) -> Self { Self::new(text) } } @@ -130,30 +128,33 @@ enum Repr { } impl Repr { - fn new(text: &str) -> Self { - let len = text.len(); - if len <= INLINE_CAP { - let mut buf = [0; INLINE_CAP]; - buf[..len].copy_from_slice(text.as_bytes()); - return Repr::Inline { - len: len as u8, - buf, - }; - } + fn new(text: T) -> Self + where T: Into + AsRef + { + { + let text = text.as_ref(); + + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Repr::Inline { + len: len as u8, + buf, + }; + } - let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); - if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { - let mut buf = [0; INLINE_CAP]; - buf[0] = newlines as u8; - buf[1] = spaces as u8; - return Repr::Inline { len: WS_TAG, buf }; + let newlines = text.bytes().take_while(|&b| b == b'\n').count(); + let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); + if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { + let mut buf = [0; INLINE_CAP]; + buf[0] = newlines as u8; + buf[1] = spaces as u8; + return Repr::Inline { len: WS_TAG, buf }; + } } - Self::new_heap(text.to_string()) - } - fn new_heap(text: String) -> Self { - Repr::Heap(text.into_boxed_str().into()) + Repr::Heap(text.into().into_boxed_str().into()) } fn as_str(&self) -> &str { From 71190dfe47b7f429e8b1bbda5324f1db430a6005 Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Sat, 18 Aug 2018 10:41:16 +0200 Subject: [PATCH 011/322] Bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 173a149fe3f5..cdda6051635c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.1" +version = "0.1.2" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" From 3b379f8d9945669eb9c0fd0b1beeec2782a2d973 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 22 Aug 2018 11:36:06 +0300 Subject: [PATCH 012/322] ranges arithmetics --- Cargo.toml | 2 +- src/lib.rs | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5b7e1c4b5400..6122a304dda3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.2" +version = "0.1.3" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" diff --git a/src/lib.rs b/src/lib.rs index 5e34da29f027..00027ce69e62 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,7 +57,7 @@ impl From for TextUnit { } } -macro_rules! ops_impls { +macro_rules! unit_ops_impls { ($T:ident, $f:ident, $op:tt, $AT:ident, $af:ident) => { impl ops::$T for TextUnit { @@ -108,8 +108,73 @@ impl<'a> ops::$AT<&'a TextUnit> for TextUnit { }; } -ops_impls!(Add, add, +, AddAssign, add_assign); -ops_impls!(Sub, sub, -, SubAssign, sub_assign); +macro_rules! range_ops_impls { + ($T:ident, $f:ident, $op:tt, $AT:ident, $af:ident) => { + +impl ops::$T for TextRange { + type Output = TextRange; + #[inline(always)] + fn $f(self, rhs: TextUnit) -> TextRange { + TextRange::from_to( + self.start() $op rhs, + self.end() $op rhs, + ) + } +} + +impl<'a> ops::$T<&'a TextUnit> for TextRange { + type Output = TextRange; + #[inline(always)] + fn $f(self, rhs: &'a TextUnit) -> TextRange { + TextRange::from_to( + self.start() $op rhs, + self.end() $op rhs, + ) + } +} + +impl<'a> ops::$T for &'a TextRange { + type Output = TextRange; + #[inline(always)] + fn $f(self, rhs: TextUnit) -> TextRange { + TextRange::from_to( + self.start() $op rhs, + self.end() $op rhs, + ) + } +} + +impl<'a, 'b> ops::$T<&'a TextUnit> for &'b TextRange { + type Output = TextRange; + #[inline(always)] + fn $f(self, rhs: &'a TextUnit) -> TextRange { + TextRange::from_to( + self.start() $op rhs, + self.end() $op rhs, + ) + } +} + +impl ops::$AT for TextRange { + #[inline(always)] + fn $af(&mut self, rhs: TextUnit) { + *self = *self $op rhs + } +} + +impl<'a> ops::$AT<&'a TextUnit> for TextRange { + #[inline(always)] + fn $af(&mut self, rhs: &'a TextUnit) { + *self = *self $op rhs + } +} + }; +} + +unit_ops_impls!(Add, add, +, AddAssign, add_assign); +unit_ops_impls!(Sub, sub, -, SubAssign, sub_assign); +range_ops_impls!(Add, add, +, AddAssign, add_assign); +range_ops_impls!(Sub, sub, -, SubAssign, sub_assign); impl<'a> iter::Sum<&'a TextUnit> for TextUnit { fn sum>(iter: I) -> TextUnit { @@ -246,4 +311,18 @@ mod tests { assert_eq!(xs.iter().sum::(), 3.into()); assert_eq!(xs.into_iter().sum::(), 3.into()); } + + #[test] + fn test_ops() { + let r = TextRange::from_to(10.into(), 20.into()); + let u: TextUnit = 5.into(); + assert_eq!( + r + u, + TextRange::from_to(15.into(), 25.into()), + ); + assert_eq!( + r - u, + TextRange::from_to(5.into(), 15.into()), + ); + } } From 16d52857cc52c143da7e2ca398d9a6e5929956eb Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 28 Aug 2018 20:56:10 +0300 Subject: [PATCH 013/322] checked ops --- Cargo.toml | 2 +- src/lib.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6122a304dda3..99fed3d7289b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.3" +version = "0.1.4" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" diff --git a/src/lib.rs b/src/lib.rs index 00027ce69e62..28b2e4d10e1a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,11 @@ impl TextUnit { } TextUnit(s.len() as u32) } + + #[inline(always)] + pub fn checked_sub(self, other: TextUnit) -> Option { + self.0.checked_sub(other.0).map(TextUnit) + } } impl fmt::Debug for TextUnit { @@ -199,6 +204,17 @@ pub struct TextRange { end: TextUnit, } +impl TextRange { + #[inline(always)] + pub fn checked_sub(self, other: TextUnit) -> Option { + let res = TextRange::offset_len( + self.start().checked_sub(other)?, + self.len() + ); + Some(res) + } +} + impl fmt::Debug for TextRange { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { ::fmt(self, f) @@ -325,4 +341,15 @@ mod tests { TextRange::from_to(5.into(), 15.into()), ); } + + #[test] + fn test_checked_ops() { + let x: TextUnit = 1.into(); + assert_eq!(x.checked_sub(1.into()), Some(0.into())); + assert_eq!(x.checked_sub(2.into()), None); + + let r = TextRange::from_to(1.into(), 2.into()); + assert_eq!(r.checked_sub(1.into()), Some(TextRange::from_to(0.into(), 1.into()))); + assert_eq!(x.checked_sub(2.into()), None); + } } From 37696f294c6db643c6bad9d55e08963929b97b09 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Aug 2018 15:47:17 +0300 Subject: [PATCH 014/322] smol_str is hash --- Cargo.toml | 2 +- src/lib.rs | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cdda6051635c..7cd8cb4ede67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.2" +version = "0.1.3" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" diff --git a/src/lib.rs b/src/lib.rs index c9606f904955..dc5933319021 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -use std::{fmt, ops::Deref, sync::Arc}; +use std::{fmt, hash, ops::Deref, sync::Arc}; /// A `SmolStr` is a string type that has the following properties /// @@ -94,6 +94,12 @@ impl<'a> PartialEq for &'a String { } } +impl hash::Hash for SmolStr { + fn hash(&self, hasher: &mut H) { + self.as_str().hash(hasher) + } +} + impl fmt::Debug for SmolStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) From c490c2d91f4fd10f79bafae4106cc490aea46639 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Aug 2018 15:50:29 +0300 Subject: [PATCH 015/322] smol_str is Eq --- Cargo.toml | 2 +- src/lib.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7cd8cb4ede67..2cec274c8f13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.3" +version = "0.1.4" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" diff --git a/src/lib.rs b/src/lib.rs index dc5933319021..1862396a03e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,8 @@ impl PartialEq for SmolStr { } } +impl Eq for SmolStr {} + impl PartialEq for SmolStr { fn eq(&self, other: &str) -> bool { self.as_str() == other From 54b16777a5bcccb360d4da028a37678159bc5d66 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 2 Sep 2018 14:01:46 +0300 Subject: [PATCH 016/322] serde --- .travis.yml | 2 +- Cargo.toml | 6 +++++- src/lib.rs | 21 +++++++++++++++++++++ tests/test.rs | 10 ++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d848914cf9ce..56abf368957c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ language: rust script: - - cargo test + - cargo test --all-features diff --git a/Cargo.toml b/Cargo.toml index 2cec274c8f13..0eba48e8c2f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,14 @@ [package] name = "smol_str" -version = "0.1.4" +version = "0.1.5" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" license = "MIT OR Apache-2.0" +[dependencies] +serde = { version = "1", optional = true } + [dev-dependencies] proptest = "0.8.3" +serde_json = "1" diff --git a/src/lib.rs b/src/lib.rs index 1862396a03e8..cdf013519ddb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -183,3 +183,24 @@ impl Repr { } } } + +#[cfg(feature = "serde")] +mod serde { + extern crate serde; + + use SmolStr; + + impl serde::Serialize for SmolStr { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer + { self.as_str().serialize(serializer) } + } + + impl<'de> serde::Deserialize<'de> for SmolStr { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { <&'de str>::deserialize(deserializer).map(SmolStr::from) } + } +} diff --git a/tests/test.rs b/tests/test.rs index 44a0f57e4250..d66d6ab742b3 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,4 +1,5 @@ extern crate smol_str; +extern crate serde_json; #[macro_use] extern crate proptest; @@ -44,3 +45,12 @@ proptest! { prop_assert_eq!(smol.as_str(), s.as_str()); } } + +#[test] +fn test_serde() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_str(&s).unwrap(); + assert_eq!(s, "Hello, World"); +} From 8bb17646df454d8572e49502bd17d0f78ad28782 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 3 Sep 2018 21:47:04 +0300 Subject: [PATCH 017/322] add len method --- Cargo.toml | 2 +- src/lib.rs | 19 +++++++++++++++++++ tests/test.rs | 19 +++++++++++-------- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0eba48e8c2f6..09978f0a9853 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.5" +version = "0.1.6" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" diff --git a/src/lib.rs b/src/lib.rs index cdf013519ddb..c41389f38e68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,11 @@ impl SmolStr { pub fn to_string(&self) -> String { self.as_str().to_string() } + + #[inline(always)] + pub fn len(&self) -> usize { + self.0.len() + } } impl Deref for SmolStr { @@ -165,6 +170,20 @@ impl Repr { Repr::Heap(text.into().into_boxed_str().into()) } + fn len(&self) -> usize { + match self { + Repr::Heap(data) => data.len(), + Repr::Inline { len, buf } => { + if *len == WS_TAG { + let newlines = buf[0] as usize; + let spaces = buf[1] as usize; + return newlines + spaces; + } + *len as usize + } + } + } + fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, diff --git a/tests/test.rs b/tests/test.rs index d66d6ab742b3..56837b4c33f2 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -20,29 +20,32 @@ fn assert_traits() { f::(); } +fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { + let smol = SmolStr::new(s); + prop_assert_eq!(smol.as_str(), s); + prop_assert_eq!(smol.len(), s.len()); + Ok(()) +} + proptest! { #[test] fn roundtrip(s: String) { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } #[test] fn roundtrip_spaces(s in r"( )*") { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } #[test] fn roundtrip_newlines(s in r"\n*") { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } #[test] fn roundtrip_ws(s in r"( |\n)*") { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } } From e4cd3c2cc9bbb3fcd7b54aca562b340fe038f7a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Tue, 9 Oct 2018 18:29:37 +0200 Subject: [PATCH 018/322] Fix some stuff in the readme I don't understand the sentence that starts with *Runs of `\n` and space symbols*... What do you mean by that? --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 14215cf54fd4..53b33aa27379 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) -A `SmolStr` is a string type that has the following properties +A `SmolStr` is a string type that has the following properties: * `size_of::() == size_of::()` * Strings up to 22 bytes long do not use heap allocations @@ -15,6 +15,6 @@ A `SmolStr` is a string type that has the following properties Unlike `String`, however, `SmolStr` is immutable. The primary use-case for `SmolStr` is a good enough default storage for tokens of typical programming -languages. A specialized interner might be a better solution for some use-cases. +languages. A specialized interner might be a better solution for some use cases. -Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. +Internally, `SmolStr` is roughly an `enum { Heap(Arc), Inline([u8; 22]) }`. From 24192bec48d730514a706a0115dda364f98711ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Tue, 9 Oct 2018 20:56:18 +0200 Subject: [PATCH 019/322] Clarify docs --- README.md | 22 ++++++++++++---------- src/lib.rs | 52 ++++++++++++++++++++++------------------------------ 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 53b33aa27379..5ba92a637b35 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# typed_key +# smol_str [![Build Status](https://travis-ci.org/matklad/smol_str.svg?branch=master)](https://travis-ci.org/matklad/smol_str) [![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) @@ -7,14 +7,16 @@ A `SmolStr` is a string type that has the following properties: - * `size_of::() == size_of::()` - * Strings up to 22 bytes long do not use heap allocations - * Runs of `\n` and space symbols (typical whitespace pattern of indentation - in programming laguages) do not use heap allocations - * `Clone` is `O(1)` +* `size_of::() == size_of::()` +* `Clone` is `O(1)` +* Strings are stack-allocated if they are: + * Up to 22 bytes long + * Longer than 22 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist + solely of consecutive newlines, followed by consecutive spaces +* If a string does not satisfy the aforementioned conditions, it is heap-allocated -Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming -languages. A specialized interner might be a better solution for some use cases. - -Internally, `SmolStr` is roughly an `enum { Heap(Arc), Inline([u8; 22]) }`. +languages. Strings consisting of a series of newlines, followed by a series of +whitespace are a typical pattern in computer programms because of indentation. +Note that a specialized interner might be a better solution for some use cases. diff --git a/src/lib.rs b/src/lib.rs index c41389f38e68..49d26bbf9d54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,20 @@ use std::{fmt, hash, ops::Deref, sync::Arc}; -/// A `SmolStr` is a string type that has the following properties +/// A `SmolStr` is a string type that has the following properties: /// -/// * `size_of::() == size_of::()` -/// * Strings up to 22 bytes long do not use heap allocations -/// * Runs of `\n` and space symbols (typical whitespace pattern of indentation -/// in programming laguages) do not use heap allocations -/// * `Clone` is `O(1)` +/// * `size_of::() == size_of::()` +/// * `Clone` is `O(1)` +/// * Strings are stack-allocated if they are: +/// * Up to 22 bytes long +/// * Longer than 22 bytes, but substrings of `WS` (see below). Such strings consist +/// solely of consecutive newlines, followed by consecutive spaces +/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// -/// Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming -/// languages. A specialized interner might be a better solution for some use-cases. -/// -/// Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. +/// languages. Strings consisting of a series of newlines, followed by a series of +/// whitespace are a typical pattern in computer programms because of indentation. +/// Note that a specialized interner might be a better solution for some use cases. #[derive(Clone)] pub struct SmolStr(Repr); @@ -128,7 +130,6 @@ impl From for SmolStr } const INLINE_CAP: usize = 22; -const WS_TAG: u8 = (INLINE_CAP + 1) as u8; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = @@ -138,6 +139,7 @@ const WS: &str = enum Repr { Heap(Arc), Inline { len: u8, buf: [u8; INLINE_CAP] }, + Substring { newlines: usize, spaces: usize }, } impl Repr { @@ -160,10 +162,7 @@ impl Repr { let newlines = text.bytes().take_while(|&b| b == b'\n').count(); let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { - let mut buf = [0; INLINE_CAP]; - buf[0] = newlines as u8; - buf[1] = spaces as u8; - return Repr::Inline { len: WS_TAG, buf }; + return Repr::Substring { newlines, spaces }; } } @@ -173,14 +172,8 @@ impl Repr { fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), - Repr::Inline { len, buf } => { - if *len == WS_TAG { - let newlines = buf[0] as usize; - let spaces = buf[1] as usize; - return newlines + spaces; - } - *len as usize - } + Repr::Inline { len, .. } => *len as usize, + Repr::Substring { newlines, spaces } => *newlines + *spaces } } @@ -188,17 +181,16 @@ impl Repr { match self { Repr::Heap(data) => &*data, Repr::Inline { len, buf } => { - if *len == WS_TAG { - let newlines = buf[0] as usize; - let spaces = buf[1] as usize; - assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); - return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; - } - let len = *len as usize; let buf = &buf[..len]; unsafe { ::std::str::from_utf8_unchecked(buf) } } + Repr::Substring { newlines, spaces } => { + let newlines = *newlines; + let spaces = *spaces; + assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); + &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] + } } } } From 21e78e817ccde938c7292916aeaa51aab27f71cc Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:17:31 +0700 Subject: [PATCH 020/322] Fix typos. --- README.md | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5ba92a637b35..949f6e6ebf68 100644 --- a/README.md +++ b/README.md @@ -18,5 +18,5 @@ A `SmolStr` is a string type that has the following properties: Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming languages. Strings consisting of a series of newlines, followed by a series of -whitespace are a typical pattern in computer programms because of indentation. +whitespace are a typical pattern in computer programs because of indentation. Note that a specialized interner might be a better solution for some use cases. diff --git a/src/lib.rs b/src/lib.rs index 49d26bbf9d54..a62cbddb436d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ use std::{fmt, hash, ops::Deref, sync::Arc}; /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming /// languages. Strings consisting of a series of newlines, followed by a series of -/// whitespace are a typical pattern in computer programms because of indentation. +/// whitespace are a typical pattern in computer programs because of indentation. /// Note that a specialized interner might be a better solution for some use cases. #[derive(Clone)] pub struct SmolStr(Repr); From 1c3c947e3304c67b4732f842c457d0dc7bb40780 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:18:49 +0700 Subject: [PATCH 021/322] rustfmt with stable. --- src/lib.rs | 23 +++++++++++++++-------- tests/test.rs | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 49d26bbf9d54..c13a19d8f0b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,8 @@ pub struct SmolStr(Repr); impl SmolStr { pub fn new(text: T) -> SmolStr - where T: Into + AsRef + where + T: Into + AsRef, { SmolStr(Repr::new(text)) } @@ -122,7 +123,8 @@ impl fmt::Display for SmolStr { } impl From for SmolStr - where T: Into + AsRef +where + T: Into + AsRef, { fn from(text: T) -> Self { Self::new(text) @@ -144,7 +146,8 @@ enum Repr { impl Repr { fn new(text: T) -> Self - where T: Into + AsRef + where + T: Into + AsRef, { { let text = text.as_ref(); @@ -173,7 +176,7 @@ impl Repr { match self { Repr::Heap(data) => data.len(), Repr::Inline { len, .. } => *len as usize, - Repr::Substring { newlines, spaces } => *newlines + *spaces + Repr::Substring { newlines, spaces } => *newlines + *spaces, } } @@ -204,14 +207,18 @@ mod serde { impl serde::Serialize for SmolStr { fn serialize(&self, serializer: S) -> Result where - S: serde::Serializer - { self.as_str().serialize(serializer) } + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } } impl<'de> serde::Deserialize<'de> for SmolStr { fn deserialize(deserializer: D) -> Result where - D: serde::Deserializer<'de> - { <&'de str>::deserialize(deserializer).map(SmolStr::from) } + D: serde::Deserializer<'de>, + { + <&'de str>::deserialize(deserializer).map(SmolStr::from) + } } } diff --git a/tests/test.rs b/tests/test.rs index 56837b4c33f2..011a9d733d5b 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,5 +1,5 @@ -extern crate smol_str; extern crate serde_json; +extern crate smol_str; #[macro_use] extern crate proptest; From 038da7d074d909d95c6a76ae57a621aa006ea306 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:25:25 +0700 Subject: [PATCH 022/322] Fix test compilation without serde feature. --- tests/test.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test.rs b/tests/test.rs index 011a9d733d5b..eebdc205a080 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -49,6 +49,7 @@ proptest! { } } +#[cfg(feature = "serde")] #[test] fn test_serde() { let s = SmolStr::new("Hello, World"); From a96deaa7a5aebe333a475358dc7eaa9129a5cb0a Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:28:44 +0700 Subject: [PATCH 023/322] Fix typo in Cargo.toml. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 09978f0a9853..45a3f2cac86a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "smol_str" version = "0.1.6" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" -description = "small-string optimized stirng type with O(1) clone" +description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" [dependencies] From 5644a67ed782fbb8c3a1a284abe9f925b2555366 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:27:49 +0700 Subject: [PATCH 024/322] clippy: Add (and test) is_empty method. Since there is a `len` method, clippy suggests having an `is_empty` method as well. --- src/lib.rs | 14 ++++++++++++++ tests/test.rs | 1 + 2 files changed, 15 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 5df411a56db0..bedb6fb5b3a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,11 @@ impl SmolStr { pub fn len(&self) -> usize { self.0.len() } + + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } impl Deref for SmolStr { @@ -180,6 +185,15 @@ impl Repr { } } + fn is_empty(&self) -> bool { + match self { + Repr::Heap(data) => data.is_empty(), + Repr::Inline { len, .. } => *len == 0, + // A substring isn't created for an empty string. + Repr::Substring { .. } => false, + } + } + fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, diff --git a/tests/test.rs b/tests/test.rs index eebdc205a080..f015f2fe7749 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -24,6 +24,7 @@ fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { let smol = SmolStr::new(s); prop_assert_eq!(smol.as_str(), s); prop_assert_eq!(smol.len(), s.len()); + prop_assert_eq!(smol.is_empty(), s.is_empty()); Ok(()) } From 6a17181ee222d51aec63868d421676ca5aeac88e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 13 Oct 2018 11:36:06 +0300 Subject: [PATCH 025/322] Propagate inline to inner wrapper --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index bedb6fb5b3a3..85381b31fd5b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,6 +177,7 @@ impl Repr { Repr::Heap(text.into().into_boxed_str().into()) } + #[inline(always)] fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), @@ -185,6 +186,7 @@ impl Repr { } } + #[inline(always)] fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), From 13e75c28a1f90cac0b443e8d2e84d481f4f717b6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 13 Oct 2018 11:36:56 +0300 Subject: [PATCH 026/322] bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 45a3f2cac86a..051ee6dcfffe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.6" +version = "0.1.7" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 69123863a133fb699257fb51b9f570256e3e2f84 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 30 Oct 2018 21:13:51 +0300 Subject: [PATCH 027/322] a couple of utility methods --- Cargo.toml | 2 +- src/lib.rs | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 99fed3d7289b..274d6e64bd51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.4" +version = "0.1.5" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" diff --git a/src/lib.rs b/src/lib.rs index 28b2e4d10e1a..179fde214235 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ use std::{fmt, ops, iter}; pub struct TextUnit(u32); impl TextUnit { + //TODO: rename to `from_char`: this is not ocaml! /// `TextUnit` equal to the length of this char. #[inline(always)] pub fn of_char(c: char) -> TextUnit { @@ -33,6 +34,21 @@ impl TextUnit { pub fn checked_sub(self, other: TextUnit) -> Option { self.0.checked_sub(other.0).map(TextUnit) } + + #[inline(always)] + pub fn from_usize(size: usize) -> TextUnit { + #[cfg(debug_assertions)] { + if size > u32::max_value() as usize { + panic!("overflow when converting to TextUnit: {}", size) + } + } + (size as u32).into() + } + + #[inline(always)] + pub fn to_usize(self) -> usize { + u32::from(self) as usize + } } impl fmt::Debug for TextUnit { @@ -244,6 +260,7 @@ impl TextRange { TextRange::from_to(offset, offset + len) } + // TODO: pass by value /// The inclusive start of this range #[inline(always)] pub fn start(&self) -> TextUnit { @@ -267,6 +284,12 @@ impl TextRange { pub fn is_empty(&self) -> bool { self.start() == self.end() } + + #[inline(always)] + pub fn is_subrange(&self, other: &TextRange) -> bool { + other.start() <= self.start() + && self.end() <= other.end() + } } impl ops::Index for str { @@ -352,4 +375,13 @@ mod tests { assert_eq!(r.checked_sub(1.into()), Some(TextRange::from_to(0.into(), 1.into()))); assert_eq!(x.checked_sub(2.into()), None); } + + #[test] + fn test_subrange() { + let r1 = TextRange::from_to(2.into(), 4.into()); + let r2 = TextRange::from_to(2.into(), 3.into()); + let r3 = TextRange::from_to(1.into(), 3.into()); + assert!(r2.is_subrange(&r1)); + assert!(!r3.is_subrange(&r1)); + } } From 30fcaf120f38e2c743d19d2b2c0fd7ab371ee485 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Dec 2018 14:16:23 +0300 Subject: [PATCH 028/322] add From for String --- src/lib.rs | 6 ++++++ tests/test.rs | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 85381b31fd5b..78b0103ef5f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -136,6 +136,12 @@ where } } +impl From for String { + fn from(text: SmolStr) -> Self { + text.to_string() + } +} + const INLINE_CAP: usize = 22; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; diff --git a/tests/test.rs b/tests/test.rs index f015f2fe7749..94ab66dacb39 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -20,6 +20,13 @@ fn assert_traits() { f::(); } +#[test] +fn conversions() { + let s: SmolStr = "Hello, World!".into(); + let s: String = s.into(); + assert_eq!(s, "Hello, World!") +} + fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { let smol = SmolStr::new(s); prop_assert_eq!(smol.as_str(), s); From b4e1418b3292ffff2506492a69363a3f98dad0cb Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Dec 2018 14:16:57 +0300 Subject: [PATCH 029/322] bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 051ee6dcfffe..c01c378c2e6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.7" +version = "0.1.8" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 65484e0f86c4612b513694005f36a974b19b53dc Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 11 Jan 2019 13:15:38 +0300 Subject: [PATCH 030/322] add Default for SmolStr --- Cargo.toml | 2 +- src/lib.rs | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c01c378c2e6d..dc99ab007af8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.8" +version = "0.1.9" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" diff --git a/src/lib.rs b/src/lib.rs index 78b0103ef5f2..e355630e4754 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,6 +45,12 @@ impl SmolStr { } } +impl Default for SmolStr { + fn default() -> SmolStr { + SmolStr::new("") + } +} + impl Deref for SmolStr { type Target = str; From ce9802618bf1fa093fcd777a7bac41752cc3e5c9 Mon Sep 17 00:00:00 2001 From: "Evgeniy A. Dushistov" Date: Sun, 10 Mar 2019 04:27:42 +0300 Subject: [PATCH 031/322] implement Borrow to make possible search str in HashMap for String { } } +impl Borrow for SmolStr { + fn borrow(&self) -> &str { + self.as_str() + } +} + const INLINE_CAP: usize = 22; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; diff --git a/tests/test.rs b/tests/test.rs index 94ab66dacb39..f5b7cf3daf7a 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -66,3 +66,10 @@ fn test_serde() { let s: SmolStr = serde_json::from_str(&s).unwrap(); assert_eq!(s, "Hello, World"); } + +#[test] +fn test_search_in_hashmap() { + let mut m = ::std::collections::HashMap::::new(); + m.insert("aaa".into(), 17); + assert_eq!(17, *m.get("aaa").unwrap()); +} From fd109355863d1267f1f7ce697882d90aec422e8e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Mar 2019 14:15:00 +0300 Subject: [PATCH 032/322] add is_heap_allocated --- src/lib.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index eb05dc67eb05..b4240c9efb74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,10 +26,12 @@ impl SmolStr { SmolStr(Repr::new(text)) } + #[inline(always)] pub fn as_str(&self) -> &str { self.0.as_str() } + #[inline(always)] pub fn to_string(&self) -> String { self.as_str().to_string() } @@ -43,6 +45,14 @@ impl SmolStr { pub fn is_empty(&self) -> bool { self.0.is_empty() } + + #[inline(always)] + pub fn is_heap_allocated(&self) -> bool { + match self.0 { + Repr::Heap(..) => true, + _ => false + } + } } impl Default for SmolStr { @@ -214,6 +224,7 @@ impl Repr { } } + #[inline] fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, From 77aae544d1fa2806e6381cab54c4937cb34f9c99 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Mar 2019 14:15:41 +0300 Subject: [PATCH 033/322] v0.1.10 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dc99ab007af8..acf5f3cf6390 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.9" +version = "0.1.10" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 3826349e3c32c0f991a2eded01e340c0c8f049ed Mon Sep 17 00:00:00 2001 From: Kevin Stenerson Date: Tue, 23 Apr 2019 13:56:07 -0600 Subject: [PATCH 034/322] Implement `FromIterator` for `SmolStr` --- src/lib.rs | 23 ++++++++++++++++++++++- tests/test.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index b4240c9efb74..e35787220506 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -use std::{borrow::Borrow, fmt, hash, ops::Deref, sync::Arc}; +use std::{borrow::Borrow, fmt, hash, iter, ops::Deref, sync::Arc}; /// A `SmolStr` is a string type that has the following properties: /// @@ -143,6 +143,27 @@ impl fmt::Display for SmolStr { } } +impl iter::FromIterator for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + let mut iter = iter.into_iter(); + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) + } +} + impl From for SmolStr where T: Into + AsRef, diff --git a/tests/test.rs b/tests/test.rs index f5b7cf3daf7a..beab0780a80a 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -73,3 +73,29 @@ fn test_search_in_hashmap() { m.insert("aaa".into(), 17); assert_eq!(17, *m.get("aaa").unwrap()); } + +#[test] +fn test_from_iterator() { + let examples = [ + // Simple keyword-like strings + ("if", false), + ("for", false), + ("impl", false), + + // Strings containing two-byte characters + ("パーティーへ行かないか", true), + ("パーティーへ行か", true), + ("パーティーへ行_", false), + ("和製漢語", false), + ("部落格", false), + ("사회과학원 어학연구소", true), + + // String containin diverse characters + ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), + ]; + for (raw, is_heap) in &examples { + let s: SmolStr = raw.chars().collect(); + assert_eq!(s.as_str(), *raw); + assert_eq!(s.is_heap_allocated(), *is_heap); + } +} From 4832de6aaee33c453982f5764130d15f158f4f8a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 15 May 2019 18:59:55 +0300 Subject: [PATCH 035/322] restore lost code =/ --- src/lib.rs | 90 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 179fde214235..73e076923ff6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,7 @@ #[cfg(feature = "serde")] extern crate serde; -use std::{fmt, ops, iter}; - +use std::{fmt, iter, ops}; /// An offset into text. /// Offset is represented as `u32` storing number of utf8-bytes, @@ -37,7 +36,8 @@ impl TextUnit { #[inline(always)] pub fn from_usize(size: usize) -> TextUnit { - #[cfg(debug_assertions)] { + #[cfg(debug_assertions)] + { if size > u32::max_value() as usize { panic!("overflow when converting to TextUnit: {}", size) } @@ -198,13 +198,13 @@ range_ops_impls!(Add, add, +, AddAssign, add_assign); range_ops_impls!(Sub, sub, -, SubAssign, sub_assign); impl<'a> iter::Sum<&'a TextUnit> for TextUnit { - fn sum>(iter: I) -> TextUnit { + fn sum>(iter: I) -> TextUnit { iter.fold(TextUnit::from(0), ops::Add::add) } } impl iter::Sum for TextUnit { - fn sum>(iter: I) -> TextUnit { + fn sum>(iter: I) -> TextUnit { iter.fold(TextUnit::from(0), ops::Add::add) } } @@ -223,10 +223,7 @@ pub struct TextRange { impl TextRange { #[inline(always)] pub fn checked_sub(self, other: TextUnit) -> Option { - let res = TextRange::offset_len( - self.start().checked_sub(other)?, - self.len() - ); + let res = TextRange::offset_len(self.start().checked_sub(other)?, self.len()); Some(res) } } @@ -287,8 +284,28 @@ impl TextRange { #[inline(always)] pub fn is_subrange(&self, other: &TextRange) -> bool { - other.start() <= self.start() - && self.end() <= other.end() + other.start() <= self.start() && self.end() <= other.end() + } + + #[inline(always)] + pub fn intersection(&self, other: &TextRange) -> Option { + let start = self.start.max(other.start()); + let end = self.end.min(other.end()); + if start <= end { + Some(TextRange::from_to(start, end)) + } else { + None + } + } + + #[inline(always)] + pub fn contains(&self, offset: TextUnit) -> bool { + self.start() <= offset && offset < self.end() + } + + #[inline(always)] + pub fn contains_inclusive(&self, offset: TextUnit) -> bool { + self.start() <= offset && offset <= self.end() } } @@ -310,8 +327,8 @@ impl ops::Index for String { #[cfg(feature = "serde")] mod serde_impls { - use serde::{Serialize, Serializer, Deserialize, Deserializer}; - use {TextUnit, TextRange}; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use {TextRange, TextUnit}; impl Serialize for TextUnit { fn serialize(&self, serializer: S) -> Result { @@ -344,6 +361,10 @@ mod serde_impls { mod tests { use super::*; + fn r(from: u32, to: u32) -> TextRange { + TextRange::from_to(from.into(), to.into()) + } + #[test] fn test_sum() { let xs: Vec = vec![0.into(), 1.into(), 2.into()]; @@ -353,16 +374,10 @@ mod tests { #[test] fn test_ops() { - let r = TextRange::from_to(10.into(), 20.into()); + let range = r(10, 20); let u: TextUnit = 5.into(); - assert_eq!( - r + u, - TextRange::from_to(15.into(), 25.into()), - ); - assert_eq!( - r - u, - TextRange::from_to(5.into(), 15.into()), - ); + assert_eq!(range + u, r(15, 25)); + assert_eq!(range - u, r(5, 15)); } #[test] @@ -371,17 +386,38 @@ mod tests { assert_eq!(x.checked_sub(1.into()), Some(0.into())); assert_eq!(x.checked_sub(2.into()), None); - let r = TextRange::from_to(1.into(), 2.into()); - assert_eq!(r.checked_sub(1.into()), Some(TextRange::from_to(0.into(), 1.into()))); + assert_eq!(r(1, 2).checked_sub(1.into()), Some(r(0, 1))); assert_eq!(x.checked_sub(2.into()), None); } #[test] fn test_subrange() { - let r1 = TextRange::from_to(2.into(), 4.into()); - let r2 = TextRange::from_to(2.into(), 3.into()); - let r3 = TextRange::from_to(1.into(), 3.into()); + let r1 = r(2, 4); + let r2 = r(2, 3); + let r3 = r(1, 3); assert!(r2.is_subrange(&r1)); assert!(!r3.is_subrange(&r1)); } + + #[test] + fn check_intersection() { + assert_eq!(r(1, 2).intersection(&r(2, 3)), Some(r(2, 2))); + assert_eq!(r(1, 5).intersection(&r(2, 3)), Some(r(2, 3))); + assert_eq!(r(1, 2).intersection(&r(3, 4)), None); + } + + #[test] + fn check_contains() { + assert!(!r(1, 3).contains(0.into())); + assert!(r(1, 3).contains(1.into())); + assert!(r(1, 3).contains(2.into())); + assert!(!r(1, 3).contains(3.into())); + assert!(!r(1, 3).contains(4.into())); + + assert!(!r(1, 3).contains_inclusive(0.into())); + assert!(r(1, 3).contains_inclusive(1.into())); + assert!(r(1, 3).contains_inclusive(2.into())); + assert!(r(1, 3).contains_inclusive(3.into())); + assert!(!r(1, 3).contains_inclusive(4.into())); + } } From c64484f68a4f4addca0ef122e3be1488287800a3 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 15 May 2019 18:45:58 +0300 Subject: [PATCH 036/322] implement RangeBounds for TextRange --- src/lib.rs | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 73e076923ff6..6d0864975197 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -220,14 +220,6 @@ pub struct TextRange { end: TextUnit, } -impl TextRange { - #[inline(always)] - pub fn checked_sub(self, other: TextUnit) -> Option { - let res = TextRange::offset_len(self.start().checked_sub(other)?, self.len()); - Some(res) - } -} - impl fmt::Debug for TextRange { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { ::fmt(self, f) @@ -307,6 +299,25 @@ impl TextRange { pub fn contains_inclusive(&self, offset: TextUnit) -> bool { self.start() <= offset && offset <= self.end() } + + #[inline(always)] + pub fn checked_sub(self, other: TextUnit) -> Option { + let res = TextRange::offset_len( + self.start().checked_sub(other)?, + self.len() + ); + Some(res) + } +} + +impl ops::RangeBounds for TextRange { + fn start_bound(&self) -> ops::Bound<&TextUnit> { + ops::Bound::Included(&self.start) + } + + fn end_bound(&self) -> ops::Bound<&TextUnit> { + ops::Bound::Excluded(&self.end) + } } impl ops::Index for str { From d4a9b0e5a765eaa75cf4b5023fc9ee7ffbfcc3b8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 15 May 2019 19:01:08 +0300 Subject: [PATCH 037/322] bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 274d6e64bd51..ea1881e6e62f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.5" +version = "0.1.8" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" From ebd1146c51da7bcfc3ff40204cdfdb4732f4ce48 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 21 May 2019 12:51:58 +0300 Subject: [PATCH 038/322] SmolStr: Ord --- Cargo.toml | 2 +- src/lib.rs | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index acf5f3cf6390..ae17a208f4d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.10" +version = "0.1.11" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" diff --git a/src/lib.rs b/src/lib.rs index e35787220506..ca64e9077cf2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,10 @@ -use std::{borrow::Borrow, fmt, hash, iter, ops::Deref, sync::Arc}; +use std::{ + fmt, hash, iter, + borrow::Borrow, + cmp::Ordering, + ops::Deref, + sync::Arc, +}; /// A `SmolStr` is a string type that has the following properties: /// @@ -125,6 +131,18 @@ impl<'a> PartialEq for &'a String { } } +impl Ord for SmolStr { + fn cmp(&self, other: &SmolStr) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl PartialOrd for SmolStr { + fn partial_cmp(&self, other: &SmolStr) -> Option { + Some(self.cmp(other)) + } +} + impl hash::Hash for SmolStr { fn hash(&self, hasher: &mut H) { self.as_str().hash(hasher) From 4176c29f98e2c6fdb0a17daaf04dab8b667da98a Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sat, 25 May 2019 17:39:32 +0200 Subject: [PATCH 039/322] Add FromIterator with &str/&String/String items --- src/lib.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/test.rs | 35 +++++++++++++++++++++++----------- 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ca64e9077cf2..344c4ae05102 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -182,6 +182,58 @@ impl iter::FromIterator for SmolStr { } } +impl iter::FromIterator for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + use std::io::prelude::*; + + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + let mut iter = iter.into_iter(); + while let Some(slice) = iter.next() { + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(&slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) + } +} + +impl<'a> iter::FromIterator<&'a String> for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + SmolStr::from_iter(iter.into_iter().map(|x| x.as_str())) + } +} + +impl<'a> iter::FromIterator<&'a str> for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + use std::io::prelude::*; + + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + let mut iter = iter.into_iter(); + while let Some(slice) = iter.next() { + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) + } +} + impl From for SmolStr where T: Into + AsRef, diff --git a/tests/test.rs b/tests/test.rs index beab0780a80a..28081bc3d144 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -27,33 +27,46 @@ fn conversions() { assert_eq!(s, "Hello, World!") } -fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { - let smol = SmolStr::new(s); - prop_assert_eq!(smol.as_str(), s); - prop_assert_eq!(smol.len(), s.len()); - prop_assert_eq!(smol.is_empty(), s.is_empty()); +fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { + prop_assert_eq!(smol.as_str(), std_str); + prop_assert_eq!(smol.len(), std_str.len()); + prop_assert_eq!(smol.is_empty(), std_str.is_empty()); Ok(()) } proptest! { #[test] fn roundtrip(s: String) { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; } #[test] fn roundtrip_spaces(s in r"( )*") { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; } #[test] fn roundtrip_newlines(s in r"\n*") { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; } #[test] fn roundtrip_ws(s in r"( |\n)*") { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; + } + + #[test] + fn from_string_iter(slices in proptest::collection::vec(".*", 1..100)) { + let string: String = slices.iter().map(|x| x.as_str()).collect(); + let smol: SmolStr = slices.into_iter().collect(); + check_props(string.as_str(), smol)?; + } + + #[test] + fn from_str_iter(slices in proptest::collection::vec(".*", 1..100)) { + let string: String = slices.iter().map(|x| x.as_str()).collect(); + let smol: SmolStr = slices.iter().collect(); + check_props(string.as_str(), smol)?; } } @@ -75,7 +88,7 @@ fn test_search_in_hashmap() { } #[test] -fn test_from_iterator() { +fn test_from_char_iterator() { let examples = [ // Simple keyword-like strings ("if", false), @@ -90,7 +103,7 @@ fn test_from_iterator() { ("部落格", false), ("사회과학원 어학연구소", true), - // String containin diverse characters + // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), ]; for (raw, is_heap) in &examples { From 34e1d42643dd0efe08e8a8d08720af30ceb0020b Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sat, 25 May 2019 21:28:32 +0200 Subject: [PATCH 040/322] Deduplicate FromIterator code Using a private function that is overly generic. --- src/lib.rs | 63 +++++++++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 344c4ae05102..af8001ced9b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -182,26 +182,34 @@ impl iter::FromIterator for SmolStr { } } +fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr +where + T: AsRef, + std::string::String: std::iter::Extend, +{ + use std::io::prelude::*; + + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(slice) = iter.next() { + let slice = slice.as_ref(); + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(&slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) +} + impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { - use std::io::prelude::*; - - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - let mut iter = iter.into_iter(); - while let Some(slice) = iter.next() { - let size = slice.len(); - if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); - heap.push_str(&slice); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); - len += size; - } - SmolStr(Repr::Inline { len: len as u8, buf }) + build_from_str_iter(iter.into_iter()) } } @@ -213,24 +221,7 @@ impl<'a> iter::FromIterator<&'a String> for SmolStr { impl<'a> iter::FromIterator<&'a str> for SmolStr { fn from_iter>(iter: I) -> SmolStr { - use std::io::prelude::*; - - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - let mut iter = iter.into_iter(); - while let Some(slice) = iter.next() { - let size = slice.len(); - if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); - heap.push_str(slice); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); - len += size; - } - SmolStr(Repr::Inline { len: len as u8, buf }) + build_from_str_iter(iter.into_iter()) } } From 83e23229f0b180b967c0355bca8502c9ce35d480 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:16:37 +0200 Subject: [PATCH 041/322] Explicitly use copy_from_slice --- src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index af8001ced9b6..684cb8d0fdde 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,8 +187,6 @@ where T: AsRef, std::string::String: std::iter::Extend, { - use std::io::prelude::*; - let mut len = 0; let mut buf = [0u8; INLINE_CAP]; while let Some(slice) = iter.next() { @@ -201,7 +199,7 @@ where heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + (&mut buf[len..len + size]).copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { len: len as u8, buf }) From 1626f4707d6a17d4b1c97898d711bc1d6a625390 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:16:49 +0200 Subject: [PATCH 042/322] Simplify type names --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 684cb8d0fdde..e075b200aaee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -185,7 +185,7 @@ impl iter::FromIterator for SmolStr { fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr where T: AsRef, - std::string::String: std::iter::Extend, + String: iter::Extend, { let mut len = 0; let mut buf = [0u8; INLINE_CAP]; From d89fdf75736fa8721057e6e0ae6e787e6f363cbb Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:17:07 +0200 Subject: [PATCH 043/322] Explicitly test for heap allocation, too --- tests/test.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index 28081bc3d144..853c0da67326 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -31,6 +31,9 @@ fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); prop_assert_eq!(smol.is_empty(), std_str.is_empty()); + if smol.len() <= 22 { + prop_assert!(!smol.is_heap_allocated()); + } Ok(()) } From 50ece5cb31a731107d6418ddcc1205fa2eda2ed2 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:17:13 +0200 Subject: [PATCH 044/322] Add simple benchmark --- Cargo.toml | 5 +++++ benches/building.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 benches/building.rs diff --git a/Cargo.toml b/Cargo.toml index ae17a208f4d8..81f23ff60b27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,8 @@ serde = { version = "1", optional = true } [dev-dependencies] proptest = "0.8.3" serde_json = "1" +criterion = "0.2" + +[[bench]] +name = "building" +harness = false diff --git a/benches/building.rs b/benches/building.rs new file mode 100644 index 000000000000..19833146766d --- /dev/null +++ b/benches/building.rs @@ -0,0 +1,44 @@ +#[macro_use] +extern crate criterion; +extern crate smol_str; + +use criterion::{Criterion, ParameterizedBenchmark, Throughput}; +use smol_str::SmolStr; + +fn from_str_iter(c: &mut Criterion) { + use std::iter::FromIterator; + + const SIZES: &[usize] = &[0, 5, 10, 15, 20, 2 << 4, 2 << 5, 2 << 6, 2 << 7, 2 << 8]; + + fn test_data(input: &str, size: usize) -> Vec<&str> { + std::iter::repeat(input).take(size / input.len()).collect() + } + + c.bench( + "FromIterator", + ParameterizedBenchmark::new( + "SmolStr, one byte elements", + |b, &&size| { + let src = test_data("x", size); + b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) + }, + SIZES, + ) + .with_function("SmolStr, five byte elements", |b, &&size| { + let src = test_data("helloo", size); + b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) + }) + .with_function("String, one byte elements", |b, &&size| { + let src = test_data("x", size); + b.iter(|| String::from_iter(src.iter().cloned()).len()) + }) + .with_function("String, five byte elements", |b, &&size| { + let src = test_data("hello", size); + b.iter(|| String::from_iter(src.iter().cloned()).len()) + }) + .throughput(|elems| Throughput::Bytes(**elems as u32)), + ); +} + +criterion_group!(benches, from_str_iter); +criterion_main!(benches); From 15676e37dbbde916d3cbbd97de5bdbc95a2d9a8a Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:27:46 +0200 Subject: [PATCH 045/322] Simplify slicing (no visible perf impact) --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e075b200aaee..b5892cf45502 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -199,7 +199,7 @@ where heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - (&mut buf[len..len + size]).copy_from_slice(slice.as_bytes()); + (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { len: len as u8, buf }) From 81d62e944e385abe979debd67bb5ea32fad8f5bb Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 27 May 2019 09:29:17 +0300 Subject: [PATCH 046/322] add bors --- bors.toml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 bors.toml diff --git a/bors.toml b/bors.toml new file mode 100644 index 000000000000..574c56320ff0 --- /dev/null +++ b/bors.toml @@ -0,0 +1,4 @@ +status = [ + "continuous-integration/travis-ci/push", +] +delete_merged_branches = true From bad3837b265677fc5a1c45b5a3bb313fdb375d48 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 2 Jun 2019 14:36:29 +0300 Subject: [PATCH 047/322] support deepsize --- Cargo.toml | 5 +++-- src/lib.rs | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ea1881e6e62f..40afa4311c7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.8" +version = "0.1.9" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" @@ -8,4 +8,5 @@ repository = "https://github.com/matklad/text_unit" documentation = "https://docs.rs/text_unit" [dependencies] -serde = { version = "1", optional = true } +serde = { version = "1", optional = true, default_features = false } +deepsize = { version = "0.1", optional = true, default_features = false } diff --git a/src/lib.rs b/src/lib.rs index 6d0864975197..48e1920b48d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -368,6 +368,11 @@ mod serde_impls { } } +#[cfg(feature = "deepsize")] +mod deepsize_impls { + deepsize::known_deep_size!(0, crate::TextUnit, crate::TextRange); +} + #[cfg(test)] mod tests { use super::*; From 49ab2b75ec1bfea42c9fdc086da5b5871081feee Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 2 Jun 2019 14:38:18 +0300 Subject: [PATCH 048/322] don't use derive --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 81f23ff60b27..559596966587 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" [dependencies] -serde = { version = "1", optional = true } +serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" From c7ce079a24cdd67eaf972f070874c6be7c166082 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 7 Jul 2019 13:59:33 +0300 Subject: [PATCH 049/322] add cosnt-fn ctor --- src/lib.rs | 48 +++++++++++++++++++++++++++++++++++++----------- tests/test.rs | 18 ++++++++++++++++-- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b5892cf45502..449c199682c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,4 @@ -use std::{ - fmt, hash, iter, - borrow::Borrow, - cmp::Ordering, - ops::Deref, - sync::Arc, -}; +use std::{borrow::Borrow, cmp::Ordering, fmt, hash, iter, ops::Deref, sync::Arc}; /// A `SmolStr` is a string type that has the following properties: /// @@ -25,6 +19,32 @@ use std::{ pub struct SmolStr(Repr); impl SmolStr { + /// Constructs an inline variant of `SmolStr` at compile time. + /// + /// `len` must be short (<= 22), `bytes` must be ascii. If `len` is smaller + /// than the actual len of `bytes`, the string is truncated. + pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { + let _len_is_short = [(); INLINE_CAP + 1][len]; + + const ZEROS: &[u8] = &[0; INLINE_CAP]; + + let mut buf = [0; INLINE_CAP]; + macro_rules! s { + ($($idx:literal),*) => ( $(s!(set $idx);)* ); + (set $idx:literal) => ({ + let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; + let b = src[$idx]; + let _is_ascii = [(); 128][b as usize]; + buf[$idx] = b + }); + } + s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) + } + pub fn new(text: T) -> SmolStr where T: Into + AsRef, @@ -56,7 +76,7 @@ impl SmolStr { pub fn is_heap_allocated(&self) -> bool { match self.0 { Repr::Heap(..) => true, - _ => false + _ => false, } } } @@ -178,11 +198,14 @@ impl iter::FromIterator for SmolStr { ch.encode_utf8(&mut buf[len..]); len += size; } - SmolStr(Repr::Inline { len: len as u8, buf }) + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) } } -fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr +fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr where T: AsRef, String: iter::Extend, @@ -202,7 +225,10 @@ where (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); len += size; } - SmolStr(Repr::Inline { len: len as u8, buf }) + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) } impl iter::FromIterator for SmolStr { diff --git a/tests/test.rs b/tests/test.rs index 853c0da67326..b2a2ea0c7ecb 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -27,6 +27,22 @@ fn conversions() { assert_eq!(s, "Hello, World!") } +#[test] +fn const_fn_ctor() { + const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); + const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); + const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); + const LONG: SmolStr = SmolStr::new_inline_from_ascii(22, b"ABCDEFGHIZKLMNOPQRSTUV"); + + // const TOO_LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); + // const NON_ASCII: SmolStr = SmolStr::new_inline_from_ascii(2, &[209, 139]); + + assert_eq!(EMPTY, SmolStr::from("")); + assert_eq!(A, SmolStr::from("A")); + assert_eq!(HELLO, SmolStr::from("HELLO")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); +} + fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); @@ -97,7 +113,6 @@ fn test_from_char_iterator() { ("if", false), ("for", false), ("impl", false), - // Strings containing two-byte characters ("パーティーへ行かないか", true), ("パーティーへ行か", true), @@ -105,7 +120,6 @@ fn test_from_char_iterator() { ("和製漢語", false), ("部落格", false), ("사회과학원 어학연구소", true), - // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), ]; From 2cc30a544d14abe77a0d4dbdf61c7e6278636703 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 7 Jul 2019 14:00:33 +0300 Subject: [PATCH 050/322] pubish v0.1.12 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 559596966587..1fd084132424 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.11" +version = "0.1.12" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 56330d5a6edda16d32be0a86aa7a9beaf5fdc7e7 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Mon, 22 Jul 2019 13:28:12 +0200 Subject: [PATCH 051/322] new_inline_from_ascii: Docs and compile-fail tests --- src/lib.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++++---- tests/test.rs | 3 --- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 449c199682c3..c7bbb4a875c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,8 +21,65 @@ pub struct SmolStr(Repr); impl SmolStr { /// Constructs an inline variant of `SmolStr` at compile time. /// - /// `len` must be short (<= 22), `bytes` must be ascii. If `len` is smaller - /// than the actual len of `bytes`, the string is truncated. + /// # Parameters + /// + /// - `len`: Must be short (≤ 22 bytes) + /// - `bytes`: Must be ASCII bytes, and there must be at least `len` of + /// them. If `len` is smaller than the actual len of `bytes`, the string + /// is truncated. + /// + /// # Returns + /// + /// A constant `SmolStr` with inline data. + /// + /// # Examples + /// + /// ```rust + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello"); + /// ``` + /// + /// Given a `len` smaller than the number of bytes in `bytes`, the string is + /// cut off: + /// + /// ```rust + /// # use smol_str::SmolStr; + /// const SHORT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello world"); + /// assert_eq!(SHORT.as_str(), "hello"); + /// ``` + /// + /// ## Compile-time errors + /// + /// This will **fail** at compile-time with a message like "index out of + /// bounds" on a `_len_is_short` because the string is too large: + /// + /// ```rust,compile_fail + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( + /// 49, + /// b"hello world, how are you doing this fine morning?", + /// ); + /// ``` + /// + /// Similarly, this will **fail** to compile with "index out of bounds" on + /// an `_is_ascii` binding because it contains non-ASCII characters: + /// + /// ```rust,compile_fail + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( + /// 2, + /// &[209, 139], + /// ); + /// ``` + /// + /// Last but not least, given a `len` that is larger than the number of + /// bytes in `bytes`, it will fail to compile with "index out of bounds: the + /// len is 5 but the index is 5" on a binding called `byte`: + /// + /// ```rust,compile_fail + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(10, b"hello"); + /// ``` pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { let _len_is_short = [(); INLINE_CAP + 1][len]; @@ -33,9 +90,9 @@ impl SmolStr { ($($idx:literal),*) => ( $(s!(set $idx);)* ); (set $idx:literal) => ({ let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; - let b = src[$idx]; - let _is_ascii = [(); 128][b as usize]; - buf[$idx] = b + let byte = src[$idx]; + let _is_ascii = [(); 128][byte as usize]; + buf[$idx] = byte }); } s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); diff --git a/tests/test.rs b/tests/test.rs index b2a2ea0c7ecb..13e0f01e2f9c 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -34,9 +34,6 @@ fn const_fn_ctor() { const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); const LONG: SmolStr = SmolStr::new_inline_from_ascii(22, b"ABCDEFGHIZKLMNOPQRSTUV"); - // const TOO_LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); - // const NON_ASCII: SmolStr = SmolStr::new_inline_from_ascii(2, &[209, 139]); - assert_eq!(EMPTY, SmolStr::from("")); assert_eq!(A, SmolStr::from("A")); assert_eq!(HELLO, SmolStr::from("HELLO")); From e9480f45b7d6e9902ac83fdcc314ce5bf8e7ad9d Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Wed, 25 Sep 2019 14:46:30 +0200 Subject: [PATCH 052/322] Demonstrate bug with serde from_reader --- Cargo.toml | 2 ++ tests/test.rs | 94 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1fd084132424..2a795643fa6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" serde_json = "1" +serde_derive = "1" +serde = "1" criterion = "0.2" [[bench]] diff --git a/tests/test.rs b/tests/test.rs index 13e0f01e2f9c..8814036d1f27 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -2,6 +2,8 @@ extern crate serde_json; extern crate smol_str; #[macro_use] extern crate proptest; +#[cfg(feature = "serde")] +extern crate serde_derive; use smol_str::SmolStr; @@ -87,13 +89,91 @@ proptest! { } #[cfg(feature = "serde")] -#[test] -fn test_serde() { - let s = SmolStr::new("Hello, World"); - let s = serde_json::to_string(&s).unwrap(); - assert_eq!(s, "\"Hello, World\""); - let s: SmolStr = serde_json::from_str(&s).unwrap(); - assert_eq!(s, "Hello, World"); +mod serde_tests { + use super::*; + use std::collections::HashMap; + use serde_derive::{Serialize, Deserialize}; + + #[derive(Serialize, Deserialize)] + struct SmolStrStruct { + pub(crate) s: SmolStr, + pub(crate) vec: Vec, + pub(crate) map: HashMap + } + + #[test] + fn test_serde() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_str(&s).unwrap(); + assert_eq!(s, "Hello, World"); + } + + #[test] + fn test_serde_reader() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + assert_eq!(s, "Hello, World"); + } + + #[test] + fn test_serde_struct() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let struct_ = SmolStrStruct { + s: SmolStr::new("Hello, World"), + vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")], + map, + }; + let s = serde_json::to_string(&struct_).unwrap(); + let _new_struct: SmolStrStruct = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_struct_reader() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let struct_ = SmolStrStruct { + s: SmolStr::new("Hello, World"), + vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")], + map, + }; + let s = serde_json::to_string(&struct_).unwrap(); + let _new_struct: SmolStrStruct = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } + + #[test] + fn test_serde_hashmap() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let s = serde_json::to_string(&map).unwrap(); + let _s: HashMap = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_hashmap_reader() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let s = serde_json::to_string(&map).unwrap(); + let _s: HashMap = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } + + #[test] + fn test_serde_vec() { + let vec = vec![SmolStr::new(""), SmolStr::new("b")]; + let s = serde_json::to_string(&vec).unwrap(); + let _s: Vec = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_vec_reader() { + let vec = vec![SmolStr::new(""), SmolStr::new("b")]; + let s = serde_json::to_string(&vec).unwrap(); + let _s: Vec = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } } #[test] From 0bf0169c0ee29fb1bfe55eda10435596e92f70a0 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 26 Sep 2019 10:17:56 +0200 Subject: [PATCH 053/322] Implement visitor --- Cargo.toml | 4 +-- src/lib.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++++++-- tests/test.rs | 11 +++----- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2a795643fa6a..8d4373357a57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" +edition = "2018" [dependencies] serde = { version = "1", optional = true, default_features = false } @@ -12,8 +13,7 @@ serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" serde_json = "1" -serde_derive = "1" -serde = "1" +serde = { version = "1", features = [ "derive" ] } criterion = "0.2" [[bench]] diff --git a/src/lib.rs b/src/lib.rs index c7bbb4a875c1..9e3515817787 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -408,9 +408,81 @@ impl Repr { #[cfg(feature = "serde")] mod serde { - extern crate serde; + use ::serde::de::{Deserializer, Error, Unexpected, Visitor}; + use std::fmt; + use super::SmolStr; - use SmolStr; + // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 + fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct SmolStrVisitor; + + impl<'a> Visitor<'a> for SmolStrVisitor { + type Value = SmolStr; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_borrowed_str(self, v: &'a str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match std::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result + where + E: Error, + { + match std::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } + } + } + + deserializer.deserialize_str(SmolStrVisitor) + } impl serde::Serialize for SmolStr { fn serialize(&self, serializer: S) -> Result @@ -426,7 +498,7 @@ mod serde { where D: serde::Deserializer<'de>, { - <&'de str>::deserialize(deserializer).map(SmolStr::from) + smol_str(deserializer) } } } diff --git a/tests/test.rs b/tests/test.rs index 8814036d1f27..ab2235d3c5c8 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,9 +1,5 @@ -extern crate serde_json; -extern crate smol_str; #[macro_use] extern crate proptest; -#[cfg(feature = "serde")] -extern crate serde_derive; use smol_str::SmolStr; @@ -91,14 +87,14 @@ proptest! { #[cfg(feature = "serde")] mod serde_tests { use super::*; + use serde::{Serialize, Deserialize}; use std::collections::HashMap; - use serde_derive::{Serialize, Deserialize}; #[derive(Serialize, Deserialize)] struct SmolStrStruct { pub(crate) s: SmolStr, pub(crate) vec: Vec, - pub(crate) map: HashMap + pub(crate) map: HashMap, } #[test] @@ -158,7 +154,8 @@ mod serde_tests { let mut map = HashMap::new(); map.insert(SmolStr::new("a"), SmolStr::new("ohno")); let s = serde_json::to_string(&map).unwrap(); - let _s: HashMap = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + let _s: HashMap = + serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); } #[test] From f052fc059c8bf1cf8367bc68dfe80cb07dc05fe0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 26 Sep 2019 11:48:37 +0300 Subject: [PATCH 054/322] publish v0.1.13 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8d4373357a57..219b95fdf53d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.12" +version = "0.1.13" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 0d3c2a9b275a8f9e0f971737fd103fd30fe2f9cf Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 9 Oct 2019 14:24:56 +0300 Subject: [PATCH 055/322] enable std feature for serde --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 219b95fdf53d..35b98f0e4dd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.13" +version = "0.1.14" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" @@ -13,7 +13,7 @@ serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" serde_json = "1" -serde = { version = "1", features = [ "derive" ] } +serde = { version = "1", features = [ "derive", "std" ] } criterion = "0.2" [[bench]] From 662ea38b3169c9543b52123e8c486b1b21328179 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 9 Oct 2019 14:48:03 +0300 Subject: [PATCH 056/322] actually enabled serde std feature --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 35b98f0e4dd2..b691d2d2bb62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.14" +version = "0.1.15" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" @@ -8,12 +8,12 @@ license = "MIT OR Apache-2.0" edition = "2018" [dependencies] -serde = { version = "1", optional = true, default_features = false } +serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } [dev-dependencies] proptest = "0.8.3" serde_json = "1" -serde = { version = "1", features = [ "derive", "std" ] } +serde = { version = "1", features = [ "derive" ] } criterion = "0.2" [[bench]] From 1812698f7b4c361799f5633817106dbdd8138cfe Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Fri, 10 Jan 2020 00:49:00 +0900 Subject: [PATCH 057/322] Do not count spaces --- src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9e3515817787..c0c61c80d76d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -359,9 +359,11 @@ impl Repr { } let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); - if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { - return Repr::Substring { newlines, spaces }; + if text[newlines..].bytes().all(|b| b == b' ') { + let spaces = len - newlines; + if newlines <= N_NEWLINES && spaces <= N_SPACES { + return Repr::Substring { newlines, spaces }; + } } } From 312b3a0cdc47c63c484ddc4ce1a45e100803c7c9 Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Fri, 10 Jan 2020 00:50:08 +0900 Subject: [PATCH 058/322] Improve `Arc` creation While using `Into` could avoid an allocation in `String` -> `Box`, converting `Box` into `Arc` deallocates and re-allocates anyway. --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c0c61c80d76d..e4cca4717681 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -104,7 +104,7 @@ impl SmolStr { pub fn new(text: T) -> SmolStr where - T: Into + AsRef, + T: AsRef, { SmolStr(Repr::new(text)) } @@ -343,7 +343,7 @@ enum Repr { impl Repr { fn new(text: T) -> Self where - T: Into + AsRef, + T: AsRef, { { let text = text.as_ref(); @@ -367,7 +367,7 @@ impl Repr { } } - Repr::Heap(text.into().into_boxed_str().into()) + Repr::Heap(text.as_ref().into()) } #[inline(always)] From 0258341dcfb5e7f3074490e058652b36e0631abf Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 13 Feb 2020 23:09:23 +0100 Subject: [PATCH 059/322] Add TODOs --- src/lib.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 48e1920b48d2..c00954c55cb9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,17 +6,19 @@ use std::{fmt, iter, ops}; /// An offset into text. /// Offset is represented as `u32` storing number of utf8-bytes, /// but most of the clients should treat it like opaque measure. +// BREAK: TextSize(u32) #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] pub struct TextUnit(u32); impl TextUnit { - //TODO: rename to `from_char`: this is not ocaml! + // BREAK: consider renaming? /// `TextUnit` equal to the length of this char. #[inline(always)] pub fn of_char(c: char) -> TextUnit { TextUnit(c.len_utf8() as u32) } + // BREAK: consider renaming? /// `TextUnit` equal to the length of this string. /// /// # Panics @@ -233,6 +235,8 @@ impl fmt::Display for TextRange { } impl TextRange { + // BREAK: TextRange::new(from..to)? + // BREAK: TextRange(from, to)? /// The left-inclusive range (`[from..to)`) between to points in the text #[inline(always)] pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { @@ -249,36 +253,41 @@ impl TextRange { TextRange::from_to(offset, offset + len) } - // TODO: pass by value + // BREAK: pass by value /// The inclusive start of this range #[inline(always)] pub fn start(&self) -> TextUnit { self.start } + // BREAK: pass by value /// The exclusive end of this range #[inline(always)] pub fn end(&self) -> TextUnit { self.end } + // BREAK: pass by value /// The length of this range #[inline(always)] pub fn len(&self) -> TextUnit { self.end - self.start } + // BREAK: pass by value /// Is this range empty of any content? #[inline(always)] pub fn is_empty(&self) -> bool { self.start() == self.end() } + // BREAK: pass by value #[inline(always)] pub fn is_subrange(&self, other: &TextRange) -> bool { other.start() <= self.start() && self.end() <= other.end() } + // BREAK: pass by value #[inline(always)] pub fn intersection(&self, other: &TextRange) -> Option { let start = self.start.max(other.start()); @@ -290,11 +299,13 @@ impl TextRange { } } + // BREAK: pass by value #[inline(always)] pub fn contains(&self, offset: TextUnit) -> bool { self.start() <= offset && offset < self.end() } + // BREAK: pass by value #[inline(always)] pub fn contains_inclusive(&self, offset: TextUnit) -> bool { self.start() <= offset && offset <= self.end() From c06f0046b7daf2343d3d92fca385ae051b803d0f Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Tue, 25 Feb 2020 22:10:21 +0800 Subject: [PATCH 060/322] Add convex_hull for TextRange --- src/lib.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index c00954c55cb9..2ca54859feb3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -299,6 +299,15 @@ impl TextRange { } } + // BREAK: pass by value + #[inline(always)] + /// The smallest convex set that contains both ranges + pub fn convex_hull(&self, other: &TextRange) -> TextRange { + let start = self.start().min(other.start()); + let end = self.end().max(other.end()); + TextRange::from_to(start, end) + } + // BREAK: pass by value #[inline(always)] pub fn contains(&self, offset: TextUnit) -> bool { @@ -433,6 +442,13 @@ mod tests { assert_eq!(r(1, 2).intersection(&r(3, 4)), None); } + #[test] + fn check_convex_hull() { + assert_eq!(r(1, 2).convex_hull(&r(2, 3)), r(1, 3)); + assert_eq!(r(1, 5).convex_hull(&r(2, 3)), r(1, 5)); + assert_eq!(r(1, 2).convex_hull(&r(4, 5)), r(1, 5)); + } + #[test] fn check_contains() { assert!(!r(1, 3).contains(0.into())); From 8d0d3e9b0d8281396a4c926e1cf76025a6fff2b1 Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Tue, 25 Feb 2020 22:52:04 +0800 Subject: [PATCH 061/322] Rename convex_hull to extend_to --- src/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2ca54859feb3..bd8e820e2982 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -301,8 +301,8 @@ impl TextRange { // BREAK: pass by value #[inline(always)] - /// The smallest convex set that contains both ranges - pub fn convex_hull(&self, other: &TextRange) -> TextRange { + /// The smallest range that contains both ranges + pub fn extend_to(&self, other: &TextRange) -> TextRange { let start = self.start().min(other.start()); let end = self.end().max(other.end()); TextRange::from_to(start, end) @@ -443,10 +443,10 @@ mod tests { } #[test] - fn check_convex_hull() { - assert_eq!(r(1, 2).convex_hull(&r(2, 3)), r(1, 3)); - assert_eq!(r(1, 5).convex_hull(&r(2, 3)), r(1, 5)); - assert_eq!(r(1, 2).convex_hull(&r(4, 5)), r(1, 5)); + fn check_extend_to() { + assert_eq!(r(1, 2).extend_to(&r(2, 3)), r(1, 3)); + assert_eq!(r(1, 5).extend_to(&r(2, 3)), r(1, 5)); + assert_eq!(r(1, 2).extend_to(&r(4, 5)), r(1, 5)); } #[test] From af901ad23af20fb8b32440e610d23bc34e2901b0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 26 Feb 2020 10:58:36 +0100 Subject: [PATCH 062/322] Publish v0.1.10 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 40afa4311c7c..9080b3fad8d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text_unit" -version = "0.1.9" +version = "0.1.10" authors = ["Aleksey Kladov "] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" From 5ee8fea628212aa416172f4dcb2bb37b42047703 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 7 Mar 2020 17:10:37 -0500 Subject: [PATCH 063/322] Aggressive refactor --- Cargo.toml | 21 +- src/lib.rs | 468 +-------------------------------------------- src/range.rs | 355 ++++++++++++++++++++++++++++++++++ src/serde_impls.rs | 40 ++++ src/size.rs | 246 ++++++++++++++++++++++++ src/traits.rs | 29 +++ tests/main.rs | 67 +++++++ tests/serde.rs | 49 +++++ 8 files changed, 812 insertions(+), 463 deletions(-) create mode 100644 src/range.rs create mode 100644 src/serde_impls.rs create mode 100644 src/size.rs create mode 100644 src/traits.rs create mode 100644 tests/main.rs create mode 100644 tests/serde.rs diff --git a/Cargo.toml b/Cargo.toml index 9080b3fad8d1..b5d42f018209 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,25 @@ [package] -name = "text_unit" -version = "0.1.10" -authors = ["Aleksey Kladov "] +name = "text-size" +version = "0.99.0-dev.1" +edition = "2018" + +authors = [ + "Aleksey Kladov ", + "Christopher Durham (CAD97) " +] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/text_unit" documentation = "https://docs.rs/text_unit" [dependencies] -serde = { version = "1", optional = true, default_features = false } +serde = { version = "1.0", optional = true, default_features = false } deepsize = { version = "0.1", optional = true, default_features = false } + +[dev-dependencies] +serde_test = "1.0" + +[[test]] +name = "serde" +path = "tests/serde.rs" +required-features = ["serde"] diff --git a/src/lib.rs b/src/lib.rs index bd8e820e2982..66bc65379026 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,466 +1,16 @@ -#[cfg(feature = "serde")] -extern crate serde; - -use std::{fmt, iter, ops}; - -/// An offset into text. -/// Offset is represented as `u32` storing number of utf8-bytes, -/// but most of the clients should treat it like opaque measure. -// BREAK: TextSize(u32) -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] -pub struct TextUnit(u32); - -impl TextUnit { - // BREAK: consider renaming? - /// `TextUnit` equal to the length of this char. - #[inline(always)] - pub fn of_char(c: char) -> TextUnit { - TextUnit(c.len_utf8() as u32) - } - - // BREAK: consider renaming? - /// `TextUnit` equal to the length of this string. - /// - /// # Panics - /// Panics if the length of the string is greater than `u32::max_value()` - #[inline(always)] - pub fn of_str(s: &str) -> TextUnit { - if s.len() > u32::max_value() as usize { - panic!("string is to long") - } - TextUnit(s.len() as u32) - } - - #[inline(always)] - pub fn checked_sub(self, other: TextUnit) -> Option { - self.0.checked_sub(other.0).map(TextUnit) - } - - #[inline(always)] - pub fn from_usize(size: usize) -> TextUnit { - #[cfg(debug_assertions)] - { - if size > u32::max_value() as usize { - panic!("overflow when converting to TextUnit: {}", size) - } - } - (size as u32).into() - } - - #[inline(always)] - pub fn to_usize(self) -> usize { - u32::from(self) as usize - } -} - -impl fmt::Debug for TextUnit { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(self, f) - } -} - -impl fmt::Display for TextUnit { - #[inline(always)] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.0.fmt(f) - } -} - -impl From for u32 { - #[inline(always)] - fn from(tu: TextUnit) -> u32 { - tu.0 - } -} - -impl From for TextUnit { - #[inline(always)] - fn from(tu: u32) -> TextUnit { - TextUnit(tu) - } -} - -macro_rules! unit_ops_impls { - ($T:ident, $f:ident, $op:tt, $AT:ident, $af:ident) => { - -impl ops::$T for TextUnit { - type Output = TextUnit; - #[inline(always)] - fn $f(self, rhs: TextUnit) -> TextUnit { - TextUnit(self.0 $op rhs.0) - } -} - -impl<'a> ops::$T<&'a TextUnit> for TextUnit { - type Output = TextUnit; - #[inline(always)] - fn $f(self, rhs: &'a TextUnit) -> TextUnit { - ops::$T::$f(self, *rhs) - } -} - -impl<'a> ops::$T for &'a TextUnit { - type Output = TextUnit; - #[inline(always)] - fn $f(self, rhs: TextUnit) -> TextUnit { - ops::$T::$f(*self, rhs) - } -} - -impl<'a, 'b> ops::$T<&'a TextUnit> for &'b TextUnit { - type Output = TextUnit; - #[inline(always)] - fn $f(self, rhs: &'a TextUnit) -> TextUnit { - ops::$T::$f(*self, *rhs) - } -} - -impl ops::$AT for TextUnit { - #[inline(always)] - fn $af(&mut self, rhs: TextUnit) { - self.0 = self.0 $op rhs.0 - } -} - -impl<'a> ops::$AT<&'a TextUnit> for TextUnit { - #[inline(always)] - fn $af(&mut self, rhs: &'a TextUnit) { - ops::$AT::$af(self, *rhs) - } -} - }; -} - -macro_rules! range_ops_impls { - ($T:ident, $f:ident, $op:tt, $AT:ident, $af:ident) => { - -impl ops::$T for TextRange { - type Output = TextRange; - #[inline(always)] - fn $f(self, rhs: TextUnit) -> TextRange { - TextRange::from_to( - self.start() $op rhs, - self.end() $op rhs, - ) - } -} - -impl<'a> ops::$T<&'a TextUnit> for TextRange { - type Output = TextRange; - #[inline(always)] - fn $f(self, rhs: &'a TextUnit) -> TextRange { - TextRange::from_to( - self.start() $op rhs, - self.end() $op rhs, - ) - } -} - -impl<'a> ops::$T for &'a TextRange { - type Output = TextRange; - #[inline(always)] - fn $f(self, rhs: TextUnit) -> TextRange { - TextRange::from_to( - self.start() $op rhs, - self.end() $op rhs, - ) - } -} - -impl<'a, 'b> ops::$T<&'a TextUnit> for &'b TextRange { - type Output = TextRange; - #[inline(always)] - fn $f(self, rhs: &'a TextUnit) -> TextRange { - TextRange::from_to( - self.start() $op rhs, - self.end() $op rhs, - ) - } -} - -impl ops::$AT for TextRange { - #[inline(always)] - fn $af(&mut self, rhs: TextUnit) { - *self = *self $op rhs - } -} - -impl<'a> ops::$AT<&'a TextUnit> for TextRange { - #[inline(always)] - fn $af(&mut self, rhs: &'a TextUnit) { - *self = *self $op rhs - } -} - }; -} - -unit_ops_impls!(Add, add, +, AddAssign, add_assign); -unit_ops_impls!(Sub, sub, -, SubAssign, sub_assign); -range_ops_impls!(Add, add, +, AddAssign, add_assign); -range_ops_impls!(Sub, sub, -, SubAssign, sub_assign); - -impl<'a> iter::Sum<&'a TextUnit> for TextUnit { - fn sum>(iter: I) -> TextUnit { - iter.fold(TextUnit::from(0), ops::Add::add) - } -} - -impl iter::Sum for TextUnit { - fn sum>(iter: I) -> TextUnit { - iter.fold(TextUnit::from(0), ops::Add::add) - } -} - -/// A range in the text, represented as a pair of `TextUnit`s. -/// -/// # Panics -/// Slicing a `&str` with `TextRange` panics if the result is -/// not a valid utf8 string. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct TextRange { - start: TextUnit, - end: TextUnit, -} - -impl fmt::Debug for TextRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(self, f) - } -} +//! Newtypes for working with text sizes/ranges in a more type-safe manner. -impl fmt::Display for TextRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "[{}; {})", self.start(), self.end()) - } -} +#![forbid(unsafe_code)] +#![warn(missing_debug_implementations, missing_docs)] -impl TextRange { - // BREAK: TextRange::new(from..to)? - // BREAK: TextRange(from, to)? - /// The left-inclusive range (`[from..to)`) between to points in the text - #[inline(always)] - pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { - assert!(from <= to, "Invalid text range [{}; {})", from, to); - TextRange { - start: from, - end: to, - } - } - - /// The left-inclusive range (`[offset..offset + len)`) between to points in the text - #[inline(always)] - pub fn offset_len(offset: TextUnit, len: TextUnit) -> TextRange { - TextRange::from_to(offset, offset + len) - } - - // BREAK: pass by value - /// The inclusive start of this range - #[inline(always)] - pub fn start(&self) -> TextUnit { - self.start - } - - // BREAK: pass by value - /// The exclusive end of this range - #[inline(always)] - pub fn end(&self) -> TextUnit { - self.end - } - - // BREAK: pass by value - /// The length of this range - #[inline(always)] - pub fn len(&self) -> TextUnit { - self.end - self.start - } - - // BREAK: pass by value - /// Is this range empty of any content? - #[inline(always)] - pub fn is_empty(&self) -> bool { - self.start() == self.end() - } - - // BREAK: pass by value - #[inline(always)] - pub fn is_subrange(&self, other: &TextRange) -> bool { - other.start() <= self.start() && self.end() <= other.end() - } - - // BREAK: pass by value - #[inline(always)] - pub fn intersection(&self, other: &TextRange) -> Option { - let start = self.start.max(other.start()); - let end = self.end.min(other.end()); - if start <= end { - Some(TextRange::from_to(start, end)) - } else { - None - } - } - - // BREAK: pass by value - #[inline(always)] - /// The smallest range that contains both ranges - pub fn extend_to(&self, other: &TextRange) -> TextRange { - let start = self.start().min(other.start()); - let end = self.end().max(other.end()); - TextRange::from_to(start, end) - } - - // BREAK: pass by value - #[inline(always)] - pub fn contains(&self, offset: TextUnit) -> bool { - self.start() <= offset && offset < self.end() - } - - // BREAK: pass by value - #[inline(always)] - pub fn contains_inclusive(&self, offset: TextUnit) -> bool { - self.start() <= offset && offset <= self.end() - } - - #[inline(always)] - pub fn checked_sub(self, other: TextUnit) -> Option { - let res = TextRange::offset_len( - self.start().checked_sub(other)?, - self.len() - ); - Some(res) - } -} - -impl ops::RangeBounds for TextRange { - fn start_bound(&self) -> ops::Bound<&TextUnit> { - ops::Bound::Included(&self.start) - } - - fn end_bound(&self) -> ops::Bound<&TextUnit> { - ops::Bound::Excluded(&self.end) - } -} - -impl ops::Index for str { - type Output = str; - - fn index(&self, index: TextRange) -> &str { - &self[index.start().0 as usize..index.end().0 as usize] - } -} - -impl ops::Index for String { - type Output = str; - - fn index(&self, index: TextRange) -> &str { - &self.as_str()[index] - } -} +mod range; +mod size; +mod traits; #[cfg(feature = "serde")] -mod serde_impls { - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - use {TextRange, TextUnit}; - - impl Serialize for TextUnit { - fn serialize(&self, serializer: S) -> Result { - self.0.serialize(serializer) - } - } - - impl<'de> Deserialize<'de> for TextUnit { - fn deserialize>(deserializer: D) -> Result { - let value = Deserialize::deserialize(deserializer)?; - Ok(TextUnit(value)) - } - } - - impl Serialize for TextRange { - fn serialize(&self, serializer: S) -> Result { - (self.start, self.end).serialize(serializer) - } - } +mod serde_impls; - impl<'de> Deserialize<'de> for TextRange { - fn deserialize>(deserializer: D) -> Result { - let (start, end) = Deserialize::deserialize(deserializer)?; - Ok(TextRange { start, end }) - } - } -} +pub use crate::{range::TextRange, size::TextSize, traits::TextSized}; #[cfg(feature = "deepsize")] -mod deepsize_impls { - deepsize::known_deep_size!(0, crate::TextUnit, crate::TextRange); -} - -#[cfg(test)] -mod tests { - use super::*; - - fn r(from: u32, to: u32) -> TextRange { - TextRange::from_to(from.into(), to.into()) - } - - #[test] - fn test_sum() { - let xs: Vec = vec![0.into(), 1.into(), 2.into()]; - assert_eq!(xs.iter().sum::(), 3.into()); - assert_eq!(xs.into_iter().sum::(), 3.into()); - } - - #[test] - fn test_ops() { - let range = r(10, 20); - let u: TextUnit = 5.into(); - assert_eq!(range + u, r(15, 25)); - assert_eq!(range - u, r(5, 15)); - } - - #[test] - fn test_checked_ops() { - let x: TextUnit = 1.into(); - assert_eq!(x.checked_sub(1.into()), Some(0.into())); - assert_eq!(x.checked_sub(2.into()), None); - - assert_eq!(r(1, 2).checked_sub(1.into()), Some(r(0, 1))); - assert_eq!(x.checked_sub(2.into()), None); - } - - #[test] - fn test_subrange() { - let r1 = r(2, 4); - let r2 = r(2, 3); - let r3 = r(1, 3); - assert!(r2.is_subrange(&r1)); - assert!(!r3.is_subrange(&r1)); - } - - #[test] - fn check_intersection() { - assert_eq!(r(1, 2).intersection(&r(2, 3)), Some(r(2, 2))); - assert_eq!(r(1, 5).intersection(&r(2, 3)), Some(r(2, 3))); - assert_eq!(r(1, 2).intersection(&r(3, 4)), None); - } - - #[test] - fn check_extend_to() { - assert_eq!(r(1, 2).extend_to(&r(2, 3)), r(1, 3)); - assert_eq!(r(1, 5).extend_to(&r(2, 3)), r(1, 5)); - assert_eq!(r(1, 2).extend_to(&r(4, 5)), r(1, 5)); - } - - #[test] - fn check_contains() { - assert!(!r(1, 3).contains(0.into())); - assert!(r(1, 3).contains(1.into())); - assert!(r(1, 3).contains(2.into())); - assert!(!r(1, 3).contains(3.into())); - assert!(!r(1, 3).contains(4.into())); - - assert!(!r(1, 3).contains_inclusive(0.into())); - assert!(r(1, 3).contains_inclusive(1.into())); - assert!(r(1, 3).contains_inclusive(2.into())); - assert!(r(1, 3).contains_inclusive(3.into())); - assert!(!r(1, 3).contains_inclusive(4.into())); - } -} +deepsize::known_deep_size!(0, TextSize, TextRange); diff --git a/src/range.rs b/src/range.rs new file mode 100644 index 000000000000..89012f277920 --- /dev/null +++ b/src/range.rs @@ -0,0 +1,355 @@ +use { + crate::{TextSize, TextSized}, + std::{ + cmp, + convert::{TryFrom, TryInto}, + fmt, + num::TryFromIntError, + ops::{ + Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, RangeInclusive, RangeTo, + RangeToInclusive, Sub, SubAssign, + }, + }, +}; + +/// A range in text, represented as a pair of [`TextSize`][struct@TextSize]. +/// +/// It is a logical error to have `end() < start()`, but +/// code must not assume this is true for `unsafe` guarantees. +/// +/// # Translation from `text_unit` +/// +/// - `TextRange::from_to(from, to)` ⟹ `TextRange::from(from..to)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::at(offset).with_len(size)` +/// - `range.start()` ⟹ `range.start()` +/// - `range.end()` ⟹ `range.end()` +/// - `range.len()` ⟹ `range.len()` +/// - `range.is_empty()` ⟹ `range.is_empty()` +/// - `a.is_subrange(b)` ⟹ `b.contains(a)` +/// - `a.intersection(b)` ⟹ `TextRange::intersection(a, b)` +/// - `a.extend_to(b)` ⟹ `TextRange::covering(a, b)` +/// - `range.contains(offset)` ⟹ `range.contains_point(point)` +/// - `range.contains_inclusive(offset)` ⟹ `range.contains_point_inclusive(point)` +/// +/// † See the note on [`TextRange::len`] for differing behavior for incorrect reverse ranges. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct TextRange { + start: TextSize, + end: TextSize, +} + +#[allow(non_snake_case)] +pub(crate) const fn TextRange(start: TextSize, end: TextSize) -> TextRange { + TextRange { start, end } +} + +impl fmt::Debug for TextRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl fmt::Display for TextRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{}..{})", self.start(), self.end()) + } +} + +/// Identity methods. +impl TextRange { + /// The start point of this range. + pub const fn start(self) -> TextSize { + self.start + } + + /// The end point of this range. + pub const fn end(self) -> TextSize { + self.end + } + + /// The size of this range. + /// + /// # Panics + /// + /// When `end() < start()`, triggers a subtraction overflow. + /// This will panic with debug assertions, and overflow without. + pub const fn len(self) -> TextSize { + // HACK for const fn: math on primitives only + TextSize(self.end().raw - self.start().raw) + } + + /// Check if this range empty or reversed. + /// + /// When `end() < start()`, this returns false. + /// Code should prefer `is_empty()` to `len() == 0`, + /// as this safeguards against incorrect reverse ranges. + pub const fn is_empty(self) -> bool { + // HACK for const fn: math on primitives only + self.start().raw >= self.end().raw + } +} + +/// Manipulation methods. +impl TextRange { + /// A range covering the text size of some text-like object. + pub fn of(size: impl TextSized) -> TextRange { + TextRange(0.into(), size.text_size()) + } + + /// An empty range at some text size offset. + pub fn at(size: impl Into) -> TextRange { + let size = size.into(); + TextRange(size, size) + } + + /// Set the length without changing the starting offset. + pub fn with_len(self, len: impl Into) -> TextRange { + TextRange(self.start(), self.start() + len.into()) + } + + /// Set the starting offset without changing the length. + pub fn with_offset(self, offset: impl Into) -> TextRange { + TextRange::at(offset).with_len(self.len()) + } + + /// Check if this range completely contains another range. + pub fn contains(self, other: TextRange) -> bool { + self.start() <= other.start() && other.end() <= self.end() + } + + /// The range covered by both ranges, if it exists. + /// If the ranges touch but do not overlap, the output range is empty. + pub fn intersection(lhs: TextRange, rhs: TextRange) -> Option { + let start = cmp::max(lhs.start(), rhs.start()); + let end = cmp::min(lhs.end(), rhs.end()); + Some(TextRange(start, end)).filter(|_| start <= end) + } + + /// The smallest range that completely contains both ranges. + pub fn covering(lhs: TextRange, rhs: TextRange) -> TextRange { + let start = cmp::min(lhs.start(), rhs.start()); + let end = cmp::max(lhs.end(), rhs.end()); + TextRange(start, end) + } + + /// Check if this range contains a point. + /// + /// The end index is considered excluded. + pub fn contains_point(self, point: impl Into) -> bool { + let point = point.into(); + self.start() <= point && point < self.end() + } + + /// Check if this range contains a point. + /// + /// The end index is considered included. + pub fn contains_point_inclusive(self, point: impl Into) -> bool { + let point = point.into(); + self.start() <= point && point <= self.end() + } + + /// Offset the entire range by some text size. + pub fn checked_add(self, rhs: impl TryInto) -> Option { + let rhs = rhs.try_into().ok()?; + Some(TextRange( + self.start().checked_add(rhs)?, + self.end().checked_add(rhs)?, + )) + } + + /// Offset the entire range by some text size. + pub fn checked_sub(self, rhs: impl TryInto) -> Option { + let rhs = rhs.try_into().ok()?; + Some(TextRange( + self.start().checked_sub(rhs)?, + self.end().checked_sub(rhs)?, + )) + } +} + +impl Index for str { + type Output = str; + fn index(&self, index: TextRange) -> &Self::Output { + &self[index.start().ix()..index.end().ix()] + } +} + +impl IndexMut for str { + fn index_mut(&mut self, index: TextRange) -> &mut Self::Output { + &mut self[index.start().ix()..index.end().ix()] + } +} + +impl RangeBounds for TextRange { + fn start_bound(&self) -> Bound<&TextSize> { + Bound::Included(&self.start) + } + + fn end_bound(&self) -> Bound<&TextSize> { + Bound::Excluded(&self.end) + } +} + +macro_rules! conversions { + (From<$lte:ident> for TextRange) => { + impl From> for TextRange { + fn from(value: Range<$lte>) -> TextRange { + TextRange(value.start.into(), value.end.into()) + } + } + impl TryFrom> for TextRange { + type Error = TryFromIntError; + fn try_from(value: RangeInclusive<$lte>) -> Result { + let (start, end) = value.into_inner(); + let end: TextSize = end.into(); + // This is the only way to get a TryFromIntError currently. + let end = end.checked_add(1).ok_or_else(|| u8::try_from(-1).unwrap_err())?; + Ok(TextRange(start.into(), end)) + } + } + impl From> for TextRange { + fn from(value: RangeTo<$lte>) -> TextRange { + TextRange(0.into(), value.end.into()) + } + } + impl TryFrom> for TextRange { + type Error = TryFromIntError; + fn try_from(value: RangeToInclusive<$lte>) -> Result { + let start: TextSize = 0.into(); + let end: TextSize = value.end.into(); + TextRange::try_from(start..=end) + } + } + }; + (TryFrom<$gt:ident> for TextRange) => { + impl TryFrom> for TextRange { + type Error = <$gt as TryInto>::Error; + fn try_from(value: Range<$gt>) -> Result { + Ok(TextRange(value.start.try_into()?, value.end.try_into()?)) + } + } + impl TryFrom> for TextRange { + type Error = TryFromIntError; + fn try_from(value: RangeInclusive<$gt>) -> Result { + let (start, end) = value.into_inner(); + let end: TextSize = end.try_into()?; + // This is the only way to get a TryFromIntError currently. + let end = end.checked_add(1).ok_or_else(|| u8::try_from(-1).unwrap_err())?; + Ok(TextRange(start.try_into()?, end)) + } + } + impl TryFrom> for TextRange { + type Error = TryFromIntError; + fn try_from(value: RangeTo<$gt>) -> Result { + Ok(TextRange(0.into(), value.end.try_into()?)) + } + } + impl TryFrom> for TextRange { + type Error = TryFromIntError; + fn try_from(value: RangeToInclusive<$gt>) -> Result { + let start: TextSize = 0.into(); + let end: TextSize = value.end.try_into()?; + TextRange::try_from(start..=end) + } + } + }; + { + lt TextSize [$($lt:ident)*] + eq TextSize [$($eq:ident)*] + gt TextSize [$($gt:ident)*] + varries [$($var:ident)*] + } => { + $( + // Not `From` yet because of integer type fallback. We want e.g. + // `TextRange::from(0)` and `range + 1` to work, and more `From` + // impls means that this will try (and fail) to use i32 rather + // than one of the unsigned integer types that actually work. + conversions!(TryFrom<$lt> for TextRange); + )* + + $( + conversions!(From<$eq> for TextRange); + )* + + $( + conversions!(TryFrom<$gt> for TextRange); + )* + + $( + conversions!(TryFrom<$var> for TextRange); + )* + }; +} + +// FIXME: when `default impl` is usable, change to blanket impls for [Try]Into instead +conversions! { + lt TextSize [u8 u16] + eq TextSize [u32 TextSize] + gt TextSize [u64] + varries [usize] +} + +impl Into for &'_ TextRange { + fn into(self) -> TextRange { + *self + } +} + +impl Into for &'_ mut TextRange { + fn into(self) -> TextRange { + *self + } +} + +macro_rules! op { + (impl $Op:ident for TextRange by fn $f:ident = $op:tt) => { + impl $Op for TextRange + where + TextSize: $Op, + { + type Output = TextRange; + fn $f(self, rhs: IntoSize) -> TextRange { + TextRange(self.start() $op rhs, self.end() $op rhs) + } + } + impl $Op for &'_ TextRange + where + TextRange: $Op, + { + type Output = TextRange; + fn $f(self, rhs: IntoSize) -> TextRange { + *self $op rhs + } + } + impl $Op for &'_ mut TextRange + where + TextRange: $Op, + { + type Output = TextRange; + fn $f(self, rhs: IntoSize) -> TextRange { + *self $op rhs + } + } + }; +} + +op!(impl Add for TextRange by fn add = +); +op!(impl Sub for TextRange by fn sub = -); + +impl AddAssign for TextRange +where + TextRange: Add, +{ + fn add_assign(&mut self, rhs: A) { + *self = *self + rhs + } +} + +impl SubAssign for TextRange +where + TextRange: Sub, +{ + fn sub_assign(&mut self, rhs: S) { + *self = *self - rhs + } +} diff --git a/src/serde_impls.rs b/src/serde_impls.rs new file mode 100644 index 000000000000..1963413fd83f --- /dev/null +++ b/src/serde_impls.rs @@ -0,0 +1,40 @@ +use { + crate::{TextRange, TextSize}, + serde::{Deserialize, Deserializer, Serialize, Serializer}, +}; + +impl Serialize for TextSize { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.raw.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for TextSize { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(TextSize) + } +} + +impl Serialize for TextRange { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + (self.start(), self.end()).serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for TextRange { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|(start, end)| TextRange(start, end)) + } +} diff --git a/src/size.rs b/src/size.rs new file mode 100644 index 000000000000..80dba0aba6ea --- /dev/null +++ b/src/size.rs @@ -0,0 +1,246 @@ +use { + crate::TextSized, + std::{ + convert::{TryFrom, TryInto}, + fmt, iter, + num::TryFromIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + u32, + }, +}; + +/// A measure of text length. Also, equivalently, an index into text. +/// +/// This is a utf8-bytes-offset stored as `u32`, but +/// most clients should treat it as an opaque measure. +/// +/// # Translation from `text_unit` +/// +/// - `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` +/// - `TextUnit::of_str(s)` ⟹ `TextSize:of(s)` +/// - `TextUnit::from_usize(size)` ⟹ `TextSize::new(size)` +/// - `unit.to_usize()` ⟹ `size.ix()` +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextSize { + pub(crate) raw: u32, +} + +#[allow(non_snake_case)] +pub(crate) const fn TextSize(raw: u32) -> TextSize { + TextSize { raw } +} + +impl fmt::Debug for TextSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl fmt::Display for TextSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.raw, f) + } +} + +impl TextSize { + /// The text size of some text-like object. + pub fn of(text: &impl TextSized) -> TextSize { + text.text_size() + } + + /// A text size for some `usize`. + /// + /// # Panics + /// + /// Panics if the size is greater than `u32::MAX` and debug assertions are + /// enabled. If debug assertions are not enabled, wraps into `u32` space. + pub fn new(size: usize) -> TextSize { + if let Ok(size) = size.try_into() { + size + } else if cfg!(debug_assertions) { + panic!("overflow when converting to TextSize"); + } else { + TextSize(size as u32) + } + } + + /// Convert this text size into the standard indexing type. + /// + /// # Panics + /// + /// Panics if the size is greater than `usize::MAX`. This can only + /// occur on targets where `size_of::() < size_of::()`. + pub fn ix(self) -> usize { + if let Ok(ix) = self.try_into() { + ix + } else { + panic!("overflow when converting TextSize to usize index") + } + } +} + +/// Methods to act like a primitive integer type, where reasonably applicable. +// Last updated for parity with Rust 1.42.0. +impl TextSize { + /// The smallest representable text size. (`u32::MIN`) + pub const MIN: TextSize = TextSize(u32::MIN); + /// The largest representable text size. (`u32::MAX`) + pub const MAX: TextSize = TextSize(u32::MAX); + + #[allow(missing_docs)] + pub fn checked_add(self, rhs: impl TryInto) -> Option { + let rhs = rhs.try_into().ok()?; + self.raw.checked_add(rhs.raw).map(TextSize) + } + + #[allow(missing_docs)] + pub fn checked_sub(self, rhs: impl TryInto) -> Option { + let rhs = rhs.try_into().ok()?; + self.raw.checked_sub(rhs.raw).map(TextSize) + } +} + +macro_rules! conversions { + (From for $gte:ident) => { + impl From for $gte { + fn from(value: TextSize) -> $gte { + value.raw.into() + } + } + }; + (From<$lte:ident> for TextSize) => { + impl From<$lte> for TextSize { + fn from(value: $lte) -> TextSize { + TextSize(value.into()) + } + } + }; + (TryFrom for $lt:ident) => { + impl TryFrom for $lt { + type Error = TryFromIntError; + fn try_from(value: TextSize) -> Result<$lt, Self::Error> { + value.raw.try_into() + } + } + }; + (TryFrom<$gt:ident> for TextSize) => { + impl TryFrom<$gt> for TextSize { + type Error = <$gt as TryInto>::Error; + fn try_from(value: $gt) -> Result { + value.try_into().map(TextSize) + } + } + }; + { + lt u32 [$($lt:ident)*] + eq u32 [$($eq:ident)*] + gt u32 [$($gt:ident)*] + varries [$($var:ident)*] + } => { + $( + // Not `From` yet because of integer type fallback. We want e.g. + // `TextSize::from(0)` and `size + 1` to work, and more `From` + // impls means that this will try (and fail) to use i32 rather + // than one of the unsigned integer types that actually work. + conversions!(TryFrom<$lt> for TextSize); + conversions!(TryFrom for $lt); + )* + + $( + conversions!(From<$eq> for TextSize); + conversions!(From for $eq); + )* + + $( + conversions!(TryFrom<$gt> for TextSize); + conversions!(From for $gt); + )* + + $( + conversions!(TryFrom<$var> for TextSize); + conversions!(TryFrom for $var); + )* + }; +} + +conversions! { + lt u32 [u8 u16] + eq u32 [u32] + gt u32 [u64] + varries [usize i32] // i32 so that `checked_add($lit)` (`try_from($lit)`) can work + // this will unfortunately have to hang around even if integer literal type fallback improves +} + +impl Into for &'_ TextSize { + fn into(self) -> TextSize { + *self + } +} + +impl Into for &'_ mut TextSize { + fn into(self) -> TextSize { + *self + } +} + +macro_rules! op { + (impl $Op:ident for TextSize by fn $f:ident = $op:tt) => { + impl> $Op for TextSize { + type Output = TextSize; + fn $f(self, rhs: IntoSize) -> TextSize { + TextSize(self.raw $op rhs.into().raw) + } + } + impl $Op for &'_ TextSize + where + TextSize: $Op, + { + type Output = TextSize; + fn $f(self, rhs: IntoSize) -> TextSize { + *self $op rhs + } + } + impl $Op for &'_ mut TextSize + where + TextSize: $Op, + { + type Output = TextSize; + fn $f(self, rhs: IntoSize) -> TextSize { + *self $op rhs + } + } + }; +} + +op!(impl Add for TextSize by fn add = +); +op!(impl Sub for TextSize by fn sub = -); + +impl AddAssign for TextSize +where + TextSize: Add, +{ + fn add_assign(&mut self, rhs: A) { + *self = *self + rhs + } +} + +impl SubAssign for TextSize +where + TextSize: Sub, +{ + fn sub_assign(&mut self, rhs: S) { + *self = *self - rhs + } +} + +impl iter::Sum for TextSize { + fn sum>(iter: I) -> TextSize { + iter.fold(TextSize::default(), Add::add) + } +} + +impl<'a> iter::Sum<&'a Self> for TextSize { + fn sum>(iter: I) -> Self { + iter.fold(TextSize::default(), Add::add) + } +} diff --git a/src/traits.rs b/src/traits.rs new file mode 100644 index 000000000000..52601534d2da --- /dev/null +++ b/src/traits.rs @@ -0,0 +1,29 @@ +use { + crate::{TextRange, TextSize}, + std::convert::TryInto, +}; + +/// Text-like structures that have a text size. +pub trait TextSized { + /// The size of this text-alike. + fn text_size(&self) -> TextSize; +} + +impl TextSized for str { + fn text_size(&self) -> TextSize { + let len = self.len(); + TextSize::new(len) + } +} + +impl TextSized for char { + fn text_size(&self) -> TextSize { + self.len_utf8().try_into().unwrap() + } +} + +impl TextSized for TextRange { + fn text_size(&self) -> TextSize { + self.len() + } +} diff --git a/tests/main.rs b/tests/main.rs new file mode 100644 index 000000000000..a7eef0a2cd96 --- /dev/null +++ b/tests/main.rs @@ -0,0 +1,67 @@ +use text_size::*; + +fn r(from: u32, to: u32) -> TextRange { + TextRange::from(from..to) +} + +#[test] +fn sum() { + let xs: Vec = vec![0.into(), 1.into(), 2.into()]; + assert_eq!(xs.iter().sum::(), 3.into()); + assert_eq!(xs.into_iter().sum::(), 3.into()); +} + +#[test] +fn math() { + let range = r(10, 20); + assert_eq!(range + 5, r(15, 25)); + assert_eq!(range - 5, r(5, 15)); +} + +#[test] +fn checked_math() { + let x: TextSize = 1.into(); + assert_eq!(x.checked_sub(1), Some(0.into())); + assert_eq!(x.checked_sub(2), None); + + assert_eq!(r(1, 2).checked_sub(1), Some(r(0, 1))); + assert_eq!(x.checked_sub(2), None); +} + +#[test] +fn contains() { + let r1 = r(2, 4); + let r2 = r(2, 3); + let r3 = r(1, 3); + assert!(r1.contains(r2)); + assert!(!r1.contains(r3)); +} + +#[test] +fn intersection() { + assert_eq!(TextRange::intersection(r(1, 2), r(2, 3)), Some(r(2, 2))); + assert_eq!(TextRange::intersection(r(1, 5), r(2, 3)), Some(r(2, 3))); + assert_eq!(TextRange::intersection(r(1, 2), r(3, 4)), None); +} + +#[test] +fn covering() { + assert_eq!(TextRange::covering(r(1, 2), r(2, 3)), r(1, 3)); + assert_eq!(TextRange::covering(r(1, 5), r(2, 3)), r(1, 5)); + assert_eq!(TextRange::covering(r(1, 2), r(4, 5)), r(1, 5)); +} + +#[test] +fn contains_point() { + assert!(!r(1, 3).contains_point(0)); + assert!(r(1, 3).contains_point(1)); + assert!(r(1, 3).contains_point(2)); + assert!(!r(1, 3).contains_point(3)); + assert!(!r(1, 3).contains_point(4)); + + assert!(!r(1, 3).contains_point_inclusive(0)); + assert!(r(1, 3).contains_point_inclusive(1)); + assert!(r(1, 3).contains_point_inclusive(2)); + assert!(r(1, 3).contains_point_inclusive(3)); + assert!(!r(1, 3).contains_point_inclusive(4)); +} diff --git a/tests/serde.rs b/tests/serde.rs new file mode 100644 index 000000000000..439b9d71f57b --- /dev/null +++ b/tests/serde.rs @@ -0,0 +1,49 @@ +use {serde_test::*, text_size::*}; + +#[test] +fn size() { + assert_tokens(&TextSize::new(00), &[Token::U32(00)]); + assert_tokens(&TextSize::new(10), &[Token::U32(10)]); + assert_tokens(&TextSize::new(20), &[Token::U32(20)]); + assert_tokens(&TextSize::new(30), &[Token::U32(30)]); +} + +#[test] +fn range() { + assert_tokens( + &TextRange::from(00..10), + &[ + Token::Tuple { len: 2 }, + Token::U32(00), + Token::U32(10), + Token::TupleEnd, + ], + ); + assert_tokens( + &TextRange::from(10..20), + &[ + Token::Tuple { len: 2 }, + Token::U32(10), + Token::U32(20), + Token::TupleEnd, + ], + ); + assert_tokens( + &TextRange::from(20..30), + &[ + Token::Tuple { len: 2 }, + Token::U32(20), + Token::U32(30), + Token::TupleEnd, + ], + ); + assert_tokens( + &TextRange::from(30..40), + &[ + Token::Tuple { len: 2 }, + Token::U32(30), + Token::U32(40), + Token::TupleEnd, + ], + ); +} From 7a1373e6624c2eca901e09f02aee5c1224c0b6c3 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 7 Mar 2020 22:33:21 -0500 Subject: [PATCH 064/322] Scale back the exposed API surface --- Cargo.toml | 3 +- src/lib.rs | 3 - src/range.rs | 207 ++++++------------------------------------------- src/size.rs | 125 ++++++++--------------------- src/traits.rs | 22 ++++-- tests/main.rs | 78 ++++++++++--------- tests/serde.rs | 30 ++++--- 7 files changed, 133 insertions(+), 335 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b5d42f018209..95c11472cd74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "0.99.0-dev.1" +version = "0.99.0-dev.2" edition = "2018" authors = [ @@ -14,7 +14,6 @@ documentation = "https://docs.rs/text_unit" [dependencies] serde = { version = "1.0", optional = true, default_features = false } -deepsize = { version = "0.1", optional = true, default_features = false } [dev-dependencies] serde_test = "1.0" diff --git a/src/lib.rs b/src/lib.rs index 66bc65379026..dc1a09b22bf2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,3 @@ mod traits; mod serde_impls; pub use crate::{range::TextRange, size::TextSize, traits::TextSized}; - -#[cfg(feature = "deepsize")] -deepsize::known_deep_size!(0, TextSize, TextRange); diff --git a/src/range.rs b/src/range.rs index 89012f277920..fe227cdaff06 100644 --- a/src/range.rs +++ b/src/range.rs @@ -1,14 +1,10 @@ use { - crate::{TextSize, TextSized}, + crate::TextSize, std::{ cmp, convert::{TryFrom, TryInto}, fmt, - num::TryFromIntError, - ops::{ - Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, RangeInclusive, RangeTo, - RangeToInclusive, Sub, SubAssign, - }, + ops::{Bound, Index, IndexMut, Range, RangeBounds}, }, }; @@ -19,17 +15,17 @@ use { /// /// # Translation from `text_unit` /// -/// - `TextRange::from_to(from, to)` ⟹ `TextRange::from(from..to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::at(offset).with_len(size)` -/// - `range.start()` ⟹ `range.start()` -/// - `range.end()` ⟹ `range.end()` -/// - `range.len()` ⟹ `range.len()` -/// - `range.is_empty()` ⟹ `range.is_empty()` -/// - `a.is_subrange(b)` ⟹ `b.contains(a)` -/// - `a.intersection(b)` ⟹ `TextRange::intersection(a, b)` -/// - `a.extend_to(b)` ⟹ `TextRange::covering(a, b)` -/// - `range.contains(offset)` ⟹ `range.contains_point(point)` -/// - `range.contains_inclusive(offset)` ⟹ `range.contains_point_inclusive(point)` +/// - `TextRange::from_to(from, to)` ⟹ `TextRange::from(from..to)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::from(offset..offset + size)` +/// - `range.start()` ⟹ `range.start()` +/// - `range.end()` ⟹ `range.end()` +/// - `range.len()` ⟹ `range.len()` +/// - `range.is_empty()` ⟹ `range.is_empty()` +/// - `a.is_subrange(b)` ⟹ `b.contains(a)` +/// - `a.intersection(b)` ⟹ `TextRange::intersection(a, b)` +/// - `a.extend_to(b)` ⟹ `TextRange::covering(a, b)` +/// - `range.contains(offset)` ⟹ `range.contains_exclusive(point)` +/// - `range.contains_inclusive(offset)` ⟹ `range.contains_inclusive(point)` /// /// † See the note on [`TextRange::len`] for differing behavior for incorrect reverse ranges. #[derive(Copy, Clone, Eq, PartialEq, Hash)] @@ -44,12 +40,6 @@ pub(crate) const fn TextRange(start: TextSize, end: TextSize) -> TextRange { } impl fmt::Debug for TextRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl fmt::Display for TextRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "[{}..{})", self.start(), self.end()) } @@ -91,27 +81,6 @@ impl TextRange { /// Manipulation methods. impl TextRange { - /// A range covering the text size of some text-like object. - pub fn of(size: impl TextSized) -> TextRange { - TextRange(0.into(), size.text_size()) - } - - /// An empty range at some text size offset. - pub fn at(size: impl Into) -> TextRange { - let size = size.into(); - TextRange(size, size) - } - - /// Set the length without changing the starting offset. - pub fn with_len(self, len: impl Into) -> TextRange { - TextRange(self.start(), self.start() + len.into()) - } - - /// Set the starting offset without changing the length. - pub fn with_offset(self, offset: impl Into) -> TextRange { - TextRange::at(offset).with_len(self.len()) - } - /// Check if this range completely contains another range. pub fn contains(self, other: TextRange) -> bool { self.start() <= other.start() && other.end() <= self.end() @@ -135,7 +104,7 @@ impl TextRange { /// Check if this range contains a point. /// /// The end index is considered excluded. - pub fn contains_point(self, point: impl Into) -> bool { + pub fn contains_exclusive(self, point: impl Into) -> bool { let point = point.into(); self.start() <= point && point < self.end() } @@ -143,40 +112,27 @@ impl TextRange { /// Check if this range contains a point. /// /// The end index is considered included. - pub fn contains_point_inclusive(self, point: impl Into) -> bool { + pub fn contains_inclusive(self, point: impl Into) -> bool { let point = point.into(); self.start() <= point && point <= self.end() } +} - /// Offset the entire range by some text size. - pub fn checked_add(self, rhs: impl TryInto) -> Option { - let rhs = rhs.try_into().ok()?; - Some(TextRange( - self.start().checked_add(rhs)?, - self.end().checked_add(rhs)?, - )) - } - - /// Offset the entire range by some text size. - pub fn checked_sub(self, rhs: impl TryInto) -> Option { - let rhs = rhs.try_into().ok()?; - Some(TextRange( - self.start().checked_sub(rhs)?, - self.end().checked_sub(rhs)?, - )) - } +fn ix(size: TextSize) -> usize { + size.try_into() + .unwrap_or_else(|_| panic!("overflow when converting TextSize to usize index")) } impl Index for str { type Output = str; fn index(&self, index: TextRange) -> &Self::Output { - &self[index.start().ix()..index.end().ix()] + &self[ix(index.start())..ix(index.end())] } } impl IndexMut for str { fn index_mut(&mut self, index: TextRange) -> &mut Self::Output { - &mut self[index.start().ix()..index.end().ix()] + &mut self[ix(index.start())..ix(index.end())] } } @@ -197,29 +153,7 @@ macro_rules! conversions { TextRange(value.start.into(), value.end.into()) } } - impl TryFrom> for TextRange { - type Error = TryFromIntError; - fn try_from(value: RangeInclusive<$lte>) -> Result { - let (start, end) = value.into_inner(); - let end: TextSize = end.into(); - // This is the only way to get a TryFromIntError currently. - let end = end.checked_add(1).ok_or_else(|| u8::try_from(-1).unwrap_err())?; - Ok(TextRange(start.into(), end)) - } - } - impl From> for TextRange { - fn from(value: RangeTo<$lte>) -> TextRange { - TextRange(0.into(), value.end.into()) - } - } - impl TryFrom> for TextRange { - type Error = TryFromIntError; - fn try_from(value: RangeToInclusive<$lte>) -> Result { - let start: TextSize = 0.into(); - let end: TextSize = value.end.into(); - TextRange::try_from(start..=end) - } - } + // Just support `start..end` for now, not `..end`, `start..=end`, `..=end`. }; (TryFrom<$gt:ident> for TextRange) => { impl TryFrom> for TextRange { @@ -228,30 +162,7 @@ macro_rules! conversions { Ok(TextRange(value.start.try_into()?, value.end.try_into()?)) } } - impl TryFrom> for TextRange { - type Error = TryFromIntError; - fn try_from(value: RangeInclusive<$gt>) -> Result { - let (start, end) = value.into_inner(); - let end: TextSize = end.try_into()?; - // This is the only way to get a TryFromIntError currently. - let end = end.checked_add(1).ok_or_else(|| u8::try_from(-1).unwrap_err())?; - Ok(TextRange(start.try_into()?, end)) - } - } - impl TryFrom> for TextRange { - type Error = TryFromIntError; - fn try_from(value: RangeTo<$gt>) -> Result { - Ok(TextRange(0.into(), value.end.try_into()?)) - } - } - impl TryFrom> for TextRange { - type Error = TryFromIntError; - fn try_from(value: RangeToInclusive<$gt>) -> Result { - let start: TextSize = 0.into(); - let end: TextSize = value.end.try_into()?; - TextRange::try_from(start..=end) - } - } + // Just support `start..end` for now, not `..end`, `start..=end`, `..=end`. }; { lt TextSize [$($lt:ident)*] @@ -260,11 +171,8 @@ macro_rules! conversions { varries [$($var:ident)*] } => { $( - // Not `From` yet because of integer type fallback. We want e.g. - // `TextRange::from(0)` and `range + 1` to work, and more `From` - // impls means that this will try (and fail) to use i32 rather - // than one of the unsigned integer types that actually work. - conversions!(TryFrom<$lt> for TextRange); + conversions!(From<$lt> for TextRange); + // unlike TextSize, we do not provide conversions in the "out" direction. )* $( @@ -288,68 +196,3 @@ conversions! { gt TextSize [u64] varries [usize] } - -impl Into for &'_ TextRange { - fn into(self) -> TextRange { - *self - } -} - -impl Into for &'_ mut TextRange { - fn into(self) -> TextRange { - *self - } -} - -macro_rules! op { - (impl $Op:ident for TextRange by fn $f:ident = $op:tt) => { - impl $Op for TextRange - where - TextSize: $Op, - { - type Output = TextRange; - fn $f(self, rhs: IntoSize) -> TextRange { - TextRange(self.start() $op rhs, self.end() $op rhs) - } - } - impl $Op for &'_ TextRange - where - TextRange: $Op, - { - type Output = TextRange; - fn $f(self, rhs: IntoSize) -> TextRange { - *self $op rhs - } - } - impl $Op for &'_ mut TextRange - where - TextRange: $Op, - { - type Output = TextRange; - fn $f(self, rhs: IntoSize) -> TextRange { - *self $op rhs - } - } - }; -} - -op!(impl Add for TextRange by fn add = +); -op!(impl Sub for TextRange by fn sub = -); - -impl AddAssign for TextRange -where - TextRange: Add, -{ - fn add_assign(&mut self, rhs: A) { - *self = *self + rhs - } -} - -impl SubAssign for TextRange -where - TextRange: Sub, -{ - fn sub_assign(&mut self, rhs: S) { - *self = *self - rhs - } -} diff --git a/src/size.rs b/src/size.rs index 80dba0aba6ea..43bf19dac905 100644 --- a/src/size.rs +++ b/src/size.rs @@ -16,10 +16,10 @@ use { /// /// # Translation from `text_unit` /// -/// - `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` -/// - `TextUnit::of_str(s)` ⟹ `TextSize:of(s)` -/// - `TextUnit::from_usize(size)` ⟹ `TextSize::new(size)` -/// - `unit.to_usize()` ⟹ `size.ix()` +/// - `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` +/// - `TextUnit::of_str(s)` ⟹ `TextSize:of(s)` +/// - `TextUnit::from_usize(size)` ⟹ `TextSize::try_from(size).unwrap_or_else(|| panic!(_))` +/// - `unit.to_usize()` ⟹ `usize::try_from(size).unwrap_or_else(|| panic!(_))` #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TextSize { pub(crate) raw: u32, @@ -44,38 +44,16 @@ impl fmt::Display for TextSize { impl TextSize { /// The text size of some text-like object. - pub fn of(text: &impl TextSized) -> TextSize { + pub fn of(text: impl TextSized) -> TextSize { text.text_size() } - /// A text size for some `usize`. + /// A size of zero. /// - /// # Panics - /// - /// Panics if the size is greater than `u32::MAX` and debug assertions are - /// enabled. If debug assertions are not enabled, wraps into `u32` space. - pub fn new(size: usize) -> TextSize { - if let Ok(size) = size.try_into() { - size - } else if cfg!(debug_assertions) { - panic!("overflow when converting to TextSize"); - } else { - TextSize(size as u32) - } - } - - /// Convert this text size into the standard indexing type. - /// - /// # Panics - /// - /// Panics if the size is greater than `usize::MAX`. This can only - /// occur on targets where `size_of::() < size_of::()`. - pub fn ix(self) -> usize { - if let Ok(ix) = self.try_into() { - ix - } else { - panic!("overflow when converting TextSize to usize index") - } + /// This is equivalent to `TextSize::default()` or [`TextSize::MIN`], + /// but is more explicit on intent. + pub const fn zero() -> TextSize { + TextSize(0) } } @@ -88,14 +66,12 @@ impl TextSize { pub const MAX: TextSize = TextSize(u32::MAX); #[allow(missing_docs)] - pub fn checked_add(self, rhs: impl TryInto) -> Option { - let rhs = rhs.try_into().ok()?; + pub fn checked_add(self, rhs: TextSize) -> Option { self.raw.checked_add(rhs.raw).map(TextSize) } #[allow(missing_docs)] - pub fn checked_sub(self, rhs: impl TryInto) -> Option { - let rhs = rhs.try_into().ok()?; + pub fn checked_sub(self, rhs: TextSize) -> Option { self.raw.checked_sub(rhs.raw).map(TextSize) } } @@ -138,11 +114,7 @@ macro_rules! conversions { varries [$($var:ident)*] } => { $( - // Not `From` yet because of integer type fallback. We want e.g. - // `TextSize::from(0)` and `size + 1` to work, and more `From` - // impls means that this will try (and fail) to use i32 rather - // than one of the unsigned integer types that actually work. - conversions!(TryFrom<$lt> for TextSize); + conversions!(From<$lt> for TextSize); conversions!(TryFrom for $lt); )* @@ -167,54 +139,17 @@ conversions! { lt u32 [u8 u16] eq u32 [u32] gt u32 [u64] - varries [usize i32] // i32 so that `checked_add($lit)` (`try_from($lit)`) can work - // this will unfortunately have to hang around even if integer literal type fallback improves + varries [usize] } -impl Into for &'_ TextSize { - fn into(self) -> TextSize { - *self +// NB: We do not provide the transparent-ref impls like the stdlib does. +impl Add for TextSize { + type Output = TextSize; + fn add(self, rhs: TextSize) -> TextSize { + TextSize(self.raw + rhs.raw) } } -impl Into for &'_ mut TextSize { - fn into(self) -> TextSize { - *self - } -} - -macro_rules! op { - (impl $Op:ident for TextSize by fn $f:ident = $op:tt) => { - impl> $Op for TextSize { - type Output = TextSize; - fn $f(self, rhs: IntoSize) -> TextSize { - TextSize(self.raw $op rhs.into().raw) - } - } - impl $Op for &'_ TextSize - where - TextSize: $Op, - { - type Output = TextSize; - fn $f(self, rhs: IntoSize) -> TextSize { - *self $op rhs - } - } - impl $Op for &'_ mut TextSize - where - TextSize: $Op, - { - type Output = TextSize; - fn $f(self, rhs: IntoSize) -> TextSize { - *self $op rhs - } - } - }; -} - -op!(impl Add for TextSize by fn add = +); -op!(impl Sub for TextSize by fn sub = -); - impl AddAssign for TextSize where TextSize: Add, @@ -224,6 +159,13 @@ where } } +impl Sub for TextSize { + type Output = TextSize; + fn sub(self, rhs: TextSize) -> TextSize { + TextSize(self.raw - rhs.raw) + } +} + impl SubAssign for TextSize where TextSize: Sub, @@ -233,14 +175,11 @@ where } } -impl iter::Sum for TextSize { - fn sum>(iter: I) -> TextSize { - iter.fold(TextSize::default(), Add::add) - } -} - -impl<'a> iter::Sum<&'a Self> for TextSize { - fn sum>(iter: I) -> Self { - iter.fold(TextSize::default(), Add::add) +impl iter::Sum for TextSize +where + TextSize: Add, +{ + fn sum>(iter: I) -> TextSize { + iter.fold(TextSize::zero(), Add::add) } } diff --git a/src/traits.rs b/src/traits.rs index 52601534d2da..877f05789552 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -4,26 +4,32 @@ use { }; /// Text-like structures that have a text size. -pub trait TextSized { +pub trait TextSized: Copy { /// The size of this text-alike. - fn text_size(&self) -> TextSize; + fn text_size(self) -> TextSize; } -impl TextSized for str { - fn text_size(&self) -> TextSize { +impl TextSized for &'_ str { + fn text_size(self) -> TextSize { let len = self.len(); - TextSize::new(len) + if let Ok(size) = len.try_into() { + size + } else if cfg!(debug_assertions) { + panic!("overflow when converting to TextSize"); + } else { + TextSize(len as u32) + } } } impl TextSized for char { - fn text_size(&self) -> TextSize { - self.len_utf8().try_into().unwrap() + fn text_size(self) -> TextSize { + TextSize(self.len_utf8() as u32) } } impl TextSized for TextRange { - fn text_size(&self) -> TextSize { + fn text_size(self) -> TextSize { self.len() } } diff --git a/tests/main.rs b/tests/main.rs index a7eef0a2cd96..3288c2732042 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1,67 +1,73 @@ -use text_size::*; +use {std::ops, text_size::*}; -fn r(from: u32, to: u32) -> TextRange { - TextRange::from(from..to) +fn size(x: u32) -> TextSize { + TextSize::from(x) +} + +fn range(x: ops::Range) -> TextRange { + TextRange::from(x) } #[test] fn sum() { - let xs: Vec = vec![0.into(), 1.into(), 2.into()]; - assert_eq!(xs.iter().sum::(), 3.into()); - assert_eq!(xs.into_iter().sum::(), 3.into()); + let xs: Vec = vec![size(0), size(1), size(2)]; + assert_eq!(xs.iter().copied().sum::(), size(3)); + assert_eq!(xs.into_iter().sum::(), size(3)); } #[test] fn math() { - let range = r(10, 20); - assert_eq!(range + 5, r(15, 25)); - assert_eq!(range - 5, r(5, 15)); + assert_eq!(size(10) + size(5), size(15)); + assert_eq!(size(10) - size(5), size(5)); } #[test] fn checked_math() { - let x: TextSize = 1.into(); - assert_eq!(x.checked_sub(1), Some(0.into())); - assert_eq!(x.checked_sub(2), None); - - assert_eq!(r(1, 2).checked_sub(1), Some(r(0, 1))); - assert_eq!(x.checked_sub(2), None); + assert_eq!(size(1).checked_add(size(1)), Some(size(2))); + assert_eq!(size(1).checked_sub(size(1)), Some(size(0))); + assert_eq!(size(1).checked_sub(size(2)), None); + assert_eq!(TextSize::MAX.checked_add(size(1)), None); } #[test] +#[rustfmt::skip] fn contains() { - let r1 = r(2, 4); - let r2 = r(2, 3); - let r3 = r(1, 3); - assert!(r1.contains(r2)); - assert!(!r1.contains(r3)); + assert!( range(2..4).contains(range(2..3))); + assert!( ! range(2..4).contains(range(1..3))); } #[test] fn intersection() { - assert_eq!(TextRange::intersection(r(1, 2), r(2, 3)), Some(r(2, 2))); - assert_eq!(TextRange::intersection(r(1, 5), r(2, 3)), Some(r(2, 3))); - assert_eq!(TextRange::intersection(r(1, 2), r(3, 4)), None); + assert_eq!( + TextRange::intersection(range(1..2), range(2..3)), + Some(range(2..2)) + ); + assert_eq!( + TextRange::intersection(range(1..5), range(2..3)), + Some(range(2..3)) + ); + assert_eq!(TextRange::intersection(range(1..2), range(3..4)), None); } #[test] fn covering() { - assert_eq!(TextRange::covering(r(1, 2), r(2, 3)), r(1, 3)); - assert_eq!(TextRange::covering(r(1, 5), r(2, 3)), r(1, 5)); - assert_eq!(TextRange::covering(r(1, 2), r(4, 5)), r(1, 5)); + assert_eq!(TextRange::covering(range(1..2), range(2..3)), range(1..3)); + assert_eq!(TextRange::covering(range(1..5), range(2..3)), range(1..5)); + assert_eq!(TextRange::covering(range(1..2), range(4..5)), range(1..5)); } #[test] +#[rustfmt::skip] fn contains_point() { - assert!(!r(1, 3).contains_point(0)); - assert!(r(1, 3).contains_point(1)); - assert!(r(1, 3).contains_point(2)); - assert!(!r(1, 3).contains_point(3)); - assert!(!r(1, 3).contains_point(4)); + assert!( ! range(1..3).contains_exclusive(size(0))); + assert!( range(1..3).contains_exclusive(size(1))); + assert!( range(1..3).contains_exclusive(size(2))); + assert!( ! range(1..3).contains_exclusive(size(3))); + assert!( ! range(1..3).contains_exclusive(size(4))); - assert!(!r(1, 3).contains_point_inclusive(0)); - assert!(r(1, 3).contains_point_inclusive(1)); - assert!(r(1, 3).contains_point_inclusive(2)); - assert!(r(1, 3).contains_point_inclusive(3)); - assert!(!r(1, 3).contains_point_inclusive(4)); + assert!( ! range(1..3).contains_inclusive(size(0))); + assert!( range(1..3).contains_inclusive(size(1))); + assert!( range(1..3).contains_inclusive(size(2))); + assert!( range(1..3).contains_inclusive(size(3))); + assert!( ! range(1..3).contains_inclusive(size(4))); } diff --git a/tests/serde.rs b/tests/serde.rs index 439b9d71f57b..62254634dd3a 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -1,17 +1,25 @@ -use {serde_test::*, text_size::*}; +use {serde_test::*, std::ops, text_size::*}; + +fn size(x: u32) -> TextSize { + TextSize::from(x) +} + +fn range(x: ops::Range) -> TextRange { + TextRange::from(x) +} #[test] -fn size() { - assert_tokens(&TextSize::new(00), &[Token::U32(00)]); - assert_tokens(&TextSize::new(10), &[Token::U32(10)]); - assert_tokens(&TextSize::new(20), &[Token::U32(20)]); - assert_tokens(&TextSize::new(30), &[Token::U32(30)]); +fn size_serialization() { + assert_tokens(&size(00), &[Token::U32(00)]); + assert_tokens(&size(10), &[Token::U32(10)]); + assert_tokens(&size(20), &[Token::U32(20)]); + assert_tokens(&size(30), &[Token::U32(30)]); } #[test] -fn range() { +fn range_serialization() { assert_tokens( - &TextRange::from(00..10), + &range(00..10), &[ Token::Tuple { len: 2 }, Token::U32(00), @@ -20,7 +28,7 @@ fn range() { ], ); assert_tokens( - &TextRange::from(10..20), + &range(10..20), &[ Token::Tuple { len: 2 }, Token::U32(10), @@ -29,7 +37,7 @@ fn range() { ], ); assert_tokens( - &TextRange::from(20..30), + &range(20..30), &[ Token::Tuple { len: 2 }, Token::U32(20), @@ -38,7 +46,7 @@ fn range() { ], ); assert_tokens( - &TextRange::from(30..40), + &range(30..40), &[ Token::Tuple { len: 2 }, Token::U32(30), From cd1f821c61d9488827482227f2a1ea73dbed5e6e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Mar 2020 08:37:26 +0100 Subject: [PATCH 065/322] Enforce invariant that ranges are well-formed This is in contrast to `std::ops::Range`, which are not guaranteed to be proper. For this reason, we have to lose `From` impls as well. --- src/range.rs | 88 ++++++++++++--------------------------------------- tests/main.rs | 2 +- 2 files changed, 21 insertions(+), 69 deletions(-) diff --git a/src/range.rs b/src/range.rs index fe227cdaff06..8febcad08209 100644 --- a/src/range.rs +++ b/src/range.rs @@ -2,9 +2,9 @@ use { crate::TextSize, std::{ cmp, - convert::{TryFrom, TryInto}, + convert::TryInto, fmt, - ops::{Bound, Index, IndexMut, Range, RangeBounds}, + ops::{Bound, Index, IndexMut, RangeBounds}, }, }; @@ -30,15 +30,11 @@ use { /// † See the note on [`TextRange::len`] for differing behavior for incorrect reverse ranges. #[derive(Copy, Clone, Eq, PartialEq, Hash)] pub struct TextRange { + // Invariant: start <= end start: TextSize, end: TextSize, } -#[allow(non_snake_case)] -pub(crate) const fn TextRange(start: TextSize, end: TextSize) -> TextRange { - TextRange { start, end } -} - impl fmt::Debug for TextRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "[{}..{})", self.start(), self.end()) @@ -47,6 +43,16 @@ impl fmt::Debug for TextRange { /// Identity methods. impl TextRange { + /// Creates a new `TextRange` with given `start` and `end. + /// + /// # Panics + /// + /// Panics if `end < start`. + pub fn new(start: TextSize, end: TextSize) -> TextRange { + assert!(start <= end); + TextRange { start, end } + } + /// The start point of this range. pub const fn start(self) -> TextSize { self.start @@ -58,11 +64,6 @@ impl TextRange { } /// The size of this range. - /// - /// # Panics - /// - /// When `end() < start()`, triggers a subtraction overflow. - /// This will panic with debug assertions, and overflow without. pub const fn len(self) -> TextSize { // HACK for const fn: math on primitives only TextSize(self.end().raw - self.start().raw) @@ -71,11 +72,10 @@ impl TextRange { /// Check if this range empty or reversed. /// /// When `end() < start()`, this returns false. - /// Code should prefer `is_empty()` to `len() == 0`, - /// as this safeguards against incorrect reverse ranges. + /// Code should prefer `is_empty()` to `len() == 0`. pub const fn is_empty(self) -> bool { // HACK for const fn: math on primitives only - self.start().raw >= self.end().raw + self.start().raw == self.end().raw } } @@ -91,14 +91,17 @@ impl TextRange { pub fn intersection(lhs: TextRange, rhs: TextRange) -> Option { let start = cmp::max(lhs.start(), rhs.start()); let end = cmp::min(lhs.end(), rhs.end()); - Some(TextRange(start, end)).filter(|_| start <= end) + if end < start { + return None; + } + Some(TextRange::new(start, end)) } /// The smallest range that completely contains both ranges. pub fn covering(lhs: TextRange, rhs: TextRange) -> TextRange { let start = cmp::min(lhs.start(), rhs.start()); let end = cmp::max(lhs.end(), rhs.end()); - TextRange(start, end) + TextRange::new(start, end) } /// Check if this range contains a point. @@ -145,54 +148,3 @@ impl RangeBounds for TextRange { Bound::Excluded(&self.end) } } - -macro_rules! conversions { - (From<$lte:ident> for TextRange) => { - impl From> for TextRange { - fn from(value: Range<$lte>) -> TextRange { - TextRange(value.start.into(), value.end.into()) - } - } - // Just support `start..end` for now, not `..end`, `start..=end`, `..=end`. - }; - (TryFrom<$gt:ident> for TextRange) => { - impl TryFrom> for TextRange { - type Error = <$gt as TryInto>::Error; - fn try_from(value: Range<$gt>) -> Result { - Ok(TextRange(value.start.try_into()?, value.end.try_into()?)) - } - } - // Just support `start..end` for now, not `..end`, `start..=end`, `..=end`. - }; - { - lt TextSize [$($lt:ident)*] - eq TextSize [$($eq:ident)*] - gt TextSize [$($gt:ident)*] - varries [$($var:ident)*] - } => { - $( - conversions!(From<$lt> for TextRange); - // unlike TextSize, we do not provide conversions in the "out" direction. - )* - - $( - conversions!(From<$eq> for TextRange); - )* - - $( - conversions!(TryFrom<$gt> for TextRange); - )* - - $( - conversions!(TryFrom<$var> for TextRange); - )* - }; -} - -// FIXME: when `default impl` is usable, change to blanket impls for [Try]Into instead -conversions! { - lt TextSize [u8 u16] - eq TextSize [u32 TextSize] - gt TextSize [u64] - varries [usize] -} diff --git a/tests/main.rs b/tests/main.rs index 3288c2732042..e25f59bacf4e 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -5,7 +5,7 @@ fn size(x: u32) -> TextSize { } fn range(x: ops::Range) -> TextRange { - TextRange::from(x) + TextRange::new(x.start.into(), x.end.into()) } #[test] From da935943cc9d5198bf42d5d6b705fbf8672c17ae Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Mar 2020 16:27:52 +0100 Subject: [PATCH 066/322] Tuple-struct ctor is the canonical way to create a ranges --- src/range.rs | 25 +++++++++++++------------ tests/main.rs | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/range.rs b/src/range.rs index 8febcad08209..7e01b12e9f40 100644 --- a/src/range.rs +++ b/src/range.rs @@ -41,18 +41,19 @@ impl fmt::Debug for TextRange { } } +/// Creates a new `TextRange` with given `start` and `end. +/// +/// # Panics +/// +/// Panics if `end < start`. +#[allow(non_snake_case)] +pub fn TextRange(start: TextSize, end: TextSize) -> TextRange { + assert!(start <= end); + TextRange { start, end } +} + /// Identity methods. impl TextRange { - /// Creates a new `TextRange` with given `start` and `end. - /// - /// # Panics - /// - /// Panics if `end < start`. - pub fn new(start: TextSize, end: TextSize) -> TextRange { - assert!(start <= end); - TextRange { start, end } - } - /// The start point of this range. pub const fn start(self) -> TextSize { self.start @@ -94,14 +95,14 @@ impl TextRange { if end < start { return None; } - Some(TextRange::new(start, end)) + Some(TextRange(start, end)) } /// The smallest range that completely contains both ranges. pub fn covering(lhs: TextRange, rhs: TextRange) -> TextRange { let start = cmp::min(lhs.start(), rhs.start()); let end = cmp::max(lhs.end(), rhs.end()); - TextRange::new(start, end) + TextRange(start, end) } /// Check if this range contains a point. diff --git a/tests/main.rs b/tests/main.rs index e25f59bacf4e..9a20cf9819de 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -5,7 +5,7 @@ fn size(x: u32) -> TextSize { } fn range(x: ops::Range) -> TextRange { - TextRange::new(x.start.into(), x.end.into()) + TextRange(x.start.into(), x.end.into()) } #[test] From 578ec6f11dc0911bf802b978b23253ffec01e90f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Mar 2020 16:56:35 +0100 Subject: [PATCH 067/322] Remove stray Intos --- src/range.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/range.rs b/src/range.rs index 7e01b12e9f40..16e52e1e4e15 100644 --- a/src/range.rs +++ b/src/range.rs @@ -105,19 +105,18 @@ impl TextRange { TextRange(start, end) } - /// Check if this range contains a point. + /// Check if this range contains an offset. /// /// The end index is considered excluded. - pub fn contains_exclusive(self, point: impl Into) -> bool { - let point = point.into(); - self.start() <= point && point < self.end() + pub fn contains_exclusive(self, offset: TextSize) -> bool { + self.start() <= offset && offset < self.end() } - /// Check if this range contains a point. + /// Check if this range contains an offset. /// /// The end index is considered included. - pub fn contains_inclusive(self, point: impl Into) -> bool { - let point = point.into(); + pub fn contains_inclusive(self, offset: TextSize) -> bool { + let point = offset.into(); self.start() <= point && point <= self.end() } } From c3e4019ad4c8a98a71bb4ed2cf2a3938ec94614d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Mar 2020 08:22:41 +0100 Subject: [PATCH 068/322] Drop unnecessary impl The impl is valid, but probably not too useful, we can always add it later --- src/traits.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index 877f05789552..d90f3d431c39 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,7 +1,4 @@ -use { - crate::{TextRange, TextSize}, - std::convert::TryInto, -}; +use {crate::TextSize, std::convert::TryInto}; /// Text-like structures that have a text size. pub trait TextSized: Copy { @@ -27,9 +24,3 @@ impl TextSized for char { TextSize(self.len_utf8() as u32) } } - -impl TextSized for TextRange { - fn text_size(self) -> TextSize { - self.len() - } -} From 3920cac1192825f356303813dde83723b6a3ff0e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Mar 2020 08:44:47 +0100 Subject: [PATCH 069/322] Document MSRV --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index dc1a09b22bf2..32262de12cb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ //! Newtypes for working with text sizes/ranges in a more type-safe manner. +//! +//! Minimal Supported Rust Version: latest stable. #![forbid(unsafe_code)] #![warn(missing_debug_implementations, missing_docs)] From 260aa8a118e356e8cf84a3f09957b5909e825ddd Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Mar 2020 16:47:50 +0100 Subject: [PATCH 070/322] Add TextRange::empty --- src/range.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/range.rs b/src/range.rs index 7e01b12e9f40..f30b90141ef6 100644 --- a/src/range.rs +++ b/src/range.rs @@ -54,6 +54,14 @@ pub fn TextRange(start: TextSize, end: TextSize) -> TextRange { /// Identity methods. impl TextRange { + /// Creates a zero-length range at the specified offset. + pub const fn empty(self, offset: TextSize) -> TextRange { + TextRange { + start: offset, + end: offset, + } + } + /// The start point of this range. pub const fn start(self) -> TextSize { self.start From 0f4acdbdbe2fcdec8123a4a6c01707fecad98ef4 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 10 Mar 2020 13:56:50 +0100 Subject: [PATCH 071/322] Alternative set of conversions --- src/size.rs | 85 ++++++++++++++--------------------------------------- 1 file changed, 22 insertions(+), 63 deletions(-) diff --git a/src/size.rs b/src/size.rs index 43bf19dac905..47de00c4e300 100644 --- a/src/size.rs +++ b/src/size.rs @@ -1,7 +1,7 @@ use { crate::TextSized, std::{ - convert::{TryFrom, TryInto}, + convert::TryFrom, fmt, iter, num::TryFromIntError, ops::{Add, AddAssign, Sub, SubAssign}, @@ -76,70 +76,29 @@ impl TextSize { } } -macro_rules! conversions { - (From for $gte:ident) => { - impl From for $gte { - fn from(value: TextSize) -> $gte { - value.raw.into() - } - } - }; - (From<$lte:ident> for TextSize) => { - impl From<$lte> for TextSize { - fn from(value: $lte) -> TextSize { - TextSize(value.into()) - } - } - }; - (TryFrom for $lt:ident) => { - impl TryFrom for $lt { - type Error = TryFromIntError; - fn try_from(value: TextSize) -> Result<$lt, Self::Error> { - value.raw.try_into() - } - } - }; - (TryFrom<$gt:ident> for TextSize) => { - impl TryFrom<$gt> for TextSize { - type Error = <$gt as TryInto>::Error; - fn try_from(value: $gt) -> Result { - value.try_into().map(TextSize) - } - } - }; - { - lt u32 [$($lt:ident)*] - eq u32 [$($eq:ident)*] - gt u32 [$($gt:ident)*] - varries [$($var:ident)*] - } => { - $( - conversions!(From<$lt> for TextSize); - conversions!(TryFrom for $lt); - )* - - $( - conversions!(From<$eq> for TextSize); - conversions!(From for $eq); - )* - - $( - conversions!(TryFrom<$gt> for TextSize); - conversions!(From for $gt); - )* - - $( - conversions!(TryFrom<$var> for TextSize); - conversions!(TryFrom for $var); - )* - }; +impl From for TextSize { + fn from(raw: u32) -> Self { + TextSize { raw } + } +} + +impl From for u32 { + fn from(value: TextSize) -> Self { + value.raw + } } -conversions! { - lt u32 [u8 u16] - eq u32 [u32] - gt u32 [u64] - varries [usize] +impl TryFrom for TextSize { + type Error = TryFromIntError; + fn try_from(value: usize) -> Result { + Ok(u32::try_from(value)?.into()) + } +} + +impl From for usize { + fn from(value: TextSize) -> Self { + value.raw as usize + } } // NB: We do not provide the transparent-ref impls like the stdlib does. From 83719155e8bea3982427b930f775944fa0910294 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 10 Mar 2020 14:12:47 +0100 Subject: [PATCH 072/322] Provide the same set of impls as stdlib --- src/size.rs | 66 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/src/size.rs b/src/size.rs index 43bf19dac905..a51a279e9eb9 100644 --- a/src/size.rs +++ b/src/size.rs @@ -142,38 +142,46 @@ conversions! { varries [usize] } -// NB: We do not provide the transparent-ref impls like the stdlib does. -impl Add for TextSize { - type Output = TextSize; - fn add(self, rhs: TextSize) -> TextSize { - TextSize(self.raw + rhs.raw) - } -} - -impl AddAssign for TextSize -where - TextSize: Add, -{ - fn add_assign(&mut self, rhs: A) { - *self = *self + rhs - } -} +macro_rules! arith { + ($Op:ident $op:ident, $OpAssign:ident $op_assign:ident) => { + impl $Op for TextSize { + type Output = TextSize; + fn $op(self, rhs: TextSize) -> TextSize { + TextSize($Op::$op(self.raw, rhs.raw)) + } + } + impl $Op for &'_ TextSize { + type Output = TextSize; + fn $op(self, rhs: TextSize) -> TextSize { + TextSize($Op::$op(self.raw, rhs.raw)) + } + } + impl $Op<&'_ TextSize> for TextSize { + type Output = TextSize; + fn $op(self, rhs: &TextSize) -> TextSize { + TextSize($Op::$op(self.raw, rhs.raw)) + } + } + impl $Op<&'_ TextSize> for &'_ TextSize { + type Output = TextSize; + fn $op(self, rhs: &TextSize) -> TextSize { + TextSize($Op::$op(self.raw, rhs.raw)) + } + } -impl Sub for TextSize { - type Output = TextSize; - fn sub(self, rhs: TextSize) -> TextSize { - TextSize(self.raw - rhs.raw) - } + impl $OpAssign for TextSize + where + TextSize: $Op, + { + fn $op_assign(&mut self, rhs: A) { + *self = $Op::$op(*self, rhs) + } + } + }; } -impl SubAssign for TextSize -where - TextSize: Sub, -{ - fn sub_assign(&mut self, rhs: S) { - *self = *self - rhs - } -} +arith!(Add add, AddAssign add_assign); +arith!(Sub sub, SubAssign sub_assign); impl iter::Sum for TextSize where From ac45c857fd414e28c3739ccdb2e086b4b9d74c83 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sun, 8 Mar 2020 14:46:52 -0400 Subject: [PATCH 073/322] Remove Display for TextSize --- src/range.rs | 2 +- src/size.rs | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/range.rs b/src/range.rs index 7fb4987149e2..e70a7b6a3391 100644 --- a/src/range.rs +++ b/src/range.rs @@ -37,7 +37,7 @@ pub struct TextRange { impl fmt::Debug for TextRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[{}..{})", self.start(), self.end()) + write!(f, "[{}..{})", self.start().raw, self.end().raw) } } diff --git a/src/size.rs b/src/size.rs index 43bf19dac905..26434beabd73 100644 --- a/src/size.rs +++ b/src/size.rs @@ -32,13 +32,7 @@ pub(crate) const fn TextSize(raw: u32) -> TextSize { impl fmt::Debug for TextSize { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl fmt::Display for TextSize { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.raw, f) + write!(f, "{}", self.raw) } } From 64dbce039c45f7940d72245e9745a70a67f8f730 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 11 Mar 2020 23:27:12 -0400 Subject: [PATCH 074/322] fix tests --- tests/serde.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/serde.rs b/tests/serde.rs index 62254634dd3a..6be1c93815ef 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -5,7 +5,7 @@ fn size(x: u32) -> TextSize { } fn range(x: ops::Range) -> TextRange { - TextRange::from(x) + TextRange(x.start.into(), x.end.into()) } #[test] From b8621776f11d2c7ba43df9206fa6a0a109f8acc8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 12 Mar 2020 12:31:37 +0100 Subject: [PATCH 075/322] Be 16-bit clean --- src/size.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/size.rs b/src/size.rs index 47de00c4e300..5e1c2edfd803 100644 --- a/src/size.rs +++ b/src/size.rs @@ -97,7 +97,12 @@ impl TryFrom for TextSize { impl From for usize { fn from(value: TextSize) -> Self { - value.raw as usize + assert_lossless_conversion(); + return value.raw as usize; + + const fn assert_lossless_conversion() { + [()][(std::mem::size_of::() < std::mem::size_of::()) as usize] + } } } From d0560c7002c994b9d224080cd7b25b65918edec3 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 12 Mar 2020 15:57:07 +0100 Subject: [PATCH 076/322] Fix ix --- src/range.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/range.rs b/src/range.rs index e70a7b6a3391..c06c19dfe43f 100644 --- a/src/range.rs +++ b/src/range.rs @@ -1,9 +1,7 @@ use { crate::TextSize, std::{ - cmp, - convert::TryInto, - fmt, + cmp, fmt, ops::{Bound, Index, IndexMut, RangeBounds}, }, }; @@ -130,8 +128,7 @@ impl TextRange { } fn ix(size: TextSize) -> usize { - size.try_into() - .unwrap_or_else(|_| panic!("overflow when converting TextSize to usize index")) + size.into() } impl Index for str { From 85b96cf817ddafde2ed121ae0a44deae4190b8d6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 12 Mar 2020 16:16:53 +0100 Subject: [PATCH 077/322] Alternative set of contains function Motivation: TextRange is a set, it contains elements (TextSize). For this reason, for range-range op we use a more verbose `contains_range` name. In stdlib, there's `HashSet::is_subset`. We used a similar design with `is_subrage` before, but it was very confusing in practice -- you'll have to lookup docs for which of lhs and rhs is sub and super set. Additionally, exclusive semantics is a clear default with better properties (if you have a partitioning of a range into subranges, only one of the parts contains any given offset), so it make sense to call it `contains` and reserve `contains_inclusive` for another op. --- src/range.rs | 32 ++++++++++++++++---------------- tests/main.rs | 14 +++++++------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/range.rs b/src/range.rs index c06c19dfe43f..4fba210cfbc5 100644 --- a/src/range.rs +++ b/src/range.rs @@ -88,8 +88,23 @@ impl TextRange { /// Manipulation methods. impl TextRange { + /// Check if this range contains an offset. + /// + /// The end index is considered excluded. + pub fn contains(self, offset: TextSize) -> bool { + self.start() <= offset && offset < self.end() + } + + /// Check if this range contains an offset. + /// + /// The end index is considered included. + pub fn contains_inclusive(self, offset: TextSize) -> bool { + let point = offset.into(); + self.start() <= point && point <= self.end() + } + /// Check if this range completely contains another range. - pub fn contains(self, other: TextRange) -> bool { + pub fn contains_range(self, other: TextRange) -> bool { self.start() <= other.start() && other.end() <= self.end() } @@ -110,21 +125,6 @@ impl TextRange { let end = cmp::max(lhs.end(), rhs.end()); TextRange(start, end) } - - /// Check if this range contains an offset. - /// - /// The end index is considered excluded. - pub fn contains_exclusive(self, offset: TextSize) -> bool { - self.start() <= offset && offset < self.end() - } - - /// Check if this range contains an offset. - /// - /// The end index is considered included. - pub fn contains_inclusive(self, offset: TextSize) -> bool { - let point = offset.into(); - self.start() <= point && point <= self.end() - } } fn ix(size: TextSize) -> usize { diff --git a/tests/main.rs b/tests/main.rs index 9a20cf9819de..cd299d0b664d 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -32,8 +32,8 @@ fn checked_math() { #[test] #[rustfmt::skip] fn contains() { - assert!( range(2..4).contains(range(2..3))); - assert!( ! range(2..4).contains(range(1..3))); + assert!( range(2..4).contains_range(range(2..3))); + assert!( ! range(2..4).contains_range(range(1..3))); } #[test] @@ -59,11 +59,11 @@ fn covering() { #[test] #[rustfmt::skip] fn contains_point() { - assert!( ! range(1..3).contains_exclusive(size(0))); - assert!( range(1..3).contains_exclusive(size(1))); - assert!( range(1..3).contains_exclusive(size(2))); - assert!( ! range(1..3).contains_exclusive(size(3))); - assert!( ! range(1..3).contains_exclusive(size(4))); + assert!( ! range(1..3).contains(size(0))); + assert!( range(1..3).contains(size(1))); + assert!( range(1..3).contains(size(2))); + assert!( ! range(1..3).contains(size(3))); + assert!( ! range(1..3).contains(size(4))); assert!( ! range(1..3).contains_inclusive(size(0))); assert!( range(1..3).contains_inclusive(size(1))); From a93dd2aa2eab267b7b5f66880c47222ef31e568b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 13 Mar 2020 10:26:24 +0100 Subject: [PATCH 078/322] Add a `From` impl --- src/range.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/range.rs b/src/range.rs index c06c19dfe43f..60a34450c742 100644 --- a/src/range.rs +++ b/src/range.rs @@ -2,7 +2,7 @@ use { crate::TextSize, std::{ cmp, fmt, - ops::{Bound, Index, IndexMut, RangeBounds}, + ops::{Bound, Index, IndexMut, Range, RangeBounds}, }, }; @@ -127,20 +127,16 @@ impl TextRange { } } -fn ix(size: TextSize) -> usize { - size.into() -} - impl Index for str { type Output = str; fn index(&self, index: TextRange) -> &Self::Output { - &self[ix(index.start())..ix(index.end())] + &self[Range::::from(index)] } } impl IndexMut for str { fn index_mut(&mut self, index: TextRange) -> &mut Self::Output { - &mut self[ix(index.start())..ix(index.end())] + &mut self[Range::::from(index)] } } @@ -153,3 +149,12 @@ impl RangeBounds for TextRange { Bound::Excluded(&self.end) } } + +impl From for Range +where + T: From, +{ + fn from(r: TextRange) -> Self { + r.start().into()..r.end().into() + } +} From e5bb71d61c9829350f413df7cd6b662fcf163ae5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 12 Mar 2020 15:53:47 +0100 Subject: [PATCH 079/322] Switch to Rust range notation --- src/range.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/range.rs b/src/range.rs index e70a7b6a3391..73af2896ffbf 100644 --- a/src/range.rs +++ b/src/range.rs @@ -37,7 +37,7 @@ pub struct TextRange { impl fmt::Debug for TextRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[{}..{})", self.start().raw, self.end().raw) + write!(f, "{}..{}", self.start().raw, self.end().raw) } } From ed056c48a408f9b7967cb7668d9396010f53c03e Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sun, 8 Mar 2020 14:34:47 -0400 Subject: [PATCH 080/322] Provide generic ref-removing ops for TextSize --- src/size.rs | 58 ++++++++++++++++++++++++++++----------------------- tests/main.rs | 2 +- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/size.rs b/src/size.rs index 867a0004ae72..454327a01592 100644 --- a/src/size.rs +++ b/src/size.rs @@ -100,46 +100,52 @@ impl From for usize { } } -macro_rules! arith { - ($Op:ident $op:ident, $OpAssign:ident $op_assign:ident) => { +macro_rules! ops { + (impl $Op:ident for TextSize by fn $f:ident = $op:tt) => { impl $Op for TextSize { type Output = TextSize; - fn $op(self, rhs: TextSize) -> TextSize { - TextSize($Op::$op(self.raw, rhs.raw)) + fn $f(self, other: TextSize) -> TextSize { + TextSize(self.raw $op other.raw) } } - impl $Op for &'_ TextSize { + impl $Op<&TextSize> for TextSize { type Output = TextSize; - fn $op(self, rhs: TextSize) -> TextSize { - TextSize($Op::$op(self.raw, rhs.raw)) + fn $f(self, other: &TextSize) -> TextSize { + self $op *other } } - impl $Op<&'_ TextSize> for TextSize { - type Output = TextSize; - fn $op(self, rhs: &TextSize) -> TextSize { - TextSize($Op::$op(self.raw, rhs.raw)) - } - } - impl $Op<&'_ TextSize> for &'_ TextSize { - type Output = TextSize; - fn $op(self, rhs: &TextSize) -> TextSize { - TextSize($Op::$op(self.raw, rhs.raw)) - } - } - - impl $OpAssign for TextSize + impl $Op for &TextSize where - TextSize: $Op, + TextSize: $Op, { - fn $op_assign(&mut self, rhs: A) { - *self = $Op::$op(*self, rhs) + type Output = TextSize; + fn $f(self, other: T) -> TextSize { + *self $op other } } }; } -arith!(Add add, AddAssign add_assign); -arith!(Sub sub, SubAssign sub_assign); +ops!(impl Add for TextSize by fn add = +); +ops!(impl Sub for TextSize by fn sub = -); + +impl AddAssign for TextSize +where + TextSize: Add, +{ + fn add_assign(&mut self, rhs: A) { + *self = *self + rhs + } +} + +impl SubAssign for TextSize +where + TextSize: Sub, +{ + fn sub_assign(&mut self, rhs: S) { + *self = *self - rhs + } +} impl iter::Sum for TextSize where diff --git a/tests/main.rs b/tests/main.rs index 9a20cf9819de..66b6106671a4 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -11,7 +11,7 @@ fn range(x: ops::Range) -> TextRange { #[test] fn sum() { let xs: Vec = vec![size(0), size(1), size(2)]; - assert_eq!(xs.iter().copied().sum::(), size(3)); + assert_eq!(xs.iter().sum::(), size(3)); assert_eq!(xs.into_iter().sum::(), size(3)); } From e07fb7cc5f6e944069b5d4096c03d95df45b2a99 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 13 Mar 2020 21:28:14 +0100 Subject: [PATCH 081/322] Check invariant during deserialization --- src/serde_impls.rs | 11 +++++++++-- tests/serde.rs | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/serde_impls.rs b/src/serde_impls.rs index 1963413fd83f..617e99cc027d 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -1,6 +1,6 @@ use { crate::{TextRange, TextSize}, - serde::{Deserialize, Deserializer, Serialize, Serializer}, + serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}, }; impl Serialize for TextSize { @@ -35,6 +35,13 @@ impl<'de> Deserialize<'de> for TextRange { where D: Deserializer<'de>, { - Deserialize::deserialize(deserializer).map(|(start, end)| TextRange(start, end)) + let (start, end) = Deserialize::deserialize(deserializer)?; + if !(start <= end) { + return Err(Error::custom(format!( + "invalid range: {:?}..{:?}", + start, end + ))); + } + Ok(TextRange(start, end)) } } diff --git a/tests/serde.rs b/tests/serde.rs index 6be1c93815ef..a32f89e865b0 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -55,3 +55,25 @@ fn range_serialization() { ], ); } + +#[test] +fn invalid_range_deserialization() { + assert_tokens::( + &range(62..92), + &[ + Token::Tuple { len: 2 }, + Token::U32(62), + Token::U32(92), + Token::TupleEnd, + ], + ); + assert_de_tokens_error::( + &[ + Token::Tuple { len: 2 }, + Token::U32(92), + Token::U32(62), + Token::TupleEnd, + ], + "invalid range: 92..62", + ); +} From d05705c4f6f1f3039de620899f22b6c384a3ceca Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 14 Mar 2020 21:50:15 -0400 Subject: [PATCH 082/322] minor serde style fix --- src/serde_impls.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/serde_impls.rs b/src/serde_impls.rs index 617e99cc027d..6a0d040bfaa9 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -1,6 +1,6 @@ use { crate::{TextRange, TextSize}, - serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}, + serde::{de, Deserialize, Deserializer, Serialize, Serializer}, }; impl Serialize for TextSize { @@ -31,13 +31,14 @@ impl Serialize for TextRange { } impl<'de> Deserialize<'de> for TextRange { + #[allow(clippy::nonminimal_bool)] fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let (start, end) = Deserialize::deserialize(deserializer)?; if !(start <= end) { - return Err(Error::custom(format!( + return Err(de::Error::custom(format!( "invalid range: {:?}..{:?}", start, end ))); From 4b7dd1a15dacb2e85158f64c85e26eba047805b2 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 14 Mar 2020 22:17:24 -0400 Subject: [PATCH 083/322] Don't silently wrap for too-large str --- src/traits.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index d90f3d431c39..7bfb586bad93 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -8,14 +8,9 @@ pub trait TextSized: Copy { impl TextSized for &'_ str { fn text_size(self) -> TextSize { - let len = self.len(); - if let Ok(size) = len.try_into() { - size - } else if cfg!(debug_assertions) { - panic!("overflow when converting to TextSize"); - } else { - TextSize(len as u32) - } + self.len() + .try_into() + .unwrap_or_else(|| panic!("string too large ({}) for TextSize", self.len())) } } From 0467db52ed016fa17f1f8d7665d90b475ab865f6 Mon Sep 17 00:00:00 2001 From: Christopher Durham Date: Sun, 15 Mar 2020 11:15:37 -0400 Subject: [PATCH 084/322] Add missing _ --- src/traits.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/traits.rs b/src/traits.rs index 7bfb586bad93..ca4b7d9b42d3 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -10,7 +10,7 @@ impl TextSized for &'_ str { fn text_size(self) -> TextSize { self.len() .try_into() - .unwrap_or_else(|| panic!("string too large ({}) for TextSize", self.len())) + .unwrap_or_else(|_| panic!("string too large ({}) for TextSize", self.len())) } } From 8d19701f6c6a673256bbf44f354183c147580e72 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Fri, 13 Mar 2020 18:29:22 -0400 Subject: [PATCH 085/322] minor improvements everywhere --- src/lib.rs | 3 +++ src/range.rs | 64 +++++++++++++++++++++++++++++++++++----------------- src/size.rs | 30 ++++++++++++++++-------- 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 32262de12cb6..e194e2317bb5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,3 +13,6 @@ mod traits; mod serde_impls; pub use crate::{range::TextRange, size::TextSize, traits::TextSized}; + +#[cfg(target_pointer_width = "16")] +compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets"); diff --git a/src/range.rs b/src/range.rs index df5296af147d..35c487afed47 100644 --- a/src/range.rs +++ b/src/range.rs @@ -2,30 +2,25 @@ use { crate::TextSize, std::{ cmp, fmt, - ops::{Bound, Index, IndexMut, Range, RangeBounds}, + ops::{Bound, Index, IndexMut, Range, RangeBounds, RangeFrom}, }, }; /// A range in text, represented as a pair of [`TextSize`][struct@TextSize]. /// -/// It is a logical error to have `end() < start()`, but -/// code must not assume this is true for `unsafe` guarantees. -/// /// # Translation from `text_unit` /// -/// - `TextRange::from_to(from, to)` ⟹ `TextRange::from(from..to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::from(offset..offset + size)` +/// - `TextRange::from_to(from, to)` ⟹ `TextRange(from, to)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::to(size).offset(offset)` /// - `range.start()` ⟹ `range.start()` /// - `range.end()` ⟹ `range.end()` -/// - `range.len()` ⟹ `range.len()` +/// - `range.len()` ⟹ `range.len()` /// - `range.is_empty()` ⟹ `range.is_empty()` -/// - `a.is_subrange(b)` ⟹ `b.contains(a)` +/// - `a.is_subrange(b)` ⟹ `b.contains_range(a)` /// - `a.intersection(b)` ⟹ `TextRange::intersection(a, b)` /// - `a.extend_to(b)` ⟹ `TextRange::covering(a, b)` -/// - `range.contains(offset)` ⟹ `range.contains_exclusive(point)` +/// - `range.contains(offset)` ⟹ `range.contains(point)` /// - `range.contains_inclusive(offset)` ⟹ `range.contains_inclusive(point)` -/// -/// † See the note on [`TextRange::len`] for differing behavior for incorrect reverse ranges. #[derive(Copy, Clone, Eq, PartialEq, Hash)] pub struct TextRange { // Invariant: start <= end @@ -39,7 +34,7 @@ impl fmt::Debug for TextRange { } } -/// Creates a new `TextRange` with given `start` and `end. +/// Creates a new `TextRange` with the given `start` and `end` (`start..end`). /// /// # Panics /// @@ -50,16 +45,47 @@ pub fn TextRange(start: TextSize, end: TextSize) -> TextRange { TextRange { start, end } } -/// Identity methods. impl TextRange { - /// Creates a zero-length range at the specified offset. - pub const fn empty(self, offset: TextSize) -> TextRange { + /// Create a zero-length range at the specified offset (`offset..offset`). + pub const fn empty(offset: TextSize) -> TextRange { TextRange { start: offset, end: offset, } } + /// Create a range up to the given end (`..end`). + pub const fn before(end: TextSize) -> TextRange { + TextRange { + start: TextSize::zero(), + end, + } + } + + /// Create a range after the given start (`start..`). + /// + /// This returns a std [`RangeFrom`] rather than `TextRange` because + /// `TextRange` does not support right-unbounded ranges. As such, this + /// should only be used for direct indexing, and bounded ranges should be + /// used for persistent ranges (`TextRange(start, TextSize::of(text))`). + pub const fn after(start: TextSize) -> RangeFrom { + start.raw as usize.. + } + + /// Offset this range by some amount. + /// + /// This is typically used to convert a range from one coordinate space to + /// another, such as from within a substring to within an entire document. + pub fn offset(self, offset: TextSize) -> TextRange { + TextRange( + self.start().checked_add(offset).unwrap(), + self.end().checked_add(offset).unwrap(), + ) + } +} + +/// Identity methods. +impl TextRange { /// The start point of this range. pub const fn start(self) -> TextSize { self.start @@ -76,10 +102,7 @@ impl TextRange { TextSize(self.end().raw - self.start().raw) } - /// Check if this range empty or reversed. - /// - /// When `end() < start()`, this returns false. - /// Code should prefer `is_empty()` to `len() == 0`. + /// Check if this range is empty. pub const fn is_empty(self) -> bool { // HACK for const fn: math on primitives only self.start().raw == self.end().raw @@ -99,8 +122,7 @@ impl TextRange { /// /// The end index is considered included. pub fn contains_inclusive(self, offset: TextSize) -> bool { - let point = offset.into(); - self.start() <= point && point <= self.end() + self.start() <= offset && offset <= self.end() } /// Check if this range completely contains another range. diff --git a/src/size.rs b/src/size.rs index 454327a01592..5b435e896cff 100644 --- a/src/size.rs +++ b/src/size.rs @@ -11,15 +11,23 @@ use { /// A measure of text length. Also, equivalently, an index into text. /// -/// This is a utf8-bytes-offset stored as `u32`, but +/// This is a UTF-8 bytes offset stored as `u32`, but /// most clients should treat it as an opaque measure. /// +/// For cases that need to escape `TextSize` and return to working directly +/// with primitive integers, `TextSize` can be converted losslessly to/from +/// `u32` via [`From`] conversions as well as losslessly be converted [`Into`] +/// `usize`. The `usize -> TextSize` direction can be done via [`TryFrom`]. +/// +/// These escape hatches are primarily required for unit testing and when +/// converting from UTF-8 size to another coordinate space, such as UTF-16. +/// /// # Translation from `text_unit` /// /// - `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` -/// - `TextUnit::of_str(s)` ⟹ `TextSize:of(s)` +/// - `TextUnit::of_str(s)` ⟹ `TextSize::of(s)` /// - `TextUnit::from_usize(size)` ⟹ `TextSize::try_from(size).unwrap_or_else(|| panic!(_))` -/// - `unit.to_usize()` ⟹ `usize::try_from(size).unwrap_or_else(|| panic!(_))` +/// - `unit.to_usize()` ⟹ `usize::from(size)` #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TextSize { pub(crate) raw: u32, @@ -49,6 +57,11 @@ impl TextSize { pub const fn zero() -> TextSize { TextSize(0) } + + /// A size of one. + pub const fn one() -> TextSize { + TextSize(1) + } } /// Methods to act like a primitive integer type, where reasonably applicable. @@ -58,6 +71,8 @@ impl TextSize { pub const MIN: TextSize = TextSize(u32::MIN); /// The largest representable text size. (`u32::MAX`) pub const MAX: TextSize = TextSize(u32::MAX); + /// The text size of a single ASCII character. + pub const ONE: TextSize = TextSize(1); #[allow(missing_docs)] pub fn checked_add(self, rhs: TextSize) -> Option { @@ -72,7 +87,7 @@ impl TextSize { impl From for TextSize { fn from(raw: u32) -> Self { - TextSize { raw } + TextSize(raw) } } @@ -91,12 +106,7 @@ impl TryFrom for TextSize { impl From for usize { fn from(value: TextSize) -> Self { - assert_lossless_conversion(); - return value.raw as usize; - - const fn assert_lossless_conversion() { - [()][(std::mem::size_of::() < std::mem::size_of::()) as usize] - } + value.raw as usize } } From f9d70057bcd32fd290fdcc2e26a9ad712a12fd20 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 14 Mar 2020 22:14:43 -0400 Subject: [PATCH 086/322] Add #[inline] to most things --- src/range.rs | 12 ++++++++++++ src/size.rs | 19 +++++++++++++++++-- src/traits.rs | 4 ++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/range.rs b/src/range.rs index 35c487afed47..6484d2ea3411 100644 --- a/src/range.rs +++ b/src/range.rs @@ -40,6 +40,7 @@ impl fmt::Debug for TextRange { /// /// Panics if `end < start`. #[allow(non_snake_case)] +#[inline] pub fn TextRange(start: TextSize, end: TextSize) -> TextRange { assert!(start <= end); TextRange { start, end } @@ -47,6 +48,7 @@ pub fn TextRange(start: TextSize, end: TextSize) -> TextRange { impl TextRange { /// Create a zero-length range at the specified offset (`offset..offset`). + #[inline] pub const fn empty(offset: TextSize) -> TextRange { TextRange { start: offset, @@ -55,6 +57,7 @@ impl TextRange { } /// Create a range up to the given end (`..end`). + #[inline] pub const fn before(end: TextSize) -> TextRange { TextRange { start: TextSize::zero(), @@ -68,6 +71,7 @@ impl TextRange { /// `TextRange` does not support right-unbounded ranges. As such, this /// should only be used for direct indexing, and bounded ranges should be /// used for persistent ranges (`TextRange(start, TextSize::of(text))`). + #[inline] pub const fn after(start: TextSize) -> RangeFrom { start.raw as usize.. } @@ -76,6 +80,7 @@ impl TextRange { /// /// This is typically used to convert a range from one coordinate space to /// another, such as from within a substring to within an entire document. + #[inline] pub fn offset(self, offset: TextSize) -> TextRange { TextRange( self.start().checked_add(offset).unwrap(), @@ -87,22 +92,26 @@ impl TextRange { /// Identity methods. impl TextRange { /// The start point of this range. + #[inline] pub const fn start(self) -> TextSize { self.start } /// The end point of this range. + #[inline] pub const fn end(self) -> TextSize { self.end } /// The size of this range. + #[inline] pub const fn len(self) -> TextSize { // HACK for const fn: math on primitives only TextSize(self.end().raw - self.start().raw) } /// Check if this range is empty. + #[inline] pub const fn is_empty(self) -> bool { // HACK for const fn: math on primitives only self.start().raw == self.end().raw @@ -151,12 +160,14 @@ impl TextRange { impl Index for str { type Output = str; + #[inline] fn index(&self, index: TextRange) -> &Self::Output { &self[Range::::from(index)] } } impl IndexMut for str { + #[inline] fn index_mut(&mut self, index: TextRange) -> &mut Self::Output { &mut self[Range::::from(index)] } @@ -176,6 +187,7 @@ impl From for Range where T: From, { + #[inline] fn from(r: TextRange) -> Self { r.start().into()..r.end().into() } diff --git a/src/size.rs b/src/size.rs index 5b435e896cff..4436b9f6f2e1 100644 --- a/src/size.rs +++ b/src/size.rs @@ -46,6 +46,7 @@ impl fmt::Debug for TextSize { impl TextSize { /// The text size of some text-like object. + #[inline] pub fn of(text: impl TextSized) -> TextSize { text.text_size() } @@ -54,11 +55,13 @@ impl TextSize { /// /// This is equivalent to `TextSize::default()` or [`TextSize::MIN`], /// but is more explicit on intent. + #[inline] pub const fn zero() -> TextSize { TextSize(0) } /// A size of one. + #[inline] pub const fn one() -> TextSize { TextSize(1) } @@ -74,24 +77,28 @@ impl TextSize { /// The text size of a single ASCII character. pub const ONE: TextSize = TextSize(1); - #[allow(missing_docs)] + /// Checked addition. Returns `None` if overflow occurred. + #[inline] pub fn checked_add(self, rhs: TextSize) -> Option { self.raw.checked_add(rhs.raw).map(TextSize) } - #[allow(missing_docs)] + /// Checked subtraction. Returns `None` if overflow occurred. + #[inline] pub fn checked_sub(self, rhs: TextSize) -> Option { self.raw.checked_sub(rhs.raw).map(TextSize) } } impl From for TextSize { + #[inline] fn from(raw: u32) -> Self { TextSize(raw) } } impl From for u32 { + #[inline] fn from(value: TextSize) -> Self { value.raw } @@ -99,12 +106,14 @@ impl From for u32 { impl TryFrom for TextSize { type Error = TryFromIntError; + #[inline] fn try_from(value: usize) -> Result { Ok(u32::try_from(value)?.into()) } } impl From for usize { + #[inline] fn from(value: TextSize) -> Self { value.raw as usize } @@ -114,12 +123,14 @@ macro_rules! ops { (impl $Op:ident for TextSize by fn $f:ident = $op:tt) => { impl $Op for TextSize { type Output = TextSize; + #[inline] fn $f(self, other: TextSize) -> TextSize { TextSize(self.raw $op other.raw) } } impl $Op<&TextSize> for TextSize { type Output = TextSize; + #[inline] fn $f(self, other: &TextSize) -> TextSize { self $op *other } @@ -129,6 +140,7 @@ macro_rules! ops { TextSize: $Op, { type Output = TextSize; + #[inline] fn $f(self, other: T) -> TextSize { *self $op other } @@ -143,6 +155,7 @@ impl AddAssign for TextSize where TextSize: Add, { + #[inline] fn add_assign(&mut self, rhs: A) { *self = *self + rhs } @@ -152,6 +165,7 @@ impl SubAssign for TextSize where TextSize: Sub, { + #[inline] fn sub_assign(&mut self, rhs: S) { *self = *self - rhs } @@ -161,6 +175,7 @@ impl iter::Sum for TextSize where TextSize: Add, { + #[inline] fn sum>(iter: I) -> TextSize { iter.fold(TextSize::zero(), Add::add) } diff --git a/src/traits.rs b/src/traits.rs index ca4b7d9b42d3..fac1eccdc681 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -6,7 +6,10 @@ pub trait TextSized: Copy { fn text_size(self) -> TextSize; } +/// This will panic for strings larger than `TextSize::MAX` when +/// debug assertions are enabled, and wrap when they are disabled. impl TextSized for &'_ str { + #[inline] fn text_size(self) -> TextSize { self.len() .try_into() @@ -15,6 +18,7 @@ impl TextSized for &'_ str { } impl TextSized for char { + #[inline] fn text_size(self) -> TextSize { TextSize(self.len_utf8() as u32) } From 283cb688d500cfa1f684784b8523864fb3a33afc Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 18 Mar 2020 17:22:00 -0400 Subject: [PATCH 087/322] replace `one` with `ascii` --- src/size.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/size.rs b/src/size.rs index 4436b9f6f2e1..ed4e849d51ff 100644 --- a/src/size.rs +++ b/src/size.rs @@ -59,12 +59,6 @@ impl TextSize { pub const fn zero() -> TextSize { TextSize(0) } - - /// A size of one. - #[inline] - pub const fn one() -> TextSize { - TextSize(1) - } } /// Methods to act like a primitive integer type, where reasonably applicable. @@ -75,7 +69,7 @@ impl TextSize { /// The largest representable text size. (`u32::MAX`) pub const MAX: TextSize = TextSize(u32::MAX); /// The text size of a single ASCII character. - pub const ONE: TextSize = TextSize(1); + pub const ASCII: TextSize = TextSize(1); /// Checked addition. Returns `None` if overflow occurred. #[inline] From 9995dccc5c6863dc5cb276e4c00039fac09252b3 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 18 Mar 2020 17:23:37 -0400 Subject: [PATCH 088/322] remove TextRange::after --- src/range.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/range.rs b/src/range.rs index 6484d2ea3411..507de3a5fbaa 100644 --- a/src/range.rs +++ b/src/range.rs @@ -11,7 +11,7 @@ use { /// # Translation from `text_unit` /// /// - `TextRange::from_to(from, to)` ⟹ `TextRange(from, to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::to(size).offset(offset)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::before(size).offset(offset)` /// - `range.start()` ⟹ `range.start()` /// - `range.end()` ⟹ `range.end()` /// - `range.len()` ⟹ `range.len()` @@ -65,17 +65,6 @@ impl TextRange { } } - /// Create a range after the given start (`start..`). - /// - /// This returns a std [`RangeFrom`] rather than `TextRange` because - /// `TextRange` does not support right-unbounded ranges. As such, this - /// should only be used for direct indexing, and bounded ranges should be - /// used for persistent ranges (`TextRange(start, TextSize::of(text))`). - #[inline] - pub const fn after(start: TextSize) -> RangeFrom { - start.raw as usize.. - } - /// Offset this range by some amount. /// /// This is typically used to convert a range from one coordinate space to From a3188565bda4fd975d03f310a8f0b0ef1ccfb407 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 11:29:36 -0400 Subject: [PATCH 089/322] rename TextRange::before to up_to --- src/range.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/range.rs b/src/range.rs index 507de3a5fbaa..08d8d3e2ccfd 100644 --- a/src/range.rs +++ b/src/range.rs @@ -11,7 +11,7 @@ use { /// # Translation from `text_unit` /// /// - `TextRange::from_to(from, to)` ⟹ `TextRange(from, to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::before(size).offset(offset)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::up_to(size).offset(offset)` /// - `range.start()` ⟹ `range.start()` /// - `range.end()` ⟹ `range.end()` /// - `range.len()` ⟹ `range.len()` @@ -58,7 +58,7 @@ impl TextRange { /// Create a range up to the given end (`..end`). #[inline] - pub const fn before(end: TextSize) -> TextRange { + pub const fn up_to(end: TextSize) -> TextRange { TextRange { start: TextSize::zero(), end, From 0fe47a3efbbefb063fdd0080b57f9656453ba2ff Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 11:29:48 -0400 Subject: [PATCH 090/322] Remove incorrect doc comment --- src/traits.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index fac1eccdc681..8d197db8c16a 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -6,8 +6,6 @@ pub trait TextSized: Copy { fn text_size(self) -> TextSize; } -/// This will panic for strings larger than `TextSize::MAX` when -/// debug assertions are enabled, and wrap when they are disabled. impl TextSized for &'_ str { #[inline] fn text_size(self) -> TextSize { From 9d2b3c9a9ae4331ddb871be755e7c429e2c0fe6d Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 11:30:40 -0400 Subject: [PATCH 091/322] Remove TextSize::ASCII --- src/size.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/size.rs b/src/size.rs index ed4e849d51ff..5239ea669198 100644 --- a/src/size.rs +++ b/src/size.rs @@ -68,8 +68,6 @@ impl TextSize { pub const MIN: TextSize = TextSize(u32::MIN); /// The largest representable text size. (`u32::MAX`) pub const MAX: TextSize = TextSize(u32::MAX); - /// The text size of a single ASCII character. - pub const ASCII: TextSize = TextSize(1); /// Checked addition. Returns `None` if overflow occurred. #[inline] From a9ed5d3fe6158230cc5d3332a4e03b0eee735f26 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 11:35:52 -0400 Subject: [PATCH 092/322] Remove TextRange::offset (for now?) --- src/range.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/range.rs b/src/range.rs index 08d8d3e2ccfd..339acf859956 100644 --- a/src/range.rs +++ b/src/range.rs @@ -64,18 +64,6 @@ impl TextRange { end, } } - - /// Offset this range by some amount. - /// - /// This is typically used to convert a range from one coordinate space to - /// another, such as from within a substring to within an entire document. - #[inline] - pub fn offset(self, offset: TextSize) -> TextRange { - TextRange( - self.start().checked_add(offset).unwrap(), - self.end().checked_add(offset).unwrap(), - ) - } } /// Identity methods. From a55a2f525fc818df935809503aaaf75e121f4e80 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 12:38:54 -0400 Subject: [PATCH 093/322] Remove unneeded import --- src/range.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/range.rs b/src/range.rs index 339acf859956..789d30066522 100644 --- a/src/range.rs +++ b/src/range.rs @@ -2,7 +2,7 @@ use { crate::TextSize, std::{ cmp, fmt, - ops::{Bound, Index, IndexMut, Range, RangeBounds, RangeFrom}, + ops::{Bound, Index, IndexMut, Range, RangeBounds}, }, }; From af394662df6542e4213cde0af76efb50ff1762c9 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 13:51:01 -0400 Subject: [PATCH 094/322] Add ops for TextRange --- src/range.rs | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/src/range.rs b/src/range.rs index 789d30066522..e1a7bdf86738 100644 --- a/src/range.rs +++ b/src/range.rs @@ -2,7 +2,7 @@ use { crate::TextSize, std::{ cmp, fmt, - ops::{Bound, Index, IndexMut, Range, RangeBounds}, + ops::{Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, Sub, SubAssign}, }, }; @@ -133,6 +133,38 @@ impl TextRange { let end = cmp::max(lhs.end(), rhs.end()); TextRange(start, end) } + + /// Add an offset to this range. + /// + /// Note that this is not appropriate for changing where a `TextRange` is + /// within some string; rather, it is for changing the reference anchor + /// that the `TextRange` is measured against. + /// + /// The unchecked version (`Add::add`) will _always_ panic on overflow, + /// in contrast to primitive integers, which check in debug mode only. + #[inline] + pub fn checked_add(self, offset: TextSize) -> Option { + Some(TextRange { + start: self.start.checked_add(offset)?, + end: self.end.checked_add(offset)?, + }) + } + + /// Subtract an offset from this range. + /// + /// Note that this is not appropriate for changing where a `TextRange` is + /// within some string; rather, it is for changing the reference anchor + /// that the `TextRange` is measured against. + /// + /// The unchecked version (`Sub::sub`) will _always_ panic on overflow, + /// in contrast to primitive integers, which check in debug mode only. + #[inline] + pub fn checked_sub(self, offset: TextSize) -> Option { + Some(TextRange { + start: self.start.checked_sub(offset)?, + end: self.end.checked_sub(offset)?, + }) + } } impl Index for str { @@ -169,3 +201,66 @@ where r.start().into()..r.end().into() } } + +macro_rules! ops { + (impl $Op:ident for TextRange by fn $f:ident = $op:tt) => { + impl $Op<&TextSize> for TextRange { + type Output = TextRange; + #[inline] + fn $f(self, other: &TextSize) -> TextRange { + self $op *other + } + } + impl $Op for &TextRange + where + TextRange: $Op, + { + type Output = TextRange; + #[inline] + fn $f(self, other: T) -> TextRange { + *self $op other + } + } + }; +} + +impl Add for TextRange { + type Output = TextRange; + #[inline] + fn add(self, offset: TextSize) -> TextRange { + self.checked_add(offset) + .expect("TextRange +offset overflowed") + } +} + +impl Sub for TextRange { + type Output = TextRange; + #[inline] + fn sub(self, offset: TextSize) -> TextRange { + self.checked_sub(offset) + .expect("TextRange -offset overflowed") + } +} + +ops!(impl Add for TextRange by fn add = +); +ops!(impl Sub for TextRange by fn sub = -); + +impl AddAssign for TextRange +where + TextRange: Add, +{ + #[inline] + fn add_assign(&mut self, rhs: A) { + *self = *self + rhs + } +} + +impl SubAssign for TextRange +where + TextRange: Sub, +{ + #[inline] + fn sub_assign(&mut self, rhs: S) { + *self = *self - rhs + } +} From e05750ee572df39a042d006d7931fca2f8cf8fab Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 19 Mar 2020 13:55:11 -0400 Subject: [PATCH 095/322] correct translation table --- src/range.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/range.rs b/src/range.rs index e1a7bdf86738..ebafbc7140cd 100644 --- a/src/range.rs +++ b/src/range.rs @@ -11,7 +11,7 @@ use { /// # Translation from `text_unit` /// /// - `TextRange::from_to(from, to)` ⟹ `TextRange(from, to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::up_to(size).offset(offset)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::up_to(size) + offset` /// - `range.start()` ⟹ `range.start()` /// - `range.end()` ⟹ `range.end()` /// - `range.len()` ⟹ `range.len()` From e8dcd41140f6cdd30082d4c535dd0d5c3fbdc084 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 20 Mar 2020 09:34:24 +0100 Subject: [PATCH 096/322] Make intersect and cover into methods --- src/range.rs | 19 ++++++++++++------- tests/main.rs | 31 +++++++++++++++++-------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/range.rs b/src/range.rs index ebafbc7140cd..bc899e71a09c 100644 --- a/src/range.rs +++ b/src/range.rs @@ -118,22 +118,27 @@ impl TextRange { /// The range covered by both ranges, if it exists. /// If the ranges touch but do not overlap, the output range is empty. - pub fn intersection(lhs: TextRange, rhs: TextRange) -> Option { - let start = cmp::max(lhs.start(), rhs.start()); - let end = cmp::min(lhs.end(), rhs.end()); + pub fn intersect(self, other: TextRange) -> Option { + let start = cmp::max(self.start(), other.start()); + let end = cmp::min(self.end(), other.end()); if end < start { return None; } Some(TextRange(start, end)) } - /// The smallest range that completely contains both ranges. - pub fn covering(lhs: TextRange, rhs: TextRange) -> TextRange { - let start = cmp::min(lhs.start(), rhs.start()); - let end = cmp::max(lhs.end(), rhs.end()); + /// Extends the range to cover `other` as well. + pub fn cover(self, other: TextRange) -> TextRange { + let start = cmp::min(self.start(), other.start()); + let end = cmp::max(self.end(), other.end()); TextRange(start, end) } + /// Extends the range to cover `other` offsets as well. + pub fn cover_offset(self, other: TextSize) -> TextRange { + self.cover(TextRange::empty(other)) + } + /// Add an offset to this range. /// /// Note that this is not appropriate for changing where a `TextRange` is diff --git a/tests/main.rs b/tests/main.rs index 073c2ebaa3a8..5a9e67861810 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -37,23 +37,26 @@ fn contains() { } #[test] -fn intersection() { - assert_eq!( - TextRange::intersection(range(1..2), range(2..3)), - Some(range(2..2)) - ); - assert_eq!( - TextRange::intersection(range(1..5), range(2..3)), - Some(range(2..3)) - ); - assert_eq!(TextRange::intersection(range(1..2), range(3..4)), None); +fn intersect() { + assert_eq!(range(1..2).intersect(range(2..3)), Some(range(2..2))); + assert_eq!(range(1..5).intersect(range(2..3)), Some(range(2..3))); + assert_eq!(range(1..2).intersect(range(3..4)), None); } #[test] -fn covering() { - assert_eq!(TextRange::covering(range(1..2), range(2..3)), range(1..3)); - assert_eq!(TextRange::covering(range(1..5), range(2..3)), range(1..5)); - assert_eq!(TextRange::covering(range(1..2), range(4..5)), range(1..5)); +fn cover() { + assert_eq!(range(1..2).cover(range(2..3)), range(1..3)); + assert_eq!(range(1..5).cover(range(2..3)), range(1..5)); + assert_eq!(range(1..2).cover(range(4..5)), range(1..5)); +} + +#[test] +fn cover_offset() { + assert_eq!(range(1..3).cover_offset(size(0)), range(0..3)); + assert_eq!(range(1..3).cover_offset(size(1)), range(1..3)); + assert_eq!(range(1..3).cover_offset(size(2)), range(1..3)); + assert_eq!(range(1..3).cover_offset(size(3)), range(1..3)); + assert_eq!(range(1..3).cover_offset(size(4)), range(1..4)); } #[test] From f2eeea642ff1ef247748b37d34fe33db71e561ae Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 20 Mar 2020 17:12:32 +0100 Subject: [PATCH 097/322] Rename repo --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95c11472cd74..08f5d4f7198c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,8 @@ authors = [ ] description = "Newtypes for text offsets" license = "MIT OR Apache-2.0" -repository = "https://github.com/matklad/text_unit" -documentation = "https://docs.rs/text_unit" +repository = "https://github.com/rust-analyzer/text-size" +documentation = "https://docs.rs/text-size" [dependencies] serde = { version = "1.0", optional = true, default_features = false } From 3bae1bdc8043d81f8b30543e90996f7ef4454125 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 20 Mar 2020 17:17:40 +0100 Subject: [PATCH 098/322] Swithc to GitHub actions --- .github/workflows/ci.yaml | 36 ++++++++++++++++++++++++++++++++++++ .travis.yml | 2 -- bors.toml | 6 ++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ci.yaml delete mode 100644 .travis.yml create mode 100644 bors.toml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000000..aa0587bd98eb --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,36 @@ +name: CI +on: + pull_request: + push: + branches: + - master + - staging + - trying + +jobs: + rust: + name: Rust + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + env: + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + CARGO_NET_RETRY: 10 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + + - name: Test + run: cargo test --features serde diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a7d0a0bc81ab..000000000000 --- a/.travis.yml +++ /dev/null @@ -1,2 +0,0 @@ -language: rust - diff --git a/bors.toml b/bors.toml new file mode 100644 index 000000000000..932be8d0909c --- /dev/null +++ b/bors.toml @@ -0,0 +1,6 @@ +status = [ + "Rust (ubuntu-latest)", + "Rust (windows-latest)", + "Rust (macos-latest)", +] +delete_merged_branches = true From 8c16c6a3582fa8b99ab1db32f742715cc646bb32 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 21 Mar 2020 00:43:20 +0100 Subject: [PATCH 099/322] Update src/range.rs Co-Authored-By: Christopher Durham --- src/range.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/range.rs b/src/range.rs index bc899e71a09c..b52a64af3933 100644 --- a/src/range.rs +++ b/src/range.rs @@ -135,8 +135,8 @@ impl TextRange { } /// Extends the range to cover `other` offsets as well. - pub fn cover_offset(self, other: TextSize) -> TextRange { - self.cover(TextRange::empty(other)) + pub fn cover_offset(self, offset: TextSize) -> TextRange { + self.cover(TextRange::empty(offset)) } /// Add an offset to this range. From d789405d93faedd6e4514d0e360aa4cdca050d05 Mon Sep 17 00:00:00 2001 From: Christopher Durham Date: Fri, 20 Mar 2020 20:21:44 -0400 Subject: [PATCH 100/322] Add licensing text to README --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 6887ccf032c3..482a055fc364 100644 --- a/README.md +++ b/README.md @@ -8,3 +8,20 @@ A library that provides newtype wrappers for `u32` and `(u32, u32)` for use as text offsets. See the [docs](https://docs.rs/text_unit/) for more. + +## License + +Licensed under either of + + * Apache License, Version 2.0 + ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license + ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. From 6d75655e17cb1cfbd91f271cb0bbdfb18e3972ec Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 21 Mar 2020 09:15:28 +0100 Subject: [PATCH 101/322] Make TextRange constructors more boring Remove `fn TextRange(` as that's slightly unusual and surprising, which will add up to a lot of confusion over the long run. Instead add: * `new` as the biased, canonical way to create range from bounds * `from_len` as an alternative ctor from starting position and len * `empty` for empty ranges at a given offset * `up_to` for ranges at zero offset with given length * `default` for an empty range at zero --- src/range.rs | 53 +++++++++++++++++++++++++--------------------- src/serde_impls.rs | 4 ++-- src/size.rs | 19 ++++++----------- src/traits.rs | 2 +- tests/main.rs | 2 +- tests/serde.rs | 2 +- 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/range.rs b/src/range.rs index b52a64af3933..0e5809114a7d 100644 --- a/src/range.rs +++ b/src/range.rs @@ -10,18 +10,18 @@ use { /// /// # Translation from `text_unit` /// -/// - `TextRange::from_to(from, to)` ⟹ `TextRange(from, to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::up_to(size) + offset` +/// - `TextRange::from_to(from, to)` ⟹ `TextRange::new(from, to)` +/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::from_len(offset, size)` /// - `range.start()` ⟹ `range.start()` /// - `range.end()` ⟹ `range.end()` /// - `range.len()` ⟹ `range.len()` /// - `range.is_empty()` ⟹ `range.is_empty()` /// - `a.is_subrange(b)` ⟹ `b.contains_range(a)` -/// - `a.intersection(b)` ⟹ `TextRange::intersection(a, b)` -/// - `a.extend_to(b)` ⟹ `TextRange::covering(a, b)` +/// - `a.intersection(b)` ⟹ `a.intersect(b)` +/// - `a.extend_to(b)` ⟹ `a.cover(b)` /// - `range.contains(offset)` ⟹ `range.contains(point)` /// - `range.contains_inclusive(offset)` ⟹ `range.contains_inclusive(point)` -#[derive(Copy, Clone, Eq, PartialEq, Hash)] +#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)] pub struct TextRange { // Invariant: start <= end start: TextSize, @@ -34,19 +34,24 @@ impl fmt::Debug for TextRange { } } -/// Creates a new `TextRange` with the given `start` and `end` (`start..end`). -/// -/// # Panics -/// -/// Panics if `end < start`. -#[allow(non_snake_case)] -#[inline] -pub fn TextRange(start: TextSize, end: TextSize) -> TextRange { - assert!(start <= end); - TextRange { start, end } -} - impl TextRange { + /// Creates a new `TextRange` with the given `start` and `end` (`start..end`). + /// + /// # Panics + /// + /// Panics if `end < start`. + #[inline] + pub fn new(start: TextSize, end: TextSize) -> TextRange { + assert!(start <= end); + TextRange { start, end } + } + + /// Create a new `TextRange` with the given `start` and `len` (`start..start + len`). + #[inline] + pub fn from_len(start: TextSize, len: TextSize) -> TextRange { + TextRange::new(start, start + len) + } + /// Create a zero-length range at the specified offset (`offset..offset`). #[inline] pub const fn empty(offset: TextSize) -> TextRange { @@ -59,10 +64,8 @@ impl TextRange { /// Create a range up to the given end (`..end`). #[inline] pub const fn up_to(end: TextSize) -> TextRange { - TextRange { - start: TextSize::zero(), - end, - } + let start = TextSize::zero(); + TextRange { start, end } } } @@ -84,7 +87,9 @@ impl TextRange { #[inline] pub const fn len(self) -> TextSize { // HACK for const fn: math on primitives only - TextSize(self.end().raw - self.start().raw) + TextSize { + raw: self.end().raw - self.start().raw, + } } /// Check if this range is empty. @@ -124,14 +129,14 @@ impl TextRange { if end < start { return None; } - Some(TextRange(start, end)) + Some(TextRange::new(start, end)) } /// Extends the range to cover `other` as well. pub fn cover(self, other: TextRange) -> TextRange { let start = cmp::min(self.start(), other.start()); let end = cmp::max(self.end(), other.end()); - TextRange(start, end) + TextRange::new(start, end) } /// Extends the range to cover `other` offsets as well. diff --git a/src/serde_impls.rs b/src/serde_impls.rs index 6a0d040bfaa9..a94bee9567a2 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -17,7 +17,7 @@ impl<'de> Deserialize<'de> for TextSize { where D: Deserializer<'de>, { - Deserialize::deserialize(deserializer).map(TextSize) + u32::deserialize(deserializer).map(TextSize::from) } } @@ -43,6 +43,6 @@ impl<'de> Deserialize<'de> for TextRange { start, end ))); } - Ok(TextRange(start, end)) + Ok(TextRange::new(start, end)) } } diff --git a/src/size.rs b/src/size.rs index 5239ea669198..14f7b37df0ef 100644 --- a/src/size.rs +++ b/src/size.rs @@ -33,11 +33,6 @@ pub struct TextSize { pub(crate) raw: u32, } -#[allow(non_snake_case)] -pub(crate) const fn TextSize(raw: u32) -> TextSize { - TextSize { raw } -} - impl fmt::Debug for TextSize { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.raw) @@ -57,7 +52,7 @@ impl TextSize { /// but is more explicit on intent. #[inline] pub const fn zero() -> TextSize { - TextSize(0) + TextSize { raw: 0 } } } @@ -65,27 +60,27 @@ impl TextSize { // Last updated for parity with Rust 1.42.0. impl TextSize { /// The smallest representable text size. (`u32::MIN`) - pub const MIN: TextSize = TextSize(u32::MIN); + pub const MIN: TextSize = TextSize { raw: u32::MIN }; /// The largest representable text size. (`u32::MAX`) - pub const MAX: TextSize = TextSize(u32::MAX); + pub const MAX: TextSize = TextSize { raw: u32::MAX }; /// Checked addition. Returns `None` if overflow occurred. #[inline] pub fn checked_add(self, rhs: TextSize) -> Option { - self.raw.checked_add(rhs.raw).map(TextSize) + self.raw.checked_add(rhs.raw).map(|raw| TextSize { raw }) } /// Checked subtraction. Returns `None` if overflow occurred. #[inline] pub fn checked_sub(self, rhs: TextSize) -> Option { - self.raw.checked_sub(rhs.raw).map(TextSize) + self.raw.checked_sub(rhs.raw).map(|raw| TextSize { raw }) } } impl From for TextSize { #[inline] fn from(raw: u32) -> Self { - TextSize(raw) + TextSize { raw } } } @@ -117,7 +112,7 @@ macro_rules! ops { type Output = TextSize; #[inline] fn $f(self, other: TextSize) -> TextSize { - TextSize(self.raw $op other.raw) + TextSize { raw: self.raw $op other.raw } } } impl $Op<&TextSize> for TextSize { diff --git a/src/traits.rs b/src/traits.rs index 8d197db8c16a..7064dbc65808 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -18,6 +18,6 @@ impl TextSized for &'_ str { impl TextSized for char { #[inline] fn text_size(self) -> TextSize { - TextSize(self.len_utf8() as u32) + (self.len_utf8() as u32).into() } } diff --git a/tests/main.rs b/tests/main.rs index 5a9e67861810..f8eb6d67352e 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -5,7 +5,7 @@ fn size(x: u32) -> TextSize { } fn range(x: ops::Range) -> TextRange { - TextRange(x.start.into(), x.end.into()) + TextRange::new(x.start.into(), x.end.into()) } #[test] diff --git a/tests/serde.rs b/tests/serde.rs index a32f89e865b0..874258a35f7e 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -5,7 +5,7 @@ fn size(x: u32) -> TextSize { } fn range(x: ops::Range) -> TextRange { - TextRange(x.start.into(), x.end.into()) + TextRange::new(x.start.into(), x.end.into()) } #[test] From 63cbf15486f5aef3a1f59edc232723a317b44b85 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Fri, 20 Mar 2020 19:43:26 -0400 Subject: [PATCH 102/322] add recursive impl for TextSized --- src/traits.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/traits.rs b/src/traits.rs index 7064dbc65808..6c30549edc1a 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,4 +1,7 @@ -use {crate::TextSize, std::convert::TryInto}; +use { + crate::TextSize, + std::{convert::TryInto, ops::Deref}, +}; /// Text-like structures that have a text size. pub trait TextSized: Copy { @@ -15,6 +18,17 @@ impl TextSized for &'_ str { } } +impl TextSized for &'_ D +where + D: Deref, + for<'a> &'a D::Target: TextSized, +{ + #[inline] + fn text_size(self) -> TextSize { + self.deref().text_size() + } +} + impl TextSized for char { #[inline] fn text_size(self) -> TextSize { From be985b7c445e7b26a2d0f85bf1d0f3cd811c2767 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Fri, 20 Mar 2020 20:01:14 -0400 Subject: [PATCH 103/322] target the TextSized blanket impl more tightly" --- src/traits.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index 6c30549edc1a..6f3462bee5b0 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -20,8 +20,7 @@ impl TextSized for &'_ str { impl TextSized for &'_ D where - D: Deref, - for<'a> &'a D::Target: TextSized, + D: Deref, { #[inline] fn text_size(self) -> TextSize { From f1250948c1a3f0732a1496af3e19430c05b33d32 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Sat, 21 Mar 2020 19:23:59 -0400 Subject: [PATCH 104/322] Add assertion for TextSized impls --- src/traits.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/traits.rs b/src/traits.rs index 6f3462bee5b0..018a62cb7667 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -34,3 +34,23 @@ impl TextSized for char { (self.len_utf8() as u32).into() } } + +// assertion shape from static_assertions::assert_impl_all! +const _: fn() = || { + use std::borrow::Cow; + + fn assert_impl() {} + + assert_impl::<&String>(); + assert_impl::<&Cow>(); + + struct StringLike {} + impl Deref for StringLike { + type Target = str; + fn deref(&self) -> &str { + unreachable!() + } + } + + assert_impl::<&StringLike>(); +}; From 8a6f5e3cc97f3762ac9841ce870edcee82df1dba Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 16:45:18 -0400 Subject: [PATCH 105/322] Move construction tests into a test --- src/traits.rs | 20 -------------------- tests/constructors.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 tests/constructors.rs diff --git a/src/traits.rs b/src/traits.rs index 018a62cb7667..6f3462bee5b0 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -34,23 +34,3 @@ impl TextSized for char { (self.len_utf8() as u32).into() } } - -// assertion shape from static_assertions::assert_impl_all! -const _: fn() = || { - use std::borrow::Cow; - - fn assert_impl() {} - - assert_impl::<&String>(); - assert_impl::<&Cow>(); - - struct StringLike {} - impl Deref for StringLike { - type Target = str; - fn deref(&self) -> &str { - unreachable!() - } - } - - assert_impl::<&StringLike>(); -}; diff --git a/tests/constructors.rs b/tests/constructors.rs new file mode 100644 index 000000000000..eba587b53901 --- /dev/null +++ b/tests/constructors.rs @@ -0,0 +1,31 @@ +use { + std::{borrow::Cow, ops::Deref}, + text_size::*, +}; + +struct StringLike<'a>(&'a str); + +impl Deref for StringLike<'_> { + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[test] +fn main() { + let s = ""; + let _ = TextSize::of(&s); + + let s = String::new(); + let _ = TextSize::of(&s); + + let s = Cow::Borrowed(""); + let _ = TextSize::of(&s); + + let s = Cow::Owned(String::new()); + let _ = TextSize::of(&s); + + let s = StringLike(""); + let _ = TextSize::of(&s); +} From 9ffd8c4afddd461eadac7084dddc466770e8c35e Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 17:03:43 -0400 Subject: [PATCH 106/322] Tests to ensure common and auto trait presence --- Cargo.toml | 1 + tests/auto_traits.rs | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/auto_traits.rs diff --git a/Cargo.toml b/Cargo.toml index 08f5d4f7198c..a0126f7d6d25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ serde = { version = "1.0", optional = true, default_features = false } [dev-dependencies] serde_test = "1.0" +static_assertions = "1.1" [[test]] name = "serde" diff --git a/tests/auto_traits.rs b/tests/auto_traits.rs new file mode 100644 index 000000000000..6e6236953383 --- /dev/null +++ b/tests/auto_traits.rs @@ -0,0 +1,18 @@ +use { + static_assertions::*, + std::{ + fmt::Debug, + hash::Hash, + marker::{Send, Sync}, + panic::{RefUnwindSafe, UnwindSafe}, + }, + text_size::*, +}; + +// auto traits +assert_impl_all!(TextSize: Send, Sync, Unpin, UnwindSafe, RefUnwindSafe); +assert_impl_all!(TextRange: Send, Sync, Unpin, UnwindSafe, RefUnwindSafe); + +// common traits +assert_impl_all!(TextSize: Copy, Debug, Default, Hash, Ord); +assert_impl_all!(TextRange: Copy, Debug, Default, Hash, Eq); From 1d2de420c4c17f917e60464356104a066efd4883 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 21:30:39 -0400 Subject: [PATCH 107/322] Add doc examples to things --- src/range.rs | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/size.rs | 15 +++++++ 2 files changed, 140 insertions(+) diff --git a/src/range.rs b/src/range.rs index 0e5809114a7d..2ed608aa5322 100644 --- a/src/range.rs +++ b/src/range.rs @@ -8,6 +8,8 @@ use { /// A range in text, represented as a pair of [`TextSize`][struct@TextSize]. /// +/// It is a logic error for `start` to be greater than `end`. +/// /// # Translation from `text_unit` /// /// - `TextRange::from_to(from, to)` ⟹ `TextRange::new(from, to)` @@ -40,6 +42,19 @@ impl TextRange { /// # Panics /// /// Panics if `end < start`. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let start = TextSize::from(5); + /// let end = TextSize::from(10); + /// let range = TextRange::new(start, end); + /// + /// assert_eq!(range.start(), start); + /// assert_eq!(range.end(), end); + /// assert_eq!(range.len(), end - start); + /// ``` #[inline] pub fn new(start: TextSize, end: TextSize) -> TextRange { assert!(start <= end); @@ -47,12 +62,37 @@ impl TextRange { } /// Create a new `TextRange` with the given `start` and `len` (`start..start + len`). + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let text = "0123456789"; + /// + /// let start = TextSize::from(2); + /// let length = TextSize::from(5); + /// let range = TextRange::from_len(start, length); + /// + /// assert_eq!(range, TextRange::new(start, start + length)); + /// assert_eq!(&text[range], "23456") + /// ``` #[inline] pub fn from_len(start: TextSize, len: TextSize) -> TextRange { TextRange::new(start, start + len) } /// Create a zero-length range at the specified offset (`offset..offset`). + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let point: TextSize; + /// # point = TextSize::from(3); + /// let range = TextRange::empty(point); + /// assert!(range.is_empty()); + /// assert_eq!(range, TextRange::new(point, point)); + /// ``` #[inline] pub const fn empty(offset: TextSize) -> TextRange { TextRange { @@ -62,6 +102,19 @@ impl TextRange { } /// Create a range up to the given end (`..end`). + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let point: TextSize; + /// # point = TextSize::from(12); + /// let range = TextRange::up_to(point); + /// + /// assert_eq!(range.len(), point); + /// assert_eq!(range, TextRange::new(TextSize::zero(), point)); + /// assert_eq!(range, TextRange::from_len(TextSize::zero(), point)); + /// ``` #[inline] pub const fn up_to(end: TextSize) -> TextRange { let start = TextSize::zero(); @@ -105,6 +158,17 @@ impl TextRange { /// Check if this range contains an offset. /// /// The end index is considered excluded. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let (start, end): (TextSize, TextSize); + /// # start = 10.into(); end = 20.into(); + /// let range = TextRange::new(start, end); + /// assert!(range.contains(start)); + /// assert!(!range.contains(end)); + /// ``` pub fn contains(self, offset: TextSize) -> bool { self.start() <= offset && offset < self.end() } @@ -112,17 +176,55 @@ impl TextRange { /// Check if this range contains an offset. /// /// The end index is considered included. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let (start, end): (TextSize, TextSize); + /// # start = 10.into(); end = 20.into(); + /// let range = TextRange::new(start, end); + /// assert!(range.contains_inclusive(start)); + /// assert!(range.contains_inclusive(end)); + /// ``` pub fn contains_inclusive(self, offset: TextSize) -> bool { self.start() <= offset && offset <= self.end() } /// Check if this range completely contains another range. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let larger = TextRange::new(0.into(), 20.into()); + /// let smaller = TextRange::new(5.into(), 15.into()); + /// assert!(larger.contains_range(smaller)); + /// assert!(!smaller.contains_range(larger)); + /// + /// // a range always contains itself + /// assert!(larger.contains_range(larger)); + /// assert!(smaller.contains_range(smaller)); + /// ``` pub fn contains_range(self, other: TextRange) -> bool { self.start() <= other.start() && other.end() <= self.end() } /// The range covered by both ranges, if it exists. /// If the ranges touch but do not overlap, the output range is empty. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// assert_eq!( + /// TextRange::intersect( + /// TextRange::new(0.into(), 10.into()), + /// TextRange::new(5.into(), 15.into()), + /// ), + /// Some(TextRange::new(5.into(), 10.into())), + /// ); + /// ``` pub fn intersect(self, other: TextRange) -> Option { let start = cmp::max(self.start(), other.start()); let end = cmp::min(self.end(), other.end()); @@ -133,6 +235,19 @@ impl TextRange { } /// Extends the range to cover `other` as well. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// assert_eq!( + /// TextRange::cover( + /// TextRange::new(0.into(), 5.into()), + /// TextRange::new(15.into(), 20.into()), + /// ), + /// TextRange::new(0.into(), 20.into()), + /// ); + /// ``` pub fn cover(self, other: TextRange) -> TextRange { let start = cmp::min(self.start(), other.start()); let end = cmp::max(self.end(), other.end()); @@ -140,6 +255,16 @@ impl TextRange { } /// Extends the range to cover `other` offsets as well. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// assert_eq!( + /// TextRange::empty(TextSize::zero()).cover_offset(20.into()), + /// TextRange::new(0.into(), 20.into()), + /// ) + /// ``` pub fn cover_offset(self, offset: TextSize) -> TextRange { self.cover(TextRange::empty(offset)) } diff --git a/src/size.rs b/src/size.rs index 14f7b37df0ef..9a0256e8b262 100644 --- a/src/size.rs +++ b/src/size.rs @@ -41,6 +41,21 @@ impl fmt::Debug for TextSize { impl TextSize { /// The text size of some text-like object. + /// + /// Accepts `char`, `&str`, and references to any custom string-like type + /// that dereferences to `str`. Types that don't dereference to `str` but + /// want to be usable in this constructor can implement [`TextSized`]. + /// + /// # Examples + /// + /// ```rust + /// # use text_size::*; + /// let char_size = TextSize::of('🦀'); + /// assert_eq!(char_size, TextSize::from(4)); + /// + /// let str_size = TextSize::of("rust-analyzer"); + /// assert_eq!(str_size, TextSize::from(13)); + /// ``` #[inline] pub fn of(text: impl TextSized) -> TextSize { text.text_size() From 2c39fd6e331f406814ad6fe73a77d67f20c847fe Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 21:31:14 -0400 Subject: [PATCH 108/322] Make sure docs build on CI --- .github/workflows/ci.yaml | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index aa0587bd98eb..1bd88df68e6c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,4 +33,26 @@ jobs: profile: minimal - name: Test - run: cargo test --features serde + run: cargo test --all-features + + rustdoc: + name: Docs + runs-on: ubuntu-latest + + env: + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + CARGO_NET_RETRY: 10 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + profile: minimal + + - name: Rustdoc + run: cargo doc --all-features From 8951ec18f21cc7f4ee2baab2c41541367f4d4d95 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 21:36:09 -0400 Subject: [PATCH 109/322] TextSized is not meant to be used directly... so rename it to a name more distinct from TextSize. --- src/lib.rs | 2 +- src/size.rs | 6 +++--- src/traits.rs | 18 +++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e194e2317bb5..aea1591003c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ mod traits; #[cfg(feature = "serde")] mod serde_impls; -pub use crate::{range::TextRange, size::TextSize, traits::TextSized}; +pub use crate::{range::TextRange, size::TextSize, traits::LenTextSize}; #[cfg(target_pointer_width = "16")] compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets"); diff --git a/src/size.rs b/src/size.rs index 14f7b37df0ef..16fd7d648db7 100644 --- a/src/size.rs +++ b/src/size.rs @@ -1,5 +1,5 @@ use { - crate::TextSized, + crate::LenTextSize, std::{ convert::TryFrom, fmt, iter, @@ -42,8 +42,8 @@ impl fmt::Debug for TextSize { impl TextSize { /// The text size of some text-like object. #[inline] - pub fn of(text: impl TextSized) -> TextSize { - text.text_size() + pub fn of(text: impl LenTextSize) -> TextSize { + text.len_text_size() } /// A size of zero. diff --git a/src/traits.rs b/src/traits.rs index 6f3462bee5b0..745675fda7e0 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -4,33 +4,33 @@ use { }; /// Text-like structures that have a text size. -pub trait TextSized: Copy { +pub trait LenTextSize: Copy { /// The size of this text-alike. - fn text_size(self) -> TextSize; + fn len_text_size(self) -> TextSize; } -impl TextSized for &'_ str { +impl LenTextSize for &'_ str { #[inline] - fn text_size(self) -> TextSize { + fn len_text_size(self) -> TextSize { self.len() .try_into() .unwrap_or_else(|_| panic!("string too large ({}) for TextSize", self.len())) } } -impl TextSized for &'_ D +impl LenTextSize for &'_ D where D: Deref, { #[inline] - fn text_size(self) -> TextSize { - self.deref().text_size() + fn len_text_size(self) -> TextSize { + self.deref().len_text_size() } } -impl TextSized for char { +impl LenTextSize for char { #[inline] - fn text_size(self) -> TextSize { + fn len_text_size(self) -> TextSize { (self.len_utf8() as u32).into() } } From e88e4aa7bbc9963c5158c5ac7116fda1a5bca5ce Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 25 Mar 2020 20:05:18 -0400 Subject: [PATCH 110/322] Fix broken intra doc link --- src/size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/size.rs b/src/size.rs index 234e54d888d4..e20763f4bc35 100644 --- a/src/size.rs +++ b/src/size.rs @@ -44,7 +44,7 @@ impl TextSize { /// /// Accepts `char`, `&str`, and references to any custom string-like type /// that dereferences to `str`. Types that don't dereference to `str` but - /// want to be usable in this constructor can implement [`TextSized`]. + /// want to be usable in this constructor can implement [`LenTextSize`]. /// /// # Examples /// From 9332ee9efba634a2f832103e479b2b767428a70e Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 21:44:08 -0400 Subject: [PATCH 111/322] Just unwrap in LenTextSize for &str --- src/traits.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index 745675fda7e0..65f6445a1ed6 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -12,9 +12,7 @@ pub trait LenTextSize: Copy { impl LenTextSize for &'_ str { #[inline] fn len_text_size(self) -> TextSize { - self.len() - .try_into() - .unwrap_or_else(|_| panic!("string too large ({}) for TextSize", self.len())) + self.len().try_into().unwrap() } } From 85935e0026cf93c24f9690234e15440eb3ab8ab7 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Mon, 23 Mar 2020 21:47:35 -0400 Subject: [PATCH 112/322] Use standard generics for TextSize::of There is no specific reason to use APIT here, so prefer the form that allows more control for the user, in the form of the turbofish. --- src/size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/size.rs b/src/size.rs index e20763f4bc35..171f33777748 100644 --- a/src/size.rs +++ b/src/size.rs @@ -57,7 +57,7 @@ impl TextSize { /// assert_eq!(str_size, TextSize::from(13)); /// ``` #[inline] - pub fn of(text: impl LenTextSize) -> TextSize { + pub fn of(text: T) -> TextSize { text.len_text_size() } From fdf4345cdda5e3a67bcc152652c7747a895d8298 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 25 Mar 2020 19:16:20 -0400 Subject: [PATCH 113/322] Actually fail CI for doc warnings --- .github/workflows/ci.yaml | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1bd88df68e6c..4538ca8479d9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,6 +7,11 @@ on: - staging - trying +env: + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + CARGO_NET_RETRY: 10 + jobs: rust: name: Rust @@ -17,11 +22,6 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - env: - RUSTFLAGS: -D warnings - RUSTUP_MAX_RETRIES: 10 - CARGO_NET_RETRY: 10 - steps: - name: Checkout repository uses: actions/checkout@v2 @@ -39,11 +39,6 @@ jobs: name: Docs runs-on: ubuntu-latest - env: - RUSTFLAGS: -D warnings - RUSTUP_MAX_RETRIES: 10 - CARGO_NET_RETRY: 10 - steps: - name: Checkout repository uses: actions/checkout@v2 @@ -53,6 +48,7 @@ jobs: with: toolchain: nightly profile: minimal + override: true - name: Rustdoc - run: cargo doc --all-features + run: cargo rustdoc --all-features -- -D warnings From aa8e477c16ee597048d7b52137b44f318977aac9 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 25 Mar 2020 20:39:02 -0400 Subject: [PATCH 114/322] Bump dev version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a0126f7d6d25..4ccbfdcb7b25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "0.99.0-dev.2" +version = "0.99.0-dev.3" edition = "2018" authors = [ From 5ef8391b680be9ab20f3a1457c68d9dd5122a16e Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 25 Mar 2020 21:41:48 -0400 Subject: [PATCH 115/322] Re-add Index for String --- src/range.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/range.rs b/src/range.rs index 2ed608aa5322..11fe1e702dcb 100644 --- a/src/range.rs +++ b/src/range.rs @@ -305,14 +305,29 @@ impl TextRange { impl Index for str { type Output = str; #[inline] - fn index(&self, index: TextRange) -> &Self::Output { + fn index(&self, index: TextRange) -> &str { + &self[Range::::from(index)] + } +} + +impl Index for String { + type Output = str; + #[inline] + fn index(&self, index: TextRange) -> &str { &self[Range::::from(index)] } } impl IndexMut for str { #[inline] - fn index_mut(&mut self, index: TextRange) -> &mut Self::Output { + fn index_mut(&mut self, index: TextRange) -> &mut str { + &mut self[Range::::from(index)] + } +} + +impl IndexMut for String { + #[inline] + fn index_mut(&mut self, index: TextRange) -> &mut str { &mut self[Range::::from(index)] } } From daa70c8c73c47cb0ded543f42a913c53651e8dab Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 26 Mar 2020 16:58:49 -0400 Subject: [PATCH 116/322] Don't blanket impl LenTextSize --- src/traits.rs | 43 ++++++++++++++++++++++++++++++++++--------- tests/constructors.rs | 41 ++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index 65f6445a1ed6..3944030c62eb 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,6 +1,6 @@ use { crate::TextSize, - std::{convert::TryInto, ops::Deref}, + std::{borrow::Cow, convert::TryInto, rc::Rc, sync::Arc}, }; /// Text-like structures that have a text size. @@ -16,19 +16,44 @@ impl LenTextSize for &'_ str { } } -impl LenTextSize for &'_ D -where - D: Deref, -{ +impl LenTextSize for char { #[inline] fn len_text_size(self) -> TextSize { - self.deref().len_text_size() + (self.len_utf8() as u32).into() } } -impl LenTextSize for char { - #[inline] +impl LenTextSize for &'_ D +where + D: LenTextSize + Copy, +{ fn len_text_size(self) -> TextSize { - (self.len_utf8() as u32).into() + D::len_text_size(*self) } } + +// Because we could not find a smart blanket impl to do this automatically and +// cleanly (rust-analyzer/text-size#36), just provide a bunch of manual impls. +// If a type fits in this macro and you need it to impl LenTextSize, just open +// a PR and we are likely to accept it. Or use `TextSize::of::<&str>` for now. +macro_rules! impl_lentextsize_for_string { + ($($ty:ty),+ $(,)?) => {$( + impl LenTextSize for $ty { + #[inline] + fn len_text_size(self) -> TextSize { + <&str>::len_text_size(self) + } + } + )+}; +} + +impl_lentextsize_for_string! { + &Box, + &'_ String, + &Cow<'_, str>, + &Cow<'_, String>, + &Arc, + &Arc, + &Rc, + &Rc, +} diff --git a/tests/constructors.rs b/tests/constructors.rs index eba587b53901..52f17922185f 100644 --- a/tests/constructors.rs +++ b/tests/constructors.rs @@ -1,31 +1,34 @@ use { - std::{borrow::Cow, ops::Deref}, + std::{borrow::Cow, sync::Arc}, text_size::*, }; -struct StringLike<'a>(&'a str); +#[derive(Copy, Clone)] +struct BadRope<'a>(&'a [&'a str]); -impl Deref for StringLike<'_> { - type Target = str; - fn deref(&self) -> &Self::Target { - &self.0 +impl LenTextSize for BadRope<'_> { + fn len_text_size(self) -> TextSize { + self.0.iter().copied().map(LenTextSize::len_text_size).sum() } } #[test] fn main() { - let s = ""; - let _ = TextSize::of(&s); - - let s = String::new(); - let _ = TextSize::of(&s); - - let s = Cow::Borrowed(""); - let _ = TextSize::of(&s); - - let s = Cow::Owned(String::new()); - let _ = TextSize::of(&s); + macro_rules! test { + ($($expr:expr),+ $(,)?) => { + $(let _ = TextSize::of($expr);)+ + }; + } - let s = StringLike(""); - let _ = TextSize::of(&s); + test! { + "", + &"", + 'a', + &'a', + &String::new(), + &String::new().into_boxed_str(), + &Arc::new(String::new()), + &Cow::Borrowed(""), + BadRope(&[""]), + } } From b1b7dc101aabf13edfcb97cd756bc659334cd79e Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 26 Mar 2020 17:03:04 -0400 Subject: [PATCH 117/322] Add test for indexing --- tests/indexing.rs | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/indexing.rs diff --git a/tests/indexing.rs b/tests/indexing.rs new file mode 100644 index 000000000000..ebbed7700d77 --- /dev/null +++ b/tests/indexing.rs @@ -0,0 +1,8 @@ +use text_size::*; + +#[test] +fn main() { + let range = TextRange::default(); + &""[range]; + &String::new()[range]; +} From 8061a90ee0323c36e77e70be93241347414039a8 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Fri, 27 Mar 2020 16:51:58 -0400 Subject: [PATCH 118/322] Imporove test's impl LenTextSize for BadRope --- tests/constructors.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/constructors.rs b/tests/constructors.rs index 52f17922185f..1022a4168ba4 100644 --- a/tests/constructors.rs +++ b/tests/constructors.rs @@ -8,7 +8,7 @@ struct BadRope<'a>(&'a [&'a str]); impl LenTextSize for BadRope<'_> { fn len_text_size(self) -> TextSize { - self.0.iter().copied().map(LenTextSize::len_text_size).sum() + self.0.iter().map(TextSize::of).sum() } } From c94e26f9126978091379ec88830762bc1d10aae8 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Tue, 7 Apr 2020 16:36:32 -0400 Subject: [PATCH 119/322] Rename TextRange::from_len => from_offset --- src/range.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/range.rs b/src/range.rs index 11fe1e702dcb..a7f47f9d2f20 100644 --- a/src/range.rs +++ b/src/range.rs @@ -61,7 +61,7 @@ impl TextRange { TextRange { start, end } } - /// Create a new `TextRange` with the given `start` and `len` (`start..start + len`). + /// Create a new `TextRange` with the given `offset` and `len` (`offset..offset + len`). /// /// # Examples /// @@ -69,16 +69,16 @@ impl TextRange { /// # use text_size::*; /// let text = "0123456789"; /// - /// let start = TextSize::from(2); + /// let offset = TextSize::from(2); /// let length = TextSize::from(5); - /// let range = TextRange::from_len(start, length); + /// let range = TextRange::from_offset(offset, length); /// - /// assert_eq!(range, TextRange::new(start, start + length)); + /// assert_eq!(range, TextRange::new(offset, offset + length)); /// assert_eq!(&text[range], "23456") /// ``` #[inline] - pub fn from_len(start: TextSize, len: TextSize) -> TextRange { - TextRange::new(start, start + len) + pub fn from_offset(offset: TextSize, len: TextSize) -> TextRange { + TextRange::new(offset, offset + len) } /// Create a zero-length range at the specified offset (`offset..offset`). @@ -113,7 +113,7 @@ impl TextRange { /// /// assert_eq!(range.len(), point); /// assert_eq!(range, TextRange::new(TextSize::zero(), point)); - /// assert_eq!(range, TextRange::from_len(TextSize::zero(), point)); + /// assert_eq!(range, TextRange::from_offset(TextSize::zero(), point)); /// ``` #[inline] pub const fn up_to(end: TextSize) -> TextRange { From 951aaa454114d8c1a62af5cc741faafd84bbdf56 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Tue, 7 Apr 2020 16:49:16 -0400 Subject: [PATCH 120/322] Rename TextRange::from_offset => at --- src/range.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/range.rs b/src/range.rs index a7f47f9d2f20..c80e5353ed6e 100644 --- a/src/range.rs +++ b/src/range.rs @@ -71,13 +71,13 @@ impl TextRange { /// /// let offset = TextSize::from(2); /// let length = TextSize::from(5); - /// let range = TextRange::from_offset(offset, length); + /// let range = TextRange::at(offset, length); /// /// assert_eq!(range, TextRange::new(offset, offset + length)); /// assert_eq!(&text[range], "23456") /// ``` #[inline] - pub fn from_offset(offset: TextSize, len: TextSize) -> TextRange { + pub fn at(offset: TextSize, len: TextSize) -> TextRange { TextRange::new(offset, offset + len) } @@ -113,7 +113,7 @@ impl TextRange { /// /// assert_eq!(range.len(), point); /// assert_eq!(range, TextRange::new(TextSize::zero(), point)); - /// assert_eq!(range, TextRange::from_offset(TextSize::zero(), point)); + /// assert_eq!(range, TextRange::at(TextSize::zero(), point)); /// ``` #[inline] pub const fn up_to(end: TextSize) -> TextRange { From 9c3a298268d52ebd3271469e4fe8c101a0b5d014 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Tue, 7 Apr 2020 17:04:54 -0400 Subject: [PATCH 121/322] Rename LenTextSize => TextLen This way, it's a reasonable name to use. --- src/lib.rs | 2 +- src/size.rs | 8 ++++---- src/traits.rs | 36 ++++++++++++++++++------------------ tests/constructors.rs | 4 ++-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index aea1591003c9..b39cb186c148 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ mod traits; #[cfg(feature = "serde")] mod serde_impls; -pub use crate::{range::TextRange, size::TextSize, traits::LenTextSize}; +pub use crate::{range::TextRange, size::TextSize, traits::TextLen}; #[cfg(target_pointer_width = "16")] compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets"); diff --git a/src/size.rs b/src/size.rs index 171f33777748..3a2a281329a7 100644 --- a/src/size.rs +++ b/src/size.rs @@ -1,5 +1,5 @@ use { - crate::LenTextSize, + crate::TextLen, std::{ convert::TryFrom, fmt, iter, @@ -44,7 +44,7 @@ impl TextSize { /// /// Accepts `char`, `&str`, and references to any custom string-like type /// that dereferences to `str`. Types that don't dereference to `str` but - /// want to be usable in this constructor can implement [`LenTextSize`]. + /// want to be usable in this constructor can implement [`TextLen`]. /// /// # Examples /// @@ -57,8 +57,8 @@ impl TextSize { /// assert_eq!(str_size, TextSize::from(13)); /// ``` #[inline] - pub fn of(text: T) -> TextSize { - text.len_text_size() + pub fn of(text: T) -> TextSize { + text.text_len() } /// A size of zero. diff --git a/src/traits.rs b/src/traits.rs index 3944030c62eb..a19396c133cb 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -4,52 +4,52 @@ use { }; /// Text-like structures that have a text size. -pub trait LenTextSize: Copy { +pub trait TextLen: Copy { /// The size of this text-alike. - fn len_text_size(self) -> TextSize; + fn text_len(self) -> TextSize; } -impl LenTextSize for &'_ str { +impl TextLen for &'_ str { #[inline] - fn len_text_size(self) -> TextSize { + fn text_len(self) -> TextSize { self.len().try_into().unwrap() } } -impl LenTextSize for char { +impl TextLen for char { #[inline] - fn len_text_size(self) -> TextSize { + fn text_len(self) -> TextSize { (self.len_utf8() as u32).into() } } -impl LenTextSize for &'_ D +impl TextLen for &'_ D where - D: LenTextSize + Copy, + D: TextLen + Copy, { - fn len_text_size(self) -> TextSize { - D::len_text_size(*self) + fn text_len(self) -> TextSize { + D::text_len(*self) } } // Because we could not find a smart blanket impl to do this automatically and // cleanly (rust-analyzer/text-size#36), just provide a bunch of manual impls. -// If a type fits in this macro and you need it to impl LenTextSize, just open -// a PR and we are likely to accept it. Or use `TextSize::of::<&str>` for now. -macro_rules! impl_lentextsize_for_string { +// If a standard type fits in this macro and you need it to impl TextLen, just +// open a PR and we are likely to accept it. Or convince Rust to deref to &str. +macro_rules! impl_textlen_for_string { ($($ty:ty),+ $(,)?) => {$( - impl LenTextSize for $ty { + impl TextLen for $ty { #[inline] - fn len_text_size(self) -> TextSize { - <&str>::len_text_size(self) + fn text_len(self) -> TextSize { + <&str>::text_len(self) } } )+}; } -impl_lentextsize_for_string! { +impl_textlen_for_string! { &Box, - &'_ String, + &String, &Cow<'_, str>, &Cow<'_, String>, &Arc, diff --git a/tests/constructors.rs b/tests/constructors.rs index 1022a4168ba4..829a28e477e9 100644 --- a/tests/constructors.rs +++ b/tests/constructors.rs @@ -6,8 +6,8 @@ use { #[derive(Copy, Clone)] struct BadRope<'a>(&'a [&'a str]); -impl LenTextSize for BadRope<'_> { - fn len_text_size(self) -> TextSize { +impl TextLen for BadRope<'_> { + fn text_len(self) -> TextSize { self.0.iter().map(TextSize::of).sum() } } From 53d29fd18a3c754adc5d3e324479f9fa6bb7ec13 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 11 Apr 2020 14:21:53 +0200 Subject: [PATCH 122/322] Add more docs --- CHANGELOG.md | 22 ++++++++++++++++++++++ src/lib.rs | 14 ++++++++++++++ src/range.rs | 14 -------------- src/size.rs | 7 ------- 4 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000000..2eb012a6d94c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog + +## 1.0.0 :tada: + +* the carate is renmaed to `text-size` from `text_unit` + +Transition table: +- `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` +- `TextUnit::of_str(s)` ⟹ `TextSize::of(s)` +- `TextUnit::from_usize(size)` ⟹ `TextSize::try_from(size).unwrap_or_else(|| panic!(_))` +- `unit.to_usize()` ⟹ `usize::from(size)` +- `TextRange::from_to(from, to)` ⟹ `TextRange::new(from, to)` +- `TextRange::offset_len(offset, size)` ⟹ `TextRange::from_len(offset, size)` +- `range.start()` ⟹ `range.start()` +- `range.end()` ⟹ `range.end()` +- `range.len()` ⟹ `range.len()` +- `range.is_empty()` ⟹ `range.is_empty()` +- `a.is_subrange(b)` ⟹ `b.contains_range(a)` +- `a.intersection(b)` ⟹ `a.intersect(b)` +- `a.extend_to(b)` ⟹ `a.cover(b)` +- `range.contains(offset)` ⟹ `range.contains(point)` +- `range.contains_inclusive(offset)` ⟹ `range.contains_inclusive(point)` diff --git a/src/lib.rs b/src/lib.rs index b39cb186c148..07dc5e80f4d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,19 @@ //! Newtypes for working with text sizes/ranges in a more type-safe manner. //! +//! This library can help with two things: +//! * Reducing storage requirenments for offsets and ranges, under the +//! assumption that 32 bits is enough. +//! * Providing standard vocabulary types for applications where text ranges +//! are pervasive. +//! +//! However, you should not use this library simply because you work with +//! strings. In the overhelming majority of cases, using `usize` and +//! `std::ops::Range` is better. In particular, if you are publishing a +//! library, using only std types in the interface would make it more +//! interoperable. Similarly, if you are writing something like a lexer, which +//! produces, but does not *store* text ranges, than sticking to `usize` would +//! be better. +//! //! Minimal Supported Rust Version: latest stable. #![forbid(unsafe_code)] diff --git a/src/range.rs b/src/range.rs index c80e5353ed6e..50fcf8257886 100644 --- a/src/range.rs +++ b/src/range.rs @@ -9,20 +9,6 @@ use { /// A range in text, represented as a pair of [`TextSize`][struct@TextSize]. /// /// It is a logic error for `start` to be greater than `end`. -/// -/// # Translation from `text_unit` -/// -/// - `TextRange::from_to(from, to)` ⟹ `TextRange::new(from, to)` -/// - `TextRange::offset_len(offset, size)` ⟹ `TextRange::from_len(offset, size)` -/// - `range.start()` ⟹ `range.start()` -/// - `range.end()` ⟹ `range.end()` -/// - `range.len()` ⟹ `range.len()` -/// - `range.is_empty()` ⟹ `range.is_empty()` -/// - `a.is_subrange(b)` ⟹ `b.contains_range(a)` -/// - `a.intersection(b)` ⟹ `a.intersect(b)` -/// - `a.extend_to(b)` ⟹ `a.cover(b)` -/// - `range.contains(offset)` ⟹ `range.contains(point)` -/// - `range.contains_inclusive(offset)` ⟹ `range.contains_inclusive(point)` #[derive(Default, Copy, Clone, Eq, PartialEq, Hash)] pub struct TextRange { // Invariant: start <= end diff --git a/src/size.rs b/src/size.rs index 3a2a281329a7..3a0d34b80845 100644 --- a/src/size.rs +++ b/src/size.rs @@ -21,13 +21,6 @@ use { /// /// These escape hatches are primarily required for unit testing and when /// converting from UTF-8 size to another coordinate space, such as UTF-16. -/// -/// # Translation from `text_unit` -/// -/// - `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` -/// - `TextUnit::of_str(s)` ⟹ `TextSize::of(s)` -/// - `TextUnit::from_usize(size)` ⟹ `TextSize::try_from(size).unwrap_or_else(|| panic!(_))` -/// - `unit.to_usize()` ⟹ `usize::from(size)` #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TextSize { pub(crate) raw: u32, From d7ef23ad453337a2e79d44440a22d28efe7bb4d5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 11 Apr 2020 14:22:16 +0200 Subject: [PATCH 123/322] Ready to launch :rocket:! --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4ccbfdcb7b25..9b36860ea95d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "0.99.0-dev.3" +version = "0.99.0" edition = "2018" authors = [ From a43ca1c55ea549cd052d4b4e1621f7d480210e19 Mon Sep 17 00:00:00 2001 From: Coenen Benjamin Date: Tue, 14 Apr 2020 09:46:19 +0200 Subject: [PATCH 124/322] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 482a055fc364..03a9e8169f5b 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -# text_unit +# text_size -[![Build Status](https://travis-ci.org/matklad/text_unit.svg?branch=master)](https://travis-ci.org/matklad/text_unit) -[![Crates.io](https://img.shields.io/crates/v/text_unit.svg)](https://crates.io/crates/text_unit) -[![API reference](https://docs.rs/text_unit/badge.svg)](https://docs.rs/text_unit/) +[![Build Status](https://travis-ci.org/matklad/text_size.svg?branch=master)](https://travis-ci.org/matklad/text_size) +[![Crates.io](https://img.shields.io/crates/v/text_size.svg)](https://crates.io/crates/text_size) +[![API reference](https://docs.rs/text_size/badge.svg)](https://docs.rs/text_size/) A library that provides newtype wrappers for `u32` and `(u32, u32)` for use as text offsets. -See the [docs](https://docs.rs/text_unit/) for more. +See the [docs](https://docs.rs/text_size/) for more. ## License From d0896025433ea6a39eab9ef8657eeb05ebb93783 Mon Sep 17 00:00:00 2001 From: ConnorSkees <39542938+ConnorSkees@users.noreply.github.com> Date: Tue, 14 Apr 2020 23:35:02 -0400 Subject: [PATCH 125/322] fix typos --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 07dc5e80f4d8..92bd36b192fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,17 +1,17 @@ //! Newtypes for working with text sizes/ranges in a more type-safe manner. //! //! This library can help with two things: -//! * Reducing storage requirenments for offsets and ranges, under the +//! * Reducing storage requirements for offsets and ranges, under the //! assumption that 32 bits is enough. //! * Providing standard vocabulary types for applications where text ranges //! are pervasive. //! //! However, you should not use this library simply because you work with -//! strings. In the overhelming majority of cases, using `usize` and +//! strings. In the overwhelming majority of cases, using `usize` and //! `std::ops::Range` is better. In particular, if you are publishing a //! library, using only std types in the interface would make it more //! interoperable. Similarly, if you are writing something like a lexer, which -//! produces, but does not *store* text ranges, than sticking to `usize` would +//! produces, but does not *store* text ranges, then sticking to `usize` would //! be better. //! //! Minimal Supported Rust Version: latest stable. From 4c8a10e8fc2e15a4f550fa4597874aceb02b455b Mon Sep 17 00:00:00 2001 From: Coenen Benjamin Date: Wed, 15 Apr 2020 10:54:27 +0200 Subject: [PATCH 126/322] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 03a9e8169f5b..365b6028a337 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -# text_size +# text-size -[![Build Status](https://travis-ci.org/matklad/text_size.svg?branch=master)](https://travis-ci.org/matklad/text_size) -[![Crates.io](https://img.shields.io/crates/v/text_size.svg)](https://crates.io/crates/text_size) -[![API reference](https://docs.rs/text_size/badge.svg)](https://docs.rs/text_size/) +[![Build Status](https://travis-ci.org/matklad/text-size.svg?branch=master)](https://travis-ci.org/matklad/text-size) +[![Crates.io](https://img.shields.io/crates/v/text-size.svg)](https://crates.io/crates/text-size) +[![API reference](https://docs.rs/text-size/badge.svg)](https://docs.rs/text-size/) A library that provides newtype wrappers for `u32` and `(u32, u32)` for use as text offsets. -See the [docs](https://docs.rs/text_size/) for more. +See the [docs](https://docs.rs/text-size/) for more. ## License From 408e2047d782f1b45ecf607f1faec031d3a77641 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Fri, 17 Apr 2020 20:10:36 -0400 Subject: [PATCH 127/322] Scope trait TextLen just for primitives --- Cargo.toml | 2 +- src/traits.rs | 15 ++++++++++++--- tests/constructors.rs | 5 +++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9b36860ea95d..9c75a9af0442 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "0.99.0" +version = "1.0.0-pre.1" edition = "2018" authors = [ diff --git a/src/traits.rs b/src/traits.rs index a19396c133cb..c0adacd92b08 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -3,12 +3,18 @@ use { std::{borrow::Cow, convert::TryInto, rc::Rc, sync::Arc}, }; -/// Text-like structures that have a text size. -pub trait TextLen: Copy { - /// The size of this text-alike. +use priv_in_pub::Sealed; +mod priv_in_pub { + pub trait Sealed {} +} + +/// Primitives with a textual length that can be passed to [`TextSize::of`]. +pub trait TextLen: Copy + Sealed { + /// The textual length of this primitive. fn text_len(self) -> TextSize; } +impl Sealed for &'_ str {} impl TextLen for &'_ str { #[inline] fn text_len(self) -> TextSize { @@ -16,6 +22,7 @@ impl TextLen for &'_ str { } } +impl Sealed for char {} impl TextLen for char { #[inline] fn text_len(self) -> TextSize { @@ -23,6 +30,7 @@ impl TextLen for char { } } +impl Sealed for &'_ D where D: TextLen + Copy {} impl TextLen for &'_ D where D: TextLen + Copy, @@ -38,6 +46,7 @@ where // open a PR and we are likely to accept it. Or convince Rust to deref to &str. macro_rules! impl_textlen_for_string { ($($ty:ty),+ $(,)?) => {$( + impl Sealed for $ty {} impl TextLen for $ty { #[inline] fn text_len(self) -> TextSize { diff --git a/tests/constructors.rs b/tests/constructors.rs index 829a28e477e9..9c9d0801caf0 100644 --- a/tests/constructors.rs +++ b/tests/constructors.rs @@ -6,7 +6,7 @@ use { #[derive(Copy, Clone)] struct BadRope<'a>(&'a [&'a str]); -impl TextLen for BadRope<'_> { +impl BadRope<'_> { fn text_len(self) -> TextSize { self.0.iter().map(TextSize::of).sum() } @@ -29,6 +29,7 @@ fn main() { &String::new().into_boxed_str(), &Arc::new(String::new()), &Cow::Borrowed(""), - BadRope(&[""]), } + + let _ = BadRope(&[""]).text_len(); } From ccae7686c1d8869ce94d82b7855eee3d32a2c4dd Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 24 Apr 2020 23:57:02 +0200 Subject: [PATCH 128/322] Drop unused API --- src/range.rs | 10 ++++++---- src/size.rs | 16 +--------------- src/traits.rs | 48 ++++++++---------------------------------------- 3 files changed, 15 insertions(+), 59 deletions(-) diff --git a/src/range.rs b/src/range.rs index 50fcf8257886..ffb71d3e0205 100644 --- a/src/range.rs +++ b/src/range.rs @@ -80,7 +80,7 @@ impl TextRange { /// assert_eq!(range, TextRange::new(point, point)); /// ``` #[inline] - pub const fn empty(offset: TextSize) -> TextRange { + pub fn empty(offset: TextSize) -> TextRange { TextRange { start: offset, end: offset, @@ -102,9 +102,11 @@ impl TextRange { /// assert_eq!(range, TextRange::at(TextSize::zero(), point)); /// ``` #[inline] - pub const fn up_to(end: TextSize) -> TextRange { - let start = TextSize::zero(); - TextRange { start, end } + pub fn up_to(end: TextSize) -> TextRange { + TextRange { + start: 0.into(), + end, + } } } diff --git a/src/size.rs b/src/size.rs index 3a0d34b80845..105e158ca4e2 100644 --- a/src/size.rs +++ b/src/size.rs @@ -53,25 +53,11 @@ impl TextSize { pub fn of(text: T) -> TextSize { text.text_len() } - - /// A size of zero. - /// - /// This is equivalent to `TextSize::default()` or [`TextSize::MIN`], - /// but is more explicit on intent. - #[inline] - pub const fn zero() -> TextSize { - TextSize { raw: 0 } - } } /// Methods to act like a primitive integer type, where reasonably applicable. // Last updated for parity with Rust 1.42.0. impl TextSize { - /// The smallest representable text size. (`u32::MIN`) - pub const MIN: TextSize = TextSize { raw: u32::MIN }; - /// The largest representable text size. (`u32::MAX`) - pub const MAX: TextSize = TextSize { raw: u32::MAX }; - /// Checked addition. Returns `None` if overflow occurred. #[inline] pub fn checked_add(self, rhs: TextSize) -> Option { @@ -172,6 +158,6 @@ where { #[inline] fn sum>(iter: I) -> TextSize { - iter.fold(TextSize::zero(), Add::add) + iter.fold(0.into(), Add::add) } } diff --git a/src/traits.rs b/src/traits.rs index c0adacd92b08..d0bb6c1f669a 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,7 +1,4 @@ -use { - crate::TextSize, - std::{borrow::Cow, convert::TryInto, rc::Rc, sync::Arc}, -}; +use {crate::TextSize, std::convert::TryInto}; use priv_in_pub::Sealed; mod priv_in_pub { @@ -22,47 +19,18 @@ impl TextLen for &'_ str { } } -impl Sealed for char {} -impl TextLen for char { +impl Sealed for &'_ String {} +impl TextLen for &'_ String { #[inline] fn text_len(self) -> TextSize { - (self.len_utf8() as u32).into() + self.as_str().text_len() } } -impl Sealed for &'_ D where D: TextLen + Copy {} -impl TextLen for &'_ D -where - D: TextLen + Copy, -{ +impl Sealed for char {} +impl TextLen for char { + #[inline] fn text_len(self) -> TextSize { - D::text_len(*self) + (self.len_utf8() as u32).into() } } - -// Because we could not find a smart blanket impl to do this automatically and -// cleanly (rust-analyzer/text-size#36), just provide a bunch of manual impls. -// If a standard type fits in this macro and you need it to impl TextLen, just -// open a PR and we are likely to accept it. Or convince Rust to deref to &str. -macro_rules! impl_textlen_for_string { - ($($ty:ty),+ $(,)?) => {$( - impl Sealed for $ty {} - impl TextLen for $ty { - #[inline] - fn text_len(self) -> TextSize { - <&str>::text_len(self) - } - } - )+}; -} - -impl_textlen_for_string! { - &Box, - &String, - &Cow<'_, str>, - &Cow<'_, String>, - &Arc, - &Arc, - &Rc, - &Rc, -} From 806f884b777f8bcdf848e7fe70edae5e42abc103 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 25 Apr 2020 11:32:14 +0200 Subject: [PATCH 129/322] Sprinkle inline --- src/range.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/range.rs b/src/range.rs index ffb71d3e0205..3b946ab43644 100644 --- a/src/range.rs +++ b/src/range.rs @@ -157,6 +157,7 @@ impl TextRange { /// assert!(range.contains(start)); /// assert!(!range.contains(end)); /// ``` + #[inline] pub fn contains(self, offset: TextSize) -> bool { self.start() <= offset && offset < self.end() } @@ -175,6 +176,7 @@ impl TextRange { /// assert!(range.contains_inclusive(start)); /// assert!(range.contains_inclusive(end)); /// ``` + #[inline] pub fn contains_inclusive(self, offset: TextSize) -> bool { self.start() <= offset && offset <= self.end() } @@ -194,6 +196,7 @@ impl TextRange { /// assert!(larger.contains_range(larger)); /// assert!(smaller.contains_range(smaller)); /// ``` + #[inline] pub fn contains_range(self, other: TextRange) -> bool { self.start() <= other.start() && other.end() <= self.end() } @@ -213,6 +216,7 @@ impl TextRange { /// Some(TextRange::new(5.into(), 10.into())), /// ); /// ``` + #[inline] pub fn intersect(self, other: TextRange) -> Option { let start = cmp::max(self.start(), other.start()); let end = cmp::min(self.end(), other.end()); @@ -236,6 +240,7 @@ impl TextRange { /// TextRange::new(0.into(), 20.into()), /// ); /// ``` + #[inline] pub fn cover(self, other: TextRange) -> TextRange { let start = cmp::min(self.start(), other.start()); let end = cmp::max(self.end(), other.end()); @@ -253,6 +258,7 @@ impl TextRange { /// TextRange::new(0.into(), 20.into()), /// ) /// ``` + #[inline] pub fn cover_offset(self, offset: TextSize) -> TextRange { self.cover(TextRange::empty(offset)) } From 253f122f16e6e5c4400c22bf9531bc36093746fa Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 25 Apr 2020 11:38:10 +0200 Subject: [PATCH 130/322] Fix docs --- src/range.rs | 6 +++--- tests/constructors.rs | 29 +++++++++-------------------- tests/main.rs | 2 +- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/range.rs b/src/range.rs index 3b946ab43644..fcf286d62e98 100644 --- a/src/range.rs +++ b/src/range.rs @@ -98,8 +98,8 @@ impl TextRange { /// let range = TextRange::up_to(point); /// /// assert_eq!(range.len(), point); - /// assert_eq!(range, TextRange::new(TextSize::zero(), point)); - /// assert_eq!(range, TextRange::at(TextSize::zero(), point)); + /// assert_eq!(range, TextRange::new(0.into(), point)); + /// assert_eq!(range, TextRange::at(0.into(), point)); /// ``` #[inline] pub fn up_to(end: TextSize) -> TextRange { @@ -254,7 +254,7 @@ impl TextRange { /// ```rust /// # use text_size::*; /// assert_eq!( - /// TextRange::empty(TextSize::zero()).cover_offset(20.into()), + /// TextRange::empty(0.into()).cover_offset(20.into()), /// TextRange::new(0.into(), 20.into()), /// ) /// ``` diff --git a/tests/constructors.rs b/tests/constructors.rs index 9c9d0801caf0..9ff4e19c6211 100644 --- a/tests/constructors.rs +++ b/tests/constructors.rs @@ -1,35 +1,24 @@ -use { - std::{borrow::Cow, sync::Arc}, - text_size::*, -}; +use text_size::TextSize; #[derive(Copy, Clone)] struct BadRope<'a>(&'a [&'a str]); impl BadRope<'_> { fn text_len(self) -> TextSize { - self.0.iter().map(TextSize::of).sum() + self.0.iter().copied().map(TextSize::of).sum() } } #[test] fn main() { - macro_rules! test { - ($($expr:expr),+ $(,)?) => { - $(let _ = TextSize::of($expr);)+ - }; - } + let x: char = 'c'; + let _ = TextSize::of(x); - test! { - "", - &"", - 'a', - &'a', - &String::new(), - &String::new().into_boxed_str(), - &Arc::new(String::new()), - &Cow::Borrowed(""), - } + let x: &str = "hello"; + let _ = TextSize::of(x); + + let x: &String = &"hello".into(); + let _ = TextSize::of(x); let _ = BadRope(&[""]).text_len(); } diff --git a/tests/main.rs b/tests/main.rs index f8eb6d67352e..5e6b86d6598b 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -26,7 +26,7 @@ fn checked_math() { assert_eq!(size(1).checked_add(size(1)), Some(size(2))); assert_eq!(size(1).checked_sub(size(1)), Some(size(0))); assert_eq!(size(1).checked_sub(size(2)), None); - assert_eq!(TextSize::MAX.checked_add(size(1)), None); + assert_eq!(size(!0).checked_add(size(1)), None); } #[test] From 5c24bbb969b5058626eba1742653630c5d02f39e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 25 Apr 2020 11:46:20 +0200 Subject: [PATCH 131/322] :tada: 1.0.0 :tada: --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9c75a9af0442..010e3bb4c6d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "1.0.0-pre.1" +version = "1.0.0" edition = "2018" authors = [ From 4bf6b19e182b7cb344f14dbda739527959910172 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Fri, 1 May 2020 13:09:52 -0400 Subject: [PATCH 132/322] Fix TextSize::of docs to no longer suggest implementing TextLen --- src/size.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/size.rs b/src/size.rs index 105e158ca4e2..ab2ec9a73076 100644 --- a/src/size.rs +++ b/src/size.rs @@ -33,11 +33,9 @@ impl fmt::Debug for TextSize { } impl TextSize { - /// The text size of some text-like object. + /// The text size of some primitive text-like object. /// - /// Accepts `char`, `&str`, and references to any custom string-like type - /// that dereferences to `str`. Types that don't dereference to `str` but - /// want to be usable in this constructor can implement [`TextLen`]. + /// Accepts `char`, `&str`, and `&String`. /// /// # Examples /// From 633320cc7cf44b3f216c04a93eeda8e865c247ff Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 6 Jul 2020 15:31:35 +0200 Subject: [PATCH 133/322] Speadup From for String Thanks htpps://github.com/fasterthanlime! --- Cargo.toml | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b691d2d2bb62..eeb3b1b0925a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.15" +version = "0.1.16" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" diff --git a/src/lib.rs b/src/lib.rs index e4cca4717681..bdd228b89a4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -317,7 +317,7 @@ where impl From for String { fn from(text: SmolStr) -> Self { - text.to_string() + text.as_str().into() } } From af015790d036b0767ac94b5e902cea001300a1c0 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 13:12:41 -0400 Subject: [PATCH 134/322] Rustfmt --- src/lib.rs | 2 +- tests/test.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bdd228b89a4a..45f1cb254283 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -410,9 +410,9 @@ impl Repr { #[cfg(feature = "serde")] mod serde { + use super::SmolStr; use ::serde::de::{Deserializer, Error, Unexpected, Visitor}; use std::fmt; - use super::SmolStr; // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result diff --git a/tests/test.rs b/tests/test.rs index ab2235d3c5c8..57c0e8447fa3 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -87,7 +87,7 @@ proptest! { #[cfg(feature = "serde")] mod serde_tests { use super::*; - use serde::{Serialize, Deserialize}; + use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Serialize, Deserialize)] From 90dc211ed369c7592b141a48aa8c73f10cfd1065 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 16:07:29 -0400 Subject: [PATCH 135/322] Avoid checking long strings for matching against whitespace Previously, the string was checked for starting with newlines and ending with spaces, then ensuring that the length of those substrings were short enough to use our constant. Instead, only do the check for as many items as we have in the WS constant. In the worst case, this avoids an O(n) check if the input is a long string of `\n`, possibly followed by a long string of spaces. --- src/lib.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 45f1cb254283..7314d270f322 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,10 @@ -use std::{borrow::Borrow, cmp::Ordering, fmt, hash, iter, ops::Deref, sync::Arc}; +use std::{ + borrow::Borrow, + cmp::{self, Ordering}, + fmt, hash, iter, + ops::Deref, + sync::Arc, +}; /// A `SmolStr` is a string type that has the following properties: /// @@ -358,10 +364,17 @@ impl Repr { }; } - let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - if text[newlines..].bytes().all(|b| b == b' ') { - let spaces = len - newlines; - if newlines <= N_NEWLINES && spaces <= N_SPACES { + if len <= N_NEWLINES + N_SPACES { + let bytes = text.as_bytes(); + let possible_newline_count = cmp::min(len, N_NEWLINES); + let newlines = bytes[..possible_newline_count] + .iter() + .take_while(|&&b| b == b'\n') + .count(); + let possible_space_count = len - newlines; + if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') + { + let spaces = possible_space_count; return Repr::Substring { newlines, spaces }; } } From 25ffc3004d146810f425d93fb3b0080e52137e29 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 18:16:33 -0400 Subject: [PATCH 136/322] Avoid possible extra monomorphization By pulling `from_char_iter()` into a function, we can avoid multiple monomorphizations of `FromIterator` when multiple `IntoIterator`s result in the same iterator type. --- src/lib.rs | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bdd228b89a4a..60fc81e2279a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -136,6 +136,27 @@ impl SmolStr { _ => false, } } + + fn from_char_iter>(mut iter: I) -> SmolStr { + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) + } } impl Default for SmolStr { @@ -240,25 +261,8 @@ impl fmt::Display for SmolStr { impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - let mut iter = iter.into_iter(); - while let Some(ch) = iter.next() { - let size = ch.len_utf8(); - if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); - heap.push(ch); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - ch.encode_utf8(&mut buf[len..]); - len += size; - } - SmolStr(Repr::Inline { - len: len as u8, - buf, - }) + let iter = iter.into_iter(); + Self::from_char_iter(iter) } } From d22b15ac9c2f335afd7fac9fb72fce3abe0fb836 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 18:23:01 -0400 Subject: [PATCH 137/322] Use the number of remaining elements in the char iter for allocation When collecting from an iterator of chars, when expanding past INLINE_CAP, include extra space for at least one byte per char for any remaining known size. --- src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 60fc81e2279a..c449f3611c74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -138,12 +138,18 @@ impl SmolStr { } fn from_char_iter>(mut iter: I) -> SmolStr { + let (min_size, _) = iter.size_hint(); + if min_size > INLINE_CAP { + let heap: String = iter.collect(); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } let mut len = 0; let mut buf = [0u8; INLINE_CAP]; while let Some(ch) = iter.next() { let size = ch.len_utf8(); if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); + let (min_remaining, _) = iter.size_hint(); + let mut heap = String::with_capacity(size + len + min_remaining); heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); heap.push(ch); heap.extend(iter); From 6349e32c61b20c262b436c699f98fff1ab302ca9 Mon Sep 17 00:00:00 2001 From: Atul Bhosale Date: Sun, 5 Jul 2020 23:12:18 +0530 Subject: [PATCH 138/322] Add fmt tidy test --- tests/tidy.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/tidy.rs diff --git a/tests/tidy.rs b/tests/tidy.rs new file mode 100644 index 000000000000..a716e35b2f14 --- /dev/null +++ b/tests/tidy.rs @@ -0,0 +1,46 @@ +use std::{ + env, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; + +fn project_root() -> PathBuf { + PathBuf::from( + env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()), + ) +} + +fn run(cmd: &str, dir: impl AsRef) -> Result<(), ()> { + let mut args: Vec<_> = cmd.split_whitespace().collect(); + let bin = args.remove(0); + println!("> {}", cmd); + let output = Command::new(bin) + .args(args) + .current_dir(dir) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .output() + .map_err(drop)?; + if output.status.success() { + Ok(()) + } else { + let stdout = String::from_utf8(output.stdout).map_err(drop)?; + print!("{}", stdout); + Err(()) + } +} + +#[test] +fn check_code_formatting() { + let dir = project_root(); + if run("rustfmt +stable --version", &dir).is_err() { + panic!( + "failed to run rustfmt from toolchain 'stable'; \ + please run `rustup component add rustfmt --toolchain stable` to install it.", + ); + } + if run("cargo +stable fmt -- --check", &dir).is_err() { + panic!("code is not properly formatted; please format the code by running `cargo fmt`") + } +} From e94ba4494124078a4a6426a74938274030f11c88 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 28 Jul 2020 23:46:26 +0200 Subject: [PATCH 139/322] 1.0.0 --- .gitignore | 2 + Cargo.toml | 11 + README.md | 5 + rust.ungram | 618 +++++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 45 ++++ src/lexer.rs | 129 ++++++++++ src/lib.rs | 89 +++++++ src/parser.rs | 217 +++++++++++++++++ ungrammar.ungram | 16 ++ 9 files changed, 1132 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 rust.ungram create mode 100644 src/error.rs create mode 100644 src/lexer.rs create mode 100644 src/lib.rs create mode 100644 src/parser.rs create mode 100644 ungrammar.ungram diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..9c71cc94a03b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000000..53e92506e966 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "ungrammar" +description = "A DSL for describing concrete syntax trees" +version = "1.0.0" +license = "MIT OR Apache-2.0" +repository = "https://github.com/matklad/ungrammar" +authors = ["Aleksey Kladov "] +edition = "2018" + +[dependencies] +# nope diff --git a/README.md b/README.md new file mode 100644 index 000000000000..b4f8f375ecde --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# ungrammar + +A DLS for specifying concrete syntax tree. + +See [./rust.ungram](./rust.ungram) for an example. diff --git a/rust.ungram b/rust.ungram new file mode 100644 index 000000000000..ef5da726229a --- /dev/null +++ b/rust.ungram @@ -0,0 +1,618 @@ +// Rust Un-Grammar. +// +// This grammar specifies the structure of Rust's concrete sytnax tree. +// It does not specify parsing rules (ambiguities, precedence, etc are out of scope). +// Tokens are processed -- contextual keywords are recogniesed, compound operators glued. +// +// Legend: +// +// // -- comment +// Name = -- non-termial defition +// 'ident' -- token (terminal) +// A B -- sequence +// A | B -- alternation +// A* -- zero or more repetition +// A? -- zero or one repetition +// (A) -- same as A +// label:A -- suggested name for field of AST node + +//*************************// +// Names, Paths and Macros // +//*************************// + +Name = + 'ident' + +NameRef = + 'ident' | 'int_number' + +Path = + (qualifier:Path '::')? segment:PathSegment + +PathSegment = + 'crate' | 'self' | 'super' +| '::'? NameRef +| NameRef GenericArgList? +| NameRef ParamList RetType? +| '<' PathType ('as' PathType)? '>' + +GenericArgList = + '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>' + +GenericArg = + TypeArg +| AssocTypeArg +| LifetimeArg +| ConstArg + +TypeArg = + Type + +AssocTypeArg = + NameRef (':' TypeBoundList | '=' Type) + +LifetimeArg = + 'lifetime' + +ConstArg = + Expr + +MacroCall = + Attr* Path '!' Name? TokenTree ';'? + +TokenTree = + '(' ')' +| '{' '}' +| '[' ']' + +MacroItems = + Item* + +MacroStmts = + statements:Stmt* + Expr? + +//*************************// +// Items // +//*************************// + +SourceFile = + 'shebang'? + Attr* + Item* + +Item = + Const +| Enum +| ExternBlock +| ExternCrate +| Fn +| Impl +| MacroCall +| Module +| Static +| Struct +| Trait +| TypeAlias +| Union +| Use + +Module = + Attr* Visibility? + 'mod' Name + (ItemList | ';') + +ItemList = + '{' Attr* Item* '}' + +ExternCrate = + Attr* Visibility? + 'extern' 'crate' (NameRef | 'self') Rename? ';' + +Rename = + 'as' (Name | '_') + +Use = + Attr* Visibility? + 'use' UseTree ';' + +UseTree = + (Path? '::')? ('*' | UseTreeList) +| Path Rename? + +UseTreeList = + '{' (UseTree (',' UseTree)* ','?)? '}' + +Fn = + Attr* Visibility? + 'default'? ('async' | 'const')? 'unsafe'? Abi? + 'fn' Name GenericParamList? ParamList RetType? WhereClause? + (body:BlockExpr | ';') + +Abi = + 'extern' 'string'? + +ParamList = + '('( + SelfParam + | (SelfParam ',')? (Param (',' Param)* ','?)? + )')' + +SelfParam = + Attr* ( + ('&' 'lifetime'?)? 'mut'? 'self' + | 'mut'? 'self' ':' Type + ) + +Param = + Attr* ( + Pat (':' Type) + | Type + | '...' + ) + +RetType = + '->' Type + +TypeAlias = + Attr* Visibility? + 'default'? + 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? + '=' Type ';' + +Struct = + Attr* Visibility? + 'struct' Name GenericParamList? ( + WhereClause? (RecordFieldList | ';') + | TupleFieldList WhereClause? ';' + ) + +RecordFieldList = + '{' fields:(RecordField (',' RecordField)* ','?)? '}' + +RecordField = + Attr* Visibility? + Name ':' Type + +TupleFieldList = + '(' fields:(TupleField (',' TupleField)* ','?)? ')' + +TupleField = + Attr* Visibility? + Type + +FieldList = + RecordFieldList +| TupleFieldList + +Enum = + Attr* Visibility? + 'enum' Name GenericParamList? WhereClause? + VariantList + +VariantList = + '{' (Variant (',' Variant)* ','?)? '}' + +Variant = + Attr* Visibility? + Name FieldList ('=' Expr)? + +Union = + Attr* Visibility? + 'union' Name GenericParamList? WhereClause? + RecordFieldList + +AdtDef = + Enum +| Struct +| Union + +Const = + Attr* Visibility? + 'default'? + 'const' (Name | '_') ':' Type + '=' body:Expr ';' + +Static = + Attr* Visibility? + 'static'? 'mut'? Name ':' Type + '=' body:Expr ';' + +Trait = + Attr* Visibility? + 'unsafe'? 'auto'? + 'trait' Name GenericParamList (':' TypeBoundList?)? WhereClause + AssocItemList + +AssocItemList = + '{' Attr* AssocItem* '}' + +AssocItem = + Const +| Fn +| MacroCall +| TypeAlias + +Impl = + Attr* Visibility? + 'default'? 'unsafe'? + 'impl' 'const'? GenericParamList? ('!'? trait:Type 'for')? self_ty:Type WhereClause? + AssocItemList + +ExternBlock = + Attr* Abi ExternItemList + +ExternItemList = + '{' Attr* ExternItem* '}' + +ExternItem = + Fn | Static | MacroCall + +GenericParamList = + '<' (GenericParam (',' GenericParam)* ','?)? '>' + +GenericParam = + ConstParam +| LifetimeParam +| TypeParam + +TypeParam = + Attr* Name (':' TypeBoundList?)? + ('=' default_type:Type)? + +ConstParam = + Attr* 'const' Name ':' Type + ('=' default_val:Expr)? + +LifetimeParam = + Attr* 'lifetime' (':' TypeBoundList?)? + +WhereClause = + 'where' predicates:(WherePred (',' WherePred)* ','?) + +WherePred = + ('for' GenericParamList)? ('lifetime' | Type) ':' TypeBoundList + +Visibility = + 'pub' ('(' + 'super' + | 'self' + | 'crate' + | 'in' Path + ')')? + +Attr = + '#' '!'? '[' Path ('=' Literal | TokenTree)? ']' + +//****************************// +// Statements and Expressions // +//****************************// + +Stmt = + ExprStmt +| Item +| LetStmt + +LetStmt = + Attr* 'let' Pat (':' Type)? + '=' initializer:Expr ';' + +ExprStmt = + Attr* Expr ';'? + +Expr = + ArrayExpr +| AwaitExpr +| BinExpr +| BlockExpr +| BoxExpr +| BreakExpr +| CallExpr +| CastExpr +| ClosureExpr +| ContinueExpr +| EffectExpr +| FieldExpr +| ForExpr +| IfExpr +| IndexExpr +| Literal +| LoopExpr +| MacroCall +| MatchExpr +| MethodCallExpr +| ParenExpr +| PathExpr +| PrefixExpr +| RangeExpr +| RecordExpr +| RefExpr +| ReturnExpr +| TryExpr +| TupleExpr +| WhileExpr + +Literal = + Attr* value:( + 'int_number' | 'float_number' + | 'string' | 'raw_string' + | 'byte_string' | 'raw_byte_string' + | 'true' | 'false' + | 'char' | 'byte' + ) + +PathExpr = + Attr* Path + +BlockExpr = + '{' + Attr* + statements:Stmt* + Expr? + '}' + +RefExpr = + Attr* '&' ('raw' |'mut' | 'const') Expr + +TryExpr = + Attr* Expr '?' + +EffectExpr = + Attr* Label? ('try' | 'unsafe' | 'async') BlockExpr + +PrefixExpr = + Attr* op:('-' | '!' | '*') Expr + +BinExpr = + Attr* + lhs:Expr + op:( + '||' | '&&' + | '==' | '!=' | '<=' | '>=' | '<' | '>' + | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' + | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' + ) + rhs:Expr + +CastExpr = + Attr* Expr 'as' Type + +ParenExpr = + Attr* '(' Attr* Expr ')' + +ArrayExpr = + Attr* '[' Attr* ( + (Expr (',' Expr)* ','?)? + | Expr ';' Expr + ) ']' + +IndexExpr = + Attr* base:Expr '[' index:Expr ']' + +TupleExpr = + Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')' + +RecordExpr = + Path RecordExprFieldList + +RecordExprFieldList = + '{' + Attr* + fields:(RecordExprField (',' RecordExprField)* ','?) + ('..' spread:Expr)? + '}' + +RecordExprField = + Attr* NameRef (':' Expr)? + +CallExpr = + Attr* Expr ArgList + +ArgList = + '(' args:(Expr (',' Expr)* ','?)? ')' + +MethodCallExpr = + Attr* Expr '.' NameRef GenericArgList? ArgList + +FieldExpr = + Attr* Expr '.' NameRef + +ClosureExpr = + Attr* 'static'? 'async'? 'move'? ParamList RetType? + body:Expr + +IfExpr = + Attr* 'if' Condition then_branch:BlockExpr + ('else' else_branch:(IfExpr | BlockExpr))? + +Condition = + 'let' Pat '=' Expr +| Expr + +LoopExpr = + Attr* Label? 'loop' + loop_body:BlockExpr + +ForExpr = + Attr* Label? 'for' Pat 'in' iterable:Expr + loop_body:BlockExpr + +WhileExpr = + Attr* Label? 'while' Condition + loop_body:BlockExpr + +Label = + 'lifetime' + +BreakExpr = + Attr* 'break' 'lifetime'? Expr? + +ContinueExpr = + Attr* 'continue' 'lifetime'? + +RangeExpr = + Attr* start:Expr? op:('..' | '..=') end:Expr? + +MatchExpr = + Attr* 'match' Expr MatchArmList + +MatchArmList = + '{' + Attr* + arms:MatchArm* + '}' + +MatchArm = + Attr* Pat guard:MatchGuard? '=>' Expr ','? + +MatchGuard = + 'if' Expr + +ReturnExpr = + Attr* 'return' Expr? + +AwaitExpr = + Attr* Expr '.' 'await' + +BoxExpr = + Attr* 'box' Expr + +//*************************// +// Types // +//*************************// + +Type = + ArrayType +| DynTraitType +| FnPointerType +| ForType +| ImplTraitType +| InferType +| NeverType +| ParenType +| PathType +| PointerType +| ReferenceType +| SliceType +| TupleType + +ParenType = + '(' Type ')' + +NeverType = + '!' + +PathType = + Path + +TupleType = + '(' fields:(Type (',' Type)* ','?)? ')' + +PointerType = + '*' ('const' | 'mut') Type + +ReferenceType = + '&' 'lifetime'? 'mut'? Type + +ArrayType = + '[' Type ';' Expr ']' + +SliceType = + '[' Type ']' + +InferType = + '_' + +FnPointerType = + 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType? + +ForType = + 'for' GenericParamList Type + +ImplTraitType = + 'impl' TypeBoundList + +DynTraitType = + 'dyn' TypeBoundList + +TypeBoundList = + bounds:(TypeBound ('+' TypeBound)* '+'?) + +TypeBound = + 'lifetime' +| '?'? Type + +//************************// +// Patterns // +//************************// + +Pat = + IdentPat +| BoxPat +| RestPat +| LiteralPat +| MacroPat +| OrPat +| ParenPat +| PathPat +| WildcardPat +| RangePat +| RecordPat +| RefPat +| SlicePat +| TuplePat +| TupleStructPat + +LiteralPat = + Literal + +IdentPat = + Attr* 'ref'? 'mut'? Name ('@' Pat)? + +WildcardPat = + '_' + +RangePat = + start:Pat op:('..' | '..=') end:Pat + +RefPat = + '&' 'mut'? Pat + +RecordPat = + Path RecordPatFieldList + +RecordPatFieldList = + '{' + fields:(RecordPatField (',' RecordPatField)* ','?) + '..'? + '}' + +RecordPatField = + Attr* (NameRef ':')? Pat + +TupleStructPat = + Path '(' fields:(Pat (',' Pat)* ','?)? ')' + +TuplePat = + '(' fields:(Pat (',' Pat)* ','?)? ')' + +ParenPat = + '(' Pat ')' + +SlicePat = + '[' (Pat (',' Pat)* ','?)? ']' + +PathPat = + Path + +OrPat = + (Pat ('|' Pat)* '|'?) + +BoxPat = + 'box' Pat + +RestPat = + '..' + +MacroPat = + MacroCall diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 000000000000..8d1738dbf3a7 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,45 @@ +//! Boilerplate error definitions. +use std::fmt; + +use crate::lexer::Location; + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub struct Error { + pub(crate) message: String, + pub(crate) location: Option, +} + +impl Error { + pub(crate) fn with_location(self, location: Location) -> Error { + Error { + location: Some(location), + ..self + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(loc) = self.location { + write!(f, "{}:{}: ", loc.line, loc.column)? + } + write!(f, "{}", self.message) + } +} + +macro_rules! _format_err { + ($($tt:tt)*) => { + $crate::error::Error { + message: format!($($tt)*), + location: None, + } + }; +} +pub(crate) use _format_err as format_err; + +macro_rules! _bail { + ($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) }; +} +pub(crate) use _bail as bail; diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 000000000000..f4c979b5bdc7 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,129 @@ +//! Simple hand-written ungrammar lexer +use crate::error::{bail, Result}; + +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum TokenKind { + Node(String), + Token(String), + Eq, + Star, + Pipe, + QMark, + Colon, + LParen, + RParen, +} + +#[derive(Debug)] +pub(crate) struct Token { + pub(crate) kind: TokenKind, + pub(crate) loc: Location, +} + +#[derive(Copy, Clone, Default, Debug)] +pub(crate) struct Location { + pub(crate) line: usize, + pub(crate) column: usize, +} + +impl Location { + fn advance(&mut self, text: &str) { + match text.rfind('\n') { + Some(idx) => { + self.line += text.chars().filter(|&it| it == '\n').count(); + self.column = text[idx + 1..].chars().count(); + } + None => self.column += text.chars().count(), + } + } +} + +pub(crate) fn tokenize(mut input: &str) -> Result> { + let mut res = Vec::new(); + let mut loc = Location::default(); + while !input.is_empty() { + let old_input = input; + skip_ws(&mut input); + skip_comment(&mut input); + if old_input.len() == input.len() { + match advance(&mut input) { + Ok(kind) => { + res.push(Token { kind, loc }); + } + Err(err) => return Err(err.with_location(loc)), + } + } + let consumed = old_input.len() - input.len(); + loc.advance(&old_input[..consumed]); + } + + Ok(res) +} + +fn skip_ws(input: &mut &str) { + *input = input.trim_start_matches(is_whitespace) +} +fn skip_comment(input: &mut &str) { + if input.starts_with("//") { + let idx = input.find('\n').map_or(input.len(), |it| it + 1); + *input = &input[idx..] + } +} + +fn advance(input: &mut &str) -> Result { + let mut chars = input.chars(); + let c = chars.next().unwrap(); + let res = match c { + '=' => TokenKind::Eq, + '*' => TokenKind::Star, + '?' => TokenKind::QMark, + '(' => TokenKind::LParen, + ')' => TokenKind::RParen, + '|' => TokenKind::Pipe, + ':' => TokenKind::Colon, + '\'' => { + let mut buf = String::new(); + loop { + match chars.next() { + None => bail!("unclosed token literal"), + Some('\\') => match chars.next() { + Some(c) if is_escapable(c) => buf.push(c), + _ => bail!("invalid escape in token literal"), + }, + Some('\'') => break, + Some(c) => buf.push(c), + } + } + TokenKind::Token(buf) + } + c if is_ident_char(c) => { + let mut buf = String::new(); + buf.push(c); + loop { + match chars.clone().next() { + Some(c) if is_ident_char(c) => { + chars.next(); + buf.push(c); + } + _ => break, + } + } + TokenKind::Node(buf) + } + '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"), + c => bail!("unexpected character: `{}`", c), + }; + + *input = chars.as_str(); + Ok(res) +} + +fn is_escapable(c: char) -> bool { + matches!(c, '\\' | '\'') +} +fn is_whitespace(c: char) -> bool { + matches!(c, ' ' | '\t' | '\n') +} +fn is_ident_char(c: char) -> bool { + matches!(c, 'a'..='z' | 'A'..='Z' | '_') +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000000..ff56cae9ee63 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,89 @@ +//! Ungrammar -- a DSL for specifying concrete syntax tree grammar. +//! +//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be +//! ambiguous, non LL, non LR, etc. +mod error; +mod lexer; +mod parser; + +use std::{ops, str::FromStr}; + +pub use error::{Error, Result}; + +pub fn rust_grammar() -> Grammar { + let src = include_str!("../rust.ungram"); + src.parse().unwrap() +} + +#[derive(Eq, PartialEq, Debug, Copy, Clone)] +pub struct Node(usize); +#[derive(Eq, PartialEq, Debug, Copy, Clone)] +pub struct Token(usize); + +#[derive(Default, Debug)] +pub struct Grammar { + nodes: Vec, + tokens: Vec, +} + +impl FromStr for Grammar { + type Err = Error; + fn from_str(s: &str) -> Result { + let tokens = lexer::tokenize(s)?; + parser::parse(tokens) + } +} + +impl Grammar { + pub fn iter(&self) -> impl Iterator + '_ { + (0..self.nodes.len()).map(Node) + } +} + +impl ops::Index for Grammar { + type Output = NodeData; + fn index(&self, Node(index): Node) -> &NodeData { + &self.nodes[index] + } +} + +impl ops::Index for Grammar { + type Output = TokenData; + fn index(&self, Token(index): Token) -> &TokenData { + &self.tokens[index] + } +} + +#[derive(Debug)] +pub struct NodeData { + pub name: String, + pub rule: Rule, +} + +#[derive(Debug)] +pub struct TokenData { + pub name: String, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Rule { + Labeled { label: String, rule: Box }, + Node(Node), + Token(Token), + Seq(Vec), + Alt(Vec), + Opt(Box), + Rep(Box), +} + +#[test] +fn smoke() { + let grammar = include_str!("../ungrammar.ungram"); + let grammar = grammar.parse::().unwrap(); + drop(grammar) +} + +#[test] +fn test_rust_grammar() { + let _ = rust_grammar(); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 000000000000..bd067f22a5b6 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,217 @@ +//! Simple hand-written ungrammar parser. +use std::collections::HashMap; + +use crate::{ + error::{bail, format_err, Result}, + lexer::{self, TokenKind}, + Grammar, Node, NodeData, Rule, Token, TokenData, +}; + +macro_rules! bail { + ($loc:expr, $($tt:tt)*) => {{ + let err = $crate::error::format_err!($($tt)*) + .with_location($loc); + return Err(err); + }}; +} + +pub(crate) fn parse(tokens: Vec) -> Result { + let mut p = Parser::new(tokens); + while !p.is_eof() { + node(&mut p)?; + } + p.finish() +} + +#[derive(Default)] +struct Parser { + grammar: Grammar, + tokens: Vec, + node_table: HashMap, + token_table: HashMap, +} + +const DUMMY_RULE: Rule = Rule::Node(Node(!0)); + +impl Parser { + fn new(mut tokens: Vec) -> Parser { + tokens.reverse(); + Parser { + tokens, + ..Parser::default() + } + } + + fn peek(&self) -> Option<&lexer::Token> { + self.peek_n(0) + } + fn peek_n(&self, n: usize) -> Option<&lexer::Token> { + self.tokens.iter().nth_back(n) + } + fn bump(&mut self) -> Result { + self.tokens + .pop() + .ok_or_else(|| format_err!("unexpected EOF")) + } + fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> { + let token = self.bump()?; + if token.kind != kind { + bail!(token.loc, "unexpected token, expected `{}`", what); + } + Ok(()) + } + fn is_eof(&self) -> bool { + self.tokens.is_empty() + } + fn finish(self) -> Result { + for node_data in &self.grammar.nodes { + if matches!(node_data.rule, DUMMY_RULE) { + crate::error::bail!("Undefined node: {}", node_data.name) + } + } + Ok(self.grammar) + } + fn intern_node(&mut self, name: String) -> Node { + let len = self.node_table.len(); + let grammar = &mut self.grammar; + *self.node_table.entry(name.clone()).or_insert_with(|| { + grammar.nodes.push(NodeData { + name, + rule: DUMMY_RULE, + }); + Node(len) + }) + } + fn intern_token(&mut self, name: String) -> Token { + let len = self.token_table.len(); + let grammar = &mut self.grammar; + *self.token_table.entry(name.clone()).or_insert_with(|| { + grammar.tokens.push(TokenData { name }); + Token(len) + }) + } +} + +fn node(p: &mut Parser) -> Result<()> { + let token = p.bump()?; + let node = match token.kind { + TokenKind::Node(it) => p.intern_node(it), + _ => bail!(token.loc, "expected ident"), + }; + p.expect(TokenKind::Eq, "=")?; + if !matches!(p.grammar[node].rule, DUMMY_RULE) { + bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name) + } + + let rule = rule(p)?; + p.grammar.nodes[node.0].rule = rule; + Ok(()) +} + +fn rule(p: &mut Parser) -> Result { + let lhs = seq_rule(p)?; + let mut alt = vec![lhs]; + while let Some(token) = p.peek() { + if token.kind != TokenKind::Pipe { + break; + } + p.bump()?; + let rule = seq_rule(p)?; + alt.push(rule) + } + let res = if alt.len() == 1 { + alt.pop().unwrap() + } else { + Rule::Alt(alt) + }; + Ok(res) +} + +fn seq_rule(p: &mut Parser) -> Result { + let lhs = atom_rule(p)?; + + let mut seq = vec![lhs]; + while let Some(rule) = opt_atom_rule(p)? { + seq.push(rule) + } + let res = if seq.len() == 1 { + seq.pop().unwrap() + } else { + Rule::Seq(seq) + }; + Ok(res) +} + +fn atom_rule(p: &mut Parser) -> Result { + match opt_atom_rule(p)? { + Some(it) => Ok(it), + None => { + let token = p.bump()?; + bail!(token.loc, "unexpected token") + } + } +} + +fn opt_atom_rule(p: &mut Parser) -> Result> { + let token = match p.peek() { + Some(it) => it, + None => return Ok(None), + }; + let mut res = match &token.kind { + TokenKind::Node(name) => { + if let Some(lookahead) = p.peek_n(1) { + match lookahead.kind { + TokenKind::Eq => return Ok(None), + TokenKind::Colon => { + let label = name.clone(); + p.bump()?; + p.bump()?; + let rule = atom_rule(p)?; + let res = Rule::Labeled { + label, + rule: Box::new(rule), + }; + return Ok(Some(res)); + } + _ => (), + } + } + match p.peek_n(1) { + Some(token) if token.kind == TokenKind::Eq => return Ok(None), + _ => (), + } + let name = name.clone(); + p.bump()?; + let node = p.intern_node(name); + Rule::Node(node) + } + TokenKind::Token(name) => { + let name = name.clone(); + p.bump()?; + let token = p.intern_token(name); + Rule::Token(token) + } + TokenKind::LParen => { + p.bump()?; + let rule = rule(p)?; + p.expect(TokenKind::RParen, ")")?; + rule + } + _ => return Ok(None), + }; + + if let Some(token) = p.peek() { + match &token.kind { + TokenKind::QMark => { + p.bump()?; + res = Rule::Opt(Box::new(res)); + } + TokenKind::Star => { + p.bump()?; + res = Rule::Rep(Box::new(res)); + } + _ => (), + } + } + Ok(Some(res)) +} diff --git a/ungrammar.ungram b/ungrammar.ungram new file mode 100644 index 000000000000..6cb4e10fb14b --- /dev/null +++ b/ungrammar.ungram @@ -0,0 +1,16 @@ +/// ungrammar for ungrammar +Grammar = + Node * + +Node = + name:'ident' '=' Rule + +Rule = + 'ident' +| 'token_ident' +| Rule * +| Rule ( '|' Rule) * +| Rule '?' +| Rule '*' +| '(' Rule ')' +| label:'ident' ':' From a5cf3c3422ccd8895581e33741abcae90f04ac31 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 01:50:19 +0200 Subject: [PATCH 140/322] ci --- .github/workflows/ci.yaml | 38 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 ++ bors.toml | 2 ++ 3 files changed, 42 insertions(+) create mode 100644 .github/workflows/ci.yaml create mode 100644 bors.toml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000000..8b170eb62e2e --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,38 @@ +name: CI +on: + pull_request: + push: + branches: + - master + - staging + - trying + +env: + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + CI: 1 + RUST_BACKTRACE: short + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + +jobs: + rust: + name: Rust + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Compile + run: cargo test --no-run + + - name: Test + run: cargo test diff --git a/Cargo.toml b/Cargo.toml index 53e92506e966..32a03ef44bbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,5 +7,7 @@ repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] edition = "2018" +exclude = ["/bors.toml", "/.github"] + [dependencies] # nope diff --git a/bors.toml b/bors.toml new file mode 100644 index 000000000000..b92b99ac3020 --- /dev/null +++ b/bors.toml @@ -0,0 +1,2 @@ +status = [ "Rust" ] +delete_merged_branches = true From 7e073b08bc29621e4fc51bfbdc3742e54cc102e4 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 02:02:21 +0200 Subject: [PATCH 141/322] Allow empty statement --- rust.ungram | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index ef5da726229a..c0855f5fbd61 100644 --- a/rust.ungram +++ b/rust.ungram @@ -289,7 +289,8 @@ Attr = //****************************// Stmt = - ExprStmt + ';' +| ExprStmt | Item | LetStmt From d7a03cae259be1920b4f19d1faee3de515088d6d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 04:44:24 +0200 Subject: [PATCH 142/322] Add ungrammar2json tool --- .github/workflows/ci.yaml | 4 +- Cargo.toml | 5 ++- src/error.rs | 20 +++++----- ungrammar2json/Cargo.toml | 12 ++++++ ungrammar2json/src/main.rs | 77 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 12 deletions(-) create mode 100644 ungrammar2json/Cargo.toml create mode 100644 ungrammar2json/src/main.rs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8b170eb62e2e..f03f3a9fd66c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,7 +32,7 @@ jobs: override: true - name: Compile - run: cargo test --no-run + run: cargo test --workspace --no-run - name: Test - run: cargo test + run: cargo test --workspace diff --git a/Cargo.toml b/Cargo.toml index 32a03ef44bbd..cd62884e03b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.0.0" +version = "1.1.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] @@ -9,5 +9,8 @@ edition = "2018" exclude = ["/bors.toml", "/.github"] +[workspace] +members = ["ungrammar2json"] + [dependencies] # nope diff --git a/src/error.rs b/src/error.rs index 8d1738dbf3a7..a7a62d0cc06b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,15 +11,6 @@ pub struct Error { pub(crate) location: Option, } -impl Error { - pub(crate) fn with_location(self, location: Location) -> Error { - Error { - location: Some(location), - ..self - } - } -} - impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(loc) = self.location { @@ -29,6 +20,17 @@ impl fmt::Display for Error { } } +impl std::error::Error for Error {} + +impl Error { + pub(crate) fn with_location(self, location: Location) -> Error { + Error { + location: Some(location), + ..self + } + } +} + macro_rules! _format_err { ($($tt:tt)*) => { $crate::error::Error { diff --git a/ungrammar2json/Cargo.toml b/ungrammar2json/Cargo.toml new file mode 100644 index 000000000000..19ca3d832430 --- /dev/null +++ b/ungrammar2json/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "ungrammar2json" +description = "Convert ungrammar files to JSON" +version = "1.0.0" +license = "MIT OR Apache-2.0" +repository = "https://github.com/matklad/ungrammar" +authors = ["Aleksey Kladov "] +edition = "2018" + +[dependencies] +write-json = "0.1.1" +ungrammar = { path = "../", version = "1.1.0" } diff --git a/ungrammar2json/src/main.rs b/ungrammar2json/src/main.rs new file mode 100644 index 000000000000..f588ed5eb624 --- /dev/null +++ b/ungrammar2json/src/main.rs @@ -0,0 +1,77 @@ +use std::{ + env, + io::{self, Read}, + process, +}; + +use ungrammar::{Grammar, Rule}; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(101); + } +} + +fn try_main() -> io::Result<()> { + if env::args().count() != 1 { + eprintln!("Usage: ungrammar2json < grammar.ungram > grammar.json"); + return Ok(()); + } + let grammar = read_stdin()?; + let grammar = grammar + .parse::() + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + + let mut buf = String::new(); + grammar_to_json(&grammar, write_json::object(&mut buf)); + println!("{}", buf); + Ok(()) +} + +fn read_stdin() -> io::Result { + let mut buf = String::new(); + io::stdin().lock().read_to_string(&mut buf)?; + Ok(buf) +} + +fn grammar_to_json(grammar: &Grammar, mut obj: write_json::Object<'_>) { + for node in grammar.iter() { + let node = &grammar[node]; + rule_to_json(grammar, &node.rule, obj.object(&node.name)); + } +} + +fn rule_to_json(grammar: &Grammar, rule: &Rule, mut obj: write_json::Object) { + match rule { + Rule::Labeled { label, rule } => { + obj.string("label", label); + rule_to_json(grammar, rule, obj.object("rule")) + } + Rule::Node(node) => { + obj.string("node", &grammar[*node].name); + } + Rule::Token(token) => { + obj.string("token", &grammar[*token].name); + } + Rule::Seq(rules) | Rule::Alt(rules) => { + let tag = match rule { + Rule::Seq(_) => "seq", + Rule::Alt(_) => "alt", + _ => unreachable!(), + }; + let mut array = obj.array(tag); + for rule in rules { + rule_to_json(grammar, rule, array.object()); + } + } + Rule::Opt(arg) | Rule::Rep(arg) => { + let tag = match rule { + Rule::Opt(_) => "opt", + Rule::Rep(_) => "rep", + _ => unreachable!(), + }; + rule_to_json(grammar, arg, obj.object(tag)); + } + } +} From 7671fa26df0086cb6c212f256f6f40754dc490e5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 13:05:26 +0200 Subject: [PATCH 143/322] Fmt --- rust.ungram | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index c0855f5fbd61..b4c4f93ec783 100644 --- a/rust.ungram +++ b/rust.ungram @@ -246,7 +246,9 @@ ExternItemList = '{' Attr* ExternItem* '}' ExternItem = - Fn | Static | MacroCall + Fn +| MacroCall +| Static GenericParamList = '<' (GenericParam (',' GenericParam)* ','?)? '>' From eacfce808cec9d0ab5921f7cc870092f3fe119ca Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 13:06:08 +0200 Subject: [PATCH 144/322] Allow empty fields --- rust.ungram | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust.ungram b/rust.ungram index b4c4f93ec783..5b6dccd769b6 100644 --- a/rust.ungram +++ b/rust.ungram @@ -401,7 +401,7 @@ RecordExpr = RecordExprFieldList = '{' Attr* - fields:(RecordExprField (',' RecordExprField)* ','?) + fields:(RecordExprField (',' RecordExprField)* ','?)? ('..' spread:Expr)? '}' @@ -586,7 +586,7 @@ RecordPat = RecordPatFieldList = '{' - fields:(RecordPatField (',' RecordPatField)* ','?) + fields:(RecordPatField (',' RecordPatField)* ','?)? '..'? '}' From 2ad534aa4e88df5ab87cb104177e1e6709e640bc Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 13:13:00 +0200 Subject: [PATCH 145/322] Shorten Pointer -> Ptr --- rust.ungram | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust.ungram b/rust.ungram index 5b6dccd769b6..f39a12012285 100644 --- a/rust.ungram +++ b/rust.ungram @@ -487,14 +487,14 @@ BoxExpr = Type = ArrayType | DynTraitType -| FnPointerType +| FnPtrType | ForType | ImplTraitType | InferType | NeverType | ParenType | PathType -| PointerType +| PtrType | ReferenceType | SliceType | TupleType @@ -511,7 +511,7 @@ PathType = TupleType = '(' fields:(Type (',' Type)* ','?)? ')' -PointerType = +PtrType = '*' ('const' | 'mut') Type ReferenceType = @@ -526,7 +526,7 @@ SliceType = InferType = '_' -FnPointerType = +FnPtrType = 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType? ForType = From d16b5cb88417b2c2aa445ab41883db420cb97e24 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 13:23:32 +0200 Subject: [PATCH 146/322] Shorten ReferenceType -> RefType --- rust.ungram | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust.ungram b/rust.ungram index f39a12012285..a7dcce4789ad 100644 --- a/rust.ungram +++ b/rust.ungram @@ -495,7 +495,7 @@ Type = | ParenType | PathType | PtrType -| ReferenceType +| RefType | SliceType | TupleType @@ -514,7 +514,7 @@ TupleType = PtrType = '*' ('const' | 'mut') Type -ReferenceType = +RefType = '&' 'lifetime'? 'mut'? Type ArrayType = From da1aa8a99738c1a4fe5e321e39aae857c1eb54c8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 13:46:46 +0200 Subject: [PATCH 147/322] Bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cd62884e03b7..af7e29238fa0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.1.0" +version = "1.1.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From 71350085009cad6ed195d6e8e398b94b40d02153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Duarte?= Date: Mon, 3 Aug 2020 22:32:36 +0100 Subject: [PATCH 148/322] Typo fix --- rust.ungram | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust.ungram b/rust.ungram index a7dcce4789ad..533539fed51d 100644 --- a/rust.ungram +++ b/rust.ungram @@ -1,8 +1,8 @@ // Rust Un-Grammar. // -// This grammar specifies the structure of Rust's concrete sytnax tree. +// This grammar specifies the structure of Rust's concrete syntax tree. // It does not specify parsing rules (ambiguities, precedence, etc are out of scope). -// Tokens are processed -- contextual keywords are recogniesed, compound operators glued. +// Tokens are processed -- contextual keywords are recognised, compound operators glued. // // Legend: // From e5430cd24328acffbd069bc7ce386021308f1366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Duarte?= Date: Mon, 3 Aug 2020 22:40:54 +0100 Subject: [PATCH 149/322] Another typo fix --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 533539fed51d..94890002d5c2 100644 --- a/rust.ungram +++ b/rust.ungram @@ -7,7 +7,7 @@ // Legend: // // // -- comment -// Name = -- non-termial defition +// Name = -- non-terminal definition // 'ident' -- token (terminal) // A B -- sequence // A | B -- alternation From 047f7c2de448024f31ccf5e94e8b532c978a3d86 Mon Sep 17 00:00:00 2001 From: Jeff Smits Date: Fri, 7 Aug 2020 14:47:52 +0200 Subject: [PATCH 150/322] Make type in Param optional I assumer that's why there were already parentheses around the `':' Type`? --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 94890002d5c2..49379322be98 100644 --- a/rust.ungram +++ b/rust.ungram @@ -146,7 +146,7 @@ SelfParam = Param = Attr* ( - Pat (':' Type) + Pat (':' Type)? | Type | '...' ) From fe7ac06f08b3ed5aae7d4f03d11ba97f7d86c6d9 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Aug 2020 19:04:57 +0200 Subject: [PATCH 151/322] Allow pipes in parameters closes #8 --- rust.ungram | 1 + 1 file changed, 1 insertion(+) diff --git a/rust.ungram b/rust.ungram index 49379322be98..7343c39251b3 100644 --- a/rust.ungram +++ b/rust.ungram @@ -137,6 +137,7 @@ ParamList = SelfParam | (SelfParam ',')? (Param (',' Param)* ','?)? )')' +| '|' (Param (',' Param)* ','?)? '|' SelfParam = Attr* ( From 01ab701c16c9a85106b05cfc7b6e08cf8971c949 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Aug 2020 19:07:34 +0200 Subject: [PATCH 152/322] Name MethodCall's receiver --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 7343c39251b3..1f3a70529be3 100644 --- a/rust.ungram +++ b/rust.ungram @@ -416,7 +416,7 @@ ArgList = '(' args:(Expr (',' Expr)* ','?)? ')' MethodCallExpr = - Attr* Expr '.' NameRef GenericArgList? ArgList + Attr* receiver:Expr '.' NameRef GenericArgList? ArgList FieldExpr = Attr* Expr '.' NameRef From f9a230c9e8ae415565c6eddb0ef55c239a81915e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Aug 2020 19:08:56 +0200 Subject: [PATCH 153/322] v1.1.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index af7e29238fa0..da42bb449c31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.1.1" +version = "1.1.2" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From ebf2403a66969c4d24001fd4120b63fda347f426 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 24 Aug 2020 21:19:59 +0200 Subject: [PATCH 154/322] Allow type aliases in extern blocks --- rust.ungram | 1 + 1 file changed, 1 insertion(+) diff --git a/rust.ungram b/rust.ungram index 1f3a70529be3..9e9f82a43e96 100644 --- a/rust.ungram +++ b/rust.ungram @@ -250,6 +250,7 @@ ExternItem = Fn | MacroCall | Static +| TypeAlias GenericParamList = '<' (GenericParam (',' GenericParam)* ','?)? '>' From 4640b2ef74cf07cd3d9500f08e8dcf2ef4765d52 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 24 Aug 2020 21:50:33 +0200 Subject: [PATCH 155/322] Bump to 1.1.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index da42bb449c31..f17b3cda5313 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.1.2" +version = "1.1.3" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From b6d0d743f2ac28e9dba78fd04cb11aec9a5fa02a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 24 Aug 2020 23:29:43 +0200 Subject: [PATCH 156/322] Continious release --- .github/ci.rs | 114 ++++++++++++++++++++++++++++++++++++++ .github/workflows/ci.yaml | 8 +-- 2 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 .github/ci.rs diff --git a/.github/ci.rs b/.github/ci.rs new file mode 100644 index 000000000000..ff37c746bdc5 --- /dev/null +++ b/.github/ci.rs @@ -0,0 +1,114 @@ +use std::{ + env, fs, + process::{self, Command, ExitStatus, Stdio}, + time::Instant, +}; + +type Error = Box; +type Result = std::result::Result; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<()> { + let cwd = env::current_dir()?; + let cargo_toml = cwd.join("Cargo.toml"); + assert!( + cargo_toml.exists(), + "Cargo.toml not found, cwd: {}", + cwd.display() + ); + + { + let _s = Section::new("BUILD"); + shell("cargo test --workspace --no-run")?; + } + + { + let _s = Section::new("TEST"); + shell("cargo test")?; + } + + let current_branch = shell_output("git branch --show-current")?; + if ¤t_branch == "master" { + let _s = Section::new("PUBLISH"); + let manifest = fs::read_to_string(&cargo_toml)?; + let version = get_field(&manifest, "version")?; + let tag = format!("v{}", version); + let tags = shell_output("git tag --list")?; + + if !tags.contains(&tag) { + let token = env::var("CRATES_IO_TOKEN").unwrap(); + shell(&format!("git tag v{}", version))?; + shell(&format!("cargo publish --token {}", token))?; + shell("git push --tags")?; + } + } + Ok(()) +} + +fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { + for line in text.lines() { + let words = line.split_ascii_whitespace().collect::>(); + match words.as_slice() { + [n, "=", v, ..] if n.trim() == name => { + assert!(v.starts_with('"') && v.ends_with('"')); + return Ok(&v[1..v.len() - 1]); + } + _ => (), + } + } + Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? +} + +fn shell(cmd: &str) -> Result<()> { + let status = command(cmd).status()?; + check_status(status) +} + +fn shell_output(cmd: &str) -> Result { + let output = command(cmd).stderr(Stdio::inherit()).output()?; + check_status(output.status)?; + let res = String::from_utf8(output.stdout)?; + Ok(res.trim().to_string()) +} + +fn command(cmd: &str) -> Command { + eprintln!("> {}", cmd); + let words = cmd.split_ascii_whitespace().collect::>(); + let (cmd, args) = words.split_first().unwrap(); + let mut res = Command::new(cmd); + res.args(args); + res +} + +fn check_status(status: ExitStatus) -> Result<()> { + if !status.success() { + Err(format!("$status: {}", status))?; + } + Ok(()) +} + +struct Section { + name: &'static str, + start: Instant, +} + +impl Section { + fn new(name: &'static str) -> Section { + println!("::group::{}", name); + let start = Instant::now(); + Section { name, start } + } +} + +impl Drop for Section { + fn drop(&mut self) { + eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); + println!("::endgroup::"); + } +} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f03f3a9fd66c..88f133867e71 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -31,8 +31,6 @@ jobs: profile: minimal override: true - - name: Compile - run: cargo test --workspace --no-run - - - name: Test - run: cargo test --workspace + - run: rustc ./.github/ci.rs && ./ci + env: + CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} From 84597d486ce0abe64fe2ec03fcb2f2ef36b7dc2b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 24 Aug 2020 23:31:46 +0200 Subject: [PATCH 157/322] Allow both const & async modifiers closes #9 --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f17b3cda5313..3f40dcc64870 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.1.3" +version = "1.1.4" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 9e9f82a43e96..6ecdb7bfb055 100644 --- a/rust.ungram +++ b/rust.ungram @@ -125,7 +125,7 @@ UseTreeList = Fn = Attr* Visibility? - 'default'? ('async' | 'const')? 'unsafe'? Abi? + 'default'? 'const'? 'async'? 'unsafe'? Abi? 'fn' Name GenericParamList? ParamList RetType? WhereClause? (body:BlockExpr | ';') From aa77837c05cfb84fa7f28c45ba9dab4a25f7d896 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 24 Aug 2020 23:33:18 +0200 Subject: [PATCH 158/322] Fix .gitignore --- .github/ci.rs | 2 +- .gitignore | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/ci.rs b/.github/ci.rs index ff37c746bdc5..87eb307d633d 100644 --- a/.github/ci.rs +++ b/.github/ci.rs @@ -30,7 +30,7 @@ fn try_main() -> Result<()> { { let _s = Section::new("TEST"); - shell("cargo test")?; + shell("cargo test --workspace")?; } let current_branch = shell_output("git branch --show-current")?; diff --git a/.gitignore b/.gitignore index 9c71cc94a03b..e3bd43f693fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +/ci /Cargo.lock -/target \ No newline at end of file +/target From c30083d7779f4a6c95d1f3a2af239feaad9d9e31 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:43:24 +0200 Subject: [PATCH 159/322] Add new_inline const-fn constructor --- src/lib.rs | 79 ++++++++++++--------------------------------------- tests/test.rs | 14 +++++++++ 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 945bbc9d7054..6136878b3eca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,67 +25,7 @@ use std::{ pub struct SmolStr(Repr); impl SmolStr { - /// Constructs an inline variant of `SmolStr` at compile time. - /// - /// # Parameters - /// - /// - `len`: Must be short (≤ 22 bytes) - /// - `bytes`: Must be ASCII bytes, and there must be at least `len` of - /// them. If `len` is smaller than the actual len of `bytes`, the string - /// is truncated. - /// - /// # Returns - /// - /// A constant `SmolStr` with inline data. - /// - /// # Examples - /// - /// ```rust - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello"); - /// ``` - /// - /// Given a `len` smaller than the number of bytes in `bytes`, the string is - /// cut off: - /// - /// ```rust - /// # use smol_str::SmolStr; - /// const SHORT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello world"); - /// assert_eq!(SHORT.as_str(), "hello"); - /// ``` - /// - /// ## Compile-time errors - /// - /// This will **fail** at compile-time with a message like "index out of - /// bounds" on a `_len_is_short` because the string is too large: - /// - /// ```rust,compile_fail - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( - /// 49, - /// b"hello world, how are you doing this fine morning?", - /// ); - /// ``` - /// - /// Similarly, this will **fail** to compile with "index out of bounds" on - /// an `_is_ascii` binding because it contains non-ASCII characters: - /// - /// ```rust,compile_fail - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( - /// 2, - /// &[209, 139], - /// ); - /// ``` - /// - /// Last but not least, given a `len` that is larger than the number of - /// bytes in `bytes`, it will fail to compile with "index out of bounds: the - /// len is 5 but the index is 5" on a binding called `byte`: - /// - /// ```rust,compile_fail - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(10, b"hello"); - /// ``` + #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { let _len_is_short = [(); INLINE_CAP + 1][len]; @@ -108,6 +48,23 @@ impl SmolStr { }) } + /// Constructs inline variant of `SmolStr`. + /// + /// Panics if `text.len() > 22`. + #[inline] + pub const fn new_inline(text: &str) -> SmolStr { + let mut buf = [0; INLINE_CAP]; + let mut i = 0; + while i < text.len() { + buf[i] = text.as_bytes()[i]; + i += 1 + } + SmolStr(Repr::Inline { + len: text.len() as u8, + buf, + }) + } + pub fn new(text: T) -> SmolStr where T: AsRef, diff --git a/tests/test.rs b/tests/test.rs index 57c0e8447fa3..b067e009048f 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -27,6 +27,20 @@ fn conversions() { #[test] fn const_fn_ctor() { + const EMPTY: SmolStr = SmolStr::new_inline(""); + const A: SmolStr = SmolStr::new_inline("A"); + const HELLO: SmolStr = SmolStr::new_inline("HELLO"); + const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUV"); + + assert_eq!(EMPTY, SmolStr::from("")); + assert_eq!(A, SmolStr::from("A")); + assert_eq!(HELLO, SmolStr::from("HELLO")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); +} + +#[allow(deprecated)] +#[test] +fn old_const_fn_ctor() { const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); From e61b5ac76faeff2c8b186223a6ce5277b84ce694 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:49:57 +0200 Subject: [PATCH 160/322] :arrow_up: proptest --- Cargo.toml | 2 +- tests/test.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eeb3b1b0925a..8f760011d98c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ edition = "2018" serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } [dev-dependencies] -proptest = "0.8.3" +proptest = "0.10" serde_json = "1" serde = { version = "1", features = [ "derive" ] } criterion = "0.2" diff --git a/tests/test.rs b/tests/test.rs index b067e009048f..537df8ddb216 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,5 +1,4 @@ -#[macro_use] -extern crate proptest; +use proptest::{prop_assert, prop_assert_eq, proptest}; use smol_str::SmolStr; From 1150aa92ab1fa6ba489f603f1969fb7f37d1ac8d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:54:54 +0200 Subject: [PATCH 161/322] Switch CI to actions --- .github/ci.rs | 116 ++++++++++++++++++++++++++++++++++++++ .github/workflows/ci.yaml | 38 +++++++++++++ .gitignore | 3 +- .travis.yml | 4 -- bors.toml | 4 +- 5 files changed, 157 insertions(+), 8 deletions(-) create mode 100644 .github/ci.rs create mode 100644 .github/workflows/ci.yaml delete mode 100644 .travis.yml diff --git a/.github/ci.rs b/.github/ci.rs new file mode 100644 index 000000000000..b293ebbcb784 --- /dev/null +++ b/.github/ci.rs @@ -0,0 +1,116 @@ +use std::{ + env, fs, + process::{self, Command, ExitStatus, Stdio}, + time::Instant, +}; + +type Error = Box; +type Result = std::result::Result; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<()> { + let cwd = env::current_dir()?; + let cargo_toml = cwd.join("Cargo.toml"); + assert!( + cargo_toml.exists(), + "Cargo.toml not found, cwd: {}", + cwd.display() + ); + + { + let _s = Section::new("BUILD"); + shell("cargo test --all-features --workspace --no-run")?; + } + + { + let _s = Section::new("TEST"); + shell("cargo test --all-features --workspace")?; + } + + let current_branch = shell_output("git branch --show-current")?; + if ¤t_branch == "master" { + let _s = Section::new("PUBLISH"); + let manifest = fs::read_to_string(&cargo_toml)?; + let version = get_field(&manifest, "version")?; + let tag = format!("v{}", version); + let tags = shell_output("git tag --list")?; + + if !tags.contains(&tag) { + let token = env::var("CRATES_IO_TOKEN").unwrap(); + shell(&format!("git tag v{}", version))?; + shell(&format!("cargo publish --token {}", token))?; + shell("git push --tags")?; + } + } + Ok(()) +} + +fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { + for line in text.lines() { + let words = line.split_ascii_whitespace().collect::>(); + match words.as_slice() { + [n, "=", v, ..] if n.trim() == name => { + assert!(v.starts_with('"') && v.ends_with('"')); + return Ok(&v[1..v.len() - 1]); + } + _ => (), + } + } + Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? +} + +fn shell(cmd: &str) -> Result<()> { + let status = command(cmd).status()?; + check_status(status) +} + +fn shell_output(cmd: &str) -> Result { + let output = command(cmd).stderr(Stdio::inherit()).output()?; + check_status(output.status)?; + let res = String::from_utf8(output.stdout)?; + let res = res.trim().to_string(); + println!("{}", res); + Ok(res) +} + +fn command(cmd: &str) -> Command { + eprintln!("> {}", cmd); + let words = cmd.split_ascii_whitespace().collect::>(); + let (cmd, args) = words.split_first().unwrap(); + let mut res = Command::new(cmd); + res.args(args); + res +} + +fn check_status(status: ExitStatus) -> Result<()> { + if !status.success() { + Err(format!("$status: {}", status))?; + } + Ok(()) +} + +struct Section { + name: &'static str, + start: Instant, +} + +impl Section { + fn new(name: &'static str) -> Section { + println!("::group::{}", name); + let start = Instant::now(); + Section { name, start } + } +} + +impl Drop for Section { + fn drop(&mut self) { + eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); + println!("::endgroup::"); + } +} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000000..b1bc2175caf2 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,38 @@ +name: CI +on: + pull_request: + push: + branches: + - master + - staging + - trying + +env: + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + CI: 1 + RUST_BACKTRACE: short + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + +jobs: + rust: + name: Rust + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - run: rustc ./.github/ci.rs && ./ci + env: + CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} diff --git a/.gitignore b/.gitignore index 4470988469a6..6b500aacba8f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ -target/ +/target +/ci Cargo.lock \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 56abf368957c..000000000000 --- a/.travis.yml +++ /dev/null @@ -1,4 +0,0 @@ -language: rust - -script: - - cargo test --all-features diff --git a/bors.toml b/bors.toml index 574c56320ff0..b92b99ac3020 100644 --- a/bors.toml +++ b/bors.toml @@ -1,4 +1,2 @@ -status = [ - "continuous-integration/travis-ci/push", -] +status = [ "Rust" ] delete_merged_branches = true From 639343b4a244199558d343e5c75abde232cc3d9f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:58:53 +0200 Subject: [PATCH 162/322] Bump major version --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8f760011d98c..683aeddd55c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "smol_str" -version = "0.1.16" -authors = ["Aleksey Kladov "] -repository = "https://github.com/matklad/smol_str" +version = "0.1.17" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" +repository = "https://github.com/matklad/smol_str" +authors = ["Aleksey Kladov "] edition = "2018" [dependencies] From 24553199ff0d812cb4ce8c3373234b3e3111fb50 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:59:31 +0200 Subject: [PATCH 163/322] Drop benchmarking I don't really look at the results of the benchmarks anyway, so having them in the repo creates a false sense of benchmarkdness. If I get to implementing proper benchmarking, I'd probably stay away from criterion -- we need something much much simpler for this crate. --- Cargo.toml | 5 ----- benches/building.rs | 44 -------------------------------------------- 2 files changed, 49 deletions(-) delete mode 100644 benches/building.rs diff --git a/Cargo.toml b/Cargo.toml index 683aeddd55c0..fee00ec2ba4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,8 +14,3 @@ serde = { version = "1", optional = true, default_features = false, features = [ proptest = "0.10" serde_json = "1" serde = { version = "1", features = [ "derive" ] } -criterion = "0.2" - -[[bench]] -name = "building" -harness = false diff --git a/benches/building.rs b/benches/building.rs deleted file mode 100644 index 19833146766d..000000000000 --- a/benches/building.rs +++ /dev/null @@ -1,44 +0,0 @@ -#[macro_use] -extern crate criterion; -extern crate smol_str; - -use criterion::{Criterion, ParameterizedBenchmark, Throughput}; -use smol_str::SmolStr; - -fn from_str_iter(c: &mut Criterion) { - use std::iter::FromIterator; - - const SIZES: &[usize] = &[0, 5, 10, 15, 20, 2 << 4, 2 << 5, 2 << 6, 2 << 7, 2 << 8]; - - fn test_data(input: &str, size: usize) -> Vec<&str> { - std::iter::repeat(input).take(size / input.len()).collect() - } - - c.bench( - "FromIterator", - ParameterizedBenchmark::new( - "SmolStr, one byte elements", - |b, &&size| { - let src = test_data("x", size); - b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) - }, - SIZES, - ) - .with_function("SmolStr, five byte elements", |b, &&size| { - let src = test_data("helloo", size); - b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) - }) - .with_function("String, one byte elements", |b, &&size| { - let src = test_data("x", size); - b.iter(|| String::from_iter(src.iter().cloned()).len()) - }) - .with_function("String, five byte elements", |b, &&size| { - let src = test_data("hello", size); - b.iter(|| String::from_iter(src.iter().cloned()).len()) - }) - .throughput(|elems| Throughput::Bytes(**elems as u32)), - ); -} - -criterion_group!(benches, from_str_iter); -criterion_main!(benches); From de2c360ccb21ba91dedad1207ff994a7635cca5b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Sep 2020 17:15:24 +0200 Subject: [PATCH 164/322] Document MSRV --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 949f6e6ebf68..ad57a10f90fd 100644 --- a/README.md +++ b/README.md @@ -20,3 +20,9 @@ Unlike `String`, however, `SmolStr` is immutable. The primary use case for languages. Strings consisting of a series of newlines, followed by a series of whitespace are a typical pattern in computer programs because of indentation. Note that a specialized interner might be a better solution for some use cases. + +## MSRV Policy + +Minimal Supported Rust Version: latest stable. + +Bumping MSRV is not considered a semver-breaking change. From 45a50fd92978dd77d6198e54da06d7547eeeb3d0 Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Tue, 6 Oct 2020 19:43:15 -0700 Subject: [PATCH 165/322] Update CI badge in readme to point to Github Actions The Travis workflow was deleted in 1150aa92ab1fa6ba489f603f1969fb7f37d1ac8d --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ad57a10f90fd..2e61b9ee4261 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # smol_str -[![Build Status](https://travis-ci.org/matklad/smol_str.svg?branch=master)](https://travis-ci.org/matklad/smol_str) +[![CI](https://github.com/rust-analyzer/smol_str/workflows/CI/badge.svg)](https://github.com/rust-analyzer/smol_str/actions?query=branch%3Amaster+workflow%3ACI) [![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) [![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) From 784f345e5e799e828650da1b1acbb947f1e49a52 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 24 Oct 2020 02:05:24 +0200 Subject: [PATCH 166/322] Minor --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 6ecdb7bfb055..d8950d2debf0 100644 --- a/rust.ungram +++ b/rust.ungram @@ -164,7 +164,7 @@ TypeAlias = Struct = Attr* Visibility? 'struct' Name GenericParamList? ( - WhereClause? (RecordFieldList | ';') + WhereClause? (RecordFieldList | ';') | TupleFieldList WhereClause? ';' ) From eb7e474d641fdb5210318479696cb7b362190cfd Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 24 Oct 2020 11:52:21 +0200 Subject: [PATCH 167/322] Link post --- README.md | 2 +- src/lib.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b4f8f375ecde..ea47622f2208 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # ungrammar -A DLS for specifying concrete syntax tree. +A DLS for specifying concrete syntax tree. See this [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html). See [./rust.ungram](./rust.ungram) for an example. diff --git a/src/lib.rs b/src/lib.rs index ff56cae9ee63..2d51dcc5d276 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,10 @@ //! //! Producing a parser is an explicit non-goal -- it's ok for this grammar to be //! ambiguous, non LL, non LR, etc. +//! +//! See this +//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html) +//! for details. mod error; mod lexer; mod parser; From 9f9a6f0bc96e53961973f848cd218d3474ee0d52 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Fri, 27 Nov 2020 18:50:39 +0100 Subject: [PATCH 168/322] Move towards upstream `macro_rules!` model --- rust.ungram | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index d8950d2debf0..1d8769fe84fc 100644 --- a/rust.ungram +++ b/rust.ungram @@ -58,7 +58,7 @@ ConstArg = Expr MacroCall = - Attr* Path '!' Name? TokenTree ';'? + Attr* Path '!' TokenTree ';'? TokenTree = '(' ')' @@ -89,6 +89,7 @@ Item = | Fn | Impl | MacroCall +| MacroRules | Module | Static | Struct @@ -97,6 +98,14 @@ Item = | Union | Use +MacroRules = + Attr* Visibility? + 'macro_rules' '!' Name + '{' MacroArm (';' MacroArm)* ';'? '}' + +MacroArm = + TokenTree '=>' TokenTree + Module = Attr* Visibility? 'mod' Name From fab616288a6ccf2d16a4e6915efc633d8b9e5583 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 14 Dec 2020 14:54:29 +0100 Subject: [PATCH 169/322] Bump to 1.2.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3f40dcc64870..e4f058883ba4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.1.4" +version = "1.2.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From d41a4110554501314bdfcabe5b41971abda99397 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 14 Dec 2020 15:13:59 +0100 Subject: [PATCH 170/322] Fixup MacroRules to work with xtask codegen --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e4f058883ba4..6c66b66e5b21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.2.0" +version = "1.2.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 1d8769fe84fc..46257f375f27 100644 --- a/rust.ungram +++ b/rust.ungram @@ -101,7 +101,7 @@ Item = MacroRules = Attr* Visibility? 'macro_rules' '!' Name - '{' MacroArm (';' MacroArm)* ';'? '}' + '{' (MacroArm (';' MacroArm)* ';'?)? '}' MacroArm = TokenTree '=>' TokenTree From 143cc528b11290bf2463de1fb5a97a15f9b2ed44 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Tue, 15 Dec 2020 15:15:28 +0100 Subject: [PATCH 171/322] Roll back `MacroArm` change It's unclear if this is worthwhile, and this requires a lot of changes in r-a --- Cargo.toml | 2 +- rust.ungram | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6c66b66e5b21..c49486f08ded 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.2.1" +version = "1.2.2" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 46257f375f27..ca3cf9292a6c 100644 --- a/rust.ungram +++ b/rust.ungram @@ -101,10 +101,7 @@ Item = MacroRules = Attr* Visibility? 'macro_rules' '!' Name - '{' (MacroArm (';' MacroArm)* ';'?)? '}' - -MacroArm = - TokenTree '=>' TokenTree + TokenTree Module = Attr* Visibility? From 728247759e38e791f4a398c61cdaa433d22ebadc Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Tue, 15 Dec 2020 18:41:02 +0100 Subject: [PATCH 172/322] Add `MacroDef` for "Macros 2.0" --- rust.ungram | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust.ungram b/rust.ungram index ca3cf9292a6c..bfa2ade2dfbd 100644 --- a/rust.ungram +++ b/rust.ungram @@ -90,6 +90,7 @@ Item = | Impl | MacroCall | MacroRules +| MacroDef | Module | Static | Struct @@ -103,6 +104,11 @@ MacroRules = 'macro_rules' '!' Name TokenTree +MacroDef = + Attr* Visibility? + 'macro' Name args:TokenTree? + body:TokenTree + Module = Attr* Visibility? 'mod' Name From a188a93e693d875658cc0eaa1843406d819ed3a9 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 14 Dec 2020 18:06:40 +0100 Subject: [PATCH 173/322] Fix labels missing the colon token in rust ungrammar --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index ca3cf9292a6c..e0142b64cdf0 100644 --- a/rust.ungram +++ b/rust.ungram @@ -453,7 +453,7 @@ WhileExpr = loop_body:BlockExpr Label = - 'lifetime' + 'lifetime' ':' BreakExpr = Attr* 'break' 'lifetime'? Expr? From 9e81b8bb79dd5c98be9bc7320efcd29cf07785c1 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 14 Dec 2020 18:14:49 +0100 Subject: [PATCH 174/322] Node-ify lifetime --- Cargo.toml | 2 +- rust.ungram | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c49486f08ded..533abba2fee9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.2.2" +version = "1.3.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index e0142b64cdf0..fdff06a0f231 100644 --- a/rust.ungram +++ b/rust.ungram @@ -26,6 +26,9 @@ Name = NameRef = 'ident' | 'int_number' +Lifetime = + 'lifetime_ident' + Path = (qualifier:Path '::')? segment:PathSegment @@ -52,7 +55,7 @@ AssocTypeArg = NameRef (':' TypeBoundList | '=' Type) LifetimeArg = - 'lifetime' + Lifetime ConstArg = Expr @@ -147,7 +150,7 @@ ParamList = SelfParam = Attr* ( - ('&' 'lifetime'?)? 'mut'? 'self' + ('&' Lifetime?)? 'mut'? 'self' | 'mut'? 'self' ':' Type ) @@ -275,13 +278,13 @@ ConstParam = ('=' default_val:Expr)? LifetimeParam = - Attr* 'lifetime' (':' TypeBoundList?)? + Attr* Lifetime (':' TypeBoundList?)? WhereClause = 'where' predicates:(WherePred (',' WherePred)* ','?) WherePred = - ('for' GenericParamList)? ('lifetime' | Type) ':' TypeBoundList + ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList Visibility = 'pub' ('(' @@ -453,13 +456,13 @@ WhileExpr = loop_body:BlockExpr Label = - 'lifetime' ':' + Lifetime ':' BreakExpr = - Attr* 'break' 'lifetime'? Expr? + Attr* 'break' Lifetime? Expr? ContinueExpr = - Attr* 'continue' 'lifetime'? + Attr* 'continue' Lifetime? RangeExpr = Attr* start:Expr? op:('..' | '..=') end:Expr? @@ -523,7 +526,7 @@ PtrType = '*' ('const' | 'mut') Type RefType = - '&' 'lifetime'? 'mut'? Type + '&' Lifetime? 'mut'? Type ArrayType = '[' Type ';' Expr ']' @@ -550,7 +553,7 @@ TypeBoundList = bounds:(TypeBound ('+' TypeBound)* '+'?) TypeBound = - 'lifetime' + Lifetime | '?'? Type //************************// From ccc54ede2345dfc3e272c568163275181f5c4708 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 16 Dec 2020 12:47:15 +0100 Subject: [PATCH 175/322] Bump to 1.3.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c49486f08ded..533abba2fee9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.2.2" +version = "1.3.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From c87f01ec3c2f8af11a0f51445b92889ee5787fb5 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 16 Dec 2020 13:33:00 +0100 Subject: [PATCH 176/322] Bump to 1.4.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 533abba2fee9..1ba385676d40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.3.0" +version = "1.4.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From a2900d258a4ade4b32f924b84bcbe5f0de8ca901 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 15 Dec 2020 20:58:03 +0100 Subject: [PATCH 177/322] Add inline const expression and pattern --- Cargo.toml | 2 +- rust.ungram | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1ba385676d40..10aff5502528 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.4.0" +version = "1.5.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index fdb381f98d91..a954d9efd2e7 100644 --- a/rust.ungram +++ b/rust.ungram @@ -372,13 +372,13 @@ BlockExpr = '}' RefExpr = - Attr* '&' ('raw' |'mut' | 'const') Expr + Attr* '&' ('raw' | 'mut' | 'const') Expr TryExpr = Attr* Expr '?' EffectExpr = - Attr* Label? ('try' | 'unsafe' | 'async') BlockExpr + Attr* Label? ('try' | 'unsafe' | 'async' | 'const') BlockExpr PrefixExpr = Attr* op:('-' | '!' | '*') Expr @@ -582,6 +582,7 @@ Pat = | SlicePat | TuplePat | TupleStructPat +| ConstBlockPat LiteralPat = Literal @@ -636,3 +637,6 @@ RestPat = MacroPat = MacroCall + +ConstBlockPat = + 'const' BlockExpr From fe4a8e6f7b6497d700f3f77ca8308ac580101122 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 5 Jan 2021 15:44:45 +0300 Subject: [PATCH 178/322] Rename tail_expr --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 10aff5502528..d2d67e9249de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.5.0" +version = "1.6.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index a954d9efd2e7..d6471d227989 100644 --- a/rust.ungram +++ b/rust.ungram @@ -368,7 +368,7 @@ BlockExpr = '{' Attr* statements:Stmt* - Expr? + tail_expr:Expr? '}' RefExpr = From bf2dc9934912e68a9f8471b4eb2316e83b0db640 Mon Sep 17 00:00:00 2001 From: Daiki Ihara Date: Mon, 11 Jan 2021 20:57:14 +0900 Subject: [PATCH 179/322] Add YieldExpr --- rust.ungram | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust.ungram b/rust.ungram index d6471d227989..13d5968e3579 100644 --- a/rust.ungram +++ b/rust.ungram @@ -351,6 +351,7 @@ Expr = | TryExpr | TupleExpr | WhileExpr +| YieldExpr Literal = Attr* value:( @@ -491,6 +492,9 @@ MatchGuard = ReturnExpr = Attr* 'return' Expr? +YieldExpr = + Attr* 'yield' Expr? + AwaitExpr = Attr* Expr '.' 'await' From c332d9d8d7558f58c53dc850679be596b0054e5f Mon Sep 17 00:00:00 2001 From: Daiki Ihara Date: Tue, 12 Jan 2021 23:39:35 +0900 Subject: [PATCH 180/322] Bump to 1.7.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d2d67e9249de..ffc01e3aca10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.6.0" +version = "1.7.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From e4d0f2b35cab6779eb0dd28297bf53cb13c8c944 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 13 Jan 2021 20:08:43 +0300 Subject: [PATCH 181/322] Allow binary-searching an array of disjoint ranges --- CHANGELOG.md | 6 +++++- Cargo.toml | 2 +- src/range.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2eb012a6d94c..0167599e5580 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,12 @@ # Changelog +## 1.1.0 + +* add `TextRange::ordering` method + ## 1.0.0 :tada: -* the carate is renmaed to `text-size` from `text_unit` +* the carate is renamed to `text-size` from `text_unit` Transition table: - `TextUnit::of_char(c)` ⟹ `TextSize::of(c)` diff --git a/Cargo.toml b/Cargo.toml index 010e3bb4c6d9..19c5a92670d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "1.0.0" +version = "1.1.0" edition = "2018" authors = [ diff --git a/src/range.rs b/src/range.rs index fcf286d62e98..4a98deec5683 100644 --- a/src/range.rs +++ b/src/range.rs @@ -1,3 +1,5 @@ +use cmp::Ordering; + use { crate::TextSize, std::{ @@ -294,6 +296,50 @@ impl TextRange { end: self.end.checked_sub(offset)?, }) } + + /// Relative order of the two ranges (overlapping ranges are considered + /// equal). + /// + /// + /// This is useful when, for example, binary searching an array of disjoint + /// ranges. + /// + /// # Examples + /// + /// ``` + /// # use text_size::*; + /// # use std::cmp::Ordering; + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(4.into(), 5.into()); + /// assert_eq!(a.ordering(b), Ordering::Less); + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(3.into(), 5.into()); + /// assert_eq!(a.ordering(b), Ordering::Less); + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(2.into(), 5.into()); + /// assert_eq!(a.ordering(b), Ordering::Equal); + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(2.into(), 2.into()); + /// assert_eq!(a.ordering(b), Ordering::Equal); + /// + /// let a = TextRange::new(2.into(), 3.into()); + /// let b = TextRange::new(2.into(), 2.into()); + /// assert_eq!(a.ordering(b), Ordering::Greater); + /// ``` + #[inline] + pub fn ordering(self, other: TextRange) -> Ordering { + if self.end() <= other.start() { + Ordering::Less + } else if other.end() <= self.start() { + Ordering::Greater + } else { + Ordering::Equal + } + } } impl Index for str { From 8ba7eadb46e02575c822be345e1883bf81798e03 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 15 Jan 2021 18:55:01 +0100 Subject: [PATCH 182/322] Replace self/super/crate in PathSegment with NameRef --- Cargo.toml | 2 +- rust.ungram | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ffc01e3aca10..2a86a58d8315 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.7.0" +version = "1.8.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 13d5968e3579..cb55060a8815 100644 --- a/rust.ungram +++ b/rust.ungram @@ -24,7 +24,7 @@ Name = 'ident' NameRef = - 'ident' | 'int_number' + 'ident' | 'int_number' | 'self' | 'super' | 'crate' Lifetime = 'lifetime_ident' @@ -33,8 +33,7 @@ Path = (qualifier:Path '::')? segment:PathSegment PathSegment = - 'crate' | 'self' | 'super' -| '::'? NameRef + '::'? NameRef | NameRef GenericArgList? | NameRef ParamList RetType? | '<' PathType ('as' PathType)? '>' From 54df3c1e4930463d1518e9ee84cd19c0cfd1beb0 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 15 Jan 2021 20:19:56 +0100 Subject: [PATCH 183/322] Replace other self/super/crate usages with NameRef --- Cargo.toml | 2 +- rust.ungram | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2a86a58d8315..dcb1aea5eaa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.8.0" +version = "1.9.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index cb55060a8815..34377f5e92fb 100644 --- a/rust.ungram +++ b/rust.ungram @@ -21,7 +21,7 @@ //*************************// Name = - 'ident' + 'ident' | 'self' NameRef = 'ident' | 'int_number' | 'self' | 'super' | 'crate' @@ -121,7 +121,7 @@ ItemList = ExternCrate = Attr* Visibility? - 'extern' 'crate' (NameRef | 'self') Rename? ';' + 'extern' 'crate' NameRef Rename? ';' Rename = 'as' (Name | '_') @@ -155,8 +155,8 @@ ParamList = SelfParam = Attr* ( - ('&' Lifetime?)? 'mut'? 'self' - | 'mut'? 'self' ':' Type + ('&' Lifetime?)? 'mut'? Name + | 'mut'? Name ':' Type ) Param = @@ -292,12 +292,7 @@ WherePred = ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList Visibility = - 'pub' ('(' - 'super' - | 'self' - | 'crate' - | 'in' Path - ')')? + 'pub' ('(' 'in'? Path ')')? Attr = '#' '!'? '[' Path ('=' Literal | TokenTree)? ']' From af30e7c517b5b47a4813ee26d227ac3a8d2dbf02 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 18 Jan 2021 15:15:35 +0100 Subject: [PATCH 184/322] `Type` can also be `MacroCall` --- Cargo.toml | 2 +- rust.ungram | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dcb1aea5eaa6..80b6af77abd3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.9.0" +version = "1.9.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 34377f5e92fb..8dfed606eb71 100644 --- a/rust.ungram +++ b/rust.ungram @@ -506,6 +506,7 @@ Type = | ForType | ImplTraitType | InferType +| MacroCall | NeverType | ParenType | PathType From 1959fdfe4ac43642bb24c6b55538ffbdb88e1392 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 18 Jan 2021 16:36:22 +0100 Subject: [PATCH 185/322] Wrap type macro in new `MacroType` node --- Cargo.toml | 2 +- rust.ungram | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 80b6af77abd3..252ce0eb07c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.9.1" +version = "1.9.2" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 8dfed606eb71..0e91b7a849fa 100644 --- a/rust.ungram +++ b/rust.ungram @@ -506,7 +506,7 @@ Type = | ForType | ImplTraitType | InferType -| MacroCall +| MacroType | NeverType | ParenType | PathType @@ -521,6 +521,9 @@ ParenType = NeverType = '!' +MacroType = + MacroCall + PathType = Path From 075778d56da3be5c67507ad1d29c06d5b5bfc805 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 18 Jan 2021 19:39:19 +0100 Subject: [PATCH 186/322] Swap RecordExprField optional part --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 0e91b7a849fa..c3b31cb6715b 100644 --- a/rust.ungram +++ b/rust.ungram @@ -418,7 +418,7 @@ RecordExprFieldList = '}' RecordExprField = - Attr* NameRef (':' Expr)? + Attr* (NameRef ':')? Expr CallExpr = Attr* Expr ArgList From effe644c66163c78b49404cc2c3e0bc990bac475 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 18 Jan 2021 20:03:04 +0100 Subject: [PATCH 187/322] Correct `const` keyword position in `Impl` rule --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 252ce0eb07c5..bf4a6fa0abf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.9.2" +version = "1.9.3" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index c3b31cb6715b..fbbc3161c01f 100644 --- a/rust.ungram +++ b/rust.ungram @@ -251,7 +251,7 @@ AssocItem = Impl = Attr* Visibility? 'default'? 'unsafe'? - 'impl' 'const'? GenericParamList? ('!'? trait:Type 'for')? self_ty:Type WhereClause? + 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause? AssocItemList ExternBlock = From 66763c849cce0a3ff42e1a7afa53ba327ebe9573 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 18:46:51 -0800 Subject: [PATCH 188/322] Derive more for Node and Token --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2d51dcc5d276..d4a5e6a44c35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,9 +19,9 @@ pub fn rust_grammar() -> Grammar { src.parse().unwrap() } -#[derive(Eq, PartialEq, Debug, Copy, Clone)] +#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash, PartialOrd, Ord)] pub struct Node(usize); -#[derive(Eq, PartialEq, Debug, Copy, Clone)] +#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash, PartialOrd, Ord)] pub struct Token(usize); #[derive(Default, Debug)] From d873e88012aa8d4d35ae0484296b4aa2c7b6a136 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 18:47:52 -0800 Subject: [PATCH 189/322] Add a tokens iterator --- src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index d4a5e6a44c35..228952389f00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,6 +42,10 @@ impl Grammar { pub fn iter(&self) -> impl Iterator + '_ { (0..self.nodes.len()).map(Node) } + + pub fn tokens(&self) -> impl Iterator + '_ { + (0..self.tokens.len()).map(Token) + } } impl ops::Index for Grammar { From 3beaff7e387b0eb194ea31522919ad2fb8993324 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 18:59:29 -0800 Subject: [PATCH 190/322] Write docs --- src/error.rs | 2 ++ src/lib.rs | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/error.rs b/src/error.rs index a7a62d0cc06b..6cc86f52f98b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,8 +3,10 @@ use std::fmt; use crate::lexer::Location; +/// A type alias for std's Result with the Error as our error type. pub type Result = std::result::Result; +/// An error encountered when parsing a Grammar. #[derive(Debug)] pub struct Error { pub(crate) message: String, diff --git a/src/lib.rs b/src/lib.rs index 228952389f00..7d7c14f535f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,11 @@ //! See this //! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html) //! for details. + +#![deny(missing_debug_implementations)] +#![deny(missing_docs)] +#![deny(rust_2018_idioms)] + mod error; mod lexer; mod parser; @@ -14,16 +19,21 @@ use std::{ops, str::FromStr}; pub use error::{Error, Result}; +/// Returns a Rust grammar. pub fn rust_grammar() -> Grammar { let src = include_str!("../rust.ungram"); src.parse().unwrap() } +/// A node, like `A = 'b' | 'c'`. #[derive(Eq, PartialEq, Debug, Copy, Clone, Hash, PartialOrd, Ord)] pub struct Node(usize); + +/// A token, denoted with single quotes, like `'+'` or `'struct'`. #[derive(Eq, PartialEq, Debug, Copy, Clone, Hash, PartialOrd, Ord)] pub struct Token(usize); +/// An Ungrammar grammar. #[derive(Default, Debug)] pub struct Grammar { nodes: Vec, @@ -39,10 +49,12 @@ impl FromStr for Grammar { } impl Grammar { + /// Returns an iterator over all nodes in the grammar. pub fn iter(&self) -> impl Iterator + '_ { (0..self.nodes.len()).map(Node) } + /// Returns an iterator over all tokens in the grammar. pub fn tokens(&self) -> impl Iterator + '_ { (0..self.tokens.len()).map(Token) } @@ -62,25 +74,47 @@ impl ops::Index for Grammar { } } +/// Data about a node. #[derive(Debug)] pub struct NodeData { + /// The name of the node. + /// + /// In the rule `A = 'b' | 'c'`, this is `"A"`. pub name: String, + /// The rule for this node. + /// + /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`. pub rule: Rule, } +/// Data about a token. #[derive(Debug)] pub struct TokenData { + /// The name of the token. pub name: String, } +/// A production rule. #[derive(Debug, Eq, PartialEq)] pub enum Rule { - Labeled { label: String, rule: Box }, + /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule). + Labeled { + /// The label. + label: String, + /// The rule. + rule: Box, + }, + /// A node, like `A`. Node(Node), + /// A token, like `'struct'`. Token(Token), + /// A sequence of rules, like `'while' '(' Expr ')' Stmt`. Seq(Vec), + /// An alternative between many rules, like `'+' | '-' | '*' | '/'`. Alt(Vec), + /// An optional rule, like `A?`. Opt(Box), + /// An repeated rule, like `A*`. Rep(Box), } From 63617418291fa757f0a519eadb5f26ac7151a70f Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 18:59:59 -0800 Subject: [PATCH 191/322] Reorder --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7d7c14f535f2..eb1feaa340cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,11 +26,11 @@ pub fn rust_grammar() -> Grammar { } /// A node, like `A = 'b' | 'c'`. -#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Node(usize); /// A token, denoted with single quotes, like `'+'` or `'struct'`. -#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token(usize); /// An Ungrammar grammar. From 02be05795f7c2fff200c9df1b3fe4b3b5a4c5099 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 19:07:50 -0800 Subject: [PATCH 192/322] Fix typo --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ea47622f2208..a0990d747b36 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # ungrammar -A DLS for specifying concrete syntax tree. See this [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html). +A DSL for specifying concrete syntax tree. See this +[introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html). See [./rust.ungram](./rust.ungram) for an example. From ab160cda347d2d068ea2b38f4b8d91c1478b18fd Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 19:20:08 -0800 Subject: [PATCH 193/322] Pluralize --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a0990d747b36..b5a3f48ab3d2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ungrammar -A DSL for specifying concrete syntax tree. See this +A DSL for specifying concrete syntax trees. See this [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html). See [./rust.ungram](./rust.ungram) for an example. From e4ab990f5579933e945f537213fd12b16c3bbcd4 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 19:25:10 -0800 Subject: [PATCH 194/322] Add more doc --- src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index eb1feaa340cf..858b680c81ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,10 +26,16 @@ pub fn rust_grammar() -> Grammar { } /// A node, like `A = 'b' | 'c'`. +/// +/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a +/// [`NodeData`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Node(usize); /// A token, denoted with single quotes, like `'+'` or `'struct'`. +/// +/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a +/// [`TokenData`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token(usize); From b82d2fc7a33d05ff6bfdf5a0b44d2a4c70abf741 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 21 Jan 2021 19:26:35 -0800 Subject: [PATCH 195/322] Fix typo --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 858b680c81ae..7aa0ce9c88df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -120,7 +120,7 @@ pub enum Rule { Alt(Vec), /// An optional rule, like `A?`. Opt(Box), - /// An repeated rule, like `A*`. + /// A repeated rule, like `A*`. Rep(Box), } From e4e81fe54d93774d71091232a06bfa8e81c23234 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Fri, 22 Jan 2021 01:25:03 -0800 Subject: [PATCH 196/322] Bump minor version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bf4a6fa0abf4..539abe6ee3c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.9.3" +version = "1.10.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From d0d1fc0cf134996c72f406212ee129db68cbfdaa Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 7 Feb 2021 14:10:38 +0300 Subject: [PATCH 197/322] adt --- Cargo.toml | 2 +- rust.ungram | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 539abe6ee3c4..108ef72d7ccb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.10.0" +version = "1.11.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index fbbc3161c01f..3d2e189edf77 100644 --- a/rust.ungram +++ b/rust.ungram @@ -217,7 +217,10 @@ Union = 'union' Name GenericParamList? WhereClause? RecordFieldList -AdtDef = +// A Data Type. +// +// Not used directly in the grammar, but handy to have anyway. +Adt = Enum | Struct | Union From 723a57a8ddb6ff62f1db3dcfc46d182eb30ec96b Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Thu, 18 Feb 2021 14:47:43 +0100 Subject: [PATCH 198/322] Split out macro calls in item position --- Cargo.toml | 2 +- rust.ungram | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 108ef72d7ccb..b983cb414a04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.11.0" +version = "1.12.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 3d2e189edf77..9a332398b251 100644 --- a/rust.ungram +++ b/rust.ungram @@ -90,7 +90,7 @@ Item = | ExternCrate | Fn | Impl -| MacroCall +| MacroItem | MacroRules | MacroDef | Module @@ -101,6 +101,9 @@ Item = | Union | Use +MacroItem = + MacroCall + MacroRules = Attr* Visibility? 'macro_rules' '!' Name From 190981decd4fb47dfee736377c7f8b4b55ad1949 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Thu, 18 Feb 2021 19:18:10 +0100 Subject: [PATCH 199/322] Make ExternItem and AssocItem use MacroItem --- Cargo.toml | 2 +- rust.ungram | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b983cb414a04..ec3b5dd7a574 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.12.0" +version = "1.12.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 9a332398b251..4dbf43c7bd35 100644 --- a/rust.ungram +++ b/rust.ungram @@ -251,7 +251,7 @@ AssocItemList = AssocItem = Const | Fn -| MacroCall +| MacroItem | TypeAlias Impl = @@ -268,7 +268,7 @@ ExternItemList = ExternItem = Fn -| MacroCall +| MacroItem | Static | TypeAlias From 49294fec86b64be03d38e9ddbd3defa792c83d9e Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Mon, 8 Mar 2021 12:59:34 +0800 Subject: [PATCH 200/322] Macro Statements should be expr --- Cargo.toml | 2 +- rust.ungram | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ec3b5dd7a574..002a38651b47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.12.1" +version = "1.12.2" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 4dbf43c7bd35..ab65fee7ef88 100644 --- a/rust.ungram +++ b/rust.ungram @@ -339,6 +339,7 @@ Expr = | Literal | LoopExpr | MacroCall +| MacroStmts | MatchExpr | MethodCallExpr | ParenExpr From 928c6459424e4625ffd07b7294f6bb6353221163 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 15 Mar 2021 14:44:27 +0100 Subject: [PATCH 201/322] Revert "Make ExternItem and AssocItem use MacroItem" This reverts commit 190981decd4fb47dfee736377c7f8b4b55ad1949. --- Cargo.toml | 2 +- rust.ungram | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ec3b5dd7a574..b983cb414a04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.12.1" +version = "1.12.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 4dbf43c7bd35..9a332398b251 100644 --- a/rust.ungram +++ b/rust.ungram @@ -251,7 +251,7 @@ AssocItemList = AssocItem = Const | Fn -| MacroItem +| MacroCall | TypeAlias Impl = @@ -268,7 +268,7 @@ ExternItemList = ExternItem = Fn -| MacroItem +| MacroCall | Static | TypeAlias From 113a7ac93a1dfb29cc90ec7b19a2d717118ecbfe Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 15 Mar 2021 14:44:32 +0100 Subject: [PATCH 202/322] Revert "Split out macro calls in item position" This reverts commit 723a57a8ddb6ff62f1db3dcfc46d182eb30ec96b. --- Cargo.toml | 2 +- rust.ungram | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b983cb414a04..108ef72d7ccb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.12.0" +version = "1.11.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 9a332398b251..3d2e189edf77 100644 --- a/rust.ungram +++ b/rust.ungram @@ -90,7 +90,7 @@ Item = | ExternCrate | Fn | Impl -| MacroItem +| MacroCall | MacroRules | MacroDef | Module @@ -101,9 +101,6 @@ Item = | Union | Use -MacroItem = - MacroCall - MacroRules = Attr* Visibility? 'macro_rules' '!' Name From fab562aabe3a4305cedfb814073864c1455c5e9d Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Mon, 15 Mar 2021 14:46:34 +0100 Subject: [PATCH 203/322] Restore current version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 108ef72d7ccb..ec3b5dd7a574 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.11.0" +version = "1.12.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From 9abe6453ccccde2833da97dc5038c4efd895828c Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 18 Mar 2021 22:12:32 +0100 Subject: [PATCH 204/322] extended_key_value_attributes --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 002a38651b47..e5c1d501a7a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.12.2" +version = "1.13.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index e7542b182a1f..3ebe1cbd3269 100644 --- a/rust.ungram +++ b/rust.ungram @@ -298,7 +298,7 @@ Visibility = 'pub' ('(' 'in'? Path ')')? Attr = - '#' '!'? '[' Path ('=' Literal | TokenTree)? ']' + '#' '!'? '[' Path ('=' Expr | TokenTree)? ']' //****************************// // Statements and Expressions // From 0f5b22ff18043b9fa8ec5417ce94ada0418f136f Mon Sep 17 00:00:00 2001 From: Christopher Durham Date: Sat, 1 May 2021 22:56:42 -0500 Subject: [PATCH 205/322] Report 1-based indices in Error's Display impl This matches every (mainstream) text editor's use of the line:column format. --- src/error.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/error.rs b/src/error.rs index 6cc86f52f98b..355e0b7ebc25 100644 --- a/src/error.rs +++ b/src/error.rs @@ -16,7 +16,8 @@ pub struct Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(loc) = self.location { - write!(f, "{}:{}: ", loc.line, loc.column)? + // Report 1-based indices, to match text editors + write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)? } write!(f, "{}", self.message) } From 3c3095f2e971a2cfc9848bf1c6f5777965e8cc56 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Fri, 11 Jun 2021 18:12:35 +0200 Subject: [PATCH 206/322] Add a `Meta` node representing attribute contents --- rust.ungram | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 3ebe1cbd3269..d08ae24905c7 100644 --- a/rust.ungram +++ b/rust.ungram @@ -298,7 +298,10 @@ Visibility = 'pub' ('(' 'in'? Path ')')? Attr = - '#' '!'? '[' Path ('=' Expr | TokenTree)? ']' + '#' '!'? '[' Meta ']' + +Meta = + Path ('=' Expr | TokenTree)? //****************************// // Statements and Expressions // From b344c9a41be74bc3ba6e641bad76690c951e9692 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Fri, 11 Jun 2021 18:27:11 +0200 Subject: [PATCH 207/322] Bump to 1.14.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e5c1d501a7a9..56dbc825fa3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.13.0" +version = "1.14.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] From bcb24cd3b9cc8d75a6c9634267d9790fdbf0bc6e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 19 Jun 2021 17:30:37 +0300 Subject: [PATCH 208/322] fix indentation --- rust.ungram | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust.ungram b/rust.ungram index d08ae24905c7..ce8e9367ec0e 100644 --- a/rust.ungram +++ b/rust.ungram @@ -390,9 +390,9 @@ BinExpr = lhs:Expr op:( '||' | '&&' - | '==' | '!=' | '<=' | '>=' | '<' | '>' - | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' - | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' + | '==' | '!=' | '<=' | '>=' | '<' | '>' + | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' + | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' ) rhs:Expr From 29ab704b75e71ebede40ccf51837a5466055b9dd Mon Sep 17 00:00:00 2001 From: ammkrn Date: Sat, 19 Jun 2021 11:29:31 -0500 Subject: [PATCH 209/322] More specific error for leading pipes --- Cargo.toml | 2 +- src/parser.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 56dbc825fa3c..18c9dffecfc3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.0" +version = "1.14.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/src/parser.rs b/src/parser.rs index bd067f22a5b6..a4ce9c120298 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -109,6 +109,14 @@ fn node(p: &mut Parser) -> Result<()> { } fn rule(p: &mut Parser) -> Result { + if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() { + bail!( + *loc, + "The first element in a sequence of productions or alternatives \ + must not have a leading pipe (`|`)" + ); + } + let lhs = seq_rule(p)?; let mut alt = vec![lhs]; while let Some(token) = p.peek() { From 538c29e06bd3ff559b93ee2f5c250d07354b7696 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Sat, 26 Jun 2021 08:27:17 +0100 Subject: [PATCH 210/322] Implement arbitrary behind a feature flag --- Cargo.toml | 3 ++- src/lib.rs | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fee00ec2ba4a..fe2497cfd646 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.17" +version = "0.1.18" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/smol_str" @@ -9,6 +9,7 @@ edition = "2018" [dependencies] serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } +arbitrary = { version = "1", optional = true } [dev-dependencies] proptest = "0.10" diff --git a/src/lib.rs b/src/lib.rs index 6136878b3eca..1583dfe07777 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -300,6 +300,14 @@ impl Borrow for SmolStr { } } +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for SmolStr { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { + let s = <&str>::arbitrary(u)?; + Ok(SmolStr::new(s)) + } +} + const INLINE_CAP: usize = 22; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; From 683747fdede75816d7035db250a0187094f03d78 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 17 Jul 2021 03:42:37 +0200 Subject: [PATCH 211/322] Add GenericParamList to AssocTypeArg --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 18c9dffecfc3..99b020548bd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.1" +version = "1.14.2" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index ce8e9367ec0e..2ac52ddcb578 100644 --- a/rust.ungram +++ b/rust.ungram @@ -51,7 +51,7 @@ TypeArg = Type AssocTypeArg = - NameRef (':' TypeBoundList | '=' Type) + NameRef GenericParamList? (':' TypeBoundList | '=' Type) LifetimeArg = Lifetime From c306ed34ed0e21caaadccd7808a505c35ab0d354 Mon Sep 17 00:00:00 2001 From: Arsenii Lyashenko Date: Thu, 12 Aug 2021 15:00:02 +0300 Subject: [PATCH 212/322] Add `#![no_std]` support --- .github/ci.rs | 5 +++++ Cargo.toml | 6 +++++- src/lib.rs | 16 ++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/ci.rs b/.github/ci.rs index b293ebbcb784..98017ad97f1f 100644 --- a/.github/ci.rs +++ b/.github/ci.rs @@ -23,6 +23,11 @@ fn try_main() -> Result<()> { cwd.display() ); + { + let _s = Section::new("BUILD_NO_DEFAULT_FEATURES"); + shell("cargo test --all-features --workspace --no-run --no-default-features")?; + } + { let _s = Section::new("BUILD"); shell("cargo test --all-features --workspace --no-run")?; diff --git a/Cargo.toml b/Cargo.toml index fe2497cfd646..f9c3a350f2ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,14 @@ authors = ["Aleksey Kladov "] edition = "2018" [dependencies] -serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } +serde = { version = "1", optional = true, default_features = false } arbitrary = { version = "1", optional = true } [dev-dependencies] proptest = "0.10" serde_json = "1" serde = { version = "1", features = [ "derive" ] } + +[features] +default = ["std"] +std = ["serde/std"] diff --git a/src/lib.rs b/src/lib.rs index 1583dfe07777..9f99153f2a0b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,27 @@ +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(not(feature = "std"))] +extern crate core as std; + +#[cfg(not(feature = "std"))] +extern crate alloc; + use std::{ borrow::Borrow, cmp::{self, Ordering}, fmt, hash, iter, ops::Deref, +}; + +#[cfg(not(feature = "std"))] +use alloc::{ + string::{String, ToString}, sync::Arc, }; +#[cfg(feature = "std")] +use std::sync::Arc; + /// A `SmolStr` is a string type that has the following properties: /// /// * `size_of::() == size_of::()` From 19176cd726b5959438419de802f3a2c4d0c19aca Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Fri, 13 Aug 2021 00:07:57 +0200 Subject: [PATCH 213/322] Add syntax elements for `if let` match guards --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 99b020548bd8..067b9b302f20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.2" +version = "1.14.3" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] diff --git a/rust.ungram b/rust.ungram index 2ac52ddcb578..6bb123a3e275 100644 --- a/rust.ungram +++ b/rust.ungram @@ -488,7 +488,7 @@ MatchArm = Attr* Pat guard:MatchGuard? '=>' Expr ','? MatchGuard = - 'if' Expr + 'if' ('let' Pat '=')? Expr ReturnExpr = Attr* 'return' Expr? From 50019bae42f8bb4357ea52b1c783b92af8717a06 Mon Sep 17 00:00:00 2001 From: Jade Date: Tue, 3 Aug 2021 21:35:16 -0700 Subject: [PATCH 214/322] Add both variants of half open range patterns to the grammar This is prompted by https://github.com/rust-analyzer/rust-analyzer/issues/9779, but it is not actually a prerequisite of making that one happen as this commit doesn't change the generated code on the r-a side. --- rust.ungram | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 6bb123a3e275..52bdf323af33 100644 --- a/rust.ungram +++ b/rust.ungram @@ -603,7 +603,12 @@ WildcardPat = '_' RangePat = - start:Pat op:('..' | '..=') end:Pat + // 1.. + start:Pat op:('..' | '..=') + // 1..2 + | start:Pat op:('..' | '..=') end:Pat + // ..2 + | op:('..' | '..=') end:Pat RefPat = '&' 'mut'? Pat From 1fec3b1017c7a7115b2a10f280e8c0abe557c942 Mon Sep 17 00:00:00 2001 From: Jade Date: Wed, 4 Aug 2021 20:31:22 -0700 Subject: [PATCH 215/322] Bump version and remove soft-deprecated authors field --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 067b9b302f20..79f3c39e9304 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,9 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.3" +version = "1.14.4" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" -authors = ["Aleksey Kladov "] edition = "2018" exclude = ["/bors.toml", "/.github"] From 81876f7b56f05001e8ec090db1fb4980fa0f8a1c Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Mon, 13 Sep 2021 20:37:39 -0700 Subject: [PATCH 216/322] Add ungrammar extensions --- README.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5a3f48ab3d2..a5e130fedf10 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,21 @@ # ungrammar -A DSL for specifying concrete syntax trees. See this -[introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html). +A DSL for specifying concrete syntax trees. + +See the [blog post][post] for an introduction. See [./rust.ungram](./rust.ungram) for an example. + +## Editor support + +- Vim + - [vim-ungrammar][] + - [ungrammar.vim][] +- VSCode + - [ungrammar-tools][] + +[post]: + https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html +[vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar +[ungrammar.vim]: https://github.com/drtychai/ungrammar.vim +[ungrammar-tools]: https://github.com/azdavis/ungrammar-tools From 1523dde223b7f4cee84ef62f5dbcc84e6eb2fcb2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 26 Sep 2021 12:00:56 +0300 Subject: [PATCH 217/322] avoid attribute ambiguity in ExprStmt Both expr stmt and expr can have attributes, which doesn't make sense. Let's say that statement's attrs are those of the expression. --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 52bdf323af33..5a8e11613750 100644 --- a/rust.ungram +++ b/rust.ungram @@ -318,7 +318,7 @@ LetStmt = '=' initializer:Expr ';' ExprStmt = - Attr* Expr ';'? + Expr ';'? Expr = ArrayExpr From 2da46ff08c55a5940ce3005fba37b774ed6d023b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 26 Sep 2021 12:02:26 +0300 Subject: [PATCH 218/322] canonical grammar for block expressions Historically, we struggled with formulating the right grammar for block expressions. Today's EffectExpr is the best we've come up so far, but, if you are thinking "WTF is an effect expression?", you are not wrong. I think in this commit I've come up with what can be called a reasonable grammar for block expressions. Observe that *all* things in `{}` we call list: item list, assoc item list, match arm list, record field list, record expr field list. In fact, `BlockExpr` is the only exception. So, let's just call the stuff in `{}` a statement list. This isn't forced: *all* things inside a block are statements, and `;` is a statement separator, just like `,`. Trailing `;` is allowed, but not required. Then, statement list with modifiers such as `async` or attributes or labels is just a block expression. Why haven't I thought of it from the start? --- rust.ungram | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rust.ungram b/rust.ungram index 5a8e11613750..938843ffce32 100644 --- a/rust.ungram +++ b/rust.ungram @@ -331,7 +331,6 @@ Expr = | CastExpr | ClosureExpr | ContinueExpr -| EffectExpr | FieldExpr | ForExpr | IfExpr @@ -366,7 +365,7 @@ Literal = PathExpr = Attr* Path -BlockExpr = +StmtList = '{' Attr* statements:Stmt* @@ -379,8 +378,8 @@ RefExpr = TryExpr = Attr* Expr '?' -EffectExpr = - Attr* Label? ('try' | 'unsafe' | 'async' | 'const') BlockExpr +BlockExpr = + Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList PrefixExpr = Attr* op:('-' | '!' | '*') Expr From 91eb93c349b7a441baa1524915b84428585554e8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 26 Sep 2021 19:13:22 +0300 Subject: [PATCH 219/322] bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 79f3c39e9304..af7ec004fd18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.4" +version = "1.14.5" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" From aee806c64c2daca6c6009ba4e97958342cb20187 Mon Sep 17 00:00:00 2001 From: zhoufan <1247714429@qq.com> Date: Sun, 3 Oct 2021 08:49:18 +0800 Subject: [PATCH 220/322] add Attr to RestPat --- Cargo.toml | 2 +- rust.ungram | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index af7ec004fd18..25a10b3a5424 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.5" +version = "1.14.6" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index 938843ffce32..81323d27cad1 100644 --- a/rust.ungram +++ b/rust.ungram @@ -618,7 +618,7 @@ RecordPat = RecordPatFieldList = '{' fields:(RecordPatField (',' RecordPatField)* ','?)? - '..'? + RestPat? '}' RecordPatField = @@ -646,7 +646,7 @@ BoxPat = 'box' Pat RestPat = - '..' + Attr* '..' MacroPat = MacroCall From 7fdea9fdc978a9419682e3319bcb047a5540fb9d Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 6 Oct 2021 18:15:06 +0200 Subject: [PATCH 221/322] Add support for `let ... else` --- Cargo.toml | 2 +- rust.ungram | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 25a10b3a5424..f0172821bfca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.6" +version = "1.14.7" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index 81323d27cad1..c9a36079f275 100644 --- a/rust.ungram +++ b/rust.ungram @@ -315,7 +315,9 @@ Stmt = LetStmt = Attr* 'let' Pat (':' Type)? - '=' initializer:Expr ';' + '=' initializer:Expr + ('else' else_branch:BlockExpr)? + ';' ExprStmt = Expr ';'? From f1db2cac1a4498eff74dc8541f754b48d1a0265b Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Thu, 7 Oct 2021 16:59:49 +0200 Subject: [PATCH 222/322] Make `LetElse` its own node --- Cargo.toml | 2 +- rust.ungram | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f0172821bfca..050459e109c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.7" +version = "1.14.8" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index c9a36079f275..0af0f2521cfd 100644 --- a/rust.ungram +++ b/rust.ungram @@ -316,9 +316,12 @@ Stmt = LetStmt = Attr* 'let' Pat (':' Type)? '=' initializer:Expr - ('else' else_branch:BlockExpr)? + LetElse? ';' +LetElse = + 'else' BlockExpr + ExprStmt = Expr ';'? From c0478a4433923af678b2f9b20ef916061be294ae Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 19 Oct 2021 14:14:31 +0200 Subject: [PATCH 223/322] Add `~const` bounds to rust grammar --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 050459e109c3..8e732c8dd9f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.8" +version = "1.14.9" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index 0af0f2521cfd..d2ef552ae912 100644 --- a/rust.ungram +++ b/rust.ungram @@ -573,7 +573,7 @@ TypeBoundList = TypeBound = Lifetime -| '?'? Type +| ('?' | '~' 'const')? Type //************************// // Patterns // From e3edb41dc01ff0e5affeb4d9ba9d03bb8a54bb11 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 1 Nov 2021 14:33:22 +0300 Subject: [PATCH 224/322] implement FromStr closes #31 --- Cargo.toml | 2 +- src/lib.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f9c3a350f2ed..d00ca3112348 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.18" +version = "0.1.19" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/smol_str" diff --git a/src/lib.rs b/src/lib.rs index 9f99153f2a0b..d819fe2dd90c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,11 +6,13 @@ extern crate core as std; #[cfg(not(feature = "std"))] extern crate alloc; +use core::convert::Infallible; use std::{ borrow::Borrow, cmp::{self, Ordering}, fmt, hash, iter, ops::Deref, + str::FromStr, }; #[cfg(not(feature = "std"))] @@ -316,6 +318,15 @@ impl Borrow for SmolStr { } } +impl FromStr for SmolStr { + type Err = Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(SmolStr::from(s)) + } +} + #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for SmolStr { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { From 2cdb8d79582786e726a078289de0dfe92c24f42d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 1 Nov 2021 16:04:40 +0300 Subject: [PATCH 225/322] fix no_std support --- .github/ci.rs | 1 + Cargo.toml | 2 +- src/lib.rs | 43 ++++++++++++++++++------------------------- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/.github/ci.rs b/.github/ci.rs index 98017ad97f1f..21c8584fb9f7 100644 --- a/.github/ci.rs +++ b/.github/ci.rs @@ -36,6 +36,7 @@ fn try_main() -> Result<()> { { let _s = Section::new("TEST"); shell("cargo test --all-features --workspace")?; + shell("cargo test --no-default-features --workspace")?; } let current_branch = shell_output("git branch --show-current")?; diff --git a/Cargo.toml b/Cargo.toml index d00ca3112348..912b7f13458f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.19" +version = "0.1.20" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/smol_str" diff --git a/src/lib.rs b/src/lib.rs index d819fe2dd90c..c542fe639f0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,29 +1,19 @@ -#![cfg_attr(not(feature = "std"), no_std)] - -#[cfg(not(feature = "std"))] -extern crate core as std; - -#[cfg(not(feature = "std"))] +#![no_std] extern crate alloc; -use core::convert::Infallible; -use std::{ +use alloc::{ + string::{String, ToString}, + sync::Arc, +}; +use core::{ borrow::Borrow, cmp::{self, Ordering}, + convert::Infallible, fmt, hash, iter, ops::Deref, str::FromStr, }; -#[cfg(not(feature = "std"))] -use alloc::{ - string::{String, ToString}, - sync::Arc, -}; - -#[cfg(feature = "std")] -use std::sync::Arc; - /// A `SmolStr` is a string type that has the following properties: /// /// * `size_of::() == size_of::()` @@ -131,7 +121,7 @@ impl SmolStr { if size + len > INLINE_CAP { let (min_remaining, _) = iter.size_hint(); let mut heap = String::with_capacity(size + len + min_remaining); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); heap.push(ch); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); @@ -265,7 +255,7 @@ where let size = slice.len(); if size + len > INLINE_CAP { let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); heap.push_str(&slice); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); @@ -411,7 +401,7 @@ impl Repr { Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; - unsafe { ::std::str::from_utf8_unchecked(buf) } + unsafe { ::core::str::from_utf8_unchecked(buf) } } Repr::Substring { newlines, spaces } => { let newlines = *newlines; @@ -425,9 +415,12 @@ impl Repr { #[cfg(feature = "serde")] mod serde { - use super::SmolStr; - use ::serde::de::{Deserializer, Error, Unexpected, Visitor}; - use std::fmt; + use alloc::{string::String, vec::Vec}; + use core::fmt; + + use serde::de::{Deserializer, Error, Unexpected, Visitor}; + + use crate::SmolStr; // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result @@ -468,7 +461,7 @@ mod serde { where E: Error, { - match std::str::from_utf8(v) { + match core::str::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } @@ -478,7 +471,7 @@ mod serde { where E: Error, { - match std::str::from_utf8(v) { + match core::str::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } From c46ffc99e30dd316d1a4451f69f977a1410fae67 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 4 Nov 2021 18:42:22 +0100 Subject: [PATCH 226/322] Remove unnecessary Into bound from From impl --- Cargo.toml | 6 +++--- src/lib.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 912b7f13458f..4efaaf90ef91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "smol_str" -version = "0.1.20" +version = "0.1.21" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" -repository = "https://github.com/matklad/smol_str" +repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov "] edition = "2018" @@ -14,7 +14,7 @@ arbitrary = { version = "1", optional = true } [dev-dependencies] proptest = "0.10" serde_json = "1" -serde = { version = "1", features = [ "derive" ] } +serde = { version = "1", features = [ "derive" ] } [features] default = ["std"] diff --git a/src/lib.rs b/src/lib.rs index c542fe639f0c..45ec174a2014 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -289,7 +289,7 @@ impl<'a> iter::FromIterator<&'a str> for SmolStr { impl From for SmolStr where - T: Into + AsRef, + T: AsRef, { fn from(text: T) -> Self { Self::new(text) From 7479c18dc9ca8c2f5fda60f4b19dc8595a611427 Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Mon, 24 Jan 2022 03:30:36 +0200 Subject: [PATCH 227/322] Support if- and while-let chains RFC 2497 https://github.com/rust-lang/rfcs/blob/master/text/2497-if-let-chains.md. --- Cargo.toml | 2 +- rust.ungram | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e732c8dd9f0..bbfb0f58552b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.14.9" +version = "1.15.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index d2ef552ae912..7d7f1848526b 100644 --- a/rust.ungram +++ b/rust.ungram @@ -357,6 +357,7 @@ Expr = | TupleExpr | WhileExpr | YieldExpr +| LetExpr Literal = Attr* value:( @@ -448,13 +449,9 @@ ClosureExpr = body:Expr IfExpr = - Attr* 'if' Condition then_branch:BlockExpr + Attr* 'if' condition:Expr then_branch:BlockExpr ('else' else_branch:(IfExpr | BlockExpr))? -Condition = - 'let' Pat '=' Expr -| Expr - LoopExpr = Attr* Label? 'loop' loop_body:BlockExpr @@ -464,7 +461,7 @@ ForExpr = loop_body:BlockExpr WhileExpr = - Attr* Label? 'while' Condition + Attr* Label? 'while' condition:Expr loop_body:BlockExpr Label = @@ -492,7 +489,7 @@ MatchArm = Attr* Pat guard:MatchGuard? '=>' Expr ','? MatchGuard = - 'if' ('let' Pat '=')? Expr + 'if' condition:Expr ReturnExpr = Attr* 'return' Expr? @@ -500,6 +497,9 @@ ReturnExpr = YieldExpr = Attr* 'yield' Expr? +LetExpr = + Attr* 'let' Pat '=' Expr + AwaitExpr = Attr* Expr '.' 'await' From 800de5830d99c1fd7dade0440c5bfc464e09d566 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 8 Feb 2022 13:51:44 +0100 Subject: [PATCH 228/322] Fix some rust.ungram inconsistencies --- rust.ungram | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rust.ungram b/rust.ungram index 7d7f1848526b..6b31f98ec09a 100644 --- a/rust.ungram +++ b/rust.ungram @@ -102,7 +102,7 @@ Item = | Use MacroRules = - Attr* Visibility? + Attr* 'macro_rules' '!' Name TokenTree @@ -173,7 +173,7 @@ TypeAlias = Attr* Visibility? 'default'? 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? - '=' Type ';' + ('=' Type)? ';' Struct = Attr* Visibility? @@ -210,7 +210,7 @@ VariantList = Variant = Attr* Visibility? - Name FieldList ('=' Expr)? + Name FieldList? ('=' Expr)? Union = Attr* Visibility? @@ -229,17 +229,17 @@ Const = Attr* Visibility? 'default'? 'const' (Name | '_') ':' Type - '=' body:Expr ';' + ('=' body:Expr)? ';' Static = Attr* Visibility? - 'static'? 'mut'? Name ':' Type - '=' body:Expr ';' + 'static' 'mut'? Name ':' Type + ('=' body:Expr)? ';' Trait = Attr* Visibility? 'unsafe'? 'auto'? - 'trait' Name GenericParamList (':' TypeBoundList?)? WhereClause + 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause? AssocItemList AssocItemList = @@ -258,7 +258,7 @@ Impl = AssocItemList ExternBlock = - Attr* Abi ExternItemList + Attr* 'unsafe'? Abi ExternItemList ExternItemList = '{' Attr* ExternItem* '}' @@ -292,7 +292,7 @@ WhereClause = 'where' predicates:(WherePred (',' WherePred)* ','?) WherePred = - ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList + ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? Visibility = 'pub' ('(' 'in'? Path ')')? From 11206f1fb048301e2f907a6474c84976df4a004d Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Thu, 24 Feb 2022 14:20:36 +0200 Subject: [PATCH 229/322] Revert the visibility removal of macro_rules visibility (#46) As discussed in https://github.com/rust-analyzer/ungrammar/pull/46#issuecomment-1049758027. --- rust.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust.ungram b/rust.ungram index 6b31f98ec09a..572afe2e555b 100644 --- a/rust.ungram +++ b/rust.ungram @@ -102,7 +102,7 @@ Item = | Use MacroRules = - Attr* + Attr* Visibility? 'macro_rules' '!' Name TokenTree From 039a27440743b068f811c6f0bc06dbdd0d1de503 Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Thu, 24 Feb 2022 02:34:24 +0000 Subject: [PATCH 230/322] Support destructuring assignments (RFC 2909) The supported patterns are already valid as expressions, except the rest pattern (`..`) and the wildcard pattern (`_`). --- Cargo.toml | 2 +- rust.ungram | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bbfb0f58552b..e9a51457791c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.15.0" +version = "1.16.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index 572afe2e555b..8c85c45ec4da 100644 --- a/rust.ungram +++ b/rust.ungram @@ -358,6 +358,7 @@ Expr = | WhileExpr | YieldExpr | LetExpr +| UnderscoreExpr Literal = Attr* value:( @@ -426,7 +427,7 @@ RecordExprFieldList = '{' Attr* fields:(RecordExprField (',' RecordExprField)* ','?)? - ('..' spread:Expr)? + ('..' spread:Expr?)? '}' RecordExprField = @@ -500,6 +501,9 @@ YieldExpr = LetExpr = Attr* 'let' Pat '=' Expr +UnderscoreExpr = + Attr* '_' + AwaitExpr = Attr* Expr '.' 'await' From 6d2f3ec8b0339baf8508b5faf2f917b7627d5cfc Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 5 Mar 2022 22:48:00 +0100 Subject: [PATCH 231/322] Include `Self` specifically in NameRef --- Cargo.toml | 2 +- rust.ungram | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e9a51457791c..833d47990aea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" -version = "1.16.0" +version = "1.16.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" diff --git a/rust.ungram b/rust.ungram index 8c85c45ec4da..cb58486eff01 100644 --- a/rust.ungram +++ b/rust.ungram @@ -24,7 +24,7 @@ Name = 'ident' | 'self' NameRef = - 'ident' | 'int_number' | 'self' | 'super' | 'crate' + 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self' Lifetime = 'lifetime_ident' From 6edb52592ecd0f6df4a9f3b1299f215471e63926 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 7 Jul 2020 09:57:19 -0400 Subject: [PATCH 232/322] Add a new test for bad size hint Changes in PR #20 allow for an incorrect size hint to create a non-canonical SmolStr. Add a new test which will fail if we ever rely on SmolStrs to be canonical when comparing for equality. --- tests/test.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index 537df8ddb216..cdcc9bf67035 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -209,6 +209,8 @@ fn test_from_char_iterator() { ("사회과학원 어학연구소", true), // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), + // String which has too many characters to even consider inlining + ("☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺", true), ]; for (raw, is_heap) in &examples { let s: SmolStr = raw.chars().collect(); @@ -216,3 +218,32 @@ fn test_from_char_iterator() { assert_eq!(s.is_heap_allocated(), *is_heap); } } + +#[test] +fn test_bad_size_hint_char_iter() { + struct BadSizeHint(I); + + impl> Iterator for BadSizeHint { + type Item = T; + + fn next(&mut self) -> Option { + self.0.next() + } + + fn size_hint(&self) -> (usize, Option) { + (1024, None) + } + } + + let data = "testing"; + let collected: SmolStr = BadSizeHint(data.chars()).collect(); + let new = SmolStr::new(data); + + // Because of the bad size hint, `collected` will be heap allocated, but `new` will be inline + + // If we try to use the type of the string (inline/heap) to quickly test for equality, we need to ensure + // `collected` is inline allocated instead + assert!(collected.is_heap_allocated()); + assert!(!new.is_heap_allocated()); + assert_eq!(new, collected); +} From e197fd15e91e924719544338118f7a5a99b4b6dc Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Thu, 10 Mar 2022 19:20:59 -0500 Subject: [PATCH 233/322] Use ASCII to get the "too big" char iterator Additionally, make the construction of the string mechanical --- tests/test.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index cdcc9bf67035..934cfa3c0517 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -209,14 +209,19 @@ fn test_from_char_iterator() { ("사회과학원 어학연구소", true), // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), - // String which has too many characters to even consider inlining - ("☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺", true), ]; for (raw, is_heap) in &examples { let s: SmolStr = raw.chars().collect(); assert_eq!(s.as_str(), *raw); assert_eq!(s.is_heap_allocated(), *is_heap); } + // String which has too many characters to even consider inlining: Chars::size_hint uses + // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately + // heap allocate + let raw: String = std::iter::repeat('a').take(22 * 4 + 1).collect(); + let s: SmolStr = raw.chars().collect(); + assert_eq!(s.as_str(), raw); + assert!(s.is_heap_allocated()); } #[test] From b8248e28d77c4df106d55ed556204abc9dad8d24 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 3 Apr 2022 16:10:03 +0200 Subject: [PATCH 234/322] Clarify what `WS` is --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index c542fe639f0c..96d464fec386 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,8 @@ use core::{ /// languages. Strings consisting of a series of newlines, followed by a series of /// whitespace are a typical pattern in computer programs because of indentation. /// Note that a specialized interner might be a better solution for some use cases. +/// +/// `WS`: A string of 32 newlines followed by 128 spaces. #[derive(Clone)] pub struct SmolStr(Repr); From 756fa7de7472f6acc43ec753fb1f8de88e1feb68 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 3 Apr 2022 16:17:08 +0200 Subject: [PATCH 235/322] Bump dependencies --- Cargo.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4efaaf90ef91..5bfe18cf1f45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,13 +8,13 @@ authors = ["Aleksey Kladov "] edition = "2018" [dependencies] -serde = { version = "1", optional = true, default_features = false } -arbitrary = { version = "1", optional = true } +serde = { version = "1.0.136", optional = true, default_features = false } +arbitrary = { version = "1.1.0", optional = true } [dev-dependencies] -proptest = "0.10" -serde_json = "1" -serde = { version = "1", features = [ "derive" ] } +proptest = "1.0.0" +serde_json = "1.0.79" +serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] From 3d05659c6542e8a9425801b1dfa8e6637656d52f Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 8 Apr 2022 21:04:22 +0200 Subject: [PATCH 236/322] Use new optional dependency feature syntax making serde truly optional --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5bfe18cf1f45..ea4c8c09f696 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.21" +version = "0.1.22" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" @@ -18,4 +18,4 @@ serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] -std = ["serde/std"] +std = ["serde?/std"] From d6b8d67003969022ab2c9e7cd61560495bd14833 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 26 Apr 2022 23:57:07 +0200 Subject: [PATCH 237/322] Revert usage of optional dependency feature syntax --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ea4c8c09f696..b7bd8f731486 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.22" +version = "0.1.23" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" @@ -18,4 +18,4 @@ serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] -std = ["serde?/std"] +std = ["serde/std"] From 8651ce01689f09df96d32a4011afd2c779efa94d Mon Sep 17 00:00:00 2001 From: austaras Date: Sun, 22 Jan 2023 23:55:58 +0800 Subject: [PATCH 238/322] One more byte for inlined --- src/lib.rs | 62 ++++++++++++++++++++++++++++++++++++++++----------- tests/test.rs | 8 +++---- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8c92e51e1c1a..296945d4d3d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ use core::{ cmp::{self, Ordering}, convert::Infallible, fmt, hash, iter, + mem::transmute, ops::Deref, str::FromStr, }; @@ -19,8 +20,8 @@ use core::{ /// * `size_of::() == size_of::()` /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: -/// * Up to 22 bytes long -/// * Longer than 22 bytes, but substrings of `WS` (see below). Such strings consist +/// * Up to 23 bytes long +/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// @@ -51,16 +52,16 @@ impl SmolStr { buf[$idx] = byte }); } - s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); SmolStr(Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }) } /// Constructs inline variant of `SmolStr`. /// - /// Panics if `text.len() > 22`. + /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { let mut buf = [0; INLINE_CAP]; @@ -70,7 +71,7 @@ impl SmolStr { i += 1 } SmolStr(Repr::Inline { - len: text.len() as u8, + len: unsafe { transmute(text.len() as u8) }, buf, }) } @@ -132,7 +133,7 @@ impl SmolStr { len += size; } SmolStr(Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }) } @@ -266,7 +267,7 @@ where len += size; } SmolStr(Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }) } @@ -327,17 +328,52 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } } -const INLINE_CAP: usize = 22; +const INLINE_CAP: usize = 23; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +enum InlineSize { + _V0 = 0, + _V1 = 1, + _V2 = 2, + _V3 = 3, + _V4 = 4, + _V5 = 5, + _V6 = 6, + _V7 = 7, + _V8 = 8, + _V9 = 9, + _V10 = 10, + _V11 = 11, + _V12 = 12, + _V13 = 13, + _V14 = 14, + _V15 = 15, + _V16 = 16, + _V17 = 17, + _V18 = 18, + _V19 = 19, + _V20 = 20, + _V21 = 21, + _V22 = 22, + _V23 = 23, +} + #[derive(Clone, Debug)] enum Repr { Heap(Arc), - Inline { len: u8, buf: [u8; INLINE_CAP] }, - Substring { newlines: usize, spaces: usize }, + Inline { + len: InlineSize, + buf: [u8; INLINE_CAP], + }, + Substring { + newlines: usize, + spaces: usize, + }, } impl Repr { @@ -353,7 +389,7 @@ impl Repr { let mut buf = [0; INLINE_CAP]; buf[..len].copy_from_slice(text.as_bytes()); return Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }; } @@ -390,7 +426,7 @@ impl Repr { fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), - Repr::Inline { len, .. } => *len == 0, + Repr::Inline { len, .. } => *len as u8 == 0, // A substring isn't created for an empty string. Repr::Substring { .. } => false, } diff --git a/tests/test.rs b/tests/test.rs index 934cfa3c0517..609a8f7e6e1f 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -29,12 +29,12 @@ fn const_fn_ctor() { const EMPTY: SmolStr = SmolStr::new_inline(""); const A: SmolStr = SmolStr::new_inline("A"); const HELLO: SmolStr = SmolStr::new_inline("HELLO"); - const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUV"); + const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUVW"); assert_eq!(EMPTY, SmolStr::from("")); assert_eq!(A, SmolStr::from("A")); assert_eq!(HELLO, SmolStr::from("HELLO")); - assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } #[allow(deprecated)] @@ -43,12 +43,12 @@ fn old_const_fn_ctor() { const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); - const LONG: SmolStr = SmolStr::new_inline_from_ascii(22, b"ABCDEFGHIZKLMNOPQRSTUV"); + const LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); assert_eq!(EMPTY, SmolStr::from("")); assert_eq!(A, SmolStr::from("A")); assert_eq!(HELLO, SmolStr::from("HELLO")); - assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { From 14badf187ad526e0a90523ddd64000a3c00fef67 Mon Sep 17 00:00:00 2001 From: austaras Date: Mon, 23 Jan 2023 18:57:29 +0800 Subject: [PATCH 239/322] 0.1.24 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b7bd8f731486..e46b14fa8340 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.23" +version = "0.1.24" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 10dcf6ba748483696a139225660ca0b722879bf1 Mon Sep 17 00:00:00 2001 From: austaras Date: Mon, 23 Jan 2023 21:19:58 +0800 Subject: [PATCH 240/322] Update README.MD --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2e61b9ee4261..0cc191018139 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ A `SmolStr` is a string type that has the following properties: * `size_of::() == size_of::()` * `Clone` is `O(1)` * Strings are stack-allocated if they are: - * Up to 22 bytes long - * Longer than 22 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist + * Up to 23 bytes long + * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist solely of consecutive newlines, followed by consecutive spaces * If a string does not satisfy the aforementioned conditions, it is heap-allocated From e3b4ae934a4ebb4ae7d3349db5465c51e584d73a Mon Sep 17 00:00:00 2001 From: austaras Date: Tue, 24 Jan 2023 18:03:45 +0800 Subject: [PATCH 241/322] Remove redundant enum value --- src/lib.rs | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 296945d4d3d0..775c5d8f2240 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -338,29 +338,29 @@ const WS: &str = #[repr(u8)] enum InlineSize { _V0 = 0, - _V1 = 1, - _V2 = 2, - _V3 = 3, - _V4 = 4, - _V5 = 5, - _V6 = 6, - _V7 = 7, - _V8 = 8, - _V9 = 9, - _V10 = 10, - _V11 = 11, - _V12 = 12, - _V13 = 13, - _V14 = 14, - _V15 = 15, - _V16 = 16, - _V17 = 17, - _V18 = 18, - _V19 = 19, - _V20 = 20, - _V21 = 21, - _V22 = 22, - _V23 = 23, + _V1, + _V2, + _V3, + _V4, + _V5, + _V6, + _V7, + _V8, + _V9, + _V10, + _V11, + _V12, + _V13, + _V14, + _V15, + _V16, + _V17, + _V18, + _V19, + _V20, + _V21, + _V22, + _V23, } #[derive(Clone, Debug)] From 8a35102805eefe455dd39c892f350e8176c4bb15 Mon Sep 17 00:00:00 2001 From: austaras Date: Sun, 5 Feb 2023 08:13:08 +0800 Subject: [PATCH 242/322] Update test --- tests/test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index 609a8f7e6e1f..187b39f00110 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -55,7 +55,7 @@ fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); prop_assert_eq!(smol.is_empty(), std_str.is_empty()); - if smol.len() <= 22 { + if smol.len() <= 23 { prop_assert!(!smol.is_heap_allocated()); } Ok(()) @@ -218,7 +218,7 @@ fn test_from_char_iterator() { // String which has too many characters to even consider inlining: Chars::size_hint uses // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately // heap allocate - let raw: String = std::iter::repeat('a').take(22 * 4 + 1).collect(); + let raw: String = std::iter::repeat('a').take(23 * 4 + 1).collect(); let s: SmolStr = raw.chars().collect(); assert_eq!(s.as_str(), raw); assert!(s.is_heap_allocated()); From 456160cfce2a64b65bcbaccc556aeb4b317b497b Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 30 Mar 2023 17:36:41 +0200 Subject: [PATCH 243/322] Use optional dependency feature syntax to make serde actually optional --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e46b14fa8340..aa729865aaa3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.24" +version = "0.1.25" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" @@ -18,4 +18,4 @@ serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] -std = ["serde/std"] +std = ["serde?/std"] From 2a2b7df56861d98307b83ef8f8861e01bdfb9142 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 31 Mar 2023 07:32:55 +0200 Subject: [PATCH 244/322] Implement AsRef in favor of generic From impls --- src/lib.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 775c5d8f2240..a67a7fc096c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ extern crate alloc; use alloc::{ + borrow::Cow, + boxed::Box, string::{String, ToString}, sync::Arc, }; @@ -290,22 +292,64 @@ impl<'a> iter::FromIterator<&'a str> for SmolStr { } } -impl From for SmolStr -where - T: AsRef, -{ - fn from(text: T) -> Self { +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl From<&str> for SmolStr { + #[inline] + fn from(s: &str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&mut str> for SmolStr { + #[inline] + fn from(s: &mut str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&String> for SmolStr { + #[inline] + fn from(s: &String) -> SmolStr { + SmolStr::new(s) + } +} + +impl From for SmolStr { + #[inline(always)] + fn from(text: String) -> Self { Self::new(text) } } +impl From> for SmolStr { + #[inline] + fn from(s: Box) -> SmolStr { + SmolStr::new(s) + } +} + +impl<'a> From> for SmolStr { + #[inline] + fn from(s: Cow<'a, str>) -> SmolStr { + SmolStr::new(s) + } +} + impl From for String { + #[inline(always)] fn from(text: SmolStr) -> Self { text.as_str().into() } } impl Borrow for SmolStr { + #[inline(always)] fn borrow(&self) -> &str { self.as_str() } From 60091177b3fcb65494e253855f8365cc608c3191 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 31 Mar 2023 07:37:08 +0200 Subject: [PATCH 245/322] Clarify size of SmolStr better --- README.md | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0cc191018139..610726a216b1 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == size_of::()` +* `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long diff --git a/src/lib.rs b/src/lib.rs index a67a7fc096c2..91dc6252cc5c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ use core::{ /// A `SmolStr` is a string type that has the following properties: /// -/// * `size_of::() == size_of::()` +/// * `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long From aeae7198dd80e61f7cbac7e8cf23ca0e14702a2f Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 31 Mar 2023 07:37:30 +0200 Subject: [PATCH 246/322] Release 0.2.0 --- Cargo.toml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index aa729865aaa3..c7a646e52721 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.25" +version = "0.2.0" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" diff --git a/README.md b/README.md index 610726a216b1..5e3506846fca 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) +* `size_of::() == 24 (therefore == size_of::() on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long From 11fecff8dec9879129083d89de64864f8a5cb903 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 19 Jun 2023 08:42:25 -0700 Subject: [PATCH 247/322] Update ungrammar.ungram with proper labeled rule Labels are followed by rules The ungrammar listed in https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html seems to be correct --- ungrammar.ungram | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ungrammar.ungram b/ungrammar.ungram index 6cb4e10fb14b..856a6cede0db 100644 --- a/ungrammar.ungram +++ b/ungrammar.ungram @@ -13,4 +13,4 @@ Rule = | Rule '?' | Rule '*' | '(' Rule ')' -| label:'ident' ':' +| label:'ident' ':' Rule From 5289a56013181cbdbf31e99c7b2ff611e0d0200a Mon Sep 17 00:00:00 2001 From: Alona Enraght-Moony Date: Thu, 29 Jun 2023 14:51:33 +0100 Subject: [PATCH 248/322] implement `Clone` for `Rule` --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 7aa0ce9c88df..6adf8ef8ea6a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,7 +101,7 @@ pub struct TokenData { } /// A production rule. -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum Rule { /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule). Labeled { From 5f22fbe12550a2c70caff9a8c5347d6722370469 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 30 Jun 2023 09:18:45 +0200 Subject: [PATCH 249/322] Constify some constructors --- Cargo.toml | 2 +- src/range.rs | 14 +++++++------- src/size.rs | 20 ++++++++++++++++---- tests/indexing.rs | 4 ++-- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 19c5a92670d6..7882f7cc3526 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-size" -version = "1.1.0" +version = "1.1.1" edition = "2018" authors = [ diff --git a/src/range.rs b/src/range.rs index 4a98deec5683..9b981642d16d 100644 --- a/src/range.rs +++ b/src/range.rs @@ -44,8 +44,8 @@ impl TextRange { /// assert_eq!(range.len(), end - start); /// ``` #[inline] - pub fn new(start: TextSize, end: TextSize) -> TextRange { - assert!(start <= end); + pub const fn new(start: TextSize, end: TextSize) -> TextRange { + assert!(start.raw <= end.raw); TextRange { start, end } } @@ -65,8 +65,8 @@ impl TextRange { /// assert_eq!(&text[range], "23456") /// ``` #[inline] - pub fn at(offset: TextSize, len: TextSize) -> TextRange { - TextRange::new(offset, offset + len) + pub const fn at(offset: TextSize, len: TextSize) -> TextRange { + TextRange::new(offset, TextSize::new(offset.raw + len.raw)) } /// Create a zero-length range at the specified offset (`offset..offset`). @@ -82,7 +82,7 @@ impl TextRange { /// assert_eq!(range, TextRange::new(point, point)); /// ``` #[inline] - pub fn empty(offset: TextSize) -> TextRange { + pub const fn empty(offset: TextSize) -> TextRange { TextRange { start: offset, end: offset, @@ -104,9 +104,9 @@ impl TextRange { /// assert_eq!(range, TextRange::at(0.into(), point)); /// ``` #[inline] - pub fn up_to(end: TextSize) -> TextRange { + pub const fn up_to(end: TextSize) -> TextRange { TextRange { - start: 0.into(), + start: TextSize::new(0), end, } } diff --git a/src/size.rs b/src/size.rs index ab2ec9a73076..c950d2edd041 100644 --- a/src/size.rs +++ b/src/size.rs @@ -33,6 +33,12 @@ impl fmt::Debug for TextSize { } impl TextSize { + /// Creates a new instance of `TextSize` from a raw `u32`. + #[inline] + pub const fn new(raw: u32) -> TextSize { + TextSize { raw } + } + /// The text size of some primitive text-like object. /// /// Accepts `char`, `&str`, and `&String`. @@ -58,14 +64,20 @@ impl TextSize { impl TextSize { /// Checked addition. Returns `None` if overflow occurred. #[inline] - pub fn checked_add(self, rhs: TextSize) -> Option { - self.raw.checked_add(rhs.raw).map(|raw| TextSize { raw }) + pub const fn checked_add(self, rhs: TextSize) -> Option { + match self.raw.checked_add(rhs.raw) { + Some(raw) => Some(TextSize { raw }), + None => None, + } } /// Checked subtraction. Returns `None` if overflow occurred. #[inline] - pub fn checked_sub(self, rhs: TextSize) -> Option { - self.raw.checked_sub(rhs.raw).map(|raw| TextSize { raw }) + pub const fn checked_sub(self, rhs: TextSize) -> Option { + match self.raw.checked_sub(rhs.raw) { + Some(raw) => Some(TextSize { raw }), + None => None, + } } } diff --git a/tests/indexing.rs b/tests/indexing.rs index ebbed7700d77..93ba3c7cab1b 100644 --- a/tests/indexing.rs +++ b/tests/indexing.rs @@ -3,6 +3,6 @@ use text_size::*; #[test] fn main() { let range = TextRange::default(); - &""[range]; - &String::new()[range]; + _ = &""[range]; + _ = &String::new()[range]; } From 1d4189c0afa157dbff7d8f9efa73cd77020f3fe5 Mon Sep 17 00:00:00 2001 From: tison Date: Sat, 1 Jul 2023 15:13:32 +0800 Subject: [PATCH 250/322] Update repo link in Cargo.toml --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 833d47990aea..920d9ef49d0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "ungrammar" description = "A DSL for describing concrete syntax trees" version = "1.16.1" license = "MIT OR Apache-2.0" -repository = "https://github.com/matklad/ungrammar" +repository = "https://github.com/rust-analyzer/ungrammar" edition = "2018" exclude = ["/bors.toml", "/.github"] From 37f399184ee833743de424ae89f1995c4c6cc8c9 Mon Sep 17 00:00:00 2001 From: Scott Driggers Date: Fri, 25 Aug 2023 09:39:14 -0400 Subject: [PATCH 251/322] Implementing `From> for SmolStr` and `From for Arc` Also adding one test to verify --- src/lib.rs | 17 +++++++++++++++++ tests/test.rs | 8 +++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 91dc6252cc5c..f09d0010c4d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -334,6 +334,13 @@ impl From> for SmolStr { } } +impl From> for SmolStr { + #[inline] + fn from(s: Arc) -> SmolStr { + SmolStr(Repr::Heap(s)) + } +} + impl<'a> From> for SmolStr { #[inline] fn from(s: Cow<'a, str>) -> SmolStr { @@ -341,6 +348,16 @@ impl<'a> From> for SmolStr { } } +impl From for Arc { + #[inline(always)] + fn from(text: SmolStr) -> Self { + match text.0 { + Repr::Heap(data) => data, + _ => text.as_str().into(), + } + } +} + impl From for String { #[inline(always)] fn from(text: SmolStr) -> Self { diff --git a/tests/test.rs b/tests/test.rs index 187b39f00110..1fbe7d667d75 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use proptest::{prop_assert, prop_assert_eq, proptest}; use smol_str::SmolStr; @@ -21,7 +23,11 @@ fn assert_traits() { fn conversions() { let s: SmolStr = "Hello, World!".into(); let s: String = s.into(); - assert_eq!(s, "Hello, World!") + assert_eq!(s, "Hello, World!"); + + let s: SmolStr = Arc::::from("Hello, World!").into(); + let s: Arc = s.into(); + assert_eq!(s.as_ref(), "Hello, World!"); } #[test] From 2c54bb602cc5cc2dca6d05c19e31f407bdae3429 Mon Sep 17 00:00:00 2001 From: Scott Driggers Date: Tue, 5 Sep 2023 14:45:32 -0400 Subject: [PATCH 252/322] Enforcing stack if can be put on stack --- src/lib.rs | 62 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f09d0010c4d8..5f0431d41999 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -337,7 +337,8 @@ impl From> for SmolStr { impl From> for SmolStr { #[inline] fn from(s: Arc) -> SmolStr { - SmolStr(Repr::Heap(s)) + let repr = Repr::new_on_stack(s.as_ref()).unwrap_or_else(|| Repr::Heap(s)); + Self(repr) } } @@ -438,40 +439,45 @@ enum Repr { } impl Repr { - fn new(text: T) -> Self + /// This function tries to create a new Repr::Inline or Repr::Substring + /// If it isn't possible, this function returns None + fn new_on_stack(text: T) -> Option where T: AsRef, { - { - let text = text.as_ref(); - - let len = text.len(); - if len <= INLINE_CAP { - let mut buf = [0; INLINE_CAP]; - buf[..len].copy_from_slice(text.as_bytes()); - return Repr::Inline { - len: unsafe { transmute(len as u8) }, - buf, - }; - } + let text = text.as_ref(); + + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Some(Repr::Inline { + len: unsafe { transmute(len as u8) }, + buf, + }); + } - if len <= N_NEWLINES + N_SPACES { - let bytes = text.as_bytes(); - let possible_newline_count = cmp::min(len, N_NEWLINES); - let newlines = bytes[..possible_newline_count] - .iter() - .take_while(|&&b| b == b'\n') - .count(); - let possible_space_count = len - newlines; - if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') - { - let spaces = possible_space_count; - return Repr::Substring { newlines, spaces }; - } + if len <= N_NEWLINES + N_SPACES { + let bytes = text.as_bytes(); + let possible_newline_count = cmp::min(len, N_NEWLINES); + let newlines = bytes[..possible_newline_count] + .iter() + .take_while(|&&b| b == b'\n') + .count(); + let possible_space_count = len - newlines; + if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { + let spaces = possible_space_count; + return Some(Repr::Substring { newlines, spaces }); } } + None + } - Repr::Heap(text.as_ref().into()) + fn new(text: T) -> Self + where + T: AsRef, + { + Self::new_on_stack(text.as_ref()).unwrap_or_else(|| Repr::Heap(text.as_ref().into())) } #[inline(always)] From 0b0514bfeabb5886594015a99ba0adb16796d900 Mon Sep 17 00:00:00 2001 From: MultisampledNight Date: Wed, 20 Sep 2023 18:26:08 +0200 Subject: [PATCH 253/322] docs: fix missing codeblock backtick --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 5f0431d41999..692803e44763 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ use core::{ /// A `SmolStr` is a string type that has the following properties: /// -/// * `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) +/// * `size_of::() == 24` (therefor `== size_of::()` on 64 bit platforms) /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long From f8ed96180d7c3c8dab5cd8e7589dac61ef2a3d7e Mon Sep 17 00:00:00 2001 From: Moulins Date: Tue, 2 Jan 2024 01:13:21 +0100 Subject: [PATCH 254/322] feat: Add `SmolStr::from_static` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows creating `SmolStr`s longer than 23 bytes in constant contexts. This is done by replacing the `Repr::Substring` variant by a more general `Repr::Static(&'static str)` variant, and borrowing from ̀`WS` directly instead of storing two `usize`s. As a bonus, it also simplifies the `as_str` implementation, hopefully saving an extra branch. --- README.md | 3 ++- src/lib.rs | 39 ++++++++++++++++++++++++--------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 5e3506846fca..ce16759e812c 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,14 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == 24 (therefore == size_of::() on 64 bit platforms) +* `size_of::() == 24` (therefore `== size_of::()` on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist solely of consecutive newlines, followed by consecutive spaces * If a string does not satisfy the aforementioned conditions, it is heap-allocated +* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming diff --git a/src/lib.rs b/src/lib.rs index 692803e44763..a27b14c0ad71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ use core::{ /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated +/// * Additionally, a `SmolStr` can be explicitely created from a `&'static str` without allocation /// /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming @@ -78,6 +79,17 @@ impl SmolStr { }) } + /// Constructs a `SmolStr` from a statically allocated string. + /// + /// This never allocates. + #[inline(always)] + pub const fn new_static(text: &'static str) -> SmolStr { + // NOTE: this never uses the inline storage; if a canonical + // representation is needed, we could check for `len() < INLINE_CAP` + // and call `new_inline`, but this would mean an extra branch. + SmolStr(Repr::Static(text)) + } + pub fn new(text: T) -> SmolStr where T: AsRef, @@ -395,6 +407,11 @@ const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +const _: () = { + assert!(WS.len() == N_NEWLINES + N_SPACES); + assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n'); + assert!(WS.as_bytes()[N_NEWLINES] == b' '); +}; #[derive(Clone, Copy, Debug)] #[repr(u8)] @@ -428,18 +445,15 @@ enum InlineSize { #[derive(Clone, Debug)] enum Repr { Heap(Arc), + Static(&'static str), Inline { len: InlineSize, buf: [u8; INLINE_CAP], }, - Substring { - newlines: usize, - spaces: usize, - }, } impl Repr { - /// This function tries to create a new Repr::Inline or Repr::Substring + /// This function tries to create a new Repr::Inline or Repr::Static /// If it isn't possible, this function returns None fn new_on_stack(text: T) -> Option where @@ -467,7 +481,8 @@ impl Repr { let possible_space_count = len - newlines; if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { let spaces = possible_space_count; - return Some(Repr::Substring { newlines, spaces }); + let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + return Some(Repr::Static(substring)); } } None @@ -484,8 +499,8 @@ impl Repr { fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), + Repr::Static(data) => data.len(), Repr::Inline { len, .. } => *len as usize, - Repr::Substring { newlines, spaces } => *newlines + *spaces, } } @@ -493,9 +508,8 @@ impl Repr { fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), + Repr::Static(data) => data.is_empty(), Repr::Inline { len, .. } => *len as u8 == 0, - // A substring isn't created for an empty string. - Repr::Substring { .. } => false, } } @@ -503,17 +517,12 @@ impl Repr { fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, + Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; unsafe { ::core::str::from_utf8_unchecked(buf) } } - Repr::Substring { newlines, spaces } => { - let newlines = *newlines; - let spaces = *spaces; - assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); - &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] - } } } } From 11925144454cd09c1a3c2effe8561b28aa50ceef Mon Sep 17 00:00:00 2001 From: novacrazy Date: Mon, 15 Jan 2024 17:35:11 -0600 Subject: [PATCH 255/322] Add Writer and ToSmolStr --- src/lib.rs | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/test.rs | 12 +++++++ 2 files changed, 101 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index a27b14c0ad71..c81d6ed5e5d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -527,6 +527,95 @@ impl Repr { } } +/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating. +/// +/// Almost identical to [`ToString`], but converts to `SmolStr` instead. +pub trait ToSmolStr { + fn to_smolstr(&self) -> SmolStr; +} + +/// Formats arguments to a [`SmolStr`], potentially without allocating. +/// +/// See [`alloc::format!`] or [`format_args!`] for syntax documentation. +#[macro_export] +macro_rules! format_smolstr { + ($($tt:tt)*) => {{ + use ::core::fmt::Write; + let mut w = $crate::Writer::new(); + w.write_fmt(format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); + $crate::SmolStr::from(w) + }}; +} + +#[doc(hidden)] +pub struct Writer { + inline: [u8; INLINE_CAP], + heap: String, + len: usize, +} + +impl Writer { + pub const fn new() -> Self { + Writer { + inline: [0; INLINE_CAP], + heap: String::new(), + len: 0, + } + } +} + +impl fmt::Write for Writer { + fn write_str(&mut self, s: &str) -> fmt::Result { + // if currently on the stack + if self.len <= INLINE_CAP { + let old_len = self.len; + self.len += s.len(); + + // if the new length will fit on the stack (even if it fills it entirely) + if self.len <= INLINE_CAP { + self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); + + return Ok(()); // skip the heap push below + } else { + self.heap.reserve(self.len); + + // copy existing inline bytes over to the heap + unsafe { + self.heap + .as_mut_vec() + .extend_from_slice(&self.inline[..old_len]); + } + } + } + + self.heap.push_str(s); + + Ok(()) + } +} + +impl From for SmolStr { + fn from(value: Writer) -> Self { + SmolStr(if value.len <= INLINE_CAP { + Repr::Inline { + len: unsafe { transmute(value.len as u8) }, + buf: value.inline, + } + } else { + Repr::new(value.heap) + }) + } +} + +impl ToSmolStr for T +where + T: fmt::Display + ?Sized, +{ + fn to_smolstr(&self) -> SmolStr { + format_smolstr!("{}", self) + } +} + #[cfg(feature = "serde")] mod serde { use alloc::{string::String, vec::Vec}; diff --git a/tests/test.rs b/tests/test.rs index 1fbe7d667d75..ef5749ac9cf5 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -258,3 +258,15 @@ fn test_bad_size_hint_char_iter() { assert!(!new.is_heap_allocated()); assert_eq!(new, collected); } + +#[test] +fn test_to_smolstr() { + use smol_str::ToSmolStr; + + for i in 0..26 { + let a = &"abcdefghijklmnopqrstuvwxyz"[i..]; + + assert_eq!(a, a.to_smolstr()); + assert_eq!(a, smol_str::format_smolstr!("{}", a)); + } +} From 0ca4ff7adc6df246f1115d73eefe523ce8d3c890 Mon Sep 17 00:00:00 2001 From: novacrazy Date: Tue, 16 Jan 2024 03:30:06 -0600 Subject: [PATCH 256/322] Cleanup --- src/lib.rs | 55 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c81d6ed5e5d3..37151adcdd4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,7 @@ #![no_std] extern crate alloc; -use alloc::{ - borrow::Cow, - boxed::Box, - string::{String, ToString}, - sync::Arc, -}; +use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc}; use core::{ borrow::Borrow, cmp::{self, Ordering}, @@ -41,7 +36,7 @@ pub struct SmolStr(Repr); impl SmolStr { #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { - let _len_is_short = [(); INLINE_CAP + 1][len]; + assert!(len <= INLINE_CAP); const ZEROS: &[u8] = &[0; INLINE_CAP]; @@ -102,9 +97,12 @@ impl SmolStr { self.0.as_str() } + #[allow(clippy::inherent_to_string_shadow_display)] #[inline(always)] pub fn to_string(&self) -> String { - self.as_str().to_string() + use alloc::borrow::ToOwned; + + self.as_str().to_owned() } #[inline(always)] @@ -118,11 +116,8 @@ impl SmolStr { } #[inline(always)] - pub fn is_heap_allocated(&self) -> bool { - match self.0 { - Repr::Heap(..) => true, - _ => false, - } + pub const fn is_heap_allocated(&self) -> bool { + matches!(self.0, Repr::Heap(..)) } fn from_char_iter>(mut iter: I) -> SmolStr { @@ -154,14 +149,19 @@ impl SmolStr { } impl Default for SmolStr { + #[inline(always)] fn default() -> SmolStr { - SmolStr::new("") + SmolStr(Repr::Inline { + len: InlineSize::_V0, + buf: [0; INLINE_CAP], + }) } } impl Deref for SmolStr { type Target = str; + #[inline(always)] fn deref(&self) -> &str { self.as_str() } @@ -237,7 +237,7 @@ impl PartialOrd for SmolStr { impl hash::Hash for SmolStr { fn hash(&self, hasher: &mut H) { - self.as_str().hash(hasher) + self.as_str().hash(hasher); } } @@ -273,11 +273,11 @@ where if size + len > INLINE_CAP { let mut heap = String::with_capacity(size + len); heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); - heap.push_str(&slice); + heap.push_str(slice); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); + buf[len..][..size].copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { @@ -516,7 +516,7 @@ impl Repr { #[inline] fn as_str(&self) -> &str { match self { - Repr::Heap(data) => &*data, + Repr::Heap(data) => data, Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; @@ -555,6 +555,7 @@ pub struct Writer { } impl Writer { + #[must_use] pub const fn new() -> Self { Writer { inline: [0; INLINE_CAP], @@ -576,15 +577,15 @@ impl fmt::Write for Writer { self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); return Ok(()); // skip the heap push below - } else { - self.heap.reserve(self.len); - - // copy existing inline bytes over to the heap - unsafe { - self.heap - .as_mut_vec() - .extend_from_slice(&self.inline[..old_len]); - } + } + + self.heap.reserve(self.len); + + // copy existing inline bytes over to the heap + unsafe { + self.heap + .as_mut_vec() + .extend_from_slice(&self.inline[..old_len]); } } From 69ff33c83dd56a118d20a2e6cb18e56ea99c998d Mon Sep 17 00:00:00 2001 From: novacrazy Date: Tue, 16 Jan 2024 03:37:11 -0600 Subject: [PATCH 257/322] Fix new_inline codegen --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 37151adcdd4d..24b3254fd420 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,6 +62,8 @@ impl SmolStr { /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { + assert!(text.len() <= INLINE_CAP); // avoids checks in loop + let mut buf = [0; INLINE_CAP]; let mut i = 0; while i < text.len() { From 01114d2f9c1b0403f819f1eb42a6726c9b5ab1bd Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 16 Jan 2024 10:55:38 +0100 Subject: [PATCH 258/322] Publish 0.2.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c7a646e52721..1d04d437678d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.2.0" +version = "0.2.1" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 14cbe540317f598aff822aaf8b28bdcd8e0af8ea Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 19:30:57 +0100 Subject: [PATCH 259/322] add benchmarks This regressed from a previous attempt. The worst of the old results were in the range 450.000 current: test bench::bench_derive_clone ... bench: 1,653,247 ns/iter (+/- 32,781) test bench::bench_match_clone ... bench: 1,716,482 ns/iter (+/- 34,192) test bench::bench_new_clone ... bench: 1,717,985 ns/iter (+/- 52,137) --- src/lib.rs | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 24b3254fd420..e5521944c31c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(core_intrinsics, test)] #![no_std] extern crate alloc; @@ -33,7 +34,63 @@ use core::{ #[derive(Clone)] pub struct SmolStr(Repr); +mod bench { + extern crate test; + use test::Bencher; + fn test_strings() -> [crate::SmolStr; 200] { + [0; 200].map(|_| crate::SmolStr::new("0123456780")) + } + #[bench] + fn bench_derive_clone(b: &mut Bencher) { + let it = test::black_box(test_strings()); + b.iter(|| { + (0..1000) + .map(|_| it.iter().map(|e| e.clone())) + .flatten() + .filter(|o| o.is_heap_allocated()) + .count() + }) + } + #[bench] + fn bench_new_clone(b: &mut Bencher) { + let it = test::black_box(test_strings()); + b.iter(|| { + (0..1000) + .map(|_| it.iter().map(|e| e.new_clone())) + .flatten() + .filter(|o| o.is_heap_allocated()) + .count() + }) + } + #[bench] + fn bench_match_clone(b: &mut Bencher) { + let it = test::black_box(test_strings()); + b.iter(|| { + (0..1000) + .map(|_| it.iter().map(|e| e.match_clone())) + .flatten() + .filter(|o| o.is_heap_allocated()) + .count() + }) + } +} + impl SmolStr { + + #[inline(always)] + pub fn new_clone(&self) -> Self { + if !self.is_heap_allocated() { + return unsafe { core::mem::transmute_copy(self) }; + } + Self(self.0.clone()) + } + #[inline(always)] + pub fn match_clone(&self) -> Self { + match &self.0 { + Repr::Heap(h) => return Self(Repr::Heap(h.clone())), + _ => unsafe { core::mem::transmute_copy(self) }, + } + } #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { assert!(len <= INLINE_CAP); From d5994d5069398ab97d3e2fec970365d94e189bb6 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 19:31:42 +0100 Subject: [PATCH 260/322] fix clone regression test bench::bench_derive_clone ... bench: 454,318 ns/iter (+/- 11,401) test bench::bench_match_clone ... bench: 183,570 ns/iter (+/- 10,652) test bench::bench_new_clone ... bench: 177,907 ns/iter (+/- 2,234) --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e5521944c31c..654ed660ef3b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -503,12 +503,12 @@ enum InlineSize { #[derive(Clone, Debug)] enum Repr { - Heap(Arc), - Static(&'static str), Inline { len: InlineSize, buf: [u8; INLINE_CAP], }, + Static(&'static str), + Heap(Arc), } impl Repr { From e1a98b154262b698db3c56d0186f019725a26104 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 19:40:14 +0100 Subject: [PATCH 261/322] Manually impl clone --- src/lib.rs | 60 +++++++----------------------------------------------- 1 file changed, 7 insertions(+), 53 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 654ed660ef3b..eda9aa32033a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,66 +31,20 @@ use core::{ /// Note that a specialized interner might be a better solution for some use cases. /// /// `WS`: A string of 32 newlines followed by 128 spaces. -#[derive(Clone)] pub struct SmolStr(Repr); -mod bench { - extern crate test; - use test::Bencher; - fn test_strings() -> [crate::SmolStr; 200] { - [0; 200].map(|_| crate::SmolStr::new("0123456780")) - } - #[bench] - fn bench_derive_clone(b: &mut Bencher) { - let it = test::black_box(test_strings()); - b.iter(|| { - (0..1000) - .map(|_| it.iter().map(|e| e.clone())) - .flatten() - .filter(|o| o.is_heap_allocated()) - .count() - }) - } - #[bench] - fn bench_new_clone(b: &mut Bencher) { - let it = test::black_box(test_strings()); - b.iter(|| { - (0..1000) - .map(|_| it.iter().map(|e| e.new_clone())) - .flatten() - .filter(|o| o.is_heap_allocated()) - .count() - }) - } - #[bench] - fn bench_match_clone(b: &mut Bencher) { - let it = test::black_box(test_strings()); - b.iter(|| { - (0..1000) - .map(|_| it.iter().map(|e| e.match_clone())) - .flatten() - .filter(|o| o.is_heap_allocated()) - .count() - }) +impl Clone for SmolStr{ + #[inline] + fn clone(&self) -> Self { + if !self.is_heap_allocated() { + return unsafe { core::ptr::read(self as *const SmolStr) }; + } + Self(self.0.clone()) } } impl SmolStr { - #[inline(always)] - pub fn new_clone(&self) -> Self { - if !self.is_heap_allocated() { - return unsafe { core::mem::transmute_copy(self) }; - } - Self(self.0.clone()) - } - #[inline(always)] - pub fn match_clone(&self) -> Self { - match &self.0 { - Repr::Heap(h) => return Self(Repr::Heap(h.clone())), - _ => unsafe { core::mem::transmute_copy(self) }, - } - } #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { assert!(len <= INLINE_CAP); From a063f407993b25611461653447cb9ede9e1af14d Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 20:01:51 +0100 Subject: [PATCH 262/322] remove nightly benchmark feature --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index eda9aa32033a..b78a86b1bccc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(core_intrinsics, test)] #![no_std] extern crate alloc; From 0fb3a130ceef2644f9da32e40a29189de54f11a6 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 31 Jan 2024 17:40:49 +0100 Subject: [PATCH 263/322] reformat --- src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b78a86b1bccc..e40323369470 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,7 @@ use core::{ /// `WS`: A string of 32 newlines followed by 128 spaces. pub struct SmolStr(Repr); -impl Clone for SmolStr{ +impl Clone for SmolStr { #[inline] fn clone(&self) -> Self { if !self.is_heap_allocated() { @@ -43,7 +43,6 @@ impl Clone for SmolStr{ } impl SmolStr { - #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { assert!(len <= INLINE_CAP); From fe7064e3ec4ab94e6cf140d7298d49d5eb35ae08 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 30 Jan 2024 01:04:53 +0000 Subject: [PATCH 264/322] Add StrExt, to_lowercase_smolstr & friends --- src/lib.rs | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/test.rs | 45 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e40323369470..79a22b6021b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -545,6 +545,61 @@ pub trait ToSmolStr { fn to_smolstr(&self) -> SmolStr; } +/// [`str`] methods producing [`SmolStr`]s. +pub trait StrExt: private::Sealed { + /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_lowercase`]. + fn to_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_uppercase`]. + fn to_uppercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_lowercase`]. + fn to_ascii_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_uppercase`]. + fn to_ascii_uppercase_smolstr(&self) -> SmolStr; +} + +impl StrExt for str { + #[inline] + fn to_lowercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) + } + + #[inline] + fn to_uppercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) + } + + #[inline] + fn to_ascii_lowercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) + } + + #[inline] + fn to_ascii_uppercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) + } +} + +mod private { + /// No downstream impls allowed. + pub trait Sealed {} + impl Sealed for str {} +} + /// Formats arguments to a [`SmolStr`], potentially without allocating. /// /// See [`alloc::format!`] or [`format_args!`] for syntax documentation. diff --git a/tests/test.rs b/tests/test.rs index ef5749ac9cf5..11b7df710a9d 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -224,7 +224,7 @@ fn test_from_char_iterator() { // String which has too many characters to even consider inlining: Chars::size_hint uses // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately // heap allocate - let raw: String = std::iter::repeat('a').take(23 * 4 + 1).collect(); + let raw = "a".repeat(23 * 4 + 1); let s: SmolStr = raw.chars().collect(); assert_eq!(s.as_str(), raw); assert!(s.is_heap_allocated()); @@ -270,3 +270,46 @@ fn test_to_smolstr() { assert_eq!(a, smol_str::format_smolstr!("{}", a)); } } + +#[cfg(test)] +mod test_str_ext { + use smol_str::StrExt; + + #[test] + fn large() { + let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr(); + assert_eq!( + lowercase, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + ); + assert!(lowercase.is_heap_allocated()); + } + + #[test] + fn to_lowercase() { + let lowercase = "aßΔC".to_lowercase_smolstr(); + assert_eq!(lowercase, "aßδc"); + assert!(!lowercase.is_heap_allocated()); + } + + #[test] + fn to_uppercase() { + let uppercase = "aßΔC".to_uppercase_smolstr(); + assert_eq!(uppercase, "ASSΔC"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn to_ascii_lowercase() { + let uppercase = "aßΔC".to_ascii_lowercase_smolstr(); + assert_eq!(uppercase, "aßΔc"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn to_ascii_uppercase() { + let uppercase = "aßΔC".to_ascii_uppercase_smolstr(); + assert_eq!(uppercase, "AßΔC"); + assert!(!uppercase.is_heap_allocated()); + } +} From 0078fdf295e18699b083617da4068b0a674b95a7 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Thu, 8 Feb 2024 19:02:39 +0000 Subject: [PATCH 265/322] Add must_use to StrExt methods --- src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 79a22b6021b3..192f9723538d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -551,24 +551,28 @@ pub trait StrExt: private::Sealed { /// potentially without allocating. /// /// See [`str::to_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_lowercase_smolstr(&self) -> SmolStr; /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`], /// potentially without allocating. /// /// See [`str::to_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_uppercase_smolstr(&self) -> SmolStr; /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`], /// potentially without allocating. /// /// See [`str::to_ascii_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_ascii_lowercase_smolstr(&self) -> SmolStr; /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`], /// potentially without allocating. /// /// See [`str::to_ascii_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_ascii_uppercase_smolstr(&self) -> SmolStr; } From 9971a3f59ab1f0ca862d98cb37e457eb30cfa579 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 17 Jan 2024 21:55:59 +0100 Subject: [PATCH 266/322] Add `PartialEq` shortcut for `ptr_eq` strings This first compares the `Repr` before falling back to actually comparing the raw `as_str` itself. In some micro-benchmarks, this speeds up inline and heap string comparisons when equal by ~70%. There is a tiny hit in the non-equal case however. It is also noteworthy that the assembly generated for `Repr` is horrible, and looks like its above the inlining threshold now. --- src/lib.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 192f9723538d..375a4a5b6b2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -180,7 +180,7 @@ impl Deref for SmolStr { impl PartialEq for SmolStr { fn eq(&self, other: &SmolStr) -> bool { - self.as_str() == other.as_str() + self.0.ptr_eq(&other.0) || self.as_str() == other.as_str() } } @@ -424,7 +424,7 @@ const _: () = { assert!(WS.as_bytes()[N_NEWLINES] == b' '); }; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] #[repr(u8)] enum InlineSize { _V0 = 0, @@ -536,6 +536,24 @@ impl Repr { } } } + + fn ptr_eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0), + (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0), + ( + Self::Inline { + len: l_len, + buf: l_buf, + }, + Self::Inline { + len: r_len, + buf: r_buf, + }, + ) => l_len == r_len && l_buf == r_buf, + _ => false, + } + } } /// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating. From 1e2145d1dccf7f0418b6fac7b9b587d964da5ca2 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Thu, 8 Feb 2024 19:00:25 +0000 Subject: [PATCH 267/322] Add StrExt::replace_smolstr, replacen_smolstr --- src/lib.rs | 47 ++++++++++++++++++++++++++++++++++++++++++----- tests/test.rs | 14 ++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 375a4a5b6b2a..9afe2a932c36 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -592,6 +592,22 @@ pub trait StrExt: private::Sealed { /// See [`str::to_ascii_uppercase`]. #[must_use = "this returns a new SmolStr without modifying the original"] fn to_ascii_uppercase_smolstr(&self) -> SmolStr; + + /// Replaces all matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replace`]. + // TODO: Use `Pattern` when stable. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr; + + /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replacen`]. + // TODO: Use `Pattern` when stable. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr; } impl StrExt for str { @@ -614,6 +630,24 @@ impl StrExt for str { fn to_ascii_uppercase_smolstr(&self) -> SmolStr { SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) } + + #[inline] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr { + self.replacen_smolstr(from, to, usize::MAX) + } + + #[inline] + fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + let mut result = Writer::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from).take(count) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + SmolStr::from(result) + } } mod private { @@ -651,10 +685,8 @@ impl Writer { len: 0, } } -} -impl fmt::Write for Writer { - fn write_str(&mut self, s: &str) -> fmt::Result { + fn push_str(&mut self, s: &str) { // if currently on the stack if self.len <= INLINE_CAP { let old_len = self.len; @@ -663,8 +695,7 @@ impl fmt::Write for Writer { // if the new length will fit on the stack (even if it fills it entirely) if self.len <= INLINE_CAP { self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); - - return Ok(()); // skip the heap push below + return; // skip the heap push below } self.heap.reserve(self.len); @@ -678,7 +709,13 @@ impl fmt::Write for Writer { } self.heap.push_str(s); + } +} +impl fmt::Write for Writer { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); Ok(()) } } diff --git a/tests/test.rs b/tests/test.rs index 11b7df710a9d..655f30cbb0e2 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -312,4 +312,18 @@ mod test_str_ext { assert_eq!(uppercase, "AßΔC"); assert!(!uppercase.is_heap_allocated()); } + + #[test] + fn replace() { + let result = "foo_bar_baz".replace_smolstr("ba", "do"); + assert_eq!(result, "foo_dor_doz"); + assert!(!result.is_heap_allocated()); + } + + #[test] + fn replacen() { + let result = "foo_bar_baz".replacen_smolstr("ba", "do", 1); + assert_eq!(result, "foo_dor_baz"); + assert!(!result.is_heap_allocated()); + } } From 73678ce217b4b78df4484d9c32eca6c7dff34496 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 18 Apr 2024 17:12:18 +0200 Subject: [PATCH 268/322] Update rust.ungram --- rust.ungram | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust.ungram b/rust.ungram index cb58486eff01..7781e719e2af 100644 --- a/rust.ungram +++ b/rust.ungram @@ -1,3 +1,5 @@ +// Note this grammar file does not reflect the current language as this file is no longer maintained. + // Rust Un-Grammar. // // This grammar specifies the structure of Rust's concrete syntax tree. From 673755b50736b424e8b01a983da50e7bcc8974b0 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 14 May 2024 10:09:33 +0200 Subject: [PATCH 269/322] Document unsafe blocks --- src/lib.rs | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9afe2a932c36..78c4e9a74e2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,7 +61,9 @@ impl SmolStr { } s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we asserted it. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }) } @@ -80,7 +82,9 @@ impl SmolStr { i += 1 } SmolStr(Repr::Inline { - len: unsafe { transmute(text.len() as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we asserted it. + len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) }, buf, }) } @@ -153,7 +157,9 @@ impl SmolStr { len += size; } SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }) } @@ -292,7 +298,9 @@ where len += size; } SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }) } @@ -413,7 +421,7 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } } -const INLINE_CAP: usize = 23; +const INLINE_CAP: usize = InlineSize::_V23 as usize; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = @@ -453,6 +461,14 @@ enum InlineSize { _V23, } +impl InlineSize { + #[inline(always)] + const unsafe fn transmute_from_u8(value: u8) -> Self { + debug_assert!(value <= InlineSize::_V23 as u8); + unsafe { transmute::(value) } + } +} + #[derive(Clone, Debug)] enum Repr { Inline { @@ -477,7 +493,8 @@ impl Repr { let mut buf = [0; INLINE_CAP]; buf[..len].copy_from_slice(text.as_bytes()); return Some(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }); } @@ -532,6 +549,7 @@ impl Repr { Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; + // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes unsafe { ::core::str::from_utf8_unchecked(buf) } } } @@ -641,10 +659,14 @@ impl StrExt for str { let mut result = Writer::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { + // SAFETY: `start` is guaranteed to be within the bounds of `self` as per + // `match_indices` and last_end is always less than or equal to `start` result.push_str(unsafe { self.get_unchecked(last_end..start) }); result.push_str(to); last_end = start + part.len(); } + // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is + // always less than or equal to `self.len()` result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); SmolStr::from(result) } @@ -701,6 +723,7 @@ impl Writer { self.heap.reserve(self.len); // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes unsafe { self.heap .as_mut_vec() @@ -724,7 +747,8 @@ impl From for SmolStr { fn from(value: Writer) -> Self { SmolStr(if value.len <= INLINE_CAP { Repr::Inline { - len: unsafe { transmute(value.len as u8) }, + // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(value.len as u8) }, buf: value.inline, } } else { From aea39b4974f55e9380556272b1335e2f6fd13cc7 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 14 May 2024 10:15:12 +0200 Subject: [PATCH 270/322] Publish 0.2.2 --- Cargo.toml | 2 +- tests/test.rs | 3 +++ tests/tidy.rs | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1d04d437678d..dcefb03231c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.2.1" +version = "0.2.2" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" diff --git a/tests/test.rs b/tests/test.rs index 655f30cbb0e2..2e2914d79771 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +#[cfg(not(miri))] use proptest::{prop_assert, prop_assert_eq, proptest}; use smol_str::SmolStr; @@ -57,6 +58,7 @@ fn old_const_fn_ctor() { assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } +#[cfg(not(miri))] fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); @@ -67,6 +69,7 @@ fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner Ok(()) } +#[cfg(not(miri))] proptest! { #[test] fn roundtrip(s: String) { diff --git a/tests/tidy.rs b/tests/tidy.rs index a716e35b2f14..e2d809e40fee 100644 --- a/tests/tidy.rs +++ b/tests/tidy.rs @@ -1,3 +1,4 @@ +#![cfg(not(miri))] use std::{ env, path::{Path, PathBuf}, From 2a021890218626a94f17f17b15274726cb84f052 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Sat, 1 Jun 2024 05:41:34 +0100 Subject: [PATCH 271/322] Document crate feature guards --- Cargo.toml | 6 +++++- src/lib.rs | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dcefb03231c9..b04a6f8e5b51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,12 @@ repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov "] edition = "2018" +[package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] +all-features = true + [dependencies] -serde = { version = "1.0.136", optional = true, default_features = false } +serde = { version = "1.0.136", optional = true, default-features = false } arbitrary = { version = "1.1.0", optional = true } [dev-dependencies] diff --git a/src/lib.rs b/src/lib.rs index 78c4e9a74e2f..f49cfbfe40d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ #![no_std] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + extern crate alloc; use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc}; @@ -21,7 +23,7 @@ use core::{ /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated -/// * Additionally, a `SmolStr` can be explicitely created from a `&'static str` without allocation +/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation /// /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming From fde86a5c0cb8f397c8a59e3bb156a39fbeb754d5 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Sat, 1 Jun 2024 05:47:05 +0100 Subject: [PATCH 272/322] Migrate CI to actions-rust-lang/setup-rust-toolchain --- .github/workflows/ci.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b1bc2175caf2..1c2e347374ae 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -27,11 +27,9 @@ jobs: fetch-depth: 0 - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 + uses: actions-rust-lang/setup-rust-toolchain@v1 with: - toolchain: stable - profile: minimal - override: true + cache: false - run: rustc ./.github/ci.rs && ./ci env: From 5e7b3c4337693199d21e67346a752d2ef0d3703f Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 2 Sep 2024 20:40:49 +0200 Subject: [PATCH 273/322] Bump deps --- .gitignore | 3 +- Cargo.toml | 13 +++---- src/lib.rs | 99 +--------------------------------------------------- src/serde.rs | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 105 deletions(-) create mode 100644 src/serde.rs diff --git a/.gitignore b/.gitignore index 6b500aacba8f..0c8227b253a5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /ci -Cargo.lock \ No newline at end of file +/.vscode +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index b04a6f8e5b51..659c797f5f23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,21 +5,22 @@ description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov "] -edition = "2018" +edition = "2021" [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docsrs"] all-features = true [dependencies] -serde = { version = "1.0.136", optional = true, default-features = false } -arbitrary = { version = "1.1.0", optional = true } +serde = { version = "1.0", optional = true, default-features = false } +arbitrary = { version = "1.3", optional = true } [dev-dependencies] -proptest = "1.0.0" -serde_json = "1.0.79" -serde = { version = "1.0.136", features = ["derive"] } +proptest = "1.5" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] std = ["serde?/std"] +serde = ["dep:serde"] diff --git a/src/lib.rs b/src/lib.rs index f49cfbfe40d9..ca9944ce9b47 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -769,101 +769,4 @@ where } #[cfg(feature = "serde")] -mod serde { - use alloc::{string::String, vec::Vec}; - use core::fmt; - - use serde::de::{Deserializer, Error, Unexpected, Visitor}; - - use crate::SmolStr; - - // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 - fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct SmolStrVisitor; - - impl<'a> Visitor<'a> for SmolStrVisitor { - type Value = SmolStr; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string") - } - - fn visit_str(self, v: &str) -> Result - where - E: Error, - { - Ok(SmolStr::from(v)) - } - - fn visit_borrowed_str(self, v: &'a str) -> Result - where - E: Error, - { - Ok(SmolStr::from(v)) - } - - fn visit_string(self, v: String) -> Result - where - E: Error, - { - Ok(SmolStr::from(v)) - } - - fn visit_bytes(self, v: &[u8]) -> Result - where - E: Error, - { - match core::str::from_utf8(v) { - Ok(s) => Ok(SmolStr::from(s)), - Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), - } - } - - fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result - where - E: Error, - { - match core::str::from_utf8(v) { - Ok(s) => Ok(SmolStr::from(s)), - Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), - } - } - - fn visit_byte_buf(self, v: Vec) -> Result - where - E: Error, - { - match String::from_utf8(v) { - Ok(s) => Ok(SmolStr::from(s)), - Err(e) => Err(Error::invalid_value( - Unexpected::Bytes(&e.into_bytes()), - &self, - )), - } - } - } - - deserializer.deserialize_str(SmolStrVisitor) - } - - impl serde::Serialize for SmolStr { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.as_str().serialize(serializer) - } - } - - impl<'de> serde::Deserialize<'de> for SmolStr { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - smol_str(deserializer) - } - } -} +mod serde; diff --git a/src/serde.rs b/src/serde.rs new file mode 100644 index 000000000000..05b8fecacc33 --- /dev/null +++ b/src/serde.rs @@ -0,0 +1,96 @@ +use alloc::{string::String, vec::Vec}; +use core::fmt; + +use serde::de::{Deserializer, Error, Unexpected, Visitor}; + +use crate::SmolStr; + +// https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 +fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + struct SmolStrVisitor; + + impl<'a> Visitor<'a> for SmolStrVisitor { + type Value = SmolStr; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_borrowed_str(self, v: &'a str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } + } + } + + deserializer.deserialize_str(SmolStrVisitor) +} + +impl serde::Serialize for SmolStr { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for SmolStr { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + smol_str(deserializer) + } +} From eb00f33ab4f409020d47acc2eba17d6bbb69e3be Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 2 Sep 2024 21:36:34 +0200 Subject: [PATCH 274/322] Various cleanups --- Cargo.toml | 1 - src/lib.rs | 200 ++++++++++++++++++++++++-------------------------- tests/test.rs | 20 +---- 3 files changed, 95 insertions(+), 126 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 659c797f5f23..c1e34e7d7b21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,4 +23,3 @@ serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] std = ["serde?/std"] -serde = ["dep:serde"] diff --git a/src/lib.rs b/src/lib.rs index ca9944ce9b47..9845e4da055c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,9 +8,7 @@ use core::{ borrow::Borrow, cmp::{self, Ordering}, convert::Infallible, - fmt, hash, iter, - mem::transmute, - ops::Deref, + fmt, hash, iter, mem, ops, str::FromStr, }; @@ -34,53 +32,23 @@ use core::{ /// `WS`: A string of 32 newlines followed by 128 spaces. pub struct SmolStr(Repr); -impl Clone for SmolStr { - #[inline] - fn clone(&self) -> Self { - if !self.is_heap_allocated() { - return unsafe { core::ptr::read(self as *const SmolStr) }; - } - Self(self.0.clone()) - } -} - impl SmolStr { - #[deprecated = "Use `new_inline` instead"] - pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { - assert!(len <= INLINE_CAP); - - const ZEROS: &[u8] = &[0; INLINE_CAP]; - - let mut buf = [0; INLINE_CAP]; - macro_rules! s { - ($($idx:literal),*) => ( $(s!(set $idx);)* ); - (set $idx:literal) => ({ - let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; - let byte = src[$idx]; - let _is_ascii = [(); 128][byte as usize]; - buf[$idx] = byte - }); - } - s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); - SmolStr(Repr::Inline { - // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` - // as we asserted it. - len: unsafe { InlineSize::transmute_from_u8(len as u8) }, - buf, - }) - } - - /// Constructs inline variant of `SmolStr`. + /// Constructs an inline variant of `SmolStr`. + /// + /// This never allocates. + /// + /// # Panics /// /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { - assert!(text.len() <= INLINE_CAP); // avoids checks in loop + assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop + let text = text.as_bytes(); let mut buf = [0; INLINE_CAP]; let mut i = 0; while i < text.len() { - buf[i] = text.as_bytes()[i]; + buf[i] = text[i]; i += 1 } SmolStr(Repr::Inline { @@ -102,68 +70,45 @@ impl SmolStr { SmolStr(Repr::Static(text)) } - pub fn new(text: T) -> SmolStr - where - T: AsRef, - { - SmolStr(Repr::new(text)) + /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary. + #[inline(always)] + pub fn new(text: impl AsRef) -> SmolStr { + SmolStr(Repr::new(text.as_ref())) } + /// Returns a `&str` slice of this `SmolStr`. #[inline(always)] pub fn as_str(&self) -> &str { self.0.as_str() } - #[allow(clippy::inherent_to_string_shadow_display)] - #[inline(always)] - pub fn to_string(&self) -> String { - use alloc::borrow::ToOwned; - - self.as_str().to_owned() - } - + /// Returns the length of `self` in bytes. #[inline(always)] pub fn len(&self) -> usize { self.0.len() } + /// Returns `true` if `self` has a length of zero bytes. #[inline(always)] pub fn is_empty(&self) -> bool { self.0.is_empty() } + /// Returns `true` if `self` is heap-allocated. #[inline(always)] pub const fn is_heap_allocated(&self) -> bool { matches!(self.0, Repr::Heap(..)) } +} - fn from_char_iter>(mut iter: I) -> SmolStr { - let (min_size, _) = iter.size_hint(); - if min_size > INLINE_CAP { - let heap: String = iter.collect(); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - while let Some(ch) = iter.next() { - let size = ch.len_utf8(); - if size + len > INLINE_CAP { - let (min_remaining, _) = iter.size_hint(); - let mut heap = String::with_capacity(size + len + min_remaining); - heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); - heap.push(ch); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - ch.encode_utf8(&mut buf[len..]); - len += size; +impl Clone for SmolStr { + #[inline] + fn clone(&self) -> Self { + if !self.is_heap_allocated() { + // SAFETY: We verified that the payload of `Repr` is a POD + return unsafe { core::ptr::read(self as *const SmolStr) }; } - SmolStr(Repr::Inline { - // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` - // as we otherwise return early. - len: unsafe { InlineSize::transmute_from_u8(len as u8) }, - buf, - }) + Self(self.0.clone()) } } @@ -177,7 +122,7 @@ impl Default for SmolStr { } } -impl Deref for SmolStr { +impl ops::Deref for SmolStr { type Target = str; #[inline(always)] @@ -186,61 +131,71 @@ impl Deref for SmolStr { } } +// region: PartialEq implementations + +impl Eq for SmolStr {} impl PartialEq for SmolStr { fn eq(&self, other: &SmolStr) -> bool { self.0.ptr_eq(&other.0) || self.as_str() == other.as_str() } } -impl Eq for SmolStr {} - impl PartialEq for SmolStr { + #[inline(always)] fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq for str { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { other == self } } impl<'a> PartialEq<&'a str> for SmolStr { + #[inline(always)] fn eq(&self, other: &&'a str) -> bool { self == *other } } impl<'a> PartialEq for &'a str { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other } } impl PartialEq for SmolStr { + #[inline(always)] fn eq(&self, other: &String) -> bool { self.as_str() == other } } impl PartialEq for String { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { other == self } } impl<'a> PartialEq<&'a String> for SmolStr { + #[inline(always)] fn eq(&self, other: &&'a String) -> bool { self == *other } } impl<'a> PartialEq for &'a String { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other } } +// endregion: PartialEq implementations impl Ord for SmolStr { fn cmp(&self, other: &SmolStr) -> Ordering { @@ -274,9 +229,41 @@ impl fmt::Display for SmolStr { impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { - let iter = iter.into_iter(); - Self::from_char_iter(iter) + from_char_iter(iter.into_iter()) + } +} + +fn from_char_iter(mut iter: impl Iterator) -> SmolStr { + let (min_size, _) = iter.size_hint(); + if min_size > INLINE_CAP { + let heap: String = iter.collect(); + if heap.len() <= INLINE_CAP { + // size hint lied + return SmolStr::new_inline(&heap); + } + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let (min_remaining, _) = iter.size_hint(); + let mut heap = String::with_capacity(size + len + min_remaining); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) } fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr @@ -415,14 +402,6 @@ impl FromStr for SmolStr { } } -#[cfg(feature = "arbitrary")] -impl<'a> arbitrary::Arbitrary<'a> for SmolStr { - fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { - let s = <&str>::arbitrary(u)?; - Ok(SmolStr::new(s)) - } -} - const INLINE_CAP: usize = InlineSize::_V23 as usize; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; @@ -434,6 +413,7 @@ const _: () = { assert!(WS.as_bytes()[N_NEWLINES] == b' '); }; +/// A [`u8`] with a bunch of niches. #[derive(Clone, Copy, Debug, PartialEq)] #[repr(u8)] enum InlineSize { @@ -464,10 +444,12 @@ enum InlineSize { } impl InlineSize { + /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`] #[inline(always)] const unsafe fn transmute_from_u8(value: u8) -> Self { debug_assert!(value <= InlineSize::_V23 as u8); - unsafe { transmute::(value) } + // SAFETY: The caller is responsible to uphold this invariant + unsafe { mem::transmute::(value) } } } @@ -518,11 +500,8 @@ impl Repr { None } - fn new(text: T) -> Self - where - T: AsRef, - { - Self::new_on_stack(text.as_ref()).unwrap_or_else(|| Repr::Heap(text.as_ref().into())) + fn new(text: &str) -> Self { + Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text))) } #[inline(always)] @@ -539,7 +518,7 @@ impl Repr { match self { Repr::Heap(data) => data.is_empty(), Repr::Static(data) => data.is_empty(), - Repr::Inline { len, .. } => *len as u8 == 0, + &Repr::Inline { len, .. } => len as u8 == 0, } } @@ -550,7 +529,8 @@ impl Repr { Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; - let buf = &buf[..len]; + // SAFETY: len is guaranteed to be <= INLINE_CAP + let buf = unsafe { buf.get_unchecked(..len) }; // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes unsafe { ::core::str::from_utf8_unchecked(buf) } } @@ -633,22 +613,22 @@ pub trait StrExt: private::Sealed { impl StrExt for str { #[inline] fn to_lowercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) + from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) } #[inline] fn to_uppercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) + from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) } #[inline] fn to_ascii_lowercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) + from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) } #[inline] fn to_ascii_uppercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) + from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) } #[inline] @@ -754,7 +734,7 @@ impl From for SmolStr { buf: value.inline, } } else { - Repr::new(value.heap) + Repr::new(&value.heap) }) } } @@ -768,5 +748,13 @@ where } } +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for SmolStr { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { + let s = <&str>::arbitrary(u)?; + Ok(SmolStr::new(s)) + } +} + #[cfg(feature = "serde")] mod serde; diff --git a/tests/test.rs b/tests/test.rs index 2e2914d79771..631f7d78bfb5 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -44,20 +44,6 @@ fn const_fn_ctor() { assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } -#[allow(deprecated)] -#[test] -fn old_const_fn_ctor() { - const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); - const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); - const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); - const LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); - - assert_eq!(EMPTY, SmolStr::from("")); - assert_eq!(A, SmolStr::from("A")); - assert_eq!(HELLO, SmolStr::from("HELLO")); - assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); -} - #[cfg(not(miri))] fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { prop_assert_eq!(smol.as_str(), std_str); @@ -253,11 +239,7 @@ fn test_bad_size_hint_char_iter() { let collected: SmolStr = BadSizeHint(data.chars()).collect(); let new = SmolStr::new(data); - // Because of the bad size hint, `collected` will be heap allocated, but `new` will be inline - - // If we try to use the type of the string (inline/heap) to quickly test for equality, we need to ensure - // `collected` is inline allocated instead - assert!(collected.is_heap_allocated()); + assert!(!collected.is_heap_allocated()); assert!(!new.is_heap_allocated()); assert_eq!(new, collected); } From 593d89fa899e62b03da428fd1dfb6d651b78f461 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 2 Sep 2024 21:45:25 +0200 Subject: [PATCH 275/322] Add more trait impls --- src/lib.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 9845e4da055c..73e757bcad1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -#![no_std] +#![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_auto_cfg))] extern crate alloc; @@ -319,6 +319,29 @@ impl AsRef for SmolStr { } } +impl AsRef<[u8]> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +#[cfg(feature = "std")] +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::ffi::OsStr { + AsRef::::as_ref(self.as_str()) + } +} + +#[cfg(feature = "std")] +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::path::Path { + AsRef::::as_ref(self.as_str()) + } +} + impl From<&str> for SmolStr { #[inline] fn from(s: &str) -> SmolStr { From de2af0d34c3752e2b9192d7da28fe0c138bcf4d3 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:11:57 +0200 Subject: [PATCH 276/322] Expose SmolStrBuilder --- src/lib.rs | 130 ++++++++++++++++++++++++++++---------------------- tests/test.rs | 38 ++++++++++++++- 2 files changed, 110 insertions(+), 58 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 73e757bcad1f..448315c33831 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -620,7 +620,6 @@ pub trait StrExt: private::Sealed { /// potentially without allocating. /// /// See [`str::replace`]. - // TODO: Use `Pattern` when stable. #[must_use = "this returns a new SmolStr without modifying the original"] fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr; @@ -628,7 +627,6 @@ pub trait StrExt: private::Sealed { /// potentially without allocating. /// /// See [`str::replacen`]. - // TODO: Use `Pattern` when stable. #[must_use = "this returns a new SmolStr without modifying the original"] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr; } @@ -661,7 +659,7 @@ impl StrExt for str { #[inline] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { - let mut result = Writer::new(); + let mut result = SmolStrBuilder::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { // SAFETY: `start` is guaranteed to be within the bounds of `self` as per @@ -677,6 +675,15 @@ impl StrExt for str { } } +impl ToSmolStr for T +where + T: fmt::Display + ?Sized, +{ + fn to_smolstr(&self) -> SmolStr { + format_smolstr!("{}", self) + } +} + mod private { /// No downstream impls allowed. pub trait Sealed {} @@ -689,58 +696,84 @@ mod private { #[macro_export] macro_rules! format_smolstr { ($($tt:tt)*) => {{ - use ::core::fmt::Write; - let mut w = $crate::Writer::new(); - w.write_fmt(format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); - $crate::SmolStr::from(w) + let mut w = $crate::SmolStrBuilder::new(); + ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); + w.finish() }}; } -#[doc(hidden)] -pub struct Writer { - inline: [u8; INLINE_CAP], - heap: String, - len: usize, +/// A builder that can be used to efficiently build a [`SmolStr`]. +/// +/// This won't allocate if the final string fits into the inline buffer. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum SmolStrBuilder { + Inline { len: usize, buf: [u8; INLINE_CAP] }, + Heap(String), +} + +impl Default for SmolStrBuilder { + #[inline] + fn default() -> Self { + Self::new() + } } -impl Writer { +impl SmolStrBuilder { + /// Creates a new empty [`SmolStrBuilder`]. #[must_use] pub const fn new() -> Self { - Writer { - inline: [0; INLINE_CAP], - heap: String::new(), + SmolStrBuilder::Inline { + buf: [0; INLINE_CAP], len: 0, } } - fn push_str(&mut self, s: &str) { - // if currently on the stack - if self.len <= INLINE_CAP { - let old_len = self.len; - self.len += s.len(); - - // if the new length will fit on the stack (even if it fills it entirely) - if self.len <= INLINE_CAP { - self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); - return; // skip the heap push below + /// Builds a [`SmolStr`] from `self`. + #[must_use] + pub fn finish(&self) -> SmolStr { + SmolStr(match self { + &SmolStrBuilder::Inline { len, buf } => { + debug_assert!(len <= INLINE_CAP); + Repr::Inline { + // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + } } + SmolStrBuilder::Heap(heap) => Repr::new(heap), + }) + } - self.heap.reserve(self.len); - - // copy existing inline bytes over to the heap - // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes - unsafe { - self.heap - .as_mut_vec() - .extend_from_slice(&self.inline[..old_len]); + /// Appends a given string slice onto the end of `self`'s buffer. + pub fn push_str(&mut self, s: &str) { + // if currently on the stack + match self { + Self::Inline { len, buf } => { + let old_len = *len; + *len += s.len(); + + // if the new length will fit on the stack (even if it fills it entirely) + if *len <= INLINE_CAP { + buf[old_len..*len].copy_from_slice(s.as_bytes()); + return; // skip the heap push below + } + + let mut heap = String::with_capacity(*len); + + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { + heap.as_mut_vec().extend_from_slice(&buf[..old_len]); + } + heap.push_str(s); + *self = SmolStrBuilder::Heap(heap); } + SmolStrBuilder::Heap(heap) => heap.push_str(s), } - - self.heap.push_str(s); } } -impl fmt::Write for Writer { +impl fmt::Write for SmolStrBuilder { #[inline] fn write_str(&mut self, s: &str) -> fmt::Result { self.push_str(s); @@ -748,26 +781,9 @@ impl fmt::Write for Writer { } } -impl From for SmolStr { - fn from(value: Writer) -> Self { - SmolStr(if value.len <= INLINE_CAP { - Repr::Inline { - // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` - len: unsafe { InlineSize::transmute_from_u8(value.len as u8) }, - buf: value.inline, - } - } else { - Repr::new(&value.heap) - }) - } -} - -impl ToSmolStr for T -where - T: fmt::Display + ?Sized, -{ - fn to_smolstr(&self) -> SmolStr { - format_smolstr!("{}", self) +impl From for SmolStr { + fn from(value: SmolStrBuilder) -> Self { + value.finish() } } diff --git a/tests/test.rs b/tests/test.rs index 631f7d78bfb5..0d553caabc82 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -3,7 +3,7 @@ use std::sync::Arc; #[cfg(not(miri))] use proptest::{prop_assert, prop_assert_eq, proptest}; -use smol_str::SmolStr; +use smol_str::{SmolStr, SmolStrBuilder}; #[test] #[cfg(target_pointer_width = "64")] @@ -255,6 +255,42 @@ fn test_to_smolstr() { assert_eq!(a, smol_str::format_smolstr!("{}", a)); } } +#[test] +fn test_builder() { + //empty + let builder = SmolStrBuilder::new(); + assert_eq!("", builder.finish()); + + // inline push + let mut builder = SmolStrBuilder::new(); + builder.push_str("a"); + builder.push_str("b"); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("ab", s); + + // inline max push + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(23)); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("a".repeat(23), s); + + // heap push immediate + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(24)); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(24), s); + + // heap push succession + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(23)); + builder.push_str(&"a".repeat(23)); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(46), s); +} #[cfg(test)] mod test_str_ext { From 5ee10574e81a0f0d5622c6ed6cf6bd3b834530b4 Mon Sep 17 00:00:00 2001 From: Corvin Paul Date: Wed, 17 Apr 2024 12:28:14 +0100 Subject: [PATCH 277/322] Add support for borsh --- Cargo.toml | 3 ++- src/borsh.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 ++ tests/test.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 src/borsh.rs diff --git a/Cargo.toml b/Cargo.toml index c1e34e7d7b21..7dd7a5f9bbd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ all-features = true [dependencies] serde = { version = "1.0", optional = true, default-features = false } +borsh = { version = "1.4.0", optional = true, default-features = false } arbitrary = { version = "1.3", optional = true } [dev-dependencies] @@ -22,4 +23,4 @@ serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] -std = ["serde?/std"] +std = ["serde?/std", "borsh?/std"] diff --git a/src/borsh.rs b/src/borsh.rs new file mode 100644 index 000000000000..12580cb4f27a --- /dev/null +++ b/src/borsh.rs @@ -0,0 +1,58 @@ +use crate::{Repr, SmolStr, INLINE_CAP}; +use alloc::string::{String, ToString}; +use borsh::io::{Error, ErrorKind, Read, Write}; +use borsh::{BorshDeserialize, BorshSerialize}; +use core::intrinsics::transmute; + +impl BorshSerialize for SmolStr { + fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { + self.as_str().serialize(writer) + } +} + +impl BorshDeserialize for SmolStr { + #[inline] + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + let len = u32::deserialize_reader(reader)?; + if (len as usize) < INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + reader.read_exact(&mut buf[..len as usize])?; + _ = core::str::from_utf8(&buf[..len as usize]).map_err(|err| { + let msg = err.to_string(); + Error::new(ErrorKind::InvalidData, msg) + })?; + Ok(SmolStr(Repr::Inline { + len: unsafe { transmute(len as u8) }, + buf, + })) + } else { + // u8::vec_from_reader always returns Some on success in current implementation + let vec = u8::vec_from_reader(len, reader)?.ok_or_else(|| { + Error::new( + ErrorKind::Other, + "u8::vec_from_reader unexpectedly returned None".to_string(), + ) + })?; + Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| { + let msg = err.to_string(); + Error::new(ErrorKind::InvalidData, msg) + })?)) + } + } +} + +#[cfg(feature = "borsh/unstable__schema")] +mod schema { + use alloc::collections::BTreeMap; + use borsh::schema::{Declaration, Definition}; + use borsh::BorshSchema; + impl BorshSchema for SmolStr { + fn add_definitions_recursively(definitions: &mut BTreeMap) { + str::add_definitions_recursively(definitions) + } + + fn declaration() -> Declaration { + str::declaration() + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 448315c33831..cc8612ee4512 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -795,5 +795,7 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } } +#[cfg(feature = "borsh")] +mod borsh; #[cfg(feature = "serde")] mod serde; diff --git a/tests/test.rs b/tests/test.rs index 0d553caabc82..22b9df2afd5b 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -348,3 +348,55 @@ mod test_str_ext { assert!(!result.is_heap_allocated()); } } +#[cfg(feature = "borsh")] + +mod borsh_tests { + use borsh::BorshDeserialize; + use smol_str::{SmolStr, ToSmolStr}; + use std::io::Cursor; + + #[test] + fn borsh_serialize_stack() { + let smolstr_on_stack = "aßΔCaßδc".to_smolstr(); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&smolstr_on_stack, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap(); + assert_eq!(smolstr_on_stack, decoded); + } + #[test] + fn borsh_serialize_heap() { + let smolstr_on_heap = "aßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδc".to_smolstr(); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&smolstr_on_heap, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap(); + assert_eq!(smolstr_on_heap, decoded); + } + #[test] + fn borsh_non_utf8_stack() { + let invalid_utf8: Vec = vec![0xF0, 0x9F, 0x8F]; // Incomplete UTF-8 sequence + + let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) }); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let result = SmolStr::deserialize_reader(&mut cursor); + assert!(result.is_err()); + } + + #[test] + fn borsh_non_utf8_heap() { + let invalid_utf8: Vec = vec![ + 0xC1, 0x8A, 0x5F, 0xE2, 0x3A, 0x9E, 0x3B, 0xAA, 0x01, 0x08, 0x6F, 0x2F, 0xC0, 0x32, + 0xAB, 0xE1, 0x9A, 0x2F, 0x4A, 0x3F, 0x25, 0x0D, 0x8A, 0x2A, 0x19, 0x11, 0xF0, 0x7F, + 0x0E, 0x80, + ]; + let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) }); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let result = SmolStr::deserialize_reader(&mut cursor); + assert!(result.is_err()); + } +} From db4d3c054f575c394bb103e74a917f00e3d35b04 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:18:31 +0200 Subject: [PATCH 278/322] Drop unstable__schema --- src/borsh.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/borsh.rs b/src/borsh.rs index 12580cb4f27a..5617bce3713a 100644 --- a/src/borsh.rs +++ b/src/borsh.rs @@ -40,19 +40,3 @@ impl BorshDeserialize for SmolStr { } } } - -#[cfg(feature = "borsh/unstable__schema")] -mod schema { - use alloc::collections::BTreeMap; - use borsh::schema::{Declaration, Definition}; - use borsh::BorshSchema; - impl BorshSchema for SmolStr { - fn add_definitions_recursively(definitions: &mut BTreeMap) { - str::add_definitions_recursively(definitions) - } - - fn declaration() -> Declaration { - str::declaration() - } - } -} From 24bec15bed1eb69671f0baa4dc2f81b8e7489ce4 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:27:28 +0200 Subject: [PATCH 279/322] SmolStrBuilder::push --- src/lib.rs | 27 +++++++++++++++++++++++---- tests/test.rs | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index cc8612ee4512..9cc21ec78092 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -744,9 +744,30 @@ impl SmolStrBuilder { }) } + /// Appends the given [`char`] to the end of `self`'s buffer. + pub fn push(&mut self, c: char) { + match self { + SmolStrBuilder::Inline { len, buf } => { + let char_len = c.len_utf8(); + let new_len = *len + char_len; + if new_len <= INLINE_CAP { + c.encode_utf8(&mut buf[*len..]); + *len += char_len; + } else { + let mut heap = String::with_capacity(new_len); + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { heap.as_mut_vec().extend_from_slice(buf) }; + heap.push(c); + *self = SmolStrBuilder::Heap(heap); + } + } + SmolStrBuilder::Heap(h) => h.push(c), + } + } + /// Appends a given string slice onto the end of `self`'s buffer. pub fn push_str(&mut self, s: &str) { - // if currently on the stack match self { Self::Inline { len, buf } => { let old_len = *len; @@ -762,9 +783,7 @@ impl SmolStrBuilder { // copy existing inline bytes over to the heap // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes - unsafe { - heap.as_mut_vec().extend_from_slice(&buf[..old_len]); - } + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) }; heap.push_str(s); *self = SmolStrBuilder::Heap(heap); } diff --git a/tests/test.rs b/tests/test.rs index 22b9df2afd5b..81bccf106ece 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -256,7 +256,7 @@ fn test_to_smolstr() { } } #[test] -fn test_builder() { +fn test_builder_push_str() { //empty let builder = SmolStrBuilder::new(); assert_eq!("", builder.finish()); @@ -292,6 +292,39 @@ fn test_builder() { assert_eq!("a".repeat(46), s); } +#[test] +fn test_builder_push() { + //empty + let builder = SmolStrBuilder::new(); + assert_eq!("", builder.finish()); + + // inline push + let mut builder = SmolStrBuilder::new(); + builder.push('a'); + builder.push('b'); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("ab", s); + + // inline max push + let mut builder = SmolStrBuilder::new(); + for _ in 0..23 { + builder.push('a'); + } + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("a".repeat(23), s); + + // heap push + let mut builder = SmolStrBuilder::new(); + for _ in 0..24 { + builder.push('a'); + } + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(24), s); +} + #[cfg(test)] mod test_str_ext { use smol_str::StrExt; From b38a9735ef8fba3ac3a917dd7ceac65c5c1836a3 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:30:04 +0200 Subject: [PATCH 280/322] Publish 0.3.0 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7dd7a5f9bbd8..1f6b0f72d7b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "smol_str" -version = "0.2.2" +version = "0.3.0" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" -authors = ["Aleksey Kladov "] +authors = ["Aleksey Kladov ", "Lukas Wirth "] edition = "2021" [package.metadata.docs.rs] From 68bc4ed85f233e7c8dd8a120ea6241b74fa25044 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:39:50 +0200 Subject: [PATCH 281/322] Make SmolStrBuilder fields private --- src/lib.rs | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9cc21ec78092..e3a8ef8b8aad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -705,16 +705,22 @@ macro_rules! format_smolstr { /// A builder that can be used to efficiently build a [`SmolStr`]. /// /// This won't allocate if the final string fits into the inline buffer. +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct SmolStrBuilder(SmolStrBuilderRepr); + #[derive(Clone, Debug, PartialEq, Eq)] -pub enum SmolStrBuilder { +enum SmolStrBuilderRepr { Inline { len: usize, buf: [u8; INLINE_CAP] }, Heap(String), } -impl Default for SmolStrBuilder { +impl Default for SmolStrBuilderRepr { #[inline] fn default() -> Self { - Self::new() + SmolStrBuilderRepr::Inline { + buf: [0; INLINE_CAP], + len: 0, + } } } @@ -722,17 +728,17 @@ impl SmolStrBuilder { /// Creates a new empty [`SmolStrBuilder`]. #[must_use] pub const fn new() -> Self { - SmolStrBuilder::Inline { + Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0, - } + }) } /// Builds a [`SmolStr`] from `self`. #[must_use] pub fn finish(&self) -> SmolStr { - SmolStr(match self { - &SmolStrBuilder::Inline { len, buf } => { + SmolStr(match &self.0 { + &SmolStrBuilderRepr::Inline { len, buf } => { debug_assert!(len <= INLINE_CAP); Repr::Inline { // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` @@ -740,14 +746,14 @@ impl SmolStrBuilder { buf, } } - SmolStrBuilder::Heap(heap) => Repr::new(heap), + SmolStrBuilderRepr::Heap(heap) => Repr::new(heap), }) } /// Appends the given [`char`] to the end of `self`'s buffer. pub fn push(&mut self, c: char) { - match self { - SmolStrBuilder::Inline { len, buf } => { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { let char_len = c.len_utf8(); let new_len = *len + char_len; if new_len <= INLINE_CAP { @@ -759,17 +765,17 @@ impl SmolStrBuilder { // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes unsafe { heap.as_mut_vec().extend_from_slice(buf) }; heap.push(c); - *self = SmolStrBuilder::Heap(heap); + self.0 = SmolStrBuilderRepr::Heap(heap); } } - SmolStrBuilder::Heap(h) => h.push(c), + SmolStrBuilderRepr::Heap(h) => h.push(c), } } /// Appends a given string slice onto the end of `self`'s buffer. pub fn push_str(&mut self, s: &str) { - match self { - Self::Inline { len, buf } => { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { let old_len = *len; *len += s.len(); @@ -785,9 +791,9 @@ impl SmolStrBuilder { // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) }; heap.push_str(s); - *self = SmolStrBuilder::Heap(heap); + self.0 = SmolStrBuilderRepr::Heap(heap); } - SmolStrBuilder::Heap(heap) => heap.push_str(s), + SmolStrBuilderRepr::Heap(heap) => heap.push_str(s), } } } From 91e95813cd1cafe8bc95926b212214d129a321ca Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:40:01 +0200 Subject: [PATCH 282/322] Publish 0.3.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1f6b0f72d7b4..18506059d889 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.3.0" +version = "0.3.1" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 4a25cbe70414dbbb017774d5dd5d1a916ef039ee Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 10:03:47 +0200 Subject: [PATCH 283/322] Add CHANGELOG.md --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000000..82e66e1ed470 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,14 @@ +# Changelog + +## 0.3.1 - 2024-09-04 + +- Fix `SmolStrBuilder` leaking implementation details + +## 0.3.0 - 2024-09-04 + +- Removed deprecated `SmolStr::new_inline_from_ascii` function +- Removed `SmolStr::to_string` in favor of `ToString::to_string` +- Added `impl AsRef<[u8]> for SmolStr` impl +- Added `impl AsRef for SmolStr` impl +- Added `impl AsRef for SmolStr` impl +- Added `SmolStrBuilder` From 32e8936a78c2ac2868aff367a86fef157d23d076 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 3 Sep 2024 09:21:42 +0100 Subject: [PATCH 284/322] Add 0.2.2 changelog & fix lints --- CHANGELOG.md | 17 +++++++++++------ src/borsh.rs | 2 +- src/lib.rs | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82e66e1ed470..f407bc11a51b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,14 @@ ## 0.3.0 - 2024-09-04 -- Removed deprecated `SmolStr::new_inline_from_ascii` function -- Removed `SmolStr::to_string` in favor of `ToString::to_string` -- Added `impl AsRef<[u8]> for SmolStr` impl -- Added `impl AsRef for SmolStr` impl -- Added `impl AsRef for SmolStr` impl -- Added `SmolStrBuilder` +- Remove deprecated `SmolStr::new_inline_from_ascii` function +- Remove `SmolStr::to_string` in favor of `ToString::to_string` +- Add `impl AsRef<[u8]> for SmolStr` impl +- Add `impl AsRef for SmolStr` impl +- Add `impl AsRef for SmolStr` impl +- Add `SmolStrBuilder` + +## 0.2.2 - 2024-05-14 + +- Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar +- Add `PartialEq` optimisation for `ptr_eq`-able representations diff --git a/src/borsh.rs b/src/borsh.rs index 5617bce3713a..362c288d0170 100644 --- a/src/borsh.rs +++ b/src/borsh.rs @@ -22,7 +22,7 @@ impl BorshDeserialize for SmolStr { Error::new(ErrorKind::InvalidData, msg) })?; Ok(SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + len: unsafe { transmute::(len as u8) }, buf, })) } else { diff --git a/src/lib.rs b/src/lib.rs index e3a8ef8b8aad..d00ec98915e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ use core::{ /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist -/// solely of consecutive newlines, followed by consecutive spaces +/// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation /// From 811ac438d4eca7c2517a562ac6294c37e0dbb689 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 3 Sep 2024 09:47:53 +0100 Subject: [PATCH 285/322] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Laurențiu Nicola --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f407bc11a51b..c1346a28a460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,4 +16,4 @@ ## 0.2.2 - 2024-05-14 - Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar -- Add `PartialEq` optimisation for `ptr_eq`-able representations +- Add `PartialEq` optimization for `ptr_eq`-able representations From 7235aa171f52641f4d7cc88a5e90d2ef8b92ca5b Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Wed, 23 Oct 2024 22:53:32 +0200 Subject: [PATCH 286/322] Fix SmoLStrBuilder pushing null bytes on heap spill --- CHANGELOG.md | 5 +++++ src/lib.rs | 2 +- tests/test.rs | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1346a28a460..41f137743059 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.3.2 - 2024-10-23 + +- Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a + multibyte character push + ## 0.3.1 - 2024-09-04 - Fix `SmolStrBuilder` leaking implementation details diff --git a/src/lib.rs b/src/lib.rs index d00ec98915e9..bf88f57cf81e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -763,7 +763,7 @@ impl SmolStrBuilder { let mut heap = String::with_capacity(new_len); // copy existing inline bytes over to the heap // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes - unsafe { heap.as_mut_vec().extend_from_slice(buf) }; + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) }; heap.push(c); self.0 = SmolStrBuilderRepr::Heap(heap); } diff --git a/tests/test.rs b/tests/test.rs index 81bccf106ece..96b8b8f7f0b8 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -255,6 +255,7 @@ fn test_to_smolstr() { assert_eq!(a, smol_str::format_smolstr!("{}", a)); } } + #[test] fn test_builder_push_str() { //empty @@ -290,6 +291,14 @@ fn test_builder_push_str() { let s = builder.finish(); assert!(s.is_heap_allocated()); assert_eq!("a".repeat(46), s); + + // heap push on multibyte char + let mut builder = SmolStrBuilder::new(); + builder.push_str("ohnonononononononono!"); + builder.push('🤯'); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("ohnonononononononono!🤯", s); } #[test] From 5ffc90069f545c0444447cd08c2a29c6abb97fbb Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Wed, 23 Oct 2024 22:54:26 +0200 Subject: [PATCH 287/322] Publish 0.3.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 18506059d889..e89e0e8e020a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.3.1" +version = "0.3.2" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 484153df602ec3dfc4feb45ff110c915fb21e79d Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 12:34:12 +0100 Subject: [PATCH 288/322] Fix lints --- src/borsh.rs | 16 +++++++--------- src/lib.rs | 6 +++--- tests/test.rs | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/borsh.rs b/src/borsh.rs index 362c288d0170..ebb20d71a005 100644 --- a/src/borsh.rs +++ b/src/borsh.rs @@ -1,8 +1,10 @@ use crate::{Repr, SmolStr, INLINE_CAP}; use alloc::string::{String, ToString}; -use borsh::io::{Error, ErrorKind, Read, Write}; -use borsh::{BorshDeserialize, BorshSerialize}; -use core::intrinsics::transmute; +use borsh::{ + io::{Error, ErrorKind, Read, Write}, + BorshDeserialize, BorshSerialize, +}; +use core::mem::transmute; impl BorshSerialize for SmolStr { fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { @@ -27,12 +29,8 @@ impl BorshDeserialize for SmolStr { })) } else { // u8::vec_from_reader always returns Some on success in current implementation - let vec = u8::vec_from_reader(len, reader)?.ok_or_else(|| { - Error::new( - ErrorKind::Other, - "u8::vec_from_reader unexpectedly returned None".to_string(), - ) - })?; + let vec = u8::vec_from_reader(len, reader)? + .ok_or_else(|| Error::other("u8::vec_from_reader unexpectedly returned None"))?; Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| { let msg = err.to_string(); Error::new(ErrorKind::InvalidData, msg) diff --git a/src/lib.rs b/src/lib.rs index bf88f57cf81e..d76f029dbe4c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -161,7 +161,7 @@ impl<'a> PartialEq<&'a str> for SmolStr { } } -impl<'a> PartialEq for &'a str { +impl PartialEq for &str { #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other @@ -189,7 +189,7 @@ impl<'a> PartialEq<&'a String> for SmolStr { } } -impl<'a> PartialEq for &'a String { +impl PartialEq for &String { #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other @@ -380,7 +380,7 @@ impl From> for SmolStr { impl From> for SmolStr { #[inline] fn from(s: Arc) -> SmolStr { - let repr = Repr::new_on_stack(s.as_ref()).unwrap_or_else(|| Repr::Heap(s)); + let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s)); Self(repr) } } diff --git a/tests/test.rs b/tests/test.rs index 96b8b8f7f0b8..0070b3a5ecb0 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -390,8 +390,8 @@ mod test_str_ext { assert!(!result.is_heap_allocated()); } } -#[cfg(feature = "borsh")] +#[cfg(feature = "borsh")] mod borsh_tests { use borsh::BorshDeserialize; use smol_str::{SmolStr, ToSmolStr}; From 0e432a0b7c1f537d3bdafd1ec2afb103745b15ec Mon Sep 17 00:00:00 2001 From: james7132 Date: Mon, 22 Sep 2025 01:29:52 -0700 Subject: [PATCH 289/322] Use serde_core over serde --- Cargo.toml | 5 +++-- src/serde.rs | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e89e0e8e020a..d938e40ac2c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ rustdoc-args = ["--cfg", "docsrs"] all-features = true [dependencies] -serde = { version = "1.0", optional = true, default-features = false } +serde_core = { version = "1.0.220", optional = true, default-features = false } borsh = { version = "1.4.0", optional = true, default-features = false } arbitrary = { version = "1.3", optional = true } @@ -23,4 +23,5 @@ serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] -std = ["serde?/std", "borsh?/std"] +std = ["serde_core?/std", "borsh?/std"] +serde = ["dep:serde_core"] diff --git a/src/serde.rs b/src/serde.rs index 05b8fecacc33..4f08b444c58e 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -2,6 +2,7 @@ use alloc::{string::String, vec::Vec}; use core::fmt; use serde::de::{Deserializer, Error, Unexpected, Visitor}; +use serde_core as serde; use crate::SmolStr; From 4f0658f5d334ea22067a48b9c16bf63f37f45f6f Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 00:36:31 +0100 Subject: [PATCH 290/322] Optimise to_ascii_{upper,lower}case_smolstr --- src/lib.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d76f029dbe4c..ff25651f54bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -644,12 +644,36 @@ impl StrExt for str { #[inline] fn to_ascii_lowercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_lowercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_lowercase().into() + } } #[inline] fn to_ascii_uppercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_uppercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_uppercase().into() + } } #[inline] From b3ac1b56a4c89cf875ce87a37397f5533fe85165 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 01:38:41 +0100 Subject: [PATCH 291/322] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41f137743059..190d6e83098d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` + ~2x speedup inline, ~4-22x for heap. + ## 0.3.2 - 2024-10-23 - Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a From df41c50c37d4b3ef3edb59bc7a2c875f9d6bc203 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sat, 13 Sep 2025 16:34:55 +0100 Subject: [PATCH 292/322] Add SmolStr vs String benchmarks --- Cargo.toml | 9 +++ README.md | 6 ++ benches/bench.rs | 157 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 benches/bench.rs diff --git a/Cargo.toml b/Cargo.toml index d938e40ac2c4..e6f10a2715e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,17 @@ arbitrary = { version = "1.3", optional = true } proptest = "1.5" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } +criterion = "0.7" +rand = "0.9.2" [features] default = ["std"] std = ["serde_core?/std", "borsh?/std"] serde = ["dep:serde_core"] + +[[bench]] +name = "bench" +harness = false + +[profile.bench] +lto = "fat" diff --git a/README.md b/README.md index ce16759e812c..56296fb53f7d 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,12 @@ languages. Strings consisting of a series of newlines, followed by a series of whitespace are a typical pattern in computer programs because of indentation. Note that a specialized interner might be a better solution for some use cases. +## Benchmarks +Run criterion benches with +```sh +cargo bench --bench \* -- --quick +``` + ## MSRV Policy Minimal Supported Rust Version: latest stable. diff --git a/benches/bench.rs b/benches/bench.rs new file mode 100644 index 000000000000..fa4c58832de7 --- /dev/null +++ b/benches/bench.rs @@ -0,0 +1,157 @@ +//! SmolStr vs String benchmarks. +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::distr::{Alphanumeric, SampleString}; +use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr}; +use std::hint::black_box; + +/// 12: small (inline) +/// 50: medium (heap) +/// 1000: large (heap) +const TEST_LENS: [usize; 3] = [12, 50, 1000]; + +fn format_bench(c: &mut Criterion) { + for len in TEST_LENS { + let n = rand::random_range(10000..99999); + let str_len = len.checked_sub(n.to_smolstr().len()).unwrap(); + let str = Alphanumeric.sample_string(&mut rand::rng(), str_len); + + c.bench_function(&format!("SmolStr format_smolstr! len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = format_smolstr!("{str}-{n}")); + assert_eq!(v, format!("{str}-{n}")); + }); + c.bench_function(&format!("std format! len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = format!("{str}-{n}")); + assert_eq!(v, format!("{str}-{n}")); + }); + } +} + +fn from_str_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + + c.bench_function(&format!("SmolStr::from len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = SmolStr::from(black_box(&str))); + assert_eq!(v, str); + }); + c.bench_function(&format!("std String::from len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = String::from(black_box(&str))); + assert_eq!(v, str); + }); + } +} + +fn clone_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let smolstr = SmolStr::new(&str); + + c.bench_function(&format!("SmolStr::clone len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = smolstr.clone()); + assert_eq!(v, str); + }); + c.bench_function(&format!("std String::clone len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.clone()); + assert_eq!(v, str); + }); + } +} + +fn eq_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let smolstr = SmolStr::new(&str); + + c.bench_function(&format!("SmolStr::eq len={len}"), |b| { + let mut v = false; + b.iter(|| v = smolstr == black_box(&str)); + assert!(v); + }); + c.bench_function(&format!("std String::eq len={len}"), |b| { + let mut v = false; + b.iter(|| v = &str == black_box(&str)); + assert!(v); + }); + } +} + +fn to_lowercase_bench(c: &mut Criterion) { + const END_CHAR: char = 'İ'; + + for len in TEST_LENS { + // mostly ascii seq with some non-ascii at the end + let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8()); + str.push(END_CHAR); + let str = str.as_str(); + + c.bench_function(&format!("SmolStr to_lowercase_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_lowercase_smolstr()); + assert_eq!(v, str.to_lowercase()); + }); + c.bench_function(&format!("std to_lowercase len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_lowercase()); + assert_eq!(v, str.to_lowercase()); + }); + } +} + +fn to_ascii_lowercase_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let str = str.as_str(); + + c.bench_function( + &format!("SmolStr to_ascii_lowercase_smolstr len={len}"), + |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_ascii_lowercase_smolstr()); + assert_eq!(v, str.to_ascii_lowercase()); + }, + ); + c.bench_function(&format!("std to_ascii_lowercase len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_ascii_lowercase()); + assert_eq!(v, str.to_ascii_lowercase()); + }); + } +} + +fn replace_bench(c: &mut Criterion) { + for len in TEST_LENS { + let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2) + + "-" + + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2); + let str = s_dash_s.as_str(); + + c.bench_function(&format!("SmolStr replace_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.replace_smolstr("-", "_")); + assert_eq!(v, str.replace("-", "_")); + }); + c.bench_function(&format!("std replace len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.replace("-", "_")); + assert_eq!(v, str.replace("-", "_")); + }); + } +} + +criterion_group!( + benches, + format_bench, + from_str_bench, + clone_bench, + eq_bench, + to_lowercase_bench, + to_ascii_lowercase_bench, + replace_bench, +); +criterion_main!(benches); From b4220df6734f2454991ff5c89d0fac30e39c136d Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 00:30:49 +0100 Subject: [PATCH 293/322] Only bench smol_str stuff --- benches/bench.rs | 49 +++++------------------------------------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index fa4c58832de7..2643b025575d 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,4 +1,3 @@ -//! SmolStr vs String benchmarks. use criterion::{criterion_group, criterion_main, Criterion}; use rand::distr::{Alphanumeric, SampleString}; use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr}; @@ -15,16 +14,11 @@ fn format_bench(c: &mut Criterion) { let str_len = len.checked_sub(n.to_smolstr().len()).unwrap(); let str = Alphanumeric.sample_string(&mut rand::rng(), str_len); - c.bench_function(&format!("SmolStr format_smolstr! len={len}"), |b| { + c.bench_function(&format!("format_smolstr! len={len}"), |b| { let mut v = <_>::default(); b.iter(|| v = format_smolstr!("{str}-{n}")); assert_eq!(v, format!("{str}-{n}")); }); - c.bench_function(&format!("std format! len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = format!("{str}-{n}")); - assert_eq!(v, format!("{str}-{n}")); - }); } } @@ -37,11 +31,6 @@ fn from_str_bench(c: &mut Criterion) { b.iter(|| v = SmolStr::from(black_box(&str))); assert_eq!(v, str); }); - c.bench_function(&format!("std String::from len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = String::from(black_box(&str))); - assert_eq!(v, str); - }); } } @@ -55,11 +44,6 @@ fn clone_bench(c: &mut Criterion) { b.iter(|| v = smolstr.clone()); assert_eq!(v, str); }); - c.bench_function(&format!("std String::clone len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = str.clone()); - assert_eq!(v, str); - }); } } @@ -73,11 +57,6 @@ fn eq_bench(c: &mut Criterion) { b.iter(|| v = smolstr == black_box(&str)); assert!(v); }); - c.bench_function(&format!("std String::eq len={len}"), |b| { - let mut v = false; - b.iter(|| v = &str == black_box(&str)); - assert!(v); - }); } } @@ -90,16 +69,11 @@ fn to_lowercase_bench(c: &mut Criterion) { str.push(END_CHAR); let str = str.as_str(); - c.bench_function(&format!("SmolStr to_lowercase_smolstr len={len}"), |b| { + c.bench_function(&format!("to_lowercase_smolstr len={len}"), |b| { let mut v = <_>::default(); b.iter(|| v = str.to_lowercase_smolstr()); assert_eq!(v, str.to_lowercase()); }); - c.bench_function(&format!("std to_lowercase len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = str.to_lowercase()); - assert_eq!(v, str.to_lowercase()); - }); } } @@ -108,17 +82,9 @@ fn to_ascii_lowercase_bench(c: &mut Criterion) { let str = Alphanumeric.sample_string(&mut rand::rng(), len); let str = str.as_str(); - c.bench_function( - &format!("SmolStr to_ascii_lowercase_smolstr len={len}"), - |b| { - let mut v = <_>::default(); - b.iter(|| v = str.to_ascii_lowercase_smolstr()); - assert_eq!(v, str.to_ascii_lowercase()); - }, - ); - c.bench_function(&format!("std to_ascii_lowercase len={len}"), |b| { + c.bench_function(&format!("to_ascii_lowercase_smolstr len={len}"), |b| { let mut v = <_>::default(); - b.iter(|| v = str.to_ascii_lowercase()); + b.iter(|| v = str.to_ascii_lowercase_smolstr()); assert_eq!(v, str.to_ascii_lowercase()); }); } @@ -131,16 +97,11 @@ fn replace_bench(c: &mut Criterion) { + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2); let str = s_dash_s.as_str(); - c.bench_function(&format!("SmolStr replace_smolstr len={len}"), |b| { + c.bench_function(&format!("replace_smolstr len={len}"), |b| { let mut v = <_>::default(); b.iter(|| v = str.replace_smolstr("-", "_")); assert_eq!(v, str.replace("-", "_")); }); - c.bench_function(&format!("std replace len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = str.replace("-", "_")); - assert_eq!(v, str.replace("-", "_")); - }); } } From 81c8790a175365d72040959f689abab5b4e93ca4 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 12:42:41 +0100 Subject: [PATCH 294/322] CI: Add TEST_BENCHES --- .github/ci.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/ci.rs b/.github/ci.rs index 21c8584fb9f7..c594e8973c83 100644 --- a/.github/ci.rs +++ b/.github/ci.rs @@ -39,6 +39,11 @@ fn try_main() -> Result<()> { shell("cargo test --no-default-features --workspace")?; } + { + let _s = Section::new("TEST_BENCHES"); + shell("cargo test --benches --all-features")?; + } + let current_branch = shell_output("git branch --show-current")?; if ¤t_branch == "master" { let _s = Section::new("PUBLISH"); From 3122a9a93ebc26e07e93ab1e56f9a2023ba2e8c0 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 01:23:48 +0100 Subject: [PATCH 295/322] Optimise `to_{lower,upper}case_smolstr` --- src/lib.rs | 104 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ff25651f54bf..f2f021a7b5f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -233,8 +233,17 @@ impl iter::FromIterator for SmolStr { } } -fn from_char_iter(mut iter: impl Iterator) -> SmolStr { - let (min_size, _) = iter.size_hint(); +#[inline] +fn from_char_iter(iter: impl Iterator) -> SmolStr { + from_buf_and_chars([0; _], 0, iter) +} + +fn from_buf_and_chars( + mut buf: [u8; INLINE_CAP], + buf_len: usize, + mut iter: impl Iterator, +) -> SmolStr { + let min_size = iter.size_hint().0 + buf_len; if min_size > INLINE_CAP { let heap: String = iter.collect(); if heap.len() <= INLINE_CAP { @@ -243,8 +252,7 @@ fn from_char_iter(mut iter: impl Iterator) -> SmolStr { } return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; + let mut len = buf_len; while let Some(ch) = iter.next() { let size = ch.len_utf8(); if size + len > INLINE_CAP { @@ -634,12 +642,32 @@ pub trait StrExt: private::Sealed { impl StrExt for str { #[inline] fn to_lowercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase); + from_buf_and_chars( + buf, + len - rest.len(), + rest.chars().flat_map(|c| c.to_lowercase()), + ) + } else { + self.to_lowercase().into() + } } #[inline] fn to_uppercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase); + from_buf_and_chars( + buf, + len - rest.len(), + rest.chars().flat_map(|c| c.to_uppercase()), + ) + } else { + self.to_uppercase().into() + } } #[inline] @@ -699,6 +727,70 @@ impl StrExt for str { } } +/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. +#[inline] +fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { + // Process the input in chunks of 16 bytes to enable auto-vectorization. + // Previously the chunk size depended on the size of `usize`, + // but on 32-bit platforms with sse or neon is also the better choice. + // The only downside on other platforms would be a bit more loop-unrolling. + const N: usize = 16; + + debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings"); + + let mut slice = s.as_bytes(); + let mut out = [0u8; INLINE_CAP]; + let mut out_slice = &mut out[..slice.len()]; + let mut is_ascii = [false; N]; + + while slice.len() >= N { + // SAFETY: checked in loop condition + let chunk = unsafe { slice.get_unchecked(..N) }; + // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets + let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) }; + + for j in 0..N { + is_ascii[j] = chunk[j] <= 127; + } + + // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk + // size gives the best result, specifically a pmovmsk instruction on x86. + // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not + // currently recognize other similar idioms. + if is_ascii.iter().map(|x| *x as u8).sum::() as usize != N { + break; + } + + for j in 0..N { + out_chunk[j] = convert(&chunk[j]); + } + + slice = unsafe { slice.get_unchecked(N..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(N..) }; + } + + // handle the remainder as individual bytes + while !slice.is_empty() { + let byte = slice[0]; + if byte > 127 { + break; + } + // SAFETY: out_slice has at least same length as input slice + unsafe { + *out_slice.get_unchecked_mut(0) = convert(&byte); + } + slice = unsafe { slice.get_unchecked(1..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(1..) }; + } + + unsafe { + // SAFETY: we know this is a valid char boundary + // since we only skipped over leading ascii bytes + let rest = core::str::from_utf8_unchecked(slice); + (out, rest) + } +} + impl ToSmolStr for T where T: fmt::Display + ?Sized, From 22feb5fe0e8eece4b1f40f28e56353a453b22fef Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 01:36:41 +0100 Subject: [PATCH 296/322] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 190d6e83098d..2577011ffe61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. +- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. ## 0.3.2 - 2024-10-23 From 08a9dc8b54a4df873fa603f054b7ad063d6cf882 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 09:39:19 +0100 Subject: [PATCH 297/322] Add test from_buf_and_chars_size_hinted_heap & fix --- src/lib.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f2f021a7b5f9..5ef6260f56d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -245,7 +245,11 @@ fn from_buf_and_chars( ) -> SmolStr { let min_size = iter.size_hint().0 + buf_len; if min_size > INLINE_CAP { - let heap: String = iter.collect(); + let heap: String = core::str::from_utf8(&buf[..buf_len]) + .unwrap() + .chars() + .chain(iter) + .collect(); if heap.len() <= INLINE_CAP { // size hint lied return SmolStr::new_inline(&heap); @@ -940,3 +944,14 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { mod borsh; #[cfg(feature = "serde")] mod serde; + +#[test] +fn from_buf_and_chars_size_hinted_heap() { + let str = from_buf_and_chars( + *b"abcdefghijklmnopqr00000", + 18, + "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(), + ); + + assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13"); +} From 104784a50ebdb7e59ea0ad5059676935913b94a4 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 20:26:43 +0100 Subject: [PATCH 298/322] Optimise replacen_smolstr for single ascii replace --- CHANGELOG.md | 2 ++ src/lib.rs | 35 +++++++++++++++++++++++++++++++++++ tests/test.rs | 7 +++++++ 3 files changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2577011ffe61..c0193f6fcb53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. +- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. + ~3x speedup inline, ~1.8x for heap (len=50). ## 0.3.2 - 2024-10-23 diff --git a/src/lib.rs b/src/lib.rs index 5ef6260f56d7..d55ba2052265 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -715,6 +715,13 @@ impl StrExt for str { #[inline] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + // Fast path for replacing a single ASCII character with another inline. + if let [from_u8] = from.as_bytes() { + if let [to_u8] = to.as_bytes() { + return replacen_1_ascii(self, *from_u8, *to_u8, count); + } + } + let mut result = SmolStrBuilder::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { @@ -731,6 +738,34 @@ impl StrExt for str { } } +#[inline] +fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { + let mut replaced = 0; + let mut ascii_replace = |b: &u8| { + if *b == from && replaced != count { + replaced += 1; + to + } else { + *b + } + }; + if src.len() <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + for (idx, b) in src.as_bytes().iter().enumerate() { + buf[idx] = ascii_replace(b); + } + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) }, + buf, + }) + } else { + let out = src.as_bytes().iter().map(ascii_replace).collect(); + // SAFETY: We replaced ascii with ascii on valid utf8 strings. + unsafe { String::from_utf8_unchecked(out).into() } + } +} + /// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. #[inline] fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { diff --git a/tests/test.rs b/tests/test.rs index 0070b3a5ecb0..8f7d9ec39ac7 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -389,6 +389,13 @@ mod test_str_ext { assert_eq!(result, "foo_dor_baz"); assert!(!result.is_heap_allocated()); } + + #[test] + fn replacen_1_ascii() { + let result = "foo_bar_baz".replacen_smolstr("o", "u", 1); + assert_eq!(result, "fuo_bar_baz"); + assert!(!result.is_heap_allocated()); + } } #[cfg(feature = "borsh")] From 026103b3c16f5239d325c6c1463c83977d43de42 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 20:46:47 +0100 Subject: [PATCH 299/322] Optimise replacen 1-ascii when count >= len --- CHANGELOG.md | 2 +- src/lib.rs | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0193f6fcb53..1dff469b8a8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. - Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. - ~3x speedup inline, ~1.8x for heap (len=50). + ~3.7x speedup inline, ~2.4x for heap. ## 0.3.2 - 2024-10-23 diff --git a/src/lib.rs b/src/lib.rs index d55ba2052265..3a6442eaaba7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -714,11 +714,21 @@ impl StrExt for str { } #[inline] - fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr { // Fast path for replacing a single ASCII character with another inline. if let [from_u8] = from.as_bytes() { if let [to_u8] = to.as_bytes() { - return replacen_1_ascii(self, *from_u8, *to_u8, count); + return match self.len() <= count { + true => replacen_1_ascii(self, |b| if b == *from_u8 { *to_u8 } else { b }), + _ => replacen_1_ascii(self, |b| { + if b == *from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + b + } + }), + }; } } @@ -739,20 +749,11 @@ impl StrExt for str { } #[inline] -fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { - let mut replaced = 0; - let mut ascii_replace = |b: &u8| { - if *b == from && replaced != count { - replaced += 1; - to - } else { - *b - } - }; +fn replacen_1_ascii(src: &str, mut map: impl FnMut(u8) -> u8) -> SmolStr { if src.len() <= INLINE_CAP { let mut buf = [0u8; INLINE_CAP]; for (idx, b) in src.as_bytes().iter().enumerate() { - buf[idx] = ascii_replace(b); + buf[idx] = map(*b); } SmolStr(Repr::Inline { // SAFETY: `len` is in bounds @@ -760,7 +761,7 @@ fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { buf, }) } else { - let out = src.as_bytes().iter().map(ascii_replace).collect(); + let out = src.as_bytes().iter().map(|b| map(*b)).collect(); // SAFETY: We replaced ascii with ascii on valid utf8 strings. unsafe { String::from_utf8_unchecked(out).into() } } From 97ef5f3f2d8b9e580d9a192f140354173857a4e1 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 21:22:01 +0100 Subject: [PATCH 300/322] Mark replacen_1_ascii as unsafe --- src/lib.rs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3a6442eaaba7..ded07c61c6df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -718,16 +718,20 @@ impl StrExt for str { // Fast path for replacing a single ASCII character with another inline. if let [from_u8] = from.as_bytes() { if let [to_u8] = to.as_bytes() { - return match self.len() <= count { - true => replacen_1_ascii(self, |b| if b == *from_u8 { *to_u8 } else { b }), - _ => replacen_1_ascii(self, |b| { - if b == *from_u8 && count != 0 { - count -= 1; - *to_u8 - } else { - b - } - }), + return if self.len() <= count { + // SAFETY: `from_u8` & `to_u8` are ascii + unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) } + } else { + unsafe { + replacen_1_ascii(self, |b| { + if b == from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + *b + } + }) + } }; } } @@ -748,12 +752,13 @@ impl StrExt for str { } } +/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes. #[inline] -fn replacen_1_ascii(src: &str, mut map: impl FnMut(u8) -> u8) -> SmolStr { +unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr { if src.len() <= INLINE_CAP { let mut buf = [0u8; INLINE_CAP]; for (idx, b) in src.as_bytes().iter().enumerate() { - buf[idx] = map(*b); + buf[idx] = map(b); } SmolStr(Repr::Inline { // SAFETY: `len` is in bounds @@ -761,7 +766,7 @@ fn replacen_1_ascii(src: &str, mut map: impl FnMut(u8) -> u8) -> SmolStr { buf, }) } else { - let out = src.as_bytes().iter().map(|b| map(*b)).collect(); + let out = src.as_bytes().iter().map(map).collect(); // SAFETY: We replaced ascii with ascii on valid utf8 strings. unsafe { String::from_utf8_unchecked(out).into() } } From 8f4deabcc05787951f47c5c620fbcd7941f9626e Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 21:30:34 +0100 Subject: [PATCH 301/322] Update changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dff469b8a8a..c46000eb7341 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,8 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. -- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. - ~3.7x speedup inline, ~2.4x for heap. +- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace, + ~3x speedup inline & heap. ## 0.3.2 - 2024-10-23 From bf2e3d7a7f8b094ce5bab9ef0a4cc5b8e6cde904 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 23 Oct 2025 13:15:01 +0200 Subject: [PATCH 302/322] Publish 0.3.3 --- CHANGELOG.md | 6 ++++-- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c46000eb7341..bd29acc6a64a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # Changelog -## Unreleased +## Unreleased -- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` +## 0.3.3 - 2025-10-23 + +- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. - Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace, diff --git a/Cargo.toml b/Cargo.toml index e6f10a2715e7..277bb0b98c42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.3.2" +version = "0.3.3" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 02336c59bed57d292997c0b5a69e436785aa3be3 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 23 Oct 2025 17:10:05 +0200 Subject: [PATCH 303/322] Publish 0.3.4 --- CHANGELOG.md | 4 ++++ Cargo.toml | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd29acc6a64a..fb65d88ad191 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +## 0.3.4 - 2025-10-23 + +- Added `rust-version` field to `Cargo.toml` + ## 0.3.3 - 2025-10-23 - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` diff --git a/Cargo.toml b/Cargo.toml index 277bb0b98c42..4752a84ed419 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,12 @@ [package] name = "smol_str" -version = "0.3.3" +version = "0.3.4" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov ", "Lukas Wirth "] edition = "2021" +rust-version = "1.89" [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docsrs"] From 291cc5d2c7e635c06e08b8734855377223b7d842 Mon Sep 17 00:00:00 2001 From: Taj Pereira Date: Sat, 25 Oct 2025 05:52:37 +1000 Subject: [PATCH 304/322] Rename `downcast_[ref|mut]_unchecked` -> `downcast_unchecked_[ref|mut]` --- crates/stdx/src/anymap.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/stdx/src/anymap.rs b/crates/stdx/src/anymap.rs index a3f6ab89510e..b82eeaac0be4 100644 --- a/crates/stdx/src/anymap.rs +++ b/crates/stdx/src/anymap.rs @@ -117,7 +117,7 @@ impl Map { #[inline] #[must_use] pub fn get>(&self) -> Option<&T> { - self.raw.get(&TypeId::of::()).map(|any| unsafe { any.downcast_ref_unchecked::() }) + self.raw.get(&TypeId::of::()).map(|any| unsafe { any.downcast_unchecked_ref::() }) } /// Gets the entry for the given type in the collection for in-place manipulation @@ -172,7 +172,7 @@ impl<'map, A: ?Sized + Downcast, V: IntoBox> OccupiedEntry<'map, A, V> { #[inline] #[must_use] pub fn into_mut(self) -> &'map mut V { - unsafe { self.inner.into_mut().downcast_mut_unchecked() } + unsafe { self.inner.into_mut().downcast_unchecked_mut() } } } @@ -181,7 +181,7 @@ impl<'map, A: ?Sized + Downcast, V: IntoBox> VacantEntry<'map, A, V> { /// and returns a mutable reference to it #[inline] pub fn insert(self, value: V) -> &'map mut V { - unsafe { self.inner.insert(value.into_box()).downcast_mut_unchecked() } + unsafe { self.inner.insert(value.into_box()).downcast_unchecked_mut() } } } @@ -244,14 +244,14 @@ pub trait Downcast { /// # Safety /// /// The caller must ensure that `T` matches the trait object, on pain of *undefined behavior*. - unsafe fn downcast_ref_unchecked(&self) -> &T; + unsafe fn downcast_unchecked_ref(&self) -> &T; /// Downcast from `&mut Any` to `&mut T`, without checking the type matches. /// /// # Safety /// /// The caller must ensure that `T` matches the trait object, on pain of *undefined behavior*. - unsafe fn downcast_mut_unchecked(&mut self) -> &mut T; + unsafe fn downcast_unchecked_mut(&mut self) -> &mut T; } /// A trait for the conversion of an object into a boxed trait object. @@ -269,12 +269,12 @@ macro_rules! implement { } #[inline] - unsafe fn downcast_ref_unchecked(&self) -> &T { + unsafe fn downcast_unchecked_ref(&self) -> &T { unsafe { &*std::ptr::from_ref::(self).cast::() } } #[inline] - unsafe fn downcast_mut_unchecked(&mut self) -> &mut T { + unsafe fn downcast_unchecked_mut(&mut self) -> &mut T { unsafe { &mut *std::ptr::from_mut::(self).cast::() } } } From e92e6792cfdff177541b16966a70a7dbcfc9a833 Mon Sep 17 00:00:00 2001 From: A4-Tacks Date: Wed, 5 Nov 2025 16:33:02 +0800 Subject: [PATCH 305/322] Fix not parse never type in inherent impl --- crates/parser/src/grammar/items/traits.rs | 11 ++++++----- crates/parser/test_data/generated/runner.rs | 4 ++++ .../parser/inline/ok/impl_item_never_type.rast | 11 +++++++++++ .../parser/inline/ok/impl_item_never_type.rs | 1 + 4 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 crates/parser/test_data/parser/inline/ok/impl_item_never_type.rast create mode 100644 crates/parser/test_data/parser/inline/ok/impl_item_never_type.rs diff --git a/crates/parser/src/grammar/items/traits.rs b/crates/parser/src/grammar/items/traits.rs index c1b1a3fc8a94..4e48a4c50671 100644 --- a/crates/parser/src/grammar/items/traits.rs +++ b/crates/parser/src/grammar/items/traits.rs @@ -54,12 +54,13 @@ pub(super) fn impl_(p: &mut Parser<'_>, m: Marker) { // impl const Send for S {} p.eat(T![const]); - // FIXME: never type + // test impl_item_never_type // impl ! {} - - // test impl_item_neg - // impl !Send for S {} - p.eat(T![!]); + if p.at(T![!]) && !p.nth_at(1, T!['{']) { + // test impl_item_neg + // impl !Send for S {} + p.eat(T![!]); + } impl_type(p); if p.eat(T![for]) { impl_type(p); diff --git a/crates/parser/test_data/generated/runner.rs b/crates/parser/test_data/generated/runner.rs index 7f5ff0ec0735..7b0d32d9d1e4 100644 --- a/crates/parser/test_data/generated/runner.rs +++ b/crates/parser/test_data/generated/runner.rs @@ -322,6 +322,10 @@ mod ok { #[test] fn impl_item_neg() { run_and_expect_no_errors("test_data/parser/inline/ok/impl_item_neg.rs"); } #[test] + fn impl_item_never_type() { + run_and_expect_no_errors("test_data/parser/inline/ok/impl_item_never_type.rs"); + } + #[test] fn impl_trait_type() { run_and_expect_no_errors("test_data/parser/inline/ok/impl_trait_type.rs"); } diff --git a/crates/parser/test_data/parser/inline/ok/impl_item_never_type.rast b/crates/parser/test_data/parser/inline/ok/impl_item_never_type.rast new file mode 100644 index 000000000000..fa4575e0ce0c --- /dev/null +++ b/crates/parser/test_data/parser/inline/ok/impl_item_never_type.rast @@ -0,0 +1,11 @@ +SOURCE_FILE + IMPL + IMPL_KW "impl" + WHITESPACE " " + NEVER_TYPE + BANG "!" + WHITESPACE " " + ASSOC_ITEM_LIST + L_CURLY "{" + R_CURLY "}" + WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/impl_item_never_type.rs b/crates/parser/test_data/parser/inline/ok/impl_item_never_type.rs new file mode 100644 index 000000000000..ed8057b04f2a --- /dev/null +++ b/crates/parser/test_data/parser/inline/ok/impl_item_never_type.rs @@ -0,0 +1 @@ +impl ! {} From f96a01b79aab18a6c776b4f37685e89b4d265b5d Mon Sep 17 00:00:00 2001 From: A4-Tacks Date: Tue, 26 Aug 2025 18:37:29 +0800 Subject: [PATCH 306/322] Add guard support for replace_if_let_with_match - Fix loses comments - Fix bad indentation Example --- ```rust fn main() { if $0let true = true && true && false { code() } } ``` **Before this PR** Assist not applicable **After this PR** ```rust fn main() { match true { true if true && false => code(), _ => (), } } ``` --- ```rust pub fn foo(foo: i32) { $0if let 1 = foo { // some comment self.foo(); } else if let 2 = foo { // some comment 2 self.bar() } } ``` **Before this PR** ```rust pub fn foo(foo: i32) { match foo { 1 => { self.foo(); } 2 => self.bar(), _ => (), } } ``` **After this PR** ```rust pub fn foo(foo: i32) { match foo { 1 => { // some comment self.foo(); } 2 => { // some comment 2 self.bar() }, _ => (), } } ``` --- .../src/handlers/replace_if_let_with_match.rs | 438 +++++++++++++++--- crates/ide-assists/src/utils.rs | 8 + 2 files changed, 382 insertions(+), 64 deletions(-) diff --git a/crates/ide-assists/src/handlers/replace_if_let_with_match.rs b/crates/ide-assists/src/handlers/replace_if_let_with_match.rs index 3b815a467bc0..b7e5344712eb 100644 --- a/crates/ide-assists/src/handlers/replace_if_let_with_match.rs +++ b/crates/ide-assists/src/handlers/replace_if_let_with_match.rs @@ -1,15 +1,10 @@ use std::iter::successors; -use either::Either; -use ide_db::{ - RootDatabase, - defs::NameClass, - syntax_helpers::node_ext::{is_pattern_cond, single_let}, - ty_filter::TryEnum, -}; +use ide_db::{RootDatabase, defs::NameClass, ty_filter::TryEnum}; use syntax::{ - AstNode, Edition, T, TextRange, + AstNode, Edition, SyntaxKind, T, TextRange, ast::{self, HasName, edit::IndentLevel, edit_in_place::Indent, syntax_factory::SyntaxFactory}, + syntax_editor::SyntaxEditor, }; use crate::{ @@ -54,42 +49,46 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<' return None; } let mut else_block = None; + let indent = if_expr.indent_level(); let if_exprs = successors(Some(if_expr.clone()), |expr| match expr.else_branch()? { ast::ElseBranch::IfExpr(expr) => Some(expr), ast::ElseBranch::Block(block) => { + let block = unwrap_trivial_block(block).clone_for_update(); + block.reindent_to(IndentLevel(1)); else_block = Some(block); None } }); let scrutinee_to_be_expr = if_expr.condition()?; - let scrutinee_to_be_expr = match single_let(scrutinee_to_be_expr.clone()) { - Some(cond) => cond.expr()?, - None => scrutinee_to_be_expr, + let scrutinee_to_be_expr = match let_and_guard(&scrutinee_to_be_expr) { + (Some(let_expr), _) => let_expr.expr()?, + (None, cond) => cond?, }; let mut pat_seen = false; let mut cond_bodies = Vec::new(); for if_expr in if_exprs { let cond = if_expr.condition()?; - let cond = match single_let(cond.clone()) { - Some(let_) => { + let (cond, guard) = match let_and_guard(&cond) { + (None, guard) => (None, Some(guard?)), + (Some(let_), guard) => { let pat = let_.pat()?; let expr = let_.expr()?; - // FIXME: If one `let` is wrapped in parentheses and the second is not, - // we'll exit here. if scrutinee_to_be_expr.syntax().text() != expr.syntax().text() { // Only if all condition expressions are equal we can merge them into a match return None; } pat_seen = true; - Either::Left(pat) + (Some(pat), guard) } - // Multiple `let`, unsupported. - None if is_pattern_cond(cond.clone()) => return None, - None => Either::Right(cond), }; - let body = if_expr.then_branch()?; - cond_bodies.push((cond, body)); + if let Some(guard) = &guard { + guard.dedent(indent); + guard.indent(IndentLevel(1)); + } + let body = if_expr.then_branch()?.clone_for_update(); + body.indent(IndentLevel(1)); + cond_bodies.push((cond, guard, body)); } if !pat_seen && cond_bodies.len() != 1 { @@ -106,27 +105,25 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<' available_range, move |builder| { let make = SyntaxFactory::with_mappings(); - let match_expr = { + let match_expr: ast::Expr = { let else_arm = make_else_arm(ctx, &make, else_block, &cond_bodies); - let make_match_arm = |(pat, body): (_, ast::BlockExpr)| { - let body = make.block_expr(body.statements(), body.tail_expr()); - body.indent(IndentLevel::from(1)); - let body = unwrap_trivial_block(body); - match pat { - Either::Left(pat) => make.match_arm(pat, None, body), - Either::Right(_) if !pat_seen => { - make.match_arm(make.literal_pat("true").into(), None, body) + let make_match_arm = + |(pat, guard, body): (_, Option, ast::BlockExpr)| { + body.reindent_to(IndentLevel::single()); + let body = unwrap_trivial_block(body); + match (pat, guard.map(|it| make.match_guard(it))) { + (Some(pat), guard) => make.match_arm(pat, guard, body), + (None, _) if !pat_seen => { + make.match_arm(make.literal_pat("true").into(), None, body) + } + (None, guard) => { + make.match_arm(make.wildcard_pat().into(), guard, body) + } } - Either::Right(expr) => make.match_arm( - make.wildcard_pat().into(), - Some(make.match_guard(expr)), - body, - ), - } - }; + }; let arms = cond_bodies.into_iter().map(make_match_arm).chain([else_arm]); let match_expr = make.expr_match(scrutinee_to_be_expr, make.match_arm_list(arms)); - match_expr.indent(IndentLevel::from_node(if_expr.syntax())); + match_expr.indent(indent); match_expr.into() }; @@ -134,7 +131,11 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<' if_expr.syntax().parent().is_some_and(|it| ast::IfExpr::can_cast(it.kind())); let expr = if has_preceding_if_expr { // make sure we replace the `else if let ...` with a block so we don't end up with `else expr` - make.block_expr([], Some(match_expr)).into() + match_expr.dedent(indent); + match_expr.indent(IndentLevel(1)); + let block_expr = make.block_expr([], Some(match_expr)); + block_expr.indent(indent); + block_expr.into() } else { match_expr }; @@ -150,13 +151,13 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<' fn make_else_arm( ctx: &AssistContext<'_>, make: &SyntaxFactory, - else_block: Option, - conditionals: &[(Either, ast::BlockExpr)], + else_expr: Option, + conditionals: &[(Option, Option, ast::BlockExpr)], ) -> ast::MatchArm { - let (pattern, expr) = if let Some(else_block) = else_block { + let (pattern, expr) = if let Some(else_expr) = else_expr { let pattern = match conditionals { - [(Either::Right(_), _)] => make.literal_pat("false").into(), - [(Either::Left(pat), _)] => match ctx + [(None, Some(_), _)] => make.literal_pat("false").into(), + [(Some(pat), _, _)] => match ctx .sema .type_of_pat(pat) .and_then(|ty| TryEnum::from_ty(&ctx.sema, &ty.adjusted())) @@ -174,10 +175,10 @@ fn make_else_arm( }, _ => make.wildcard_pat().into(), }; - (pattern, unwrap_trivial_block(else_block)) + (pattern, else_expr) } else { let pattern = match conditionals { - [(Either::Right(_), _)] => make.literal_pat("false").into(), + [(None, Some(_), _)] => make.literal_pat("false").into(), _ => make.wildcard_pat().into(), }; (pattern, make.expr_unit()) @@ -266,7 +267,10 @@ pub(crate) fn replace_match_with_if_let(acc: &mut Assists, ctx: &AssistContext<' // wrap them in another BlockExpr. match expr { ast::Expr::BlockExpr(block) if block.modifier().is_none() => block, - expr => make.block_expr([], Some(expr)), + expr => { + expr.indent(IndentLevel(1)); + make.block_expr([], Some(expr)) + } } }; @@ -289,7 +293,9 @@ pub(crate) fn replace_match_with_if_let(acc: &mut Assists, ctx: &AssistContext<' condition }; let then_expr = then_expr.clone_for_update(); + let else_expr = else_expr.clone_for_update(); then_expr.reindent_to(IndentLevel::single()); + else_expr.reindent_to(IndentLevel::single()); let then_block = make_block_expr(then_expr); let else_expr = if is_empty_expr(&else_expr) { None } else { Some(else_expr) }; let if_let_expr = make.expr_if( @@ -382,6 +388,48 @@ fn is_sad_pat(sema: &hir::Semantics<'_, RootDatabase>, pat: &ast::Pat) -> bool { .is_some_and(|it| does_pat_match_variant(pat, &it.sad_pattern())) } +fn let_and_guard(cond: &ast::Expr) -> (Option, Option) { + if let ast::Expr::ParenExpr(expr) = cond + && let Some(sub_expr) = expr.expr() + { + let_and_guard(&sub_expr) + } else if let ast::Expr::LetExpr(let_expr) = cond { + (Some(let_expr.clone()), None) + } else if let ast::Expr::BinExpr(bin_expr) = cond + && let Some(ast::Expr::LetExpr(let_expr)) = and_bin_expr_left(bin_expr).lhs() + { + let new_expr = bin_expr.clone_subtree(); + let mut edit = SyntaxEditor::new(new_expr.syntax().clone()); + + let left_bin = and_bin_expr_left(&new_expr); + if let Some(rhs) = left_bin.rhs() { + edit.replace(left_bin.syntax(), rhs.syntax()); + } else { + if let Some(next) = left_bin.syntax().next_sibling_or_token() + && next.kind() == SyntaxKind::WHITESPACE + { + edit.delete(next); + } + edit.delete(left_bin.syntax()); + } + + let new_expr = edit.finish().new_root().clone(); + (Some(let_expr), ast::Expr::cast(new_expr)) + } else { + (None, Some(cond.clone())) + } +} + +fn and_bin_expr_left(expr: &ast::BinExpr) -> ast::BinExpr { + if expr.op_kind() == Some(ast::BinaryOp::LogicOp(ast::LogicOp::And)) + && let Some(ast::Expr::BinExpr(left)) = expr.lhs() + { + and_bin_expr_left(&left) + } else { + expr.clone() + } +} + #[cfg(test)] mod tests { use super::*; @@ -452,6 +500,45 @@ pub fn foo(foo: bool) { ) } + #[test] + fn test_if_with_match_comments() { + check_assist( + replace_if_let_with_match, + r#" +pub fn foo(foo: i32) { + $0if let 1 = foo { + // some comment + self.foo(); + } else if let 2 = foo { + // some comment 2 + self.bar() + } else { + // some comment 3 + self.baz(); + } +} +"#, + r#" +pub fn foo(foo: i32) { + match foo { + 1 => { + // some comment + self.foo(); + } + 2 => { + // some comment 2 + self.bar() + } + _ => { + // some comment 3 + self.baz(); + } + } +} +"#, + ) + } + #[test] fn test_if_let_with_match_no_else() { check_assist( @@ -514,14 +601,151 @@ impl VariantData { #[test] fn test_if_let_with_match_let_chain() { - check_assist_not_applicable( + check_assist( replace_if_let_with_match, r#" +#![feature(if_let_guard)] +fn main() { + if $0let true = true && let Some(1) = None {} else { other() } +} +"#, + r#" +#![feature(if_let_guard)] +fn main() { + match true { + true if let Some(1) = None => {} + _ => other(), + } +} +"#, + ); + + check_assist( + replace_if_let_with_match, + r#" +#![feature(if_let_guard)] +fn main() { + if true { + $0if let ParenExpr(expr) = cond + && let Some(sub_expr) = expr.expr() + { + branch1( + "..." + ) + } else if let LetExpr(let_expr) = cond { + branch2( + "..." + ) + } else if let BinExpr(bin_expr) = cond + && let Some(kind) = bin_expr.op_kind() + && let Some(LetExpr(let_expr)) = foo(bin_expr) + { + branch3() + } else { + branch4( + "..." + ) + } + } +} +"#, + r#" +#![feature(if_let_guard)] +fn main() { + if true { + match cond { + ParenExpr(expr) if let Some(sub_expr) = expr.expr() => { + branch1( + "..." + ) + } + LetExpr(let_expr) => { + branch2( + "..." + ) + } + BinExpr(bin_expr) if let Some(kind) = bin_expr.op_kind() + && let Some(LetExpr(let_expr)) = foo(bin_expr) => branch3(), + _ => { + branch4( + "..." + ) + } + } + } +} +"#, + ); + + check_assist( + replace_if_let_with_match, + r#" +fn main() { + if $0let true = true + && true + && false + { + code() + } else { + other() + } +} +"#, + r#" +fn main() { + match true { + true if true + && false => code(), + _ => other(), + } +} +"#, + ); + } + + #[test] + fn test_if_let_with_match_let_chain_no_else() { + check_assist( + replace_if_let_with_match, + r#" +#![feature(if_let_guard)] fn main() { if $0let true = true && let Some(1) = None {} } "#, - ) + r#" +#![feature(if_let_guard)] +fn main() { + match true { + true if let Some(1) = None => {} + _ => (), + } +} +"#, + ); + + check_assist( + replace_if_let_with_match, + r#" +fn main() { + if $0let true = true + && true + && false + { + code() + } +} +"#, + r#" +fn main() { + match true { + true if true + && false => code(), + _ => (), + } +} +"#, + ); } #[test] @@ -553,10 +777,10 @@ impl VariantData { VariantData::Tuple(..) => false, _ if cond() => true, _ => { - bar( - 123 - ) - } + bar( + 123 + ) + } } } } @@ -587,11 +811,11 @@ impl VariantData { if let VariantData::Struct(..) = *self { true } else { - match *self { - VariantData::Tuple(..) => false, - _ => false, + match *self { + VariantData::Tuple(..) => false, + _ => false, + } } -} } } "#, @@ -706,9 +930,12 @@ fn foo(x: Result) { fn main() { if true { $0if let Ok(rel_path) = path.strip_prefix(root_path) { - let rel_path = RelativePathBuf::from_path(rel_path).ok()?; + let rel_path = RelativePathBuf::from_path(rel_path) + .ok()?; Some((*id, rel_path)) } else { + let _ = some_code() + .clone(); None } } @@ -719,10 +946,52 @@ fn main() { if true { match path.strip_prefix(root_path) { Ok(rel_path) => { - let rel_path = RelativePathBuf::from_path(rel_path).ok()?; + let rel_path = RelativePathBuf::from_path(rel_path) + .ok()?; Some((*id, rel_path)) } - _ => None, + _ => { + let _ = some_code() + .clone(); + None + } + } + } +} +"#, + ); + + check_assist( + replace_if_let_with_match, + r#" +fn main() { + if true { + $0if let Ok(rel_path) = path.strip_prefix(root_path) { + Foo { + x: 1 + } + } else { + Foo { + x: 2 + } + } + } +} +"#, + r#" +fn main() { + if true { + match path.strip_prefix(root_path) { + Ok(rel_path) => { + Foo { + x: 1 + } + } + _ => { + Foo { + x: 2 + } + } } } } @@ -1581,13 +1850,51 @@ fn foo(x: Result) { replace_match_with_if_let, r#" fn main() { + if true { + $0match path.strip_prefix(root_path) { + Ok(rel_path) => Foo { + x: 2 + } + _ => Foo { + x: 3 + }, + } + } +} +"#, + r#" +fn main() { + if true { + if let Ok(rel_path) = path.strip_prefix(root_path) { + Foo { + x: 2 + } + } else { + Foo { + x: 3 + } + } + } +} +"#, + ); + + check_assist( + replace_match_with_if_let, + r#" +fn main() { if true { $0match path.strip_prefix(root_path) { Ok(rel_path) => { - let rel_path = RelativePathBuf::from_path(rel_path).ok()?; + let rel_path = RelativePathBuf::from_path(rel_path) + .ok()?; Some((*id, rel_path)) } - _ => None, + _ => { + let _ = some_code() + .clone(); + None + }, } } } @@ -1596,15 +1903,18 @@ fn main() { fn main() { if true { if let Ok(rel_path) = path.strip_prefix(root_path) { - let rel_path = RelativePathBuf::from_path(rel_path).ok()?; + let rel_path = RelativePathBuf::from_path(rel_path) + .ok()?; Some((*id, rel_path)) } else { + let _ = some_code() + .clone(); None } } } "#, - ) + ); } #[test] diff --git a/crates/ide-assists/src/utils.rs b/crates/ide-assists/src/utils.rs index e43516f6b963..fbdd0667b94c 100644 --- a/crates/ide-assists/src/utils.rs +++ b/crates/ide-assists/src/utils.rs @@ -57,6 +57,14 @@ pub fn extract_trivial_expression(block_expr: &ast::BlockExpr) -> Option Date: Fri, 7 Nov 2025 20:01:23 +0100 Subject: [PATCH 307/322] Update Cargo.toml and lockfile --- Cargo.lock | 516 ++++++++++++++++++++++++++++++++++++++-- lib/smol_str/Cargo.toml | 5 +- 2 files changed, 502 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12b5f8a9a64e..643b0f7e1d38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,12 +17,27 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstyle" version = "1.0.11" @@ -107,6 +122,21 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -134,6 +164,12 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e" +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + [[package]] name = "byteorder" version = "1.5.0" @@ -220,6 +256,12 @@ dependencies = [ "thiserror 2.0.16", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.38" @@ -258,6 +300,33 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.48" @@ -313,6 +382,39 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "itertools 0.13.0", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" +dependencies = [ + "cast", + "itertools 0.13.0", +] + [[package]] name = "critical-section" version = "1.2.0" @@ -362,6 +464,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "ctrlc" version = "3.5.0" @@ -526,6 +634,16 @@ dependencies = [ "typeid", ] +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.0", +] + [[package]] name = "expect-test" version = "1.5.1" @@ -536,6 +654,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "find-msvc-tools" version = "0.1.2" @@ -558,6 +682,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -599,12 +729,35 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "gimli" version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hash32" version = "0.2.1" @@ -680,7 +833,7 @@ dependencies = [ "hir-ty", "indexmap", "intern", - "itertools", + "itertools 0.14.0", "ra-ap-rustc_type_ir", "rustc-hash 2.1.1", "smallvec", @@ -713,7 +866,7 @@ dependencies = [ "hir-expand", "indexmap", "intern", - "itertools", + "itertools 0.14.0", "la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "mbe", "query-group-macro", @@ -747,7 +900,7 @@ dependencies = [ "either", "expect-test", "intern", - "itertools", + "itertools 0.14.0", "mbe", "parser", "query-group-macro", @@ -779,7 +932,7 @@ dependencies = [ "hir-expand", "indexmap", "intern", - "itertools", + "itertools 0.14.0", "la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "macros", "oorandom", @@ -921,7 +1074,7 @@ dependencies = [ "ide-db", "ide-diagnostics", "ide-ssr", - "itertools", + "itertools 0.14.0", "macros", "nohash-hasher", "oorandom", @@ -950,7 +1103,7 @@ dependencies = [ "expect-test", "hir", "ide-db", - "itertools", + "itertools 0.14.0", "smallvec", "stdx", "syntax", @@ -968,7 +1121,7 @@ dependencies = [ "expect-test", "hir", "ide-db", - "itertools", + "itertools 0.14.0", "macros", "smallvec", "stdx", @@ -992,7 +1145,7 @@ dependencies = [ "fst", "hir", "indexmap", - "itertools", + "itertools 0.14.0", "line-index 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "macros", "memchr", @@ -1025,7 +1178,7 @@ dependencies = [ "expect-test", "hir", "ide-db", - "itertools", + "itertools 0.14.0", "paths", "serde_json", "stdx", @@ -1043,7 +1196,7 @@ dependencies = [ "expect-test", "hir", "ide-db", - "itertools", + "itertools 0.14.0", "parser", "syntax", "test-fixture", @@ -1132,6 +1285,15 @@ dependencies = [ "rustversion", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1153,6 +1315,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a037eddb7d28de1d0fc42411f501b53b75838d313908078d6698d064f3029b24" +[[package]] +name = "js-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "kqueue" version = "1.1.1" @@ -1244,6 +1416,12 @@ dependencies = [ "text-size", ] +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "litemap" version = "0.8.0" @@ -1259,7 +1437,7 @@ dependencies = [ "hir-expand", "ide-db", "intern", - "itertools", + "itertools 0.14.0", "proc-macro-api", "project-model", "span", @@ -1639,6 +1817,34 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.11.1" @@ -1671,6 +1877,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro-api" version = "0.0.0" @@ -1767,7 +1982,7 @@ dependencies = [ "cfg", "expect-test", "intern", - "itertools", + "itertools 0.14.0", "la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "paths", "rustc-hash 2.1.1", @@ -1783,6 +1998,25 @@ dependencies = [ "triomphe", ] +[[package]] +name = "proptest" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.9.4", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + [[package]] name = "protobuf" version = "3.7.1" @@ -1835,6 +2069,12 @@ dependencies = [ "syn", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.40" @@ -1844,6 +2084,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "ra-ap-rustc_abi" version = "0.137.0" @@ -1971,6 +2217,44 @@ dependencies = [ "synstructure", ] +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + [[package]] name = "rayon" version = "1.11.0" @@ -2006,11 +2290,40 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror 2.0.16", ] +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "rowan" version = "0.15.15" @@ -2046,7 +2359,7 @@ dependencies = [ "ide-ssr", "indexmap", "intern", - "itertools", + "itertools 0.14.0", "load-cargo", "lsp-server 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)", "lsp-types", @@ -2147,12 +2460,37 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.9.4", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.0", +] + [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" version = "1.0.20" @@ -2359,6 +2697,20 @@ dependencies = [ "serde", ] +[[package]] +name = "smol_str" +version = "0.3.4" +dependencies = [ + "arbitrary", + "borsh", + "criterion", + "proptest", + "rand", + "serde", + "serde_core", + "serde_json", +] + [[package]] name = "span" version = "0.0.0" @@ -2395,7 +2747,7 @@ dependencies = [ "backtrace", "crossbeam-channel", "crossbeam-utils", - "itertools", + "itertools 0.14.0", "jod-thread", "libc", "miow", @@ -2431,14 +2783,14 @@ version = "0.0.0" dependencies = [ "either", "expect-test", - "itertools", + "itertools 0.14.0", "parser", "rayon", "rowan", "rustc-hash 2.1.1", "rustc-literal-escaper 0.0.4", "rustc_apfloat", - "smol_str", + "smol_str 0.3.2", "stdx", "test-utils", "tracing", @@ -2465,6 +2817,19 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83176759e9416cf81ee66cb6508dbfe9c96f20b8b56265a39917551c23c70964" +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.0", +] + [[package]] name = "tenthash" version = "1.1.0" @@ -2641,6 +3006,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" version = "0.8.23" @@ -2787,6 +3162,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "ungrammar" version = "1.16.1" @@ -2870,6 +3251,15 @@ dependencies = [ "walkdir", ] +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -2886,6 +3276,70 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi-util" version = "0.1.11" @@ -3168,6 +3622,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + [[package]] name = "write-json" version = "0.1.4" @@ -3218,7 +3678,7 @@ dependencies = [ "edition", "either", "flate2", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "stdx", @@ -3254,6 +3714,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.6" diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 4752a84ed419..814781f05a5d 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -3,7 +3,7 @@ name = "smol_str" version = "0.3.4" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-analyzer/smol_str" +repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/smol_str" authors = ["Aleksey Kladov ", "Lukas Wirth "] edition = "2021" rust-version = "1.89" @@ -35,3 +35,6 @@ harness = false [profile.bench] lto = "fat" + +[lints] +workspace = true From f12388c02372aff88c7e3fc0a26aa6d6f3520468 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 7 Nov 2025 20:27:20 +0100 Subject: [PATCH 308/322] Reformat --- Cargo.lock | 38 +++++++++++++++++++++++------ lib/text-size/src/range.rs | 20 ++++----------- lib/text-size/src/serde_impls.rs | 5 +--- lib/text-size/src/size.rs | 1 - lib/text-size/tests/serde.rs | 42 +++++--------------------------- 5 files changed, 43 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12b5f8a9a64e..75197405fb1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -730,7 +730,7 @@ dependencies = [ "syntax-bridge", "test-fixture", "test-utils", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "thin-vec", "tracing", "triomphe", @@ -1231,7 +1231,7 @@ version = "0.1.2" dependencies = [ "nohash-hasher", "oorandom", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1241,7 +1241,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e27e0ed5a392a7f5ba0b3808a2afccff16c64933312c84b57618b49d1209bd2" dependencies = [ "nohash-hasher", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2021,7 +2021,7 @@ dependencies = [ "hashbrown 0.14.5", "memoffset", "rustc-hash 1.1.0", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2328,6 +2328,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_test" +version = "1.0.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f901ee573cab6b3060453d2d5f0bae4e6d628c23c0a962ff9b5f1d7c8d4f1ed" +dependencies = [ + "serde", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2369,7 +2378,7 @@ dependencies = [ "salsa", "stdx", "syntax", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "vfs", ] @@ -2388,6 +2397,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stdx" version = "0.0.0" @@ -2497,7 +2512,16 @@ dependencies = [ "profile", "rustc-hash 2.1.1", "stdx", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "text-size" +version = "1.1.1" +dependencies = [ + "serde", + "serde_test", + "static_assertions", ] [[package]] @@ -2772,7 +2796,7 @@ dependencies = [ "intern", "ra-ap-rustc_lexer", "stdx", - "text-size", + "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/lib/text-size/src/range.rs b/lib/text-size/src/range.rs index 9b981642d16d..6067115207e6 100644 --- a/lib/text-size/src/range.rs +++ b/lib/text-size/src/range.rs @@ -83,10 +83,7 @@ impl TextRange { /// ``` #[inline] pub const fn empty(offset: TextSize) -> TextRange { - TextRange { - start: offset, - end: offset, - } + TextRange { start: offset, end: offset } } /// Create a range up to the given end (`..end`). @@ -105,10 +102,7 @@ impl TextRange { /// ``` #[inline] pub const fn up_to(end: TextSize) -> TextRange { - TextRange { - start: TextSize::new(0), - end, - } + TextRange { start: TextSize::new(0), end } } } @@ -130,9 +124,7 @@ impl TextRange { #[inline] pub const fn len(self) -> TextSize { // HACK for const fn: math on primitives only - TextSize { - raw: self.end().raw - self.start().raw, - } + TextSize { raw: self.end().raw - self.start().raw } } /// Check if this range is empty. @@ -418,8 +410,7 @@ impl Add for TextRange { type Output = TextRange; #[inline] fn add(self, offset: TextSize) -> TextRange { - self.checked_add(offset) - .expect("TextRange +offset overflowed") + self.checked_add(offset).expect("TextRange +offset overflowed") } } @@ -427,8 +418,7 @@ impl Sub for TextRange { type Output = TextRange; #[inline] fn sub(self, offset: TextSize) -> TextRange { - self.checked_sub(offset) - .expect("TextRange -offset overflowed") + self.checked_sub(offset).expect("TextRange -offset overflowed") } } diff --git a/lib/text-size/src/serde_impls.rs b/lib/text-size/src/serde_impls.rs index a94bee9567a2..7f3f75751041 100644 --- a/lib/text-size/src/serde_impls.rs +++ b/lib/text-size/src/serde_impls.rs @@ -38,10 +38,7 @@ impl<'de> Deserialize<'de> for TextRange { { let (start, end) = Deserialize::deserialize(deserializer)?; if !(start <= end) { - return Err(de::Error::custom(format!( - "invalid range: {:?}..{:?}", - start, end - ))); + return Err(de::Error::custom(format!("invalid range: {:?}..{:?}", start, end))); } Ok(TextRange::new(start, end)) } diff --git a/lib/text-size/src/size.rs b/lib/text-size/src/size.rs index c950d2edd041..05328b45ea56 100644 --- a/lib/text-size/src/size.rs +++ b/lib/text-size/src/size.rs @@ -5,7 +5,6 @@ use { fmt, iter, num::TryFromIntError, ops::{Add, AddAssign, Sub, SubAssign}, - u32, }, }; diff --git a/lib/text-size/tests/serde.rs b/lib/text-size/tests/serde.rs index 874258a35f7e..cc4d538958de 100644 --- a/lib/text-size/tests/serde.rs +++ b/lib/text-size/tests/serde.rs @@ -20,39 +20,19 @@ fn size_serialization() { fn range_serialization() { assert_tokens( &range(00..10), - &[ - Token::Tuple { len: 2 }, - Token::U32(00), - Token::U32(10), - Token::TupleEnd, - ], + &[Token::Tuple { len: 2 }, Token::U32(00), Token::U32(10), Token::TupleEnd], ); assert_tokens( &range(10..20), - &[ - Token::Tuple { len: 2 }, - Token::U32(10), - Token::U32(20), - Token::TupleEnd, - ], + &[Token::Tuple { len: 2 }, Token::U32(10), Token::U32(20), Token::TupleEnd], ); assert_tokens( &range(20..30), - &[ - Token::Tuple { len: 2 }, - Token::U32(20), - Token::U32(30), - Token::TupleEnd, - ], + &[Token::Tuple { len: 2 }, Token::U32(20), Token::U32(30), Token::TupleEnd], ); assert_tokens( &range(30..40), - &[ - Token::Tuple { len: 2 }, - Token::U32(30), - Token::U32(40), - Token::TupleEnd, - ], + &[Token::Tuple { len: 2 }, Token::U32(30), Token::U32(40), Token::TupleEnd], ); } @@ -60,20 +40,10 @@ fn range_serialization() { fn invalid_range_deserialization() { assert_tokens::( &range(62..92), - &[ - Token::Tuple { len: 2 }, - Token::U32(62), - Token::U32(92), - Token::TupleEnd, - ], + &[Token::Tuple { len: 2 }, Token::U32(62), Token::U32(92), Token::TupleEnd], ); assert_de_tokens_error::( - &[ - Token::Tuple { len: 2 }, - Token::U32(92), - Token::U32(62), - Token::TupleEnd, - ], + &[Token::Tuple { len: 2 }, Token::U32(92), Token::U32(62), Token::TupleEnd], "invalid range: 92..62", ); } From df009eef0b9a582d5062e6b37b98463f9846c257 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 7 Nov 2025 20:15:29 +0100 Subject: [PATCH 309/322] Reformat --- Cargo.lock | 14 ++++++++++- Cargo.toml | 4 +-- lib/ungrammar/Cargo.toml | 2 -- lib/ungrammar/src/error.rs | 5 +--- lib/ungrammar/src/parser.rs | 32 ++++++------------------ lib/ungrammar/ungrammar2json/src/main.rs | 1 + 6 files changed, 24 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12b5f8a9a64e..829746b660f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2787,12 +2787,24 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" +[[package]] +name = "ungrammar" +version = "1.16.1" + [[package]] name = "ungrammar" version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e5df347f0bf3ec1d670aad6ca5c6a1859cd9ea61d2113125794654ccced68f" +[[package]] +name = "ungrammar2json" +version = "1.0.0" +dependencies = [ + "ungrammar 1.16.1", + "write-json", +] + [[package]] name = "unicase" version = "2.8.1" @@ -3223,7 +3235,7 @@ dependencies = [ "quote", "stdx", "time", - "ungrammar", + "ungrammar 1.16.1 (registry+https://github.com/rust-lang/crates.io-index)", "write-json", "xflags", "xshell", diff --git a/Cargo.toml b/Cargo.toml index ecb2686a2277..6f5ea44401fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["xtask/", "lib/*", "crates/*"] +members = ["xtask/", "lib/*", "lib/ungrammar/ungrammar2json", "crates/*"] exclude = ["crates/proc-macro-srv/proc-macro-test/imp"] resolver = "2" @@ -42,7 +42,7 @@ debug = 2 # lsp-server = { path = "lib/lsp-server" } -# ungrammar = { path = "../ungrammar" } +# ungrammar = { path = "lin/ungrammar" } # salsa = { path = "../salsa" } # salsa-macros = { path = "../salsa/components/salsa-macros" } diff --git a/lib/ungrammar/Cargo.toml b/lib/ungrammar/Cargo.toml index 920d9ef49d0e..6e9dec7d6b27 100644 --- a/lib/ungrammar/Cargo.toml +++ b/lib/ungrammar/Cargo.toml @@ -8,8 +8,6 @@ edition = "2018" exclude = ["/bors.toml", "/.github"] -[workspace] -members = ["ungrammar2json"] [dependencies] # nope diff --git a/lib/ungrammar/src/error.rs b/lib/ungrammar/src/error.rs index 355e0b7ebc25..144f9fc0e981 100644 --- a/lib/ungrammar/src/error.rs +++ b/lib/ungrammar/src/error.rs @@ -27,10 +27,7 @@ impl std::error::Error for Error {} impl Error { pub(crate) fn with_location(self, location: Location) -> Error { - Error { - location: Some(location), - ..self - } + Error { location: Some(location), ..self } } } diff --git a/lib/ungrammar/src/parser.rs b/lib/ungrammar/src/parser.rs index a4ce9c120298..70fbe1ac0b66 100644 --- a/lib/ungrammar/src/parser.rs +++ b/lib/ungrammar/src/parser.rs @@ -1,4 +1,5 @@ //! Simple hand-written ungrammar parser. +#![allow(clippy::disallowed_types)] use std::collections::HashMap; use crate::{ @@ -36,10 +37,7 @@ const DUMMY_RULE: Rule = Rule::Node(Node(!0)); impl Parser { fn new(mut tokens: Vec) -> Parser { tokens.reverse(); - Parser { - tokens, - ..Parser::default() - } + Parser { tokens, ..Parser::default() } } fn peek(&self) -> Option<&lexer::Token> { @@ -49,9 +47,7 @@ impl Parser { self.tokens.iter().nth_back(n) } fn bump(&mut self) -> Result { - self.tokens - .pop() - .ok_or_else(|| format_err!("unexpected EOF")) + self.tokens.pop().ok_or_else(|| format_err!("unexpected EOF")) } fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> { let token = self.bump()?; @@ -75,10 +71,7 @@ impl Parser { let len = self.node_table.len(); let grammar = &mut self.grammar; *self.node_table.entry(name.clone()).or_insert_with(|| { - grammar.nodes.push(NodeData { - name, - rule: DUMMY_RULE, - }); + grammar.nodes.push(NodeData { name, rule: DUMMY_RULE }); Node(len) }) } @@ -127,11 +120,7 @@ fn rule(p: &mut Parser) -> Result { let rule = seq_rule(p)?; alt.push(rule) } - let res = if alt.len() == 1 { - alt.pop().unwrap() - } else { - Rule::Alt(alt) - }; + let res = if alt.len() == 1 { alt.pop().unwrap() } else { Rule::Alt(alt) }; Ok(res) } @@ -142,11 +131,7 @@ fn seq_rule(p: &mut Parser) -> Result { while let Some(rule) = opt_atom_rule(p)? { seq.push(rule) } - let res = if seq.len() == 1 { - seq.pop().unwrap() - } else { - Rule::Seq(seq) - }; + let res = if seq.len() == 1 { seq.pop().unwrap() } else { Rule::Seq(seq) }; Ok(res) } @@ -175,10 +160,7 @@ fn opt_atom_rule(p: &mut Parser) -> Result> { p.bump()?; p.bump()?; let rule = atom_rule(p)?; - let res = Rule::Labeled { - label, - rule: Box::new(rule), - }; + let res = Rule::Labeled { label, rule: Box::new(rule) }; return Ok(Some(res)); } _ => (), diff --git a/lib/ungrammar/ungrammar2json/src/main.rs b/lib/ungrammar/ungrammar2json/src/main.rs index f588ed5eb624..3e884580697f 100644 --- a/lib/ungrammar/ungrammar2json/src/main.rs +++ b/lib/ungrammar/ungrammar2json/src/main.rs @@ -1,3 +1,4 @@ +#![allow(clippy::print_stderr, clippy::print_stdout)] use std::{ env, io::{self, Read}, From 64b00e77a2282252ae24fad20d21113a1fc42916 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 7 Nov 2025 20:08:21 +0100 Subject: [PATCH 310/322] Reformat --- lib/smol_str/Cargo.toml | 3 -- lib/smol_str/src/lib.rs | 62 +++++++++----------------------------- lib/smol_str/src/serde.rs | 5 +-- lib/smol_str/tests/test.rs | 11 ++----- lib/smol_str/tests/tidy.rs | 1 + xtask/src/tidy.rs | 21 ++++++++----- 6 files changed, 33 insertions(+), 70 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 814781f05a5d..ee3263594a3b 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -33,8 +33,5 @@ serde = ["dep:serde_core"] name = "bench" harness = false -[profile.bench] -lto = "fat" - [lints] workspace = true diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index ded07c61c6df..effaba211df9 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -115,10 +115,7 @@ impl Clone for SmolStr { impl Default for SmolStr { #[inline(always)] fn default() -> SmolStr { - SmolStr(Repr::Inline { - len: InlineSize::_V0, - buf: [0; INLINE_CAP], - }) + SmolStr(Repr::Inline { len: InlineSize::_V0, buf: [0; INLINE_CAP] }) } } @@ -216,13 +213,13 @@ impl hash::Hash for SmolStr { } impl fmt::Debug for SmolStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for SmolStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } @@ -245,11 +242,8 @@ fn from_buf_and_chars( ) -> SmolStr { let min_size = iter.size_hint().0 + buf_len; if min_size > INLINE_CAP { - let heap: String = core::str::from_utf8(&buf[..buf_len]) - .unwrap() - .chars() - .chain(iter) - .collect(); + let heap: String = + core::str::from_utf8(&buf[..buf_len]).unwrap().chars().chain(iter).collect(); if heap.len() <= INLINE_CAP { // size hint lied return SmolStr::new_inline(&heap); @@ -490,10 +484,7 @@ impl InlineSize { #[derive(Clone, Debug)] enum Repr { - Inline { - len: InlineSize, - buf: [u8; INLINE_CAP], - }, + Inline { len: InlineSize, buf: [u8; INLINE_CAP] }, Static(&'static str), Heap(Arc), } @@ -521,10 +512,8 @@ impl Repr { if len <= N_NEWLINES + N_SPACES { let bytes = text.as_bytes(); let possible_newline_count = cmp::min(len, N_NEWLINES); - let newlines = bytes[..possible_newline_count] - .iter() - .take_while(|&&b| b == b'\n') - .count(); + let newlines = + bytes[..possible_newline_count].iter().take_while(|&&b| b == b'\n').count(); let possible_space_count = len - newlines; if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { let spaces = possible_space_count; @@ -576,16 +565,9 @@ impl Repr { match (self, other) { (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0), (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0), - ( - Self::Inline { - len: l_len, - buf: l_buf, - }, - Self::Inline { - len: r_len, - buf: r_buf, - }, - ) => l_len == r_len && l_buf == r_buf, + (Self::Inline { len: l_len, buf: l_buf }, Self::Inline { len: r_len, buf: r_buf }) => { + l_len == r_len && l_buf == r_buf + } _ => false, } } @@ -649,11 +631,7 @@ impl StrExt for str { let len = self.len(); if len <= INLINE_CAP { let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase); - from_buf_and_chars( - buf, - len - rest.len(), - rest.chars().flat_map(|c| c.to_lowercase()), - ) + from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_lowercase())) } else { self.to_lowercase().into() } @@ -664,11 +642,7 @@ impl StrExt for str { let len = self.len(); if len <= INLINE_CAP { let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase); - from_buf_and_chars( - buf, - len - rest.len(), - rest.chars().flat_map(|c| c.to_uppercase()), - ) + from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_uppercase())) } else { self.to_uppercase().into() } @@ -878,10 +852,7 @@ enum SmolStrBuilderRepr { impl Default for SmolStrBuilderRepr { #[inline] fn default() -> Self { - SmolStrBuilderRepr::Inline { - buf: [0; INLINE_CAP], - len: 0, - } + SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 } } } @@ -889,10 +860,7 @@ impl SmolStrBuilder { /// Creates a new empty [`SmolStrBuilder`]. #[must_use] pub const fn new() -> Self { - Self(SmolStrBuilderRepr::Inline { - buf: [0; INLINE_CAP], - len: 0, - }) + Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 }) } /// Builds a [`SmolStr`] from `self`. diff --git a/lib/smol_str/src/serde.rs b/lib/smol_str/src/serde.rs index 4f08b444c58e..66cbcd3badc1 100644 --- a/lib/smol_str/src/serde.rs +++ b/lib/smol_str/src/serde.rs @@ -67,10 +67,7 @@ where { match String::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), - Err(e) => Err(Error::invalid_value( - Unexpected::Bytes(&e.into_bytes()), - &self, - )), + Err(e) => Err(Error::invalid_value(Unexpected::Bytes(&e.into_bytes()), &self)), } } } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 8f7d9ec39ac7..640e7df681c9 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,3 +1,4 @@ +#![allow(clippy::disallowed_types)] use std::sync::Arc; #[cfg(not(miri))] @@ -8,10 +9,7 @@ use smol_str::{SmolStr, SmolStrBuilder}; #[test] #[cfg(target_pointer_width = "64")] fn smol_str_is_smol() { - assert_eq!( - ::std::mem::size_of::(), - ::std::mem::size_of::(), - ); + assert_eq!(::std::mem::size_of::(), ::std::mem::size_of::(),); } #[test] @@ -341,10 +339,7 @@ mod test_str_ext { #[test] fn large() { let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr(); - assert_eq!( - lowercase, - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - ); + assert_eq!(lowercase, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); assert!(lowercase.is_heap_allocated()); } diff --git a/lib/smol_str/tests/tidy.rs b/lib/smol_str/tests/tidy.rs index e2d809e40fee..743fa5add900 100644 --- a/lib/smol_str/tests/tidy.rs +++ b/lib/smol_str/tests/tidy.rs @@ -1,3 +1,4 @@ +#![allow(clippy::disallowed_methods, clippy::print_stdout)] #![cfg(not(miri))] use std::{ env, diff --git a/xtask/src/tidy.rs b/xtask/src/tidy.rs index 40997eb93d35..ebfc7d0a9430 100644 --- a/xtask/src/tidy.rs +++ b/xtask/src/tidy.rs @@ -127,21 +127,24 @@ fn check_cargo_toml(path: &Path, text: String) { } fn check_licenses(sh: &Shell) { - const EXPECTED: [&str; 20] = [ + const EXPECTED: &[&str] = &[ "(MIT OR Apache-2.0) AND Unicode-3.0", "0BSD OR MIT OR Apache-2.0", - "Apache-2.0", + "Apache-2.0 / MIT", "Apache-2.0 OR BSL-1.0", "Apache-2.0 OR MIT", - "Apache-2.0 WITH LLVM-exception", "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT", + "Apache-2.0 WITH LLVM-exception", + "Apache-2.0", "Apache-2.0/MIT", + "BSD-2-Clause OR Apache-2.0 OR MIT", "CC0-1.0", "ISC", - "MIT", "MIT / Apache-2.0", + "MIT OR Apache-2.0 OR LGPL-2.1-or-later", "MIT OR Apache-2.0", "MIT OR Zlib OR Apache-2.0", + "MIT", "MIT/Apache-2.0", "MPL-2.0", "Unicode-3.0", @@ -159,18 +162,20 @@ fn check_licenses(sh: &Shell) { .collect::>(); licenses.sort_unstable(); licenses.dedup(); - if licenses != EXPECTED { + let mut expected = EXPECTED.to_vec(); + expected.sort_unstable(); + if licenses != expected { let mut diff = String::new(); diff.push_str("New Licenses:\n"); for &l in licenses.iter() { - if !EXPECTED.contains(&l) { + if !expected.contains(&l) { diff += &format!(" {l}\n") } } diff.push_str("\nMissing Licenses:\n"); - for l in EXPECTED { + for l in expected { if !licenses.contains(&l) { diff += &format!(" {l}\n") } @@ -178,7 +183,7 @@ fn check_licenses(sh: &Shell) { panic!("different set of licenses!\n{diff}"); } - assert_eq!(licenses, EXPECTED); + assert_eq!(licenses, expected); } fn check_test_attrs(path: &Path, text: &str) { From 6ba7d245d4c17f30b5254208d0d7c582ba675ce1 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 8 Nov 2025 13:13:07 +0100 Subject: [PATCH 311/322] Replace `SmolStr` usage with `Symbol` in `FileSymbol` --- crates/hir/src/symbols.rs | 16 ++++++++-------- crates/ide-db/src/ra_fixture.rs | 3 ++- crates/ide/src/navigation_target.rs | 27 ++++++++++++++------------- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/crates/hir/src/symbols.rs b/crates/hir/src/symbols.rs index d8c624e5c689..e472de98c682 100644 --- a/crates/hir/src/symbols.rs +++ b/crates/hir/src/symbols.rs @@ -18,7 +18,7 @@ use hir_ty::{ }; use intern::Symbol; use rustc_hash::FxHashMap; -use syntax::{AstNode, AstPtr, SmolStr, SyntaxNode, SyntaxNodePtr, ToSmolStr, ast::HasName}; +use syntax::{AstNode, AstPtr, SyntaxNode, SyntaxNodePtr, ToSmolStr, ast::HasName}; use crate::{HasCrate, Module, ModuleDef, Semantics}; @@ -29,7 +29,7 @@ pub struct FileSymbol { pub name: Symbol, pub def: ModuleDef, pub loc: DeclarationLocation, - pub container_name: Option, + pub container_name: Option, /// Whether this symbol is a doc alias for the original symbol. pub is_alias: bool, pub is_assoc: bool, @@ -65,7 +65,7 @@ pub struct SymbolCollector<'a> { db: &'a dyn HirDatabase, symbols: FxIndexSet, work: Vec, - current_container_name: Option, + current_container_name: Option, } /// Given a [`ModuleId`] and a [`HirDatabase`], use the DefMap for the module's crate to collect @@ -108,7 +108,7 @@ impl<'a> SymbolCollector<'a> { tracing::info!(?work, "SymbolCollector::do_work"); self.db.unwind_if_revision_cancelled(); - let parent_name = work.parent.map(|name| name.as_str().to_smolstr()); + let parent_name = work.parent.map(|name| Symbol::intern(name.as_str())); self.with_container_name(parent_name, |s| s.collect_from_module(work.module_id)); } @@ -125,7 +125,7 @@ impl<'a> SymbolCollector<'a> { } ModuleDefId::AdtId(AdtId::EnumId(id)) => { this.push_decl(id, name, false, None); - let enum_name = this.db.enum_signature(id).name.as_str().to_smolstr(); + let enum_name = Symbol::intern(this.db.enum_signature(id).name.as_str()); this.with_container_name(Some(enum_name), |this| { let variants = id.enum_variants(this.db); for (variant_id, variant_name, _) in &variants.variants { @@ -328,7 +328,7 @@ impl<'a> SymbolCollector<'a> { ) .to_smolstr(), ); - self.with_container_name(impl_name, |s| { + self.with_container_name(impl_name.as_deref().map(Symbol::intern), |s| { for &(ref name, assoc_item_id) in &impl_id.impl_items(self.db).items { s.push_assoc_item(assoc_item_id, name, None) } @@ -337,14 +337,14 @@ impl<'a> SymbolCollector<'a> { fn collect_from_trait(&mut self, trait_id: TraitId, trait_do_not_complete: Complete) { let trait_data = self.db.trait_signature(trait_id); - self.with_container_name(Some(trait_data.name.as_str().into()), |s| { + self.with_container_name(Some(Symbol::intern(trait_data.name.as_str())), |s| { for &(ref name, assoc_item_id) in &trait_id.trait_items(self.db).items { s.push_assoc_item(assoc_item_id, name, Some(trait_do_not_complete)); } }); } - fn with_container_name(&mut self, container_name: Option, f: impl FnOnce(&mut Self)) { + fn with_container_name(&mut self, container_name: Option, f: impl FnOnce(&mut Self)) { if let Some(container_name) = container_name { let prev = self.current_container_name.replace(container_name); f(self); diff --git a/crates/ide-db/src/ra_fixture.rs b/crates/ide-db/src/ra_fixture.rs index 1f056a835bc6..a9d596d8f5f4 100644 --- a/crates/ide-db/src/ra_fixture.rs +++ b/crates/ide-db/src/ra_fixture.rs @@ -2,7 +2,7 @@ use std::hash::{BuildHasher, Hash}; -use hir::{CfgExpr, FilePositionWrapper, FileRangeWrapper, Semantics}; +use hir::{CfgExpr, FilePositionWrapper, FileRangeWrapper, Semantics, Symbol}; use smallvec::SmallVec; use span::{TextRange, TextSize}; use syntax::{ @@ -524,6 +524,7 @@ impl_empty_upmap_from_ra_fixture!( f64, &str, String, + Symbol, SmolStr, Documentation, SymbolKind, diff --git a/crates/ide/src/navigation_target.rs b/crates/ide/src/navigation_target.rs index b222ff3eec0b..be0f2af15042 100644 --- a/crates/ide/src/navigation_target.rs +++ b/crates/ide/src/navigation_target.rs @@ -6,7 +6,7 @@ use arrayvec::ArrayVec; use either::Either; use hir::{ AssocItem, Crate, FieldSource, HasContainer, HasCrate, HasSource, HirDisplay, HirFileId, - InFile, LocalSource, ModuleSource, Semantics, db::ExpandDatabase, symbols::FileSymbol, + InFile, LocalSource, ModuleSource, Semantics, Symbol, db::ExpandDatabase, symbols::FileSymbol, }; use ide_db::{ FileId, FileRange, RootDatabase, SymbolKind, @@ -51,8 +51,7 @@ pub struct NavigationTarget { // FIXME: Symbol pub name: SmolStr, pub kind: Option, - // FIXME: Symbol - pub container_name: Option, + pub container_name: Option, pub description: Option, pub docs: Option, /// In addition to a `name` field, a `NavigationTarget` may also be aliased @@ -349,13 +348,13 @@ impl TryToNav for hir::ModuleDef { pub(crate) trait ToNavFromAst: Sized { const KIND: SymbolKind; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { _ = db; None } } -fn container_name(db: &RootDatabase, t: impl HasContainer, edition: Edition) -> Option { +fn container_name(db: &RootDatabase, t: impl HasContainer, edition: Edition) -> Option { match t.container(db) { hir::ItemContainer::Trait(it) => Some(it.name(db).display_no_db(edition).to_smolstr()), // FIXME: Handle owners of blocks correctly here @@ -364,36 +363,38 @@ fn container_name(db: &RootDatabase, t: impl HasContainer, edition: Edition) -> } _ => None, } + .as_deref() + .map(Symbol::intern) } impl ToNavFromAst for hir::Function { const KIND: SymbolKind = SymbolKind::Function; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } impl ToNavFromAst for hir::Const { const KIND: SymbolKind = SymbolKind::Const; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } impl ToNavFromAst for hir::Static { const KIND: SymbolKind = SymbolKind::Static; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } impl ToNavFromAst for hir::Struct { const KIND: SymbolKind = SymbolKind::Struct; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } impl ToNavFromAst for hir::Enum { const KIND: SymbolKind = SymbolKind::Enum; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } @@ -402,19 +403,19 @@ impl ToNavFromAst for hir::Variant { } impl ToNavFromAst for hir::Union { const KIND: SymbolKind = SymbolKind::Union; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } impl ToNavFromAst for hir::TypeAlias { const KIND: SymbolKind = SymbolKind::TypeAlias; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } impl ToNavFromAst for hir::Trait { const KIND: SymbolKind = SymbolKind::Trait; - fn container_name(self, db: &RootDatabase) -> Option { + fn container_name(self, db: &RootDatabase) -> Option { container_name(db, self, self.krate(db).edition(db)) } } From abf2e3ed15ecb4a59422fd951cbf73965c57efb2 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 8 Nov 2025 13:24:10 +0100 Subject: [PATCH 312/322] Replace more smol strings with symbols --- crates/ide/src/goto_definition.rs | 13 ++- crates/ide/src/navigation_target.rs | 118 +++++++++++----------------- crates/ide/src/references.rs | 6 +- crates/ide/src/runnables.rs | 16 ++-- crates/intern/src/symbol/symbols.rs | 1 + 5 files changed, 68 insertions(+), 86 deletions(-) diff --git a/crates/ide/src/goto_definition.rs b/crates/ide/src/goto_definition.rs index 0ee9795af580..d663b703c3a1 100644 --- a/crates/ide/src/goto_definition.rs +++ b/crates/ide/src/goto_definition.rs @@ -245,7 +245,7 @@ fn try_lookup_include_path( Some(NavigationTarget { file_id, full_range: TextRange::new(0.into(), size), - name: path.into(), + name: hir::Symbol::intern(&path), alias: None, focus_range: None, kind: None, @@ -598,7 +598,13 @@ fn expr_to_nav( let value_range = value.syntax().text_range(); let navs = navigation_target::orig_range_with_focus_r(db, file_id, value_range, focus_range); navs.map(|(hir::FileRangeWrapper { file_id, range }, focus_range)| { - NavigationTarget::from_syntax(file_id, "".into(), focus_range, range, kind) + NavigationTarget::from_syntax( + file_id, + hir::Symbol::intern(""), + focus_range, + range, + kind, + ) }) } @@ -607,7 +613,6 @@ mod tests { use crate::{GotoDefinitionConfig, fixture}; use ide_db::{FileRange, MiniCore}; use itertools::Itertools; - use syntax::SmolStr; const TEST_CONFIG: GotoDefinitionConfig<'_> = GotoDefinitionConfig { minicore: MiniCore::default() }; @@ -658,7 +663,7 @@ mod tests { let Some(target) = navs.into_iter().next() else { panic!("expected single navigation target but encountered none"); }; - assert_eq!(target.name, SmolStr::new_inline(expected_name)); + assert_eq!(target.name, hir::Symbol::intern(expected_name)); } #[test] diff --git a/crates/ide/src/navigation_target.rs b/crates/ide/src/navigation_target.rs index be0f2af15042..7d5d9057cba9 100644 --- a/crates/ide/src/navigation_target.rs +++ b/crates/ide/src/navigation_target.rs @@ -6,7 +6,8 @@ use arrayvec::ArrayVec; use either::Either; use hir::{ AssocItem, Crate, FieldSource, HasContainer, HasCrate, HasSource, HirDisplay, HirFileId, - InFile, LocalSource, ModuleSource, Semantics, Symbol, db::ExpandDatabase, symbols::FileSymbol, + InFile, LocalSource, ModuleSource, Semantics, Symbol, db::ExpandDatabase, sym, + symbols::FileSymbol, }; use ide_db::{ FileId, FileRange, RootDatabase, SymbolKind, @@ -16,12 +17,10 @@ use ide_db::{ famous_defs::FamousDefs, ra_fixture::UpmapFromRaFixture, }; -use span::Edition; use stdx::never; use syntax::{ - AstNode, SmolStr, SyntaxNode, TextRange, ToSmolStr, + AstNode, SyntaxNode, TextRange, ast::{self, HasName}, - format_smolstr, }; /// `NavigationTarget` represents an element in the editor's UI which you can @@ -48,16 +47,14 @@ pub struct NavigationTarget { /// /// This range must be contained within [`Self::full_range`]. pub focus_range: Option, - // FIXME: Symbol - pub name: SmolStr, + pub name: Symbol, pub kind: Option, pub container_name: Option, pub description: Option, pub docs: Option, /// In addition to a `name` field, a `NavigationTarget` may also be aliased /// In such cases we want a `NavigationTarget` to be accessible by its alias - // FIXME: Symbol - pub alias: Option, + pub alias: Option, } impl fmt::Debug for NavigationTarget { @@ -148,9 +145,7 @@ impl NavigationTarget { db: &RootDatabase, module: hir::Module, ) -> UpmappingResult { - let edition = module.krate().edition(db); - let name = - module.name(db).map(|it| it.display_no_db(edition).to_smolstr()).unwrap_or_default(); + let name = module.name(db).map(|it| it.symbol().clone()).unwrap_or_else(|| sym::underscore); match module.declaration_source(db) { Some(InFile { value, file_id }) => { orig_range_with_focus(db, file_id, value.syntax(), value.name()).map( @@ -198,7 +193,8 @@ impl NavigationTarget { InFile { file_id, value }: InFile<&dyn ast::HasName>, kind: SymbolKind, ) -> UpmappingResult { - let name: SmolStr = value.name().map(|it| it.text().into()).unwrap_or_else(|| "_".into()); + let name = + value.name().map(|it| Symbol::intern(&it.text())).unwrap_or_else(|| sym::underscore); orig_range_with_focus(db, file_id, value.syntax(), value.name()).map( |(FileRange { file_id, range: full_range }, focus_range)| { @@ -209,7 +205,7 @@ impl NavigationTarget { pub(crate) fn from_syntax( file_id: FileId, - name: SmolStr, + name: Symbol, focus_range: Option, full_range: TextRange, kind: SymbolKind, @@ -234,8 +230,6 @@ impl TryToNav for FileSymbol { sema: &Semantics<'_, RootDatabase>, ) -> Option> { let db = sema.db; - let edition = - self.def.module(db).map(|it| it.krate().edition(db)).unwrap_or(Edition::CURRENT); let display_target = self.def.krate(db).to_display_target(db); Some( orig_range_with_focus_r( @@ -247,11 +241,12 @@ impl TryToNav for FileSymbol { .map(|(FileRange { file_id, range: full_range }, focus_range)| { NavigationTarget { file_id, - name: self.is_alias.then(|| self.def.name(db)).flatten().map_or_else( - || self.name.as_str().into(), - |it| it.display_no_db(edition).to_smolstr(), - ), - alias: self.is_alias.then(|| self.name.as_str().into()), + name: self + .is_alias + .then(|| self.def.name(db)) + .flatten() + .map_or_else(|| self.name.clone(), |it| it.symbol().clone()), + alias: self.is_alias.then(|| self.name.clone()), kind: Some(self.def.into()), full_range, focus_range, @@ -354,48 +349,44 @@ pub(crate) trait ToNavFromAst: Sized { } } -fn container_name(db: &RootDatabase, t: impl HasContainer, edition: Edition) -> Option { +fn container_name(db: &RootDatabase, t: impl HasContainer) -> Option { match t.container(db) { - hir::ItemContainer::Trait(it) => Some(it.name(db).display_no_db(edition).to_smolstr()), + hir::ItemContainer::Trait(it) => Some(it.name(db).symbol().clone()), // FIXME: Handle owners of blocks correctly here - hir::ItemContainer::Module(it) => { - it.name(db).map(|name| name.display_no_db(edition).to_smolstr()) - } + hir::ItemContainer::Module(it) => it.name(db).map(|name| name.symbol().clone()), _ => None, } - .as_deref() - .map(Symbol::intern) } impl ToNavFromAst for hir::Function { const KIND: SymbolKind = SymbolKind::Function; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::Const { const KIND: SymbolKind = SymbolKind::Const; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::Static { const KIND: SymbolKind = SymbolKind::Static; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::Struct { const KIND: SymbolKind = SymbolKind::Struct; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::Enum { const KIND: SymbolKind = SymbolKind::Enum; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::Variant { @@ -404,19 +395,19 @@ impl ToNavFromAst for hir::Variant { impl ToNavFromAst for hir::Union { const KIND: SymbolKind = SymbolKind::Union; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::TypeAlias { const KIND: SymbolKind = SymbolKind::TypeAlias; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } impl ToNavFromAst for hir::Trait { const KIND: SymbolKind = SymbolKind::Trait; fn container_name(self, db: &RootDatabase) -> Option { - container_name(db, self, self.krate(db).edition(db)) + container_name(db, self) } } @@ -452,10 +443,8 @@ where impl ToNav for hir::Module { fn to_nav(&self, db: &RootDatabase) -> UpmappingResult { let InFile { file_id, value } = self.definition_source(db); - let edition = self.krate(db).edition(db); - let name = - self.name(db).map(|it| it.display_no_db(edition).to_smolstr()).unwrap_or_default(); + let name = self.name(db).map(|it| it.symbol().clone()).unwrap_or_else(|| sym::underscore); let (syntax, focus) = match &value { ModuleSource::SourceFile(node) => (node.syntax(), None), ModuleSource::Module(node) => (node.syntax(), node.name()), @@ -500,7 +489,7 @@ impl TryToNav for hir::Impl { |(FileRange { file_id, range: full_range }, focus_range)| { NavigationTarget::from_syntax( file_id, - "impl".into(), + sym::kw_impl, focus_range, full_range, SymbolKind::Impl, @@ -522,16 +511,12 @@ impl TryToNav for hir::ExternCrateDecl { .rename() .map_or_else(|| value.name_ref().map(Either::Left), |it| it.name().map(Either::Right)); let krate = self.module(db).krate(); - let edition = krate.edition(db); Some(orig_range_with_focus(db, file_id, value.syntax(), focus).map( |(FileRange { file_id, range: full_range }, focus_range)| { let mut res = NavigationTarget::from_syntax( file_id, - self.alias_or_name(db) - .unwrap_or_else(|| self.name(db)) - .display_no_db(edition) - .to_smolstr(), + self.alias_or_name(db).unwrap_or_else(|| self.name(db)).symbol().clone(), focus_range, full_range, SymbolKind::Module, @@ -539,7 +524,7 @@ impl TryToNav for hir::ExternCrateDecl { res.docs = self.docs(db); res.description = Some(self.display(db, krate.to_display_target(db)).to_string()); - res.container_name = container_name(db, *self, edition); + res.container_name = container_name(db, *self); res }, )) @@ -571,7 +556,7 @@ impl TryToNav for hir::Field { |(FileRange { file_id, range: full_range }, focus_range)| { NavigationTarget::from_syntax( file_id, - format_smolstr!("{}", self.index()), + Symbol::integer(self.index()), focus_range, full_range, SymbolKind::Field, @@ -656,11 +641,10 @@ impl ToNav for LocalSource { Either::Left(bind_pat) => (bind_pat.syntax(), bind_pat.name()), Either::Right(it) => (it.syntax(), it.name()), }; - let edition = self.local.parent(db).module(db).krate().edition(db); orig_range_with_focus(db, file_id, node, name).map( |(FileRange { file_id, range: full_range }, focus_range)| { - let name = local.name(db).display_no_db(edition).to_smolstr(); + let name = local.name(db).symbol().clone(); let kind = if local.is_self(db) { SymbolKind::SelfParam } else if local.is_param(db) { @@ -697,8 +681,7 @@ impl TryToNav for hir::Label { ) -> Option> { let db = sema.db; let InFile { file_id, value } = self.source(db)?; - // Labels can't be keywords, so no escaping needed. - let name = self.name(db).display_no_db(Edition::Edition2015).to_smolstr(); + let name = self.name(db).symbol().clone(); Some(orig_range_with_focus(db, file_id, value.syntax(), value.lifetime()).map( |(FileRange { file_id, range: full_range }, focus_range)| NavigationTarget { @@ -723,8 +706,7 @@ impl TryToNav for hir::TypeParam { ) -> Option> { let db = sema.db; let InFile { file_id, value } = self.merge().source(db)?; - let edition = self.module(db).krate().edition(db); - let name = self.name(db).display_no_db(edition).to_smolstr(); + let name = self.name(db).symbol().clone(); let value = match value { Either::Left(ast::TypeOrConstParam::Type(x)) => Either::Left(x), @@ -773,8 +755,7 @@ impl TryToNav for hir::LifetimeParam { ) -> Option> { let db = sema.db; let InFile { file_id, value } = self.source(db)?; - // Lifetimes cannot be keywords, so not escaping needed. - let name = self.name(db).display_no_db(Edition::Edition2015).to_smolstr(); + let name = self.name(db).symbol().clone(); Some(orig_range(db, file_id, value.syntax()).map( |(FileRange { file_id, range: full_range }, focus_range)| NavigationTarget { @@ -799,8 +780,7 @@ impl TryToNav for hir::ConstParam { ) -> Option> { let db = sema.db; let InFile { file_id, value } = self.merge().source(db)?; - let edition = self.module(db).krate().edition(db); - let name = self.name(db).display_no_db(edition).to_smolstr(); + let name = self.name(db).symbol().clone(); let value = match value { Either::Left(ast::TypeOrConstParam::Const(x)) => x, @@ -835,21 +815,17 @@ impl TryToNav for hir::InlineAsmOperand { let InFile { file_id, value } = &self.source(db)?; let file_id = *file_id; Some(orig_range_with_focus(db, file_id, value.syntax(), value.name()).map( - |(FileRange { file_id, range: full_range }, focus_range)| { - let edition = self.parent(db).module(db).krate().edition(db); - NavigationTarget { - file_id, - name: self - .name(db) - .map_or_else(|| "_".into(), |it| it.display(db, edition).to_smolstr()), - alias: None, - kind: Some(SymbolKind::Local), - full_range, - focus_range, - container_name: None, - description: None, - docs: None, - } + |(FileRange { file_id, range: full_range }, focus_range)| NavigationTarget { + file_id, + name: + self.name(db).map_or_else(|| sym::underscore.clone(), |it| it.symbol().clone()), + alias: None, + kind: Some(SymbolKind::Local), + full_range, + focus_range, + container_name: None, + description: None, + docs: None, }, )) } diff --git a/crates/ide/src/references.rs b/crates/ide/src/references.rs index a53a19299727..516cc7fe6032 100644 --- a/crates/ide/src/references.rs +++ b/crates/ide/src/references.rs @@ -1058,7 +1058,7 @@ use self$0; use self$0; "#, expect![[r#" - Module FileId(0) 0..10 + _ Module FileId(0) 0..10 FileId(0) 4..8 import "#]], @@ -3130,7 +3130,7 @@ fn foo<'r#fn>(s: &'r#fn str) { } "#, expect![[r#" - 'r#break Label FileId(0) 87..96 87..95 + 'break Label FileId(0) 87..96 87..95 FileId(0) 113..121 "#]], @@ -3146,7 +3146,7 @@ fn foo<'r#fn$0>(s: &'r#fn str) { } "#, expect![[r#" - 'r#fn LifetimeParam FileId(0) 7..12 + 'fn LifetimeParam FileId(0) 7..12 FileId(0) 18..23 FileId(0) 44..49 diff --git a/crates/ide/src/runnables.rs b/crates/ide/src/runnables.rs index 494701d97def..2086a199b862 100644 --- a/crates/ide/src/runnables.rs +++ b/crates/ide/src/runnables.rs @@ -231,7 +231,7 @@ fn cmp_runnables( .cmp(&nav_b.focus_range.map_or_else(t_0, |it| it.start())) }) .then_with(|| kind.disc().cmp(&kind_b.disc())) - .then_with(|| nav.name.cmp(&nav_b.name)) + .then_with(|| nav.name.as_str().cmp(nav_b.name.as_str())) } fn find_related_tests( @@ -817,7 +817,7 @@ mod not_a_root { "#, expect![[r#" [ - "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..331, name: \"\", kind: Module })", + "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..331, name: \"_\", kind: Module })", "(Bin, NavigationTarget { file_id: FileId(0), full_range: 1..13, focus_range: 4..8, name: \"main\", kind: Function })", "(Bin, NavigationTarget { file_id: FileId(0), full_range: 15..76, focus_range: 42..71, name: \"__cortex_m_rt_main_trampoline\", kind: Function })", "(Bin, NavigationTarget { file_id: FileId(0), full_range: 78..154, focus_range: 113..149, name: \"__cortex_m_rt_main_trampoline_unsafe\", kind: Function })", @@ -1138,7 +1138,7 @@ fn test_foo1() {} "#, expect![[r#" [ - "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..51, name: \"\", kind: Module })", + "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..51, name: \"_\", kind: Module })", "(Test, NavigationTarget { file_id: FileId(0), full_range: 1..50, focus_range: 36..45, name: \"test_foo1\", kind: Function }, Atom(KeyValue { key: \"feature\", value: \"foo\" }))", ] "#]], @@ -1157,7 +1157,7 @@ fn test_foo1() {} "#, expect![[r#" [ - "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..73, name: \"\", kind: Module })", + "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..73, name: \"_\", kind: Module })", "(Test, NavigationTarget { file_id: FileId(0), full_range: 1..72, focus_range: 58..67, name: \"test_foo1\", kind: Function }, All([Atom(KeyValue { key: \"feature\", value: \"foo\" }), Atom(KeyValue { key: \"feature\", value: \"bar\" })]))", ] "#]], @@ -1236,7 +1236,7 @@ generate_main!(); "#, expect![[r#" [ - "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..345, name: \"\", kind: Module })", + "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..345, name: \"_\", kind: Module })", "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 282..312, focus_range: 286..291, name: \"tests\", kind: Module, description: \"mod tests\" })", "(Test, NavigationTarget { file_id: FileId(0), full_range: 298..307, name: \"foo_test\", kind: Function })", "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 313..323, name: \"tests2\", kind: Module, description: \"mod tests2\" }, true)", @@ -1679,10 +1679,10 @@ mod r#mod { "#, expect![[r#" [ - "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 1..461, focus_range: 5..10, name: \"r#mod\", kind: Module, description: \"mod r#mod\" })", + "(TestMod, NavigationTarget { file_id: FileId(0), full_range: 1..461, focus_range: 5..10, name: \"mod\", kind: Module, description: \"mod r#mod\" })", "(Test, NavigationTarget { file_id: FileId(0), full_range: 17..41, focus_range: 32..36, name: \"r#fn\", kind: Function })", - "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 47..84, name: \"r#for\", container_name: \"r#mod\" })", - "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 90..146, name: \"r#struct\", container_name: \"r#mod\" })", + "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 47..84, name: \"r#for\", container_name: \"mod\" })", + "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 90..146, name: \"r#struct\", container_name: \"mod\" })", "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 152..266, focus_range: 189..205, name: \"impl\", kind: Impl })", "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 216..260, name: \"r#fn\" })", "(DocTest, NavigationTarget { file_id: FileId(0), full_range: 323..367, name: \"r#fn\" })", diff --git a/crates/intern/src/symbol/symbols.rs b/crates/intern/src/symbol/symbols.rs index 756377fe56f7..5a9d4510ef4f 100644 --- a/crates/intern/src/symbol/symbols.rs +++ b/crates/intern/src/symbol/symbols.rs @@ -85,6 +85,7 @@ define_symbols! { false_ = "false", let_ = "let", const_ = "const", + kw_impl = "impl", proc_dash_macro = "proc-macro", aapcs_dash_unwind = "aapcs-unwind", avr_dash_interrupt = "avr-interrupt", From 3c19ebf63a1c97641499c7e0d139872aaffd93e2 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 8 Nov 2025 14:11:39 +0100 Subject: [PATCH 313/322] Disassemble `SymbolsDatabase` --- crates/hir/src/lib.rs | 7 +- crates/ide-db/src/apply_change.rs | 12 ++- crates/ide-db/src/items_locator.rs | 4 +- crates/ide-db/src/lib.rs | 10 +- crates/ide-db/src/prime_caches.rs | 7 +- crates/ide-db/src/symbol_index.rs | 154 ++++++++++++++++------------ crates/ide-ssr/src/lib.rs | 6 +- crates/ide-ssr/src/search.rs | 4 +- crates/ide-ssr/src/tests.rs | 7 +- crates/ide/src/ssr.rs | 6 +- crates/rust-analyzer/src/cli/ssr.rs | 3 +- 11 files changed, 125 insertions(+), 95 deletions(-) diff --git a/crates/hir/src/lib.rs b/crates/hir/src/lib.rs index f2faf99fc9e8..bb1741a0833e 100644 --- a/crates/hir/src/lib.rs +++ b/crates/hir/src/lib.rs @@ -35,6 +35,9 @@ pub mod term_search; mod display; +#[doc(hidden)] +pub use hir_def::ModuleId; + use std::{ fmt, mem::discriminant, @@ -48,8 +51,8 @@ use hir_def::{ AdtId, AssocItemId, AssocItemLoc, AttrDefId, CallableDefId, ConstId, ConstParamId, CrateRootModuleId, DefWithBodyId, EnumId, EnumVariantId, ExternBlockId, ExternCrateId, FunctionId, GenericDefId, GenericParamId, HasModule, ImplId, ItemContainerId, LifetimeParamId, - LocalFieldId, Lookup, MacroExpander, MacroId, ModuleId, StaticId, StructId, SyntheticSyntax, - TupleId, TypeAliasId, TypeOrConstParamId, TypeParamId, UnionId, + LocalFieldId, Lookup, MacroExpander, MacroId, StaticId, StructId, SyntheticSyntax, TupleId, + TypeAliasId, TypeOrConstParamId, TypeParamId, UnionId, expr_store::{ExpressionStoreDiagnostics, ExpressionStoreSourceMap}, hir::{ BindingAnnotation, BindingId, Expr, ExprId, ExprOrPatId, LabelId, Pat, diff --git a/crates/ide-db/src/apply_change.rs b/crates/ide-db/src/apply_change.rs index 008b6fdbe2c6..6a85c6e54838 100644 --- a/crates/ide-db/src/apply_change.rs +++ b/crates/ide-db/src/apply_change.rs @@ -3,10 +3,12 @@ use base_db::SourceRootId; use profile::Bytes; use rustc_hash::FxHashSet; -use salsa::{Database as _, Durability}; -use triomphe::Arc; +use salsa::{Database as _, Durability, Setter as _}; -use crate::{ChangeWithProcMacros, RootDatabase, symbol_index::SymbolsDatabase}; +use crate::{ + ChangeWithProcMacros, RootDatabase, + symbol_index::{LibraryRoots, LocalRoots}, +}; impl RootDatabase { pub fn request_cancellation(&mut self) { @@ -29,8 +31,8 @@ impl RootDatabase { local_roots.insert(root_id); } } - self.set_local_roots_with_durability(Arc::new(local_roots), Durability::MEDIUM); - self.set_library_roots_with_durability(Arc::new(library_roots), Durability::MEDIUM); + LocalRoots::get(self).set_roots(self).to(local_roots); + LibraryRoots::get(self).set_roots(self).to(library_roots); } change.apply(self); } diff --git a/crates/ide-db/src/items_locator.rs b/crates/ide-db/src/items_locator.rs index 4b0a84a559e2..0d305530d925 100644 --- a/crates/ide-db/src/items_locator.rs +++ b/crates/ide-db/src/items_locator.rs @@ -10,7 +10,7 @@ use hir::{Complete, Crate, ItemInNs, Module, import_map}; use crate::{ RootDatabase, imports::import_assets::NameToImport, - symbol_index::{self, SymbolsDatabase as _}, + symbol_index::{self, SymbolIndex}, }; /// A value to use, when uncertain which limit to pick. @@ -110,7 +110,7 @@ pub fn items_with_name_in_module( local_query } }; - local_query.search(&[db.module_symbols(module)], |local_candidate| { + local_query.search(&[SymbolIndex::module_symbols(db, module)], |local_candidate| { cb(match local_candidate.def { hir::ModuleDef::Macro(macro_def) => ItemInNs::Macros(macro_def), def => ItemInNs::from(def), diff --git a/crates/ide-db/src/lib.rs b/crates/ide-db/src/lib.rs index 7efa97be5573..0301b5020862 100644 --- a/crates/ide-db/src/lib.rs +++ b/crates/ide-db/src/lib.rs @@ -64,7 +64,7 @@ use hir::{ }; use triomphe::Arc; -use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase}; +use crate::line_index::LineIndex; pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; pub use ::line_index; @@ -195,8 +195,12 @@ impl RootDatabase { db.set_all_crates(Arc::new(Box::new([]))); CrateGraphBuilder::default().set_in_db(&mut db); db.set_proc_macros_with_durability(Default::default(), Durability::MEDIUM); - db.set_local_roots_with_durability(Default::default(), Durability::MEDIUM); - db.set_library_roots_with_durability(Default::default(), Durability::MEDIUM); + _ = crate::symbol_index::LibraryRoots::builder(Default::default()) + .durability(Durability::MEDIUM) + .new(&db); + _ = crate::symbol_index::LocalRoots::builder(Default::default()) + .durability(Durability::MEDIUM) + .new(&db); db.set_expand_proc_attr_macros_with_durability(false, Durability::HIGH); db.update_base_query_lru_capacities(lru_capacity); db diff --git a/crates/ide-db/src/prime_caches.rs b/crates/ide-db/src/prime_caches.rs index e6618573e091..1463fdb19563 100644 --- a/crates/ide-db/src/prime_caches.rs +++ b/crates/ide-db/src/prime_caches.rs @@ -11,7 +11,7 @@ use salsa::{Cancelled, Database}; use crate::{ FxIndexMap, RootDatabase, base_db::{Crate, RootQueryDb}, - symbol_index::SymbolsDatabase, + symbol_index::SymbolIndex, }; /// We're indexing many crates. @@ -107,8 +107,9 @@ pub fn parallel_prime_caches( Ok::<_, crossbeam_channel::SendError<_>>(()) }; let handle_symbols = |module| { - let cancelled = - Cancelled::catch(AssertUnwindSafe(|| _ = db.module_symbols(module))); + let cancelled = Cancelled::catch(AssertUnwindSafe(|| { + _ = SymbolIndex::module_symbols(&db, module) + })); match cancelled { Ok(()) => progress_sender diff --git a/crates/ide-db/src/symbol_index.rs b/crates/ide-db/src/symbol_index.rs index c5ea9bcf5f10..ae9588330254 100644 --- a/crates/ide-db/src/symbol_index.rs +++ b/crates/ide-db/src/symbol_index.rs @@ -27,7 +27,7 @@ use std::{ ops::ControlFlow, }; -use base_db::{RootQueryDb, SourceDatabase, SourceRootId}; +use base_db::{RootQueryDb, SourceRootId}; use fst::{Automaton, Streamer, raw::IndexedValue}; use hir::{ Crate, Module, @@ -37,7 +37,6 @@ use hir::{ }; use rayon::prelude::*; use rustc_hash::FxHashSet; -use triomphe::Arc; use crate::RootDatabase; @@ -102,63 +101,26 @@ impl Query { } } -#[query_group::query_group] -pub trait SymbolsDatabase: HirDatabase + SourceDatabase { - /// The symbol index for a given module. These modules should only be in source roots that - /// are inside local_roots. - // FIXME: Is it worth breaking the encapsulation boundary of `hir`, and make this take a `ModuleId`, - // in order for it to be a non-interned query? - #[salsa::invoke_interned(module_symbols)] - fn module_symbols(&self, module: Module) -> Arc; - - /// The symbol index for a given source root within library_roots. - #[salsa::invoke_interned(library_symbols)] - fn library_symbols(&self, source_root_id: SourceRootId) -> Arc; - - #[salsa::transparent] - /// The symbol indices of modules that make up a given crate. - fn crate_symbols(&self, krate: Crate) -> Box<[Arc]>; - - /// The set of "local" (that is, from the current workspace) roots. - /// Files in local roots are assumed to change frequently. - #[salsa::input] - fn local_roots(&self) -> Arc>; - - /// The set of roots for crates.io libraries. - /// Files in libraries are assumed to never change. - #[salsa::input] - fn library_roots(&self) -> Arc>; -} - -fn library_symbols(db: &dyn SymbolsDatabase, source_root_id: SourceRootId) -> Arc { - let _p = tracing::info_span!("library_symbols").entered(); - - // We call this without attaching because this runs in parallel, so we need to attach here. - hir::attach_db(db, || { - let mut symbol_collector = SymbolCollector::new(db); - - db.source_root_crates(source_root_id) - .iter() - .flat_map(|&krate| Crate::from(krate).modules(db)) - // we specifically avoid calling other SymbolsDatabase queries here, even though they do the same thing, - // as the index for a library is not going to really ever change, and we do not want to store each - // the module or crate indices for those in salsa unless we need to. - .for_each(|module| symbol_collector.collect(module)); - - Arc::new(SymbolIndex::new(symbol_collector.finish())) - }) +/// The set of roots for crates.io libraries. +/// Files in libraries are assumed to never change. +#[salsa::input(singleton, debug)] +pub struct LibraryRoots { + #[returns(ref)] + pub roots: FxHashSet, } -fn module_symbols(db: &dyn SymbolsDatabase, module: Module) -> Arc { - let _p = tracing::info_span!("module_symbols").entered(); - - // We call this without attaching because this runs in parallel, so we need to attach here. - hir::attach_db(db, || Arc::new(SymbolIndex::new(SymbolCollector::new_module(db, module)))) +/// The set of "local" (that is, from the current workspace) roots. +/// Files in local roots are assumed to change frequently. +#[salsa::input(singleton, debug)] +pub struct LocalRoots { + #[returns(ref)] + pub roots: FxHashSet, } -pub fn crate_symbols(db: &dyn SymbolsDatabase, krate: Crate) -> Box<[Arc]> { +/// The symbol indices of modules that make up a given crate. +pub fn crate_symbols(db: &dyn HirDatabase, krate: Crate) -> Box<[&SymbolIndex]> { let _p = tracing::info_span!("crate_symbols").entered(); - krate.modules(db).into_iter().map(|module| db.module_symbols(module)).collect() + krate.modules(db).into_iter().map(|module| SymbolIndex::module_symbols(db, module)).collect() } // Feature: Workspace Symbol @@ -190,20 +152,26 @@ pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec { let _p = tracing::info_span!("world_symbols", query = ?query.query).entered(); let indices: Vec<_> = if query.libs { - db.library_roots() + LibraryRoots::get(db) + .roots(db) .par_iter() - .map_with(db.clone(), |snap, &root| snap.library_symbols(root)) + .for_each_with(db.clone(), |snap, &root| _ = SymbolIndex::library_symbols(snap, root)); + LibraryRoots::get(db) + .roots(db) + .iter() + .map(|&root| SymbolIndex::library_symbols(db, root)) .collect() } else { let mut crates = Vec::new(); - for &root in db.local_roots().iter() { + for &root in LocalRoots::get(db).roots(db).iter() { crates.extend(db.source_root_crates(root).iter().copied()) } - let indices: Vec<_> = crates - .into_par_iter() - .map_with(db.clone(), |snap, krate| snap.crate_symbols(krate.into())) - .collect(); + crates + .par_iter() + .for_each_with(db.clone(), |snap, &krate| _ = crate_symbols(snap, krate.into())); + let indices: Vec<_> = + crates.into_iter().map(|krate| crate_symbols(db, krate.into())).collect(); indices.iter().flat_map(|indices| indices.iter().cloned()).collect() }; @@ -221,6 +189,62 @@ pub struct SymbolIndex { map: fst::Map>, } +impl SymbolIndex { + /// The symbol index for a given source root within library_roots. + pub fn library_symbols(db: &dyn HirDatabase, source_root_id: SourceRootId) -> &SymbolIndex { + // FIXME: + #[salsa::interned] + struct InternedSourceRootId { + id: SourceRootId, + } + #[salsa::tracked(returns(ref))] + fn library_symbols( + db: &dyn HirDatabase, + source_root_id: InternedSourceRootId<'_>, + ) -> SymbolIndex { + let _p = tracing::info_span!("library_symbols").entered(); + + // We call this without attaching because this runs in parallel, so we need to attach here. + hir::attach_db(db, || { + let mut symbol_collector = SymbolCollector::new(db); + + db.source_root_crates(source_root_id.id(db)) + .iter() + .flat_map(|&krate| Crate::from(krate).modules(db)) + // we specifically avoid calling other SymbolsDatabase queries here, even though they do the same thing, + // as the index for a library is not going to really ever change, and we do not want to store each + // the module or crate indices for those in salsa unless we need to. + .for_each(|module| symbol_collector.collect(module)); + + SymbolIndex::new(symbol_collector.finish()) + }) + } + library_symbols(db, InternedSourceRootId::new(db, source_root_id)) + } + + /// The symbol index for a given module. These modules should only be in source roots that + /// are inside local_roots. + pub fn module_symbols(db: &dyn HirDatabase, module: Module) -> &SymbolIndex { + // FIXME: + #[salsa::interned] + struct InternedModuleId { + id: hir::ModuleId, + } + + #[salsa::tracked(returns(ref))] + fn module_symbols(db: &dyn HirDatabase, module: InternedModuleId<'_>) -> SymbolIndex { + let _p = tracing::info_span!("module_symbols").entered(); + + // We call this without attaching because this runs in parallel, so we need to attach here. + hir::attach_db(db, || { + SymbolIndex::new(SymbolCollector::new_module(db, module.id(db).into())) + }) + } + + module_symbols(db, InternedModuleId::new(db, hir::ModuleId::from(module))) + } +} + impl fmt::Debug for SymbolIndex { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() @@ -309,7 +333,7 @@ impl SymbolIndex { impl Query { pub(crate) fn search<'sym, T>( self, - indices: &'sym [Arc], + indices: &'sym [&SymbolIndex], cb: impl FnMut(&'sym FileSymbol) -> ControlFlow, ) -> Option { let _p = tracing::info_span!("symbol_index::Query::search").entered(); @@ -344,7 +368,7 @@ impl Query { fn search_maps<'sym, T>( &self, - indices: &'sym [Arc], + indices: &'sym [&SymbolIndex], mut stream: fst::map::Union<'_>, mut cb: impl FnMut(&'sym FileSymbol) -> ControlFlow, ) -> Option { @@ -397,7 +421,7 @@ impl Query { mod tests { use expect_test::expect_file; - use salsa::Durability; + use salsa::Setter; use test_fixture::{WORKSPACE, WithFixture}; use super::*; @@ -535,7 +559,7 @@ pub struct Foo; let mut local_roots = FxHashSet::default(); local_roots.insert(WORKSPACE); - db.set_local_roots_with_durability(Arc::new(local_roots), Durability::HIGH); + LocalRoots::get(&db).set_roots(&mut db).to(local_roots); let mut query = Query::new("Foo".to_owned()); let mut symbols = world_symbols(&db, query.clone()); diff --git a/crates/ide-ssr/src/lib.rs b/crates/ide-ssr/src/lib.rs index 43ad12c1f699..66ece4e4f04d 100644 --- a/crates/ide-ssr/src/lib.rs +++ b/crates/ide-ssr/src/lib.rs @@ -80,7 +80,7 @@ pub use crate::{errors::SsrError, from_comment::ssr_from_comment, matching::Matc use crate::{errors::bail, matching::MatchFailureReason}; use hir::{FileRange, Semantics}; -use ide_db::symbol_index::SymbolsDatabase; +use ide_db::symbol_index::LocalRoots; use ide_db::text_edit::TextEdit; use ide_db::{EditionedFileId, FileId, FxHashMap, RootDatabase, base_db::SourceDatabase}; use resolving::ResolvedRule; @@ -138,8 +138,8 @@ impl<'db> MatchFinder<'db> { /// Constructs an instance using the start of the first file in `db` as the lookup context. pub fn at_first_file(db: &'db ide_db::RootDatabase) -> Result, SsrError> { - if let Some(first_file_id) = db - .local_roots() + if let Some(first_file_id) = LocalRoots::get(db) + .roots(db) .iter() .next() .and_then(|root| db.source_root(*root).source_root(db).iter().next()) diff --git a/crates/ide-ssr/src/search.rs b/crates/ide-ssr/src/search.rs index 72f857ceda90..56484ae7a690 100644 --- a/crates/ide-ssr/src/search.rs +++ b/crates/ide-ssr/src/search.rs @@ -9,6 +9,7 @@ use ide_db::{ EditionedFileId, FileId, FxHashSet, defs::Definition, search::{SearchScope, UsageSearchResult}, + symbol_index::LocalRoots, }; use syntax::{AstNode, SyntaxKind, SyntaxNode, ast}; @@ -156,8 +157,7 @@ impl<'db> MatchFinder<'db> { if self.restrict_ranges.is_empty() { // Unrestricted search. use ide_db::base_db::SourceDatabase; - use ide_db::symbol_index::SymbolsDatabase; - for &root in self.sema.db.local_roots().iter() { + for &root in LocalRoots::get(self.sema.db).roots(self.sema.db).iter() { let sr = self.sema.db.source_root(root).source_root(self.sema.db); for file_id in sr.iter() { callback(file_id); diff --git a/crates/ide-ssr/src/tests.rs b/crates/ide-ssr/src/tests.rs index 1bb435f31f29..852033599a50 100644 --- a/crates/ide-ssr/src/tests.rs +++ b/crates/ide-ssr/src/tests.rs @@ -2,10 +2,10 @@ use expect_test::{Expect, expect}; use hir::{FilePosition, FileRange}; use ide_db::{ EditionedFileId, FxHashSet, - base_db::{SourceDatabase, salsa::Durability}, + base_db::{SourceDatabase, salsa::Setter}, + symbol_index::LocalRoots, }; use test_utils::RangeOrOffset; -use triomphe::Arc; use crate::{MatchFinder, SsrRule}; @@ -66,7 +66,6 @@ fn parser_undefined_placeholder_in_replacement() { /// `code` may optionally contain a cursor marker `$0`. If it doesn't, then the position will be /// the start of the file. If there's a second cursor marker, then we'll return a single range. pub(crate) fn single_file(code: &str) -> (ide_db::RootDatabase, FilePosition, Vec) { - use ide_db::symbol_index::SymbolsDatabase; use test_fixture::{WORKSPACE, WithFixture}; let (mut db, file_id, range_or_offset) = if code.contains(test_utils::CURSOR_MARKER) { ide_db::RootDatabase::with_range_or_offset(code) @@ -88,7 +87,7 @@ pub(crate) fn single_file(code: &str) -> (ide_db::RootDatabase, FilePosition, Ve } let mut local_roots = FxHashSet::default(); local_roots.insert(WORKSPACE); - db.set_local_roots_with_durability(Arc::new(local_roots), Durability::HIGH); + LocalRoots::get(&db).set_roots(&mut db).to(local_roots); (db, position, selections) } diff --git a/crates/ide/src/ssr.rs b/crates/ide/src/ssr.rs index 7df4499a0c2f..dc8f34320776 100644 --- a/crates/ide/src/ssr.rs +++ b/crates/ide/src/ssr.rs @@ -59,11 +59,9 @@ mod tests { use expect_test::expect; use ide_assists::{Assist, AssistResolveStrategy}; use ide_db::{ - FileRange, FxHashSet, RootDatabase, base_db::salsa::Durability, - symbol_index::SymbolsDatabase, + FileRange, FxHashSet, RootDatabase, base_db::salsa::Setter as _, symbol_index::LocalRoots, }; use test_fixture::WithFixture; - use triomphe::Arc; use super::ssr_assists; @@ -74,7 +72,7 @@ mod tests { let (mut db, file_id, range_or_offset) = RootDatabase::with_range_or_offset(ra_fixture); let mut local_roots = FxHashSet::default(); local_roots.insert(test_fixture::WORKSPACE); - db.set_local_roots_with_durability(Arc::new(local_roots), Durability::HIGH); + LocalRoots::get(&db).set_roots(&mut db).to(local_roots); ssr_assists( &db, &resolve, diff --git a/crates/rust-analyzer/src/cli/ssr.rs b/crates/rust-analyzer/src/cli/ssr.rs index e3e3a143de03..975e81a4af67 100644 --- a/crates/rust-analyzer/src/cli/ssr.rs +++ b/crates/rust-analyzer/src/cli/ssr.rs @@ -50,7 +50,6 @@ impl flags::Search { /// for much else. pub fn run(self) -> anyhow::Result<()> { use ide_db::base_db::SourceDatabase; - use ide_db::symbol_index::SymbolsDatabase; let cargo_config = CargoConfig { all_targets: true, set_test: true, ..CargoConfig::default() }; let load_cargo_config = LoadCargoConfig { @@ -69,7 +68,7 @@ impl flags::Search { match_finder.add_search_pattern(pattern)?; } if let Some(debug_snippet) = &self.debug { - for &root in db.local_roots().iter() { + for &root in ide_db::symbol_index::LocalRoots::get(db).roots(db).iter() { let sr = db.source_root(root).source_root(db); for file_id in sr.iter() { for debug_info in match_finder.debug_where_text_equal( From 7f82691f6e3bf6a829c4e86aadc6db49c5100333 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sat, 8 Nov 2025 19:32:58 +0100 Subject: [PATCH 314/322] perf: Only populate public items in dependency symbol index --- crates/hir/src/symbols.rs | 38 +++++++++++++++++++++++++------ crates/ide-db/src/symbol_index.rs | 15 ++++++++---- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/crates/hir/src/symbols.rs b/crates/hir/src/symbols.rs index e472de98c682..bd4cff50b297 100644 --- a/crates/hir/src/symbols.rs +++ b/crates/hir/src/symbols.rs @@ -66,22 +66,28 @@ pub struct SymbolCollector<'a> { symbols: FxIndexSet, work: Vec, current_container_name: Option, + collect_pub_only: bool, } /// Given a [`ModuleId`] and a [`HirDatabase`], use the DefMap for the module's crate to collect /// all symbols that should be indexed for the given module. impl<'a> SymbolCollector<'a> { - pub fn new(db: &'a dyn HirDatabase) -> Self { + pub fn new(db: &'a dyn HirDatabase, collect_pub_only: bool) -> Self { SymbolCollector { db, symbols: Default::default(), work: Default::default(), current_container_name: None, + collect_pub_only, } } - pub fn new_module(db: &dyn HirDatabase, module: Module) -> Box<[FileSymbol]> { - let mut symbol_collector = SymbolCollector::new(db); + pub fn new_module( + db: &dyn HirDatabase, + module: Module, + collect_pub_only: bool, + ) -> Box<[FileSymbol]> { + let mut symbol_collector = SymbolCollector::new(db, collect_pub_only); symbol_collector.collect(module); symbol_collector.finish() } @@ -113,7 +119,11 @@ impl<'a> SymbolCollector<'a> { } fn collect_from_module(&mut self, module_id: ModuleId) { - let push_decl = |this: &mut Self, def, name| { + let collect_pub_only = self.collect_pub_only; + let push_decl = |this: &mut Self, def: ModuleDefId, name, vis| { + if collect_pub_only && vis != Visibility::Public { + return; + } match def { ModuleDefId::ModuleId(id) => this.push_module(id, name), ModuleDefId::FunctionId(id) => { @@ -175,6 +185,9 @@ impl<'a> SymbolCollector<'a> { }; let mut push_import = |this: &mut Self, i: ImportId, name: &Name, def: ModuleDefId, vis| { + if collect_pub_only && vis != Visibility::Public { + return; + } let source = import_child_source_cache .entry(i.use_) .or_insert_with(|| i.use_.child_source(this.db)); @@ -209,6 +222,9 @@ impl<'a> SymbolCollector<'a> { let push_extern_crate = |this: &mut Self, i: ExternCrateId, name: &Name, def: ModuleDefId, vis| { + if collect_pub_only && vis != Visibility::Public { + return; + } let loc = i.lookup(this.db); let source = loc.source(this.db); let rename = source.value.rename().and_then(|rename| rename.name()); @@ -258,7 +274,7 @@ impl<'a> SymbolCollector<'a> { continue; } // self is a declaration - push_decl(self, def, name) + push_decl(self, def, name, vis) } for (name, Item { def, vis, import }) in scope.macros() { @@ -271,7 +287,7 @@ impl<'a> SymbolCollector<'a> { continue; } // self is a declaration - push_decl(self, def.into(), name) + push_decl(self, ModuleDefId::MacroId(def), name, vis) } for (name, Item { def, vis, import }) in scope.values() { @@ -283,7 +299,7 @@ impl<'a> SymbolCollector<'a> { continue; } // self is a declaration - push_decl(self, def, name) + push_decl(self, def, name, vis) } for const_id in scope.unnamed_consts() { @@ -304,6 +320,9 @@ impl<'a> SymbolCollector<'a> { } fn collect_from_body(&mut self, body_id: impl Into, name: Option) { + if self.collect_pub_only { + return; + } let body_id = body_id.into(); let body = self.db.body(body_id); @@ -330,6 +349,11 @@ impl<'a> SymbolCollector<'a> { ); self.with_container_name(impl_name.as_deref().map(Symbol::intern), |s| { for &(ref name, assoc_item_id) in &impl_id.impl_items(self.db).items { + if s.collect_pub_only && s.db.assoc_visibility(assoc_item_id) != Visibility::Public + { + continue; + } + s.push_assoc_item(assoc_item_id, name, None) } }) diff --git a/crates/ide-db/src/symbol_index.rs b/crates/ide-db/src/symbol_index.rs index ae9588330254..ef67fbf6fe00 100644 --- a/crates/ide-db/src/symbol_index.rs +++ b/crates/ide-db/src/symbol_index.rs @@ -206,13 +206,13 @@ impl SymbolIndex { // We call this without attaching because this runs in parallel, so we need to attach here. hir::attach_db(db, || { - let mut symbol_collector = SymbolCollector::new(db); + let mut symbol_collector = SymbolCollector::new(db, true); db.source_root_crates(source_root_id.id(db)) .iter() .flat_map(|&krate| Crate::from(krate).modules(db)) // we specifically avoid calling other SymbolsDatabase queries here, even though they do the same thing, - // as the index for a library is not going to really ever change, and we do not want to store each + // as the index for a library is not going to really ever change, and we do not want to store // the module or crate indices for those in salsa unless we need to. .for_each(|module| symbol_collector.collect(module)); @@ -237,7 +237,12 @@ impl SymbolIndex { // We call this without attaching because this runs in parallel, so we need to attach here. hir::attach_db(db, || { - SymbolIndex::new(SymbolCollector::new_module(db, module.id(db).into())) + let module: Module = module.id(db).into(); + SymbolIndex::new(SymbolCollector::new_module( + db, + module, + !module.krate().origin(db).is_local(), + )) }) } @@ -508,7 +513,7 @@ pub(self) use crate::Trait as IsThisJustATrait; .modules(&db) .into_iter() .map(|module_id| { - let mut symbols = SymbolCollector::new_module(&db, module_id); + let mut symbols = SymbolCollector::new_module(&db, module_id, false); symbols.sort_by_key(|it| it.name.as_str().to_owned()); (module_id, symbols) }) @@ -535,7 +540,7 @@ struct Duplicate; .modules(&db) .into_iter() .map(|module_id| { - let mut symbols = SymbolCollector::new_module(&db, module_id); + let mut symbols = SymbolCollector::new_module(&db, module_id, false); symbols.sort_by_key(|it| it.name.as_str().to_owned()); (module_id, symbols) }) From c012ee9a481c83e0114ba87547d6dac72461739c Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 9 Nov 2025 13:28:46 +0100 Subject: [PATCH 315/322] lib: Bump editions to 2024 and remove legacy files --- lib/smol_str/.github/ci.rs | 127 ------------------------ lib/smol_str/.github/workflows/ci.yaml | 36 ------- lib/smol_str/.gitignore | 4 - lib/smol_str/Cargo.toml | 2 +- lib/smol_str/benches/bench.rs | 4 +- lib/smol_str/bors.toml | 2 - lib/smol_str/src/borsh.rs | 4 +- lib/smol_str/src/lib.rs | 39 ++++---- lib/text-size/.github/workflows/ci.yaml | 54 ---------- lib/text-size/.gitignore | 3 - lib/text-size/Cargo.toml | 7 +- lib/text-size/bors.toml | 6 -- lib/text-size/src/serde_impls.rs | 2 +- lib/ungrammar/.github/ci.rs | 114 --------------------- lib/ungrammar/.github/workflows/ci.yaml | 36 ------- lib/ungrammar/.gitignore | 3 - lib/ungrammar/Cargo.toml | 8 +- lib/ungrammar/bors.toml | 2 - lib/ungrammar/src/lexer.rs | 2 +- lib/ungrammar/src/parser.rs | 4 +- lib/ungrammar/ungrammar2json/Cargo.toml | 2 +- 21 files changed, 38 insertions(+), 423 deletions(-) delete mode 100644 lib/smol_str/.github/ci.rs delete mode 100644 lib/smol_str/.github/workflows/ci.yaml delete mode 100644 lib/smol_str/.gitignore delete mode 100644 lib/smol_str/bors.toml delete mode 100644 lib/text-size/.github/workflows/ci.yaml delete mode 100644 lib/text-size/.gitignore delete mode 100644 lib/text-size/bors.toml delete mode 100644 lib/ungrammar/.github/ci.rs delete mode 100644 lib/ungrammar/.github/workflows/ci.yaml delete mode 100644 lib/ungrammar/.gitignore delete mode 100644 lib/ungrammar/bors.toml diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs deleted file mode 100644 index c594e8973c83..000000000000 --- a/lib/smol_str/.github/ci.rs +++ /dev/null @@ -1,127 +0,0 @@ -use std::{ - env, fs, - process::{self, Command, ExitStatus, Stdio}, - time::Instant, -}; - -type Error = Box; -type Result = std::result::Result; - -fn main() { - if let Err(err) = try_main() { - eprintln!("{}", err); - process::exit(1); - } -} - -fn try_main() -> Result<()> { - let cwd = env::current_dir()?; - let cargo_toml = cwd.join("Cargo.toml"); - assert!( - cargo_toml.exists(), - "Cargo.toml not found, cwd: {}", - cwd.display() - ); - - { - let _s = Section::new("BUILD_NO_DEFAULT_FEATURES"); - shell("cargo test --all-features --workspace --no-run --no-default-features")?; - } - - { - let _s = Section::new("BUILD"); - shell("cargo test --all-features --workspace --no-run")?; - } - - { - let _s = Section::new("TEST"); - shell("cargo test --all-features --workspace")?; - shell("cargo test --no-default-features --workspace")?; - } - - { - let _s = Section::new("TEST_BENCHES"); - shell("cargo test --benches --all-features")?; - } - - let current_branch = shell_output("git branch --show-current")?; - if ¤t_branch == "master" { - let _s = Section::new("PUBLISH"); - let manifest = fs::read_to_string(&cargo_toml)?; - let version = get_field(&manifest, "version")?; - let tag = format!("v{}", version); - let tags = shell_output("git tag --list")?; - - if !tags.contains(&tag) { - let token = env::var("CRATES_IO_TOKEN").unwrap(); - shell(&format!("git tag v{}", version))?; - shell(&format!("cargo publish --token {}", token))?; - shell("git push --tags")?; - } - } - Ok(()) -} - -fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { - for line in text.lines() { - let words = line.split_ascii_whitespace().collect::>(); - match words.as_slice() { - [n, "=", v, ..] if n.trim() == name => { - assert!(v.starts_with('"') && v.ends_with('"')); - return Ok(&v[1..v.len() - 1]); - } - _ => (), - } - } - Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? -} - -fn shell(cmd: &str) -> Result<()> { - let status = command(cmd).status()?; - check_status(status) -} - -fn shell_output(cmd: &str) -> Result { - let output = command(cmd).stderr(Stdio::inherit()).output()?; - check_status(output.status)?; - let res = String::from_utf8(output.stdout)?; - let res = res.trim().to_string(); - println!("{}", res); - Ok(res) -} - -fn command(cmd: &str) -> Command { - eprintln!("> {}", cmd); - let words = cmd.split_ascii_whitespace().collect::>(); - let (cmd, args) = words.split_first().unwrap(); - let mut res = Command::new(cmd); - res.args(args); - res -} - -fn check_status(status: ExitStatus) -> Result<()> { - if !status.success() { - Err(format!("$status: {}", status))?; - } - Ok(()) -} - -struct Section { - name: &'static str, - start: Instant, -} - -impl Section { - fn new(name: &'static str) -> Section { - println!("::group::{}", name); - let start = Instant::now(); - Section { name, start } - } -} - -impl Drop for Section { - fn drop(&mut self) { - eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); - println!("::endgroup::"); - } -} diff --git a/lib/smol_str/.github/workflows/ci.yaml b/lib/smol_str/.github/workflows/ci.yaml deleted file mode 100644 index 1c2e347374ae..000000000000 --- a/lib/smol_str/.github/workflows/ci.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: CI -on: - pull_request: - push: - branches: - - master - - staging - - trying - -env: - CARGO_INCREMENTAL: 0 - CARGO_NET_RETRY: 10 - CI: 1 - RUST_BACKTRACE: short - RUSTFLAGS: -D warnings - RUSTUP_MAX_RETRIES: 10 - -jobs: - rust: - name: Rust - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Install Rust toolchain - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - cache: false - - - run: rustc ./.github/ci.rs && ./ci - env: - CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore deleted file mode 100644 index 0c8227b253a5..000000000000 --- a/lib/smol_str/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/target -/ci -/.vscode -Cargo.lock diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index ee3263594a3b..118b25993ffe 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -5,7 +5,7 @@ description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/smol_str" authors = ["Aleksey Kladov ", "Lukas Wirth "] -edition = "2021" +edition = "2024" rust-version = "1.89" [package.metadata.docs.rs] diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs index 2643b025575d..092ee3598095 100644 --- a/lib/smol_str/benches/bench.rs +++ b/lib/smol_str/benches/bench.rs @@ -1,6 +1,6 @@ -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use rand::distr::{Alphanumeric, SampleString}; -use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr}; +use smol_str::{SmolStr, StrExt, ToSmolStr, format_smolstr}; use std::hint::black_box; /// 12: small (inline) diff --git a/lib/smol_str/bors.toml b/lib/smol_str/bors.toml deleted file mode 100644 index b92b99ac3020..000000000000 --- a/lib/smol_str/bors.toml +++ /dev/null @@ -1,2 +0,0 @@ -status = [ "Rust" ] -delete_merged_branches = true diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs index ebb20d71a005..527ce85a1746 100644 --- a/lib/smol_str/src/borsh.rs +++ b/lib/smol_str/src/borsh.rs @@ -1,8 +1,8 @@ -use crate::{Repr, SmolStr, INLINE_CAP}; +use crate::{INLINE_CAP, Repr, SmolStr}; use alloc::string::{String, ToString}; use borsh::{ - io::{Error, ErrorKind, Read, Write}, BorshDeserialize, BorshSerialize, + io::{Error, ErrorKind, Read, Write}, }; use core::mem::transmute; diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index effaba211df9..a1d2c2f06744 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -434,8 +434,7 @@ impl FromStr for SmolStr { const INLINE_CAP: usize = InlineSize::_V23 as usize; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; -const WS: &str = - "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; const _: () = { assert!(WS.len() == N_NEWLINES + N_SPACES); assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n'); @@ -690,24 +689,24 @@ impl StrExt for str { #[inline] fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr { // Fast path for replacing a single ASCII character with another inline. - if let [from_u8] = from.as_bytes() { - if let [to_u8] = to.as_bytes() { - return if self.len() <= count { - // SAFETY: `from_u8` & `to_u8` are ascii - unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) } - } else { - unsafe { - replacen_1_ascii(self, |b| { - if b == from_u8 && count != 0 { - count -= 1; - *to_u8 - } else { - *b - } - }) - } - }; - } + if let [from_u8] = from.as_bytes() + && let [to_u8] = to.as_bytes() + { + return if self.len() <= count { + // SAFETY: `from_u8` & `to_u8` are ascii + unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) } + } else { + unsafe { + replacen_1_ascii(self, |b| { + if b == from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + *b + } + }) + } + }; } let mut result = SmolStrBuilder::new(); diff --git a/lib/text-size/.github/workflows/ci.yaml b/lib/text-size/.github/workflows/ci.yaml deleted file mode 100644 index 4538ca8479d9..000000000000 --- a/lib/text-size/.github/workflows/ci.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: CI -on: - pull_request: - push: - branches: - - master - - staging - - trying - -env: - RUSTFLAGS: -D warnings - RUSTUP_MAX_RETRIES: 10 - CARGO_NET_RETRY: 10 - -jobs: - rust: - name: Rust - runs-on: ${{ matrix.os }} - - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - - - name: Test - run: cargo test --all-features - - rustdoc: - name: Docs - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - profile: minimal - override: true - - - name: Rustdoc - run: cargo rustdoc --all-features -- -D warnings diff --git a/lib/text-size/.gitignore b/lib/text-size/.gitignore deleted file mode 100644 index 693699042b1a..000000000000 --- a/lib/text-size/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/target -**/*.rs.bk -Cargo.lock diff --git a/lib/text-size/Cargo.toml b/lib/text-size/Cargo.toml index 7882f7cc3526..f889009b0b29 100644 --- a/lib/text-size/Cargo.toml +++ b/lib/text-size/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "text-size" version = "1.1.1" -edition = "2018" +edition = "2024" authors = [ "Aleksey Kladov ", @@ -13,7 +13,7 @@ repository = "https://github.com/rust-analyzer/text-size" documentation = "https://docs.rs/text-size" [dependencies] -serde = { version = "1.0", optional = true, default_features = false } +serde = { version = "1.0", optional = true, default-features = false } [dev-dependencies] serde_test = "1.0" @@ -23,3 +23,6 @@ static_assertions = "1.1" name = "serde" path = "tests/serde.rs" required-features = ["serde"] + +[lints] +workspace = true diff --git a/lib/text-size/bors.toml b/lib/text-size/bors.toml deleted file mode 100644 index 932be8d0909c..000000000000 --- a/lib/text-size/bors.toml +++ /dev/null @@ -1,6 +0,0 @@ -status = [ - "Rust (ubuntu-latest)", - "Rust (windows-latest)", - "Rust (macos-latest)", -] -delete_merged_branches = true diff --git a/lib/text-size/src/serde_impls.rs b/lib/text-size/src/serde_impls.rs index 7f3f75751041..4cd41618c2d7 100644 --- a/lib/text-size/src/serde_impls.rs +++ b/lib/text-size/src/serde_impls.rs @@ -1,6 +1,6 @@ use { crate::{TextRange, TextSize}, - serde::{de, Deserialize, Deserializer, Serialize, Serializer}, + serde::{Deserialize, Deserializer, Serialize, Serializer, de}, }; impl Serialize for TextSize { diff --git a/lib/ungrammar/.github/ci.rs b/lib/ungrammar/.github/ci.rs deleted file mode 100644 index 87eb307d633d..000000000000 --- a/lib/ungrammar/.github/ci.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::{ - env, fs, - process::{self, Command, ExitStatus, Stdio}, - time::Instant, -}; - -type Error = Box; -type Result = std::result::Result; - -fn main() { - if let Err(err) = try_main() { - eprintln!("{}", err); - process::exit(1); - } -} - -fn try_main() -> Result<()> { - let cwd = env::current_dir()?; - let cargo_toml = cwd.join("Cargo.toml"); - assert!( - cargo_toml.exists(), - "Cargo.toml not found, cwd: {}", - cwd.display() - ); - - { - let _s = Section::new("BUILD"); - shell("cargo test --workspace --no-run")?; - } - - { - let _s = Section::new("TEST"); - shell("cargo test --workspace")?; - } - - let current_branch = shell_output("git branch --show-current")?; - if ¤t_branch == "master" { - let _s = Section::new("PUBLISH"); - let manifest = fs::read_to_string(&cargo_toml)?; - let version = get_field(&manifest, "version")?; - let tag = format!("v{}", version); - let tags = shell_output("git tag --list")?; - - if !tags.contains(&tag) { - let token = env::var("CRATES_IO_TOKEN").unwrap(); - shell(&format!("git tag v{}", version))?; - shell(&format!("cargo publish --token {}", token))?; - shell("git push --tags")?; - } - } - Ok(()) -} - -fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { - for line in text.lines() { - let words = line.split_ascii_whitespace().collect::>(); - match words.as_slice() { - [n, "=", v, ..] if n.trim() == name => { - assert!(v.starts_with('"') && v.ends_with('"')); - return Ok(&v[1..v.len() - 1]); - } - _ => (), - } - } - Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? -} - -fn shell(cmd: &str) -> Result<()> { - let status = command(cmd).status()?; - check_status(status) -} - -fn shell_output(cmd: &str) -> Result { - let output = command(cmd).stderr(Stdio::inherit()).output()?; - check_status(output.status)?; - let res = String::from_utf8(output.stdout)?; - Ok(res.trim().to_string()) -} - -fn command(cmd: &str) -> Command { - eprintln!("> {}", cmd); - let words = cmd.split_ascii_whitespace().collect::>(); - let (cmd, args) = words.split_first().unwrap(); - let mut res = Command::new(cmd); - res.args(args); - res -} - -fn check_status(status: ExitStatus) -> Result<()> { - if !status.success() { - Err(format!("$status: {}", status))?; - } - Ok(()) -} - -struct Section { - name: &'static str, - start: Instant, -} - -impl Section { - fn new(name: &'static str) -> Section { - println!("::group::{}", name); - let start = Instant::now(); - Section { name, start } - } -} - -impl Drop for Section { - fn drop(&mut self) { - eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); - println!("::endgroup::"); - } -} diff --git a/lib/ungrammar/.github/workflows/ci.yaml b/lib/ungrammar/.github/workflows/ci.yaml deleted file mode 100644 index 88f133867e71..000000000000 --- a/lib/ungrammar/.github/workflows/ci.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: CI -on: - pull_request: - push: - branches: - - master - - staging - - trying - -env: - CARGO_INCREMENTAL: 0 - CARGO_NET_RETRY: 10 - CI: 1 - RUST_BACKTRACE: short - RUSTFLAGS: -D warnings - RUSTUP_MAX_RETRIES: 10 - -jobs: - rust: - name: Rust - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - override: true - - - run: rustc ./.github/ci.rs && ./ci - env: - CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} diff --git a/lib/ungrammar/.gitignore b/lib/ungrammar/.gitignore deleted file mode 100644 index e3bd43f693fa..000000000000 --- a/lib/ungrammar/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/ci -/Cargo.lock -/target diff --git a/lib/ungrammar/Cargo.toml b/lib/ungrammar/Cargo.toml index 6e9dec7d6b27..b8dcb4abf7d3 100644 --- a/lib/ungrammar/Cargo.toml +++ b/lib/ungrammar/Cargo.toml @@ -4,10 +4,10 @@ description = "A DSL for describing concrete syntax trees" version = "1.16.1" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/ungrammar" -edition = "2018" - -exclude = ["/bors.toml", "/.github"] - +edition = "2024" [dependencies] # nope + +[lints] +workspace = true diff --git a/lib/ungrammar/bors.toml b/lib/ungrammar/bors.toml deleted file mode 100644 index b92b99ac3020..000000000000 --- a/lib/ungrammar/bors.toml +++ /dev/null @@ -1,2 +0,0 @@ -status = [ "Rust" ] -delete_merged_branches = true diff --git a/lib/ungrammar/src/lexer.rs b/lib/ungrammar/src/lexer.rs index f4c979b5bdc7..23da09abb2a1 100644 --- a/lib/ungrammar/src/lexer.rs +++ b/lib/ungrammar/src/lexer.rs @@ -1,5 +1,5 @@ //! Simple hand-written ungrammar lexer -use crate::error::{bail, Result}; +use crate::error::{Result, bail}; #[derive(Debug, Eq, PartialEq)] pub(crate) enum TokenKind { diff --git a/lib/ungrammar/src/parser.rs b/lib/ungrammar/src/parser.rs index 70fbe1ac0b66..2cc5dc54df6e 100644 --- a/lib/ungrammar/src/parser.rs +++ b/lib/ungrammar/src/parser.rs @@ -3,9 +3,9 @@ use std::collections::HashMap; use crate::{ - error::{bail, format_err, Result}, - lexer::{self, TokenKind}, Grammar, Node, NodeData, Rule, Token, TokenData, + error::{Result, bail, format_err}, + lexer::{self, TokenKind}, }; macro_rules! bail { diff --git a/lib/ungrammar/ungrammar2json/Cargo.toml b/lib/ungrammar/ungrammar2json/Cargo.toml index 19ca3d832430..0fa08bbbd048 100644 --- a/lib/ungrammar/ungrammar2json/Cargo.toml +++ b/lib/ungrammar/ungrammar2json/Cargo.toml @@ -5,7 +5,7 @@ version = "1.0.0" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" authors = ["Aleksey Kladov "] -edition = "2018" +edition = "2024" [dependencies] write-json = "0.1.1" From db86a0065cc43bf03c77778e4281ab8c02dd9813 Mon Sep 17 00:00:00 2001 From: A4-Tacks Date: Sun, 9 Nov 2025 21:06:50 +0800 Subject: [PATCH 316/322] Fix .const missing block on with modifier block Example --- ```rust fn main() { unsafe {1}.$0 } ``` **Before this PR** ```rust fn main() { const unsafe {{1}} } ``` **After this PR** ```rust fn main() { const { unsafe {1} } } ``` --- .../ide-completion/src/completions/postfix.rs | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/crates/ide-completion/src/completions/postfix.rs b/crates/ide-completion/src/completions/postfix.rs index ab3f619fd7f5..d62b47152adb 100644 --- a/crates/ide-completion/src/completions/postfix.rs +++ b/crates/ide-completion/src/completions/postfix.rs @@ -13,7 +13,7 @@ use ide_db::{ }; use stdx::never; use syntax::{ - SyntaxKind::{BLOCK_EXPR, EXPR_STMT, FOR_EXPR, IF_EXPR, LOOP_EXPR, STMT_LIST, WHILE_EXPR}, + SyntaxKind::{EXPR_STMT, STMT_LIST}, T, TextRange, TextSize, ast::{self, AstNode, AstToken}, match_ast, @@ -253,18 +253,15 @@ pub(crate) fn complete_postfix( } } - let mut block_should_be_wrapped = true; - if dot_receiver.syntax().kind() == BLOCK_EXPR { - block_should_be_wrapped = false; - if let Some(parent) = dot_receiver.syntax().parent() - && matches!(parent.kind(), IF_EXPR | WHILE_EXPR | LOOP_EXPR | FOR_EXPR) - { - block_should_be_wrapped = true; - } + let block_should_be_wrapped = if let ast::Expr::BlockExpr(block) = dot_receiver { + block.modifier().is_some() || !block.is_standalone() + } else { + true }; { let (open_brace, close_brace) = if block_should_be_wrapped { ("{ ", " }") } else { ("", "") }; + // FIXME: Why add parentheses let (open_paren, close_paren) = if is_in_cond { ("(", ")") } else { ("", "") }; let unsafe_completion_string = format!("{open_paren}unsafe {open_brace}{receiver_text}{close_brace}{close_paren}"); @@ -842,6 +839,20 @@ fn main() { &format!("fn main() {{ let x = {kind} {{ if true {{1}} else {{2}} }} }}"), ); + if kind == "const" { + check_edit( + kind, + r#"fn main() { unsafe {1}.$0 }"#, + &format!("fn main() {{ {kind} {{ unsafe {{1}} }} }}"), + ); + } else { + check_edit( + kind, + r#"fn main() { const {1}.$0 }"#, + &format!("fn main() {{ {kind} {{ const {{1}} }} }}"), + ); + } + // completion will not be triggered check_edit( kind, From 0c593bef5002540f6a85792c5332e7a4ab82bce2 Mon Sep 17 00:00:00 2001 From: fgamador Date: Sun, 9 Nov 2025 11:47:01 -0800 Subject: [PATCH 317/322] Typos and distracting word choices --- docs/book/src/contributing/testing.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/book/src/contributing/testing.md b/docs/book/src/contributing/testing.md index ccee9b847b6e..b94bc16611f4 100644 --- a/docs/book/src/contributing/testing.md +++ b/docs/book/src/contributing/testing.md @@ -1,4 +1,4 @@ -rust-analyzer's testing is based on *snapshot tests*: a test is a piece of input text, usually a Rust code, and some output text. There is then some testing helper that runs the feature on the input text and compares the result to the output text. +rust-analyzer's testing is based on *snapshot tests*: a test is a piece of input text, usually Rust code, and some output text. There is then some testing helper that runs the feature on the input text and compares the result to the output text. rust-analyzer uses a combination of the crate [`expect-test`](https://docs.rs/expect-test) and a custom testing framework. @@ -20,7 +20,7 @@ fn main() { } ``` -Sometimes we want to check more that there are no type mismatches. For that we use other helpers. For example, often we want to assert that the type of some expression is some specific type. For that we use the `check_types()` function. It takes a Rust code string with custom annotation, that are common in our test suite. The general scheme of annotation is: +Sometimes we want to check more than that there are no type mismatches. For that we use other helpers. For example, often we want to assert that the type of some expression is some specific type. For that we use the `check_types()` function. It takes a Rust code string with custom annotation, which are common in our test suite. The general scheme of annotation is: - `$0` marks a position. What to do with it is determined by the testing helper. Commonly it denotes the cursor position in IDE tests (for example, hover). - `$0...$0` marks a range, commonly a selection in IDE tests. @@ -31,7 +31,7 @@ Sometimes we want to check more that there are no type mismatches. For that we u // ^^^^ hey ``` -`check_types()` uses labels to assert type: when you attach a label to a range, `check_types()` assert that the type of this range will be what written in the label. +`check_types()` uses labels to assert types: when you attach a label to a range, `check_types()` asserts that the type of this range will be what is written in the label. It's all too abstract without an example: ```rust @@ -67,9 +67,9 @@ fn main() { ); } ``` -The text inside the `expect![[]]` is determined by the helper, `check_infer()` in this case. For `check_infer()`, each line is a range in the source code (the range is counted in bytes and the source is trimmed, indentation is stripped), next to it there is the text in that range, or some part of it with `...` if it's too long, and finally comes the type of that range. +The text inside the `expect![[]]` is determined by the helper, `check_infer()` in this case. For `check_infer()`, each line is a range in the source code (the range is counted in bytes and the source is trimmed, so indentation is stripped); next to it there is the text in that range, or some part of it with `...` if it's too long, and finally comes the type of that range. -The important feature of `expect-test` is that it allows easy update of the expectation. Say you changed something in the code, maybe fixed a bug, and the output in `expect![[]]` needs to change. Or maybe you are writing it from scratch. Writing it by hand is very tedious and prone to mistakes. But `expect-trait` has a magic. You can set the environment variable `UPDATE_EXPECT=1`, then run the test, and it will update automatically! Some editors (e.g. VSCode) make it even more convenient: on them, on the top of every test that uses `expect-test`, next to the usual `Run | Debug` buttons, rust-analyzer also shows an `Update Expect` button. Clicking it will run that test in updating mode. +The important feature of `expect-test` is that it allows easy update of the expectation. Say you changed something in the code, maybe fixed a bug, and the output in `expect![[]]` needs to change. Or maybe you are writing it from scratch. Writing it by hand is very tedious and prone to mistakes. But `expect-trait` has some magic. You can set the environment variable `UPDATE_EXPECT=1`, then run the test, and it will update automatically! Some editors (e.g. VSCode) make it even more convenient: on them, at the top of every test that uses `expect-test`, next to the usual `Run | Debug` buttons, rust-analyzer also shows an `Update Expect` button. Clicking it will run that test in updating mode. ## Rust code in the tests @@ -77,13 +77,13 @@ The first thing that you probably already noticed is that the Rust code in the t The syntax highlighting is very important, not just because it's nice to the eye: it's very easy to make mistakes in test code, and debugging that can be very hard. Often the test will just fail, printing an `{unknown}` type, and you'll have no clue what's going wrong. The syntax is the clue; if something isn't highlighted correctly, that probably means there is an error (there is one exception to this, which we'll discuss later). You can even set the semantic highlighting tag `unresolved_reference` to e.g. red, so you will see such things clearly. -Still, often you won't know what's going wrong. Why you can't fix the test, or worse, you expect it to fail but it doesn't. You can try the code on a real IDE to be sure it works. Later we'll give some tips to fix the test. +Still, often you won't know what's going wrong. Why you can't fix the test, or worse, you expect it to fail but it doesn't. You can try the code on a real IDE to be sure it works. Later we'll give some tips for fixing the test. ### The fixture -The Rust code in a test is not, a fact, a single Rust file. It has a mini-language that allows you to express multiple files, multiple crates, different configs, and more. All options are documented in `crates/test-utils/src/fixture.rs`, but here are some of the common ones: +The Rust code in a test is not, a fact, a single Rust file. It uses a mini-language that allows you to express multiple files, multiple crates, different configs, and more. All options are documented in `crates/test-utils/src/fixture.rs`, but here are some of the common ones: - - `//- minicore: flag1, flag2, ...`. This is by far the most common flag. Tests in rust-analyzer don't have access by default to any other type - not `Option`, not `Iterator`, not even `Sized`. This flag allows you to include parts of the `crates/test-utils/src/minicore.rs` file, which mimics `core`. All possible flags are listed at the top of `minicore` along with the flags they imply, then later you can see by `// region:flag` and `// endregion:flag` what code each flag enables. + - `//- minicore: flag1, flag2, ...`. This is by far the most common option. Tests in rust-analyzer don't have access by default to any other type - not `Option`, not `Iterator`, not even `Sized`. This option allows you to include parts of the `crates/test-utils/src/minicore.rs` file, which mimics `core`. All possible flags are listed at the top of `minicore` along with the flags they imply, then later you can see by `// region:flag` and `// endregion:flag` what code each flag enables. - `// /path/to/file.rs crate:crate deps:dep_a,dep_b`. The first component is the filename of the code that follows (until the next file). It is required, but only if you supply this line. Other components in this line are optional. They include `crate:crate_name`, to start a new crate, or `deps:dep_a,dep_b`, to declare dependencies between crates. You can also declare modules as usual in Rust - just name your paths `/foo.rs` or `/foo/mod.rs`, declare `mod foo` and that's it! So the following snippet: @@ -96,11 +96,11 @@ pub struct Bar; // /main.rs crate:main deps:foo use foo::Bar; ``` -Declares two crates `foo` and `main` where `main` depends on `foo`, with dependency in `Sized` and the `FnX` traits from `core`, and a module of `foo` called `bar`. +declares two crates `foo` and `main`, where `main` depends on `foo`, with dependencies on the `Sized` and `FnX` traits from `core`, and a module of `foo` called `bar`. -And as promised, here are some tips to make your test work: +And as promised, here are some tips for making your test work: - - If you use some type/trait, you must *always* include it in `minicore`. Note - not all types from core/std are available there, you can add new (under flags) if you need. And import them if they are not in the prelude. + - If you use some type/trait, you must *always* include it in `minicore`. Note - not all types from core/std are available there, but you can add new ones (under flags) if you need. And import them if they are not in the prelude. - If you use unsized types (`dyn Trait`/slices), you may want to include some or all of the following `minicore` flags: `sized`, `unsize`, `coerce_unsized`, `dispatch_from_dyn`. - If you use closures, consider including the `fn` minicore flag. Async closures need the `async_fn` flag. - - `sized` is commonly needed, consider adding it if you're stuck. + - `sized` is commonly needed, so consider adding it if you're stuck. From 1d0ae248dc2aa1ea0be610b787b0b95972d634da Mon Sep 17 00:00:00 2001 From: Joe Polny Date: Sun, 9 Nov 2025 18:15:48 -0500 Subject: [PATCH 318/322] feat: hide "msg" and "op" parameter names "msg" adds noise to `expect` and crossbeam's `Sender`. "op" adds noise to `map_err` --- crates/ide/src/inlay_hints/param_name.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/ide/src/inlay_hints/param_name.rs b/crates/ide/src/inlay_hints/param_name.rs index 4122c16d3e38..8d0348767316 100644 --- a/crates/ide/src/inlay_hints/param_name.rs +++ b/crates/ide/src/inlay_hints/param_name.rs @@ -111,7 +111,8 @@ fn get_callable<'db>( } const INSIGNIFICANT_METHOD_NAMES: &[&str] = &["clone", "as_ref", "into"]; -const INSIGNIFICANT_PARAMETER_NAMES: &[&str] = &["predicate", "value", "pat", "rhs", "other"]; +const INSIGNIFICANT_PARAMETER_NAMES: &[&str] = + &["predicate", "value", "pat", "rhs", "other", "msg", "op"]; fn should_hide_param_name_hint( sema: &Semantics<'_, RootDatabase>, From ef05935de05aa6e1c1316e9fdb684c091c957d5b Mon Sep 17 00:00:00 2001 From: Hegui Dai Date: Fri, 7 Nov 2025 17:52:37 +0800 Subject: [PATCH 319/322] support rename after adding label --- .../src/handlers/add_label_to_loop.rs | 99 ++++++++++++------- crates/ide-assists/src/tests/generated.rs | 6 +- 2 files changed, 69 insertions(+), 36 deletions(-) diff --git a/crates/ide-assists/src/handlers/add_label_to_loop.rs b/crates/ide-assists/src/handlers/add_label_to_loop.rs index d2b903447133..b84ad24cfcef 100644 --- a/crates/ide-assists/src/handlers/add_label_to_loop.rs +++ b/crates/ide-assists/src/handlers/add_label_to_loop.rs @@ -1,7 +1,14 @@ -use ide_db::syntax_helpers::node_ext::for_each_break_and_continue_expr; +use ide_db::{ + source_change::SourceChangeBuilder, syntax_helpers::node_ext::for_each_break_and_continue_expr, +}; use syntax::{ - T, - ast::{self, AstNode, HasLoopBody}, + SyntaxToken, T, + ast::{ + self, AstNode, HasLoopBody, + make::{self, tokens}, + syntax_factory::SyntaxFactory, + }, + syntax_editor::{Position, SyntaxEditor}, }; use crate::{AssistContext, AssistId, Assists}; @@ -21,9 +28,9 @@ use crate::{AssistContext, AssistId, Assists}; // -> // ``` // fn main() { -// 'l: loop { -// break 'l; -// continue 'l; +// ${1:'l}: loop { +// break ${2:'l}; +// continue ${0:'l}; // } // } // ``` @@ -39,30 +46,56 @@ pub(crate) fn add_label_to_loop(acc: &mut Assists, ctx: &AssistContext<'_>) -> O "Add Label", loop_expr.syntax().text_range(), |builder| { - builder.insert(loop_kw.text_range().start(), "'l: "); + let make = SyntaxFactory::with_mappings(); + let mut editor = builder.make_editor(loop_expr.syntax()); + + let label = make.lifetime("'l"); + let elements = vec![ + label.syntax().clone().into(), + make::token(T![:]).into(), + tokens::single_space().into(), + ]; + editor.insert_all(Position::before(&loop_kw), elements); + + if let Some(cap) = ctx.config.snippet_cap { + editor.add_annotation(label.syntax(), builder.make_placeholder_snippet(cap)); + } let loop_body = loop_expr.loop_body().and_then(|it| it.stmt_list()); - for_each_break_and_continue_expr( - loop_expr.label(), - loop_body, - &mut |expr| match expr { - ast::Expr::BreakExpr(break_expr) => { - if let Some(break_token) = break_expr.break_token() { - builder.insert(break_token.text_range().end(), " 'l") - } - } - ast::Expr::ContinueExpr(continue_expr) => { - if let Some(continue_token) = continue_expr.continue_token() { - builder.insert(continue_token.text_range().end(), " 'l") - } - } - _ => {} - }, - ); + for_each_break_and_continue_expr(loop_expr.label(), loop_body, &mut |expr| { + let token = match expr { + ast::Expr::BreakExpr(break_expr) => break_expr.break_token(), + ast::Expr::ContinueExpr(continue_expr) => continue_expr.continue_token(), + _ => return, + }; + if let Some(token) = token { + insert_label_after_token(&mut editor, &make, &token, ctx, builder); + } + }); + + editor.add_mappings(make.finish_with_mappings()); + builder.add_file_edits(ctx.vfs_file_id(), editor); + builder.rename(); }, ) } +fn insert_label_after_token( + editor: &mut SyntaxEditor, + make: &SyntaxFactory, + token: &SyntaxToken, + ctx: &AssistContext<'_>, + builder: &mut SourceChangeBuilder, +) { + let label = make.lifetime("'l"); + let elements = vec![tokens::single_space().into(), label.syntax().clone().into()]; + editor.insert_all(Position::after(token), elements); + + if let Some(cap) = ctx.config.snippet_cap { + editor.add_annotation(label.syntax(), builder.make_placeholder_snippet(cap)); + } +} + #[cfg(test)] mod tests { use crate::tests::{check_assist, check_assist_not_applicable}; @@ -82,9 +115,9 @@ fn main() { }"#, r#" fn main() { - 'l: loop { - break 'l; - continue 'l; + ${1:'l}: loop { + break ${2:'l}; + continue ${0:'l}; } }"#, ); @@ -107,9 +140,9 @@ fn main() { }"#, r#" fn main() { - 'l: loop { - break 'l; - continue 'l; + ${1:'l}: loop { + break ${2:'l}; + continue ${0:'l}; loop { break; continue; @@ -139,9 +172,9 @@ fn main() { loop { break; continue; - 'l: loop { - break 'l; - continue 'l; + ${1:'l}: loop { + break ${2:'l}; + continue ${0:'l}; } } }"#, diff --git a/crates/ide-assists/src/tests/generated.rs b/crates/ide-assists/src/tests/generated.rs index 7f0836abdf3c..160b31af0ae9 100644 --- a/crates/ide-assists/src/tests/generated.rs +++ b/crates/ide-assists/src/tests/generated.rs @@ -183,9 +183,9 @@ fn main() { "#####, r#####" fn main() { - 'l: loop { - break 'l; - continue 'l; + ${1:'l}: loop { + break ${2:'l}; + continue ${0:'l}; } } "#####, From 48dbd8402100beac46ded4467870f6c507225f5a Mon Sep 17 00:00:00 2001 From: A4-Tacks Date: Tue, 11 Nov 2025 15:19:20 +0800 Subject: [PATCH 320/322] Fix removed feature `doc_auto_cfg` for smol_str lib --- lib/smol_str/src/lib.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index a1d2c2f06744..31695b811747 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,5 +1,5 @@ #![cfg_attr(not(feature = "std"), no_std)] -#![cfg_attr(docsrs, feature(doc_auto_cfg))] +#![cfg_attr(docsrs, feature(doc_cfg))] extern crate alloc; @@ -333,6 +333,7 @@ impl AsRef<[u8]> for SmolStr { } #[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl AsRef for SmolStr { #[inline(always)] fn as_ref(&self) -> &std::ffi::OsStr { @@ -341,6 +342,7 @@ impl AsRef for SmolStr { } #[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl AsRef for SmolStr { #[inline(always)] fn as_ref(&self) -> &std::path::Path { @@ -941,6 +943,7 @@ impl From for SmolStr { } #[cfg(feature = "arbitrary")] +#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))] impl<'a> arbitrary::Arbitrary<'a> for SmolStr { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { let s = <&str>::arbitrary(u)?; @@ -949,8 +952,10 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } #[cfg(feature = "borsh")] +#[cfg_attr(docsrs, doc(cfg(feature = "borsh")))] mod borsh; #[cfg(feature = "serde")] +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] mod serde; #[test] From 78cf393276b72bb466abeaa480d8327d76473076 Mon Sep 17 00:00:00 2001 From: Colin Finck Date: Wed, 12 Nov 2025 14:15:55 +0100 Subject: [PATCH 321/322] Build releases with static CRT for `-windows-msvc` targets. This increases the binary size of `rust-analyzer.exe` from 42.4 MB to 42.6 MB. Which should be acceptable for eliminating 7 DLL dependencies. --- xtask/src/dist.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xtask/src/dist.rs b/xtask/src/dist.rs index 1b1fb532cae9..c0a75cc51094 100644 --- a/xtask/src/dist.rs +++ b/xtask/src/dist.rs @@ -134,8 +134,19 @@ fn dist_server( }; let mut cmd = build_command(sh, command, &target_name, features, dev_rel); + let mut rustflags = Vec::new(); + if let Some(profile) = pgo_profile { - cmd = cmd.env("RUSTFLAGS", format!("-Cprofile-use={}", profile.to_str().unwrap())); + rustflags.push(format!("-Cprofile-use={}", profile.to_str().unwrap())); + } + + if target_name.ends_with("-windows-msvc") { + // https://github.com/rust-lang/rust-analyzer/issues/20970 + rustflags.push("-Ctarget-feature=+crt-static".to_string()); + } + + if !rustflags.is_empty() { + cmd = cmd.env("RUSTFLAGS", rustflags.join(" ")); } cmd.run().context("cannot build Rust Analyzer")?; From 6d7ff66bfb930963b04d5d871df5c247ef26d0d5 Mon Sep 17 00:00:00 2001 From: Colin Finck Date: Wed, 12 Nov 2025 14:37:06 +0100 Subject: [PATCH 322/322] Make clippy happy. --- xtask/src/dist.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xtask/src/dist.rs b/xtask/src/dist.rs index c0a75cc51094..57a6a0eae1be 100644 --- a/xtask/src/dist.rs +++ b/xtask/src/dist.rs @@ -142,7 +142,7 @@ fn dist_server( if target_name.ends_with("-windows-msvc") { // https://github.com/rust-lang/rust-analyzer/issues/20970 - rustflags.push("-Ctarget-feature=+crt-static".to_string()); + rustflags.push("-Ctarget-feature=+crt-static".to_owned()); } if !rustflags.is_empty() {