From 7aca28833007885d02e5b41d6a7affc17b92fa10 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 14 Oct 2020 11:59:56 -0700 Subject: [PATCH] Move LossyUtf8 struct out of util module --- lib/binding_rust/lib.rs | 55 ++++++++++++++++++++++++++++++++++++++-- lib/binding_rust/util.rs | 53 -------------------------------------- 2 files changed, 53 insertions(+), 55 deletions(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index e64833635f..0b0097f93e 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -12,8 +12,6 @@ use std::ptr::NonNull; use std::sync::atomic::AtomicUsize; use std::{char, fmt, hash, iter, ptr, slice, str, u16}; -pub use util::LossyUtf8; - /// The latest ABI version that is supported by the current version of the /// library. /// @@ -184,6 +182,13 @@ enum TextPredicate { CaptureMatchString(u32, regex::bytes::Regex, bool), } +// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` +// is ever stabilized. +pub struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + impl Language { /// Get the ABI version number that indicates which version of the Tree-sitter CLI /// that was used to generate this `Language`. @@ -1832,6 +1837,52 @@ impl<'a> Into for &'a InputEdit { } } +impl<'a> LossyUtf8<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + LossyUtf8 { + bytes, + in_replacement: false, + } + } +} + +impl<'a> Iterator for LossyUtf8<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if self.bytes.is_empty() { + return None; + } + if self.in_replacement { + self.in_replacement = false; + return Some("\u{fffd}"); + } + match std::str::from_utf8(self.bytes) { + Ok(valid) => { + self.bytes = &[]; + Some(valid) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + let error_start = error.valid_up_to(); + if error_start > 0 { + let result = + unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; + self.bytes = &self.bytes[(error_start + error_len)..]; + self.in_replacement = true; + Some(result) + } else { + self.bytes = &self.bytes[error_len..]; + Some("\u{fffd}") + } + } else { + None + } + } + } + } +} + fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, diff --git a/lib/binding_rust/util.rs b/lib/binding_rust/util.rs index e2660c1451..1a4ac1b77f 100644 --- a/lib/binding_rust/util.rs +++ b/lib/binding_rust/util.rs @@ -72,59 +72,6 @@ pub struct CBufferIter { i: usize, } -// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` -// is ever stabilized. -pub struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - -impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { - LossyUtf8 { - bytes, - in_replacement: false, - } - } -} - -impl<'a> Iterator for LossyUtf8<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - if self.bytes.is_empty() { - return None; - } - if self.in_replacement { - self.in_replacement = false; - return Some("\u{fffd}"); - } - match std::str::from_utf8(self.bytes) { - Ok(valid) => { - self.bytes = &[]; - Some(valid) - } - Err(error) => { - if let Some(error_len) = error.error_len() { - let error_start = error.valid_up_to(); - if error_start > 0 { - let result = - unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; - self.bytes = &self.bytes[(error_start + error_len)..]; - self.in_replacement = true; - Some(result) - } else { - self.bytes = &self.bytes[error_len..]; - Some("\u{fffd}") - } - } else { - None - } - } - } - } -} - impl CBufferIter { pub unsafe fn new(ptr: *mut T, count: usize) -> Self { Self { ptr, count, i: 0 }