Skip to content

Commit

Permalink
Move LossyUtf8 struct out of util module
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbrunsfeld committed Oct 14, 2020
1 parent 0a46033 commit 7aca288
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 55 deletions.
55 changes: 53 additions & 2 deletions lib/binding_rust/lib.rs
Expand Up @@ -12,8 +12,6 @@ use std::ptr::NonNull;
use std::sync::atomic::AtomicUsize;
use std::{char, fmt, hash, iter, ptr, slice, str, u16};

pub use util::LossyUtf8;

/// The latest ABI version that is supported by the current version of the
/// library.
///
Expand Down Expand Up @@ -184,6 +182,13 @@ enum TextPredicate {
CaptureMatchString(u32, regex::bytes::Regex, bool),
}

// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
// is ever stabilized.
pub struct LossyUtf8<'a> {
bytes: &'a [u8],
in_replacement: bool,
}

impl Language {
/// Get the ABI version number that indicates which version of the Tree-sitter CLI
/// that was used to generate this `Language`.
Expand Down Expand Up @@ -1832,6 +1837,52 @@ impl<'a> Into<ffi::TSInputEdit> for &'a InputEdit {
}
}

impl<'a> LossyUtf8<'a> {
pub fn new(bytes: &'a [u8]) -> Self {
LossyUtf8 {
bytes,
in_replacement: false,
}
}
}

impl<'a> Iterator for LossyUtf8<'a> {
type Item = &'a str;

fn next(&mut self) -> Option<&'a str> {
if self.bytes.is_empty() {
return None;
}
if self.in_replacement {
self.in_replacement = false;
return Some("\u{fffd}");
}
match std::str::from_utf8(self.bytes) {
Ok(valid) => {
self.bytes = &[];
Some(valid)
}
Err(error) => {
if let Some(error_len) = error.error_len() {
let error_start = error.valid_up_to();
if error_start > 0 {
let result =
unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) };
self.bytes = &self.bytes[(error_start + error_len)..];
self.in_replacement = true;
Some(result)
} else {
self.bytes = &self.bytes[error_len..];
Some("\u{fffd}")
}
} else {
None
}
}
}
}
}

fn predicate_error(row: usize, message: String) -> QueryError {
QueryError {
kind: QueryErrorKind::Predicate,
Expand Down
53 changes: 0 additions & 53 deletions lib/binding_rust/util.rs
Expand Up @@ -72,59 +72,6 @@ pub struct CBufferIter<T> {
i: usize,
}

// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy`
// is ever stabilized.
pub struct LossyUtf8<'a> {
bytes: &'a [u8],
in_replacement: bool,
}

impl<'a> LossyUtf8<'a> {
pub fn new(bytes: &'a [u8]) -> Self {
LossyUtf8 {
bytes,
in_replacement: false,
}
}
}

impl<'a> Iterator for LossyUtf8<'a> {
type Item = &'a str;

fn next(&mut self) -> Option<&'a str> {
if self.bytes.is_empty() {
return None;
}
if self.in_replacement {
self.in_replacement = false;
return Some("\u{fffd}");
}
match std::str::from_utf8(self.bytes) {
Ok(valid) => {
self.bytes = &[];
Some(valid)
}
Err(error) => {
if let Some(error_len) = error.error_len() {
let error_start = error.valid_up_to();
if error_start > 0 {
let result =
unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) };
self.bytes = &self.bytes[(error_start + error_len)..];
self.in_replacement = true;
Some(result)
} else {
self.bytes = &self.bytes[error_len..];
Some("\u{fffd}")
}
} else {
None
}
}
}
}
}

impl<T> CBufferIter<T> {
pub unsafe fn new(ptr: *mut T, count: usize) -> Self {
Self { ptr, count, i: 0 }
Expand Down

0 comments on commit 7aca288

Please sign in to comment.