Skip to content

Commit

Permalink
Merge pull request #307 from JasperDeSutter/dedupe-unescape
Browse files Browse the repository at this point in the history
  • Loading branch information
alerque authored May 5, 2024
2 parents 2aa38ae + b17e47f commit 466e100
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 50 deletions.
59 changes: 22 additions & 37 deletions fluent-syntax/src/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ fn encode_unicode(s: Option<&str>) -> char {
/// assert_eq!(s, "Foo 😊 Bar");
/// ```
pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result
where
W: fmt::Write,
{
if unescape(w, input)? {
return Ok(());
}
w.write_str(input)
}

fn unescape<W>(w: &mut W, input: &str) -> Result<bool, std::fmt::Error>
where
W: fmt::Write,
{
Expand Down Expand Up @@ -100,10 +110,15 @@ where
w.write_char(new_char)?;
start = ptr;
}

if start == 0 {
return Ok(false);
}

if start != ptr {
w.write_str(&input[start..ptr])?;
}
Ok(())
Ok(true)
}

/// Unescapes to a `Cow<str>` optionally allocating.
Expand All @@ -119,41 +134,11 @@ where
/// );
/// ```
pub fn unescape_unicode_to_string(input: &str) -> Cow<str> {
let bytes = input.as_bytes();
let mut result = Cow::from(input);

let mut ptr = 0;

while let Some(b) = bytes.get(ptr) {
if b != &b'\\' {
if let Cow::Owned(ref mut s) = result {
s.push(*b as char);
}
ptr += 1;
continue;
}

if let Cow::Borrowed(_) = result {
result = Cow::from(&input[0..ptr]);
}

ptr += 1;

let new_char = match bytes.get(ptr) {
Some(b'\\') => '\\',
Some(b'"') => '"',
Some(u @ b'u') | Some(u @ b'U') => {
let start = ptr + 1;
let len = if u == &b'u' { 4 } else { 6 };
ptr += len;
input
.get(start..(start + len))
.map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice)))
}
_ => UNKNOWN_CHAR,
};
result.to_mut().push(new_char);
ptr += 1;
let mut result = String::new();
let owned = unescape(&mut result, input).expect("String write methods don't Err");
if owned {
Cow::Owned(result)
} else {
Cow::Borrowed(input)
}
result
}
36 changes: 23 additions & 13 deletions fluent-syntax/tests/unicode.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
use std::borrow::Cow;

use fluent_syntax::unicode::{unescape_unicode, unescape_unicode_to_string};

fn test_unescape_unicode(input: &str, output: &str) {
/// Asserts that decoding unicode escape sequences in `input` matches `output`.
/// When `borrowed` = true, asserts that the escaped value is passed back by reference.
fn test_unescape_unicode(input: &str, output: &str, borrowed: bool) {
let mut s = String::new();
unescape_unicode(&mut s, input).expect("Failed to write.");
assert_eq!(&s, output);
assert_eq!(s, output);
let result = unescape_unicode_to_string(input);
assert_eq!(&result, output);
assert_eq!(result, output);

assert_eq!(matches!(result, Cow::Borrowed(_)), borrowed);
}

#[test]
fn unescape_unicode_test() {
test_unescape_unicode("foo", "foo");
test_unescape_unicode("foo \\\\", "foo \\");
test_unescape_unicode("foo \\\"", "foo \"");
test_unescape_unicode("foo \\\\ faa", "foo \\ faa");
test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii");
test_unescape_unicode("foo \\\\\\\" faa \\\"\\\\ fii", "foo \\\" faa \"\\ fii");
test_unescape_unicode("\\u0041\\u004F", "AO");
test_unescape_unicode("\\uA", "�");
test_unescape_unicode("\\uA0Pl", "�");
test_unescape_unicode("\\d Foo", "� Foo");
test_unescape_unicode("foo", "foo", true);
test_unescape_unicode("foo \\\\", "foo \\", false);
test_unescape_unicode("foo \\\"", "foo \"", false);
test_unescape_unicode("foo \\\\ faa", "foo \\ faa", false);
test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii", false);
test_unescape_unicode(
"foo \\\\\\\" faa \\\"\\\\ fii",
"foo \\\" faa \"\\ fii",
false,
);
test_unescape_unicode("\\u0041\\u004F", "AO", false);
test_unescape_unicode("\\uA", "�", false);
test_unescape_unicode("\\uA0Pl", "�", false);
test_unescape_unicode("\\d Foo", "� Foo", false);
}

0 comments on commit 466e100

Please sign in to comment.