Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
[package]
name = "ucs2"
version = "0.2.0"
authors = ["Gabriel Majeri <gabriel.majeri6@gmail.com>", "Fredrik Aleksander"]
version = "0.3.0"
authors = ["Gabriel Majeri <gabriel.majeri6@gmail.com>", "Fredrik Aleksander", "Isaac Woods"]
description = "UCS-2 decoding and encoding functions"
repository = "https://github.com/GabrielMajeri/ucs2-rs"
readme = "README.md"
keywords = ["ucs2", "no-std", "encoding"]
categories = ["encoding", "no-std"]
license = "MPL-2.0"
edition = "2018"

[dependencies]
bit_field = "0.10"

[badges]
is-it-maintained-issue-resolution = { repository = "https://github.com/GabrielMajeri/ucs2-rs" }
Expand Down
60 changes: 60 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#[deny(missing_docs)]
#[deny(clippy::all)]
use bit_field::BitField;

/// Possible errors returned by the API.
#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -91,6 +92,52 @@ where
Ok(())
}

/// Decode an input UCS-2 string into a UTF-8 string.
///
/// The returned `usize` represents the length of the returned buffer,
/// in bytes.
pub fn decode(input: &[u16], output: &mut [u8]) -> Result<usize> {
let buffer_size = output.len();
let mut i = 0;

for &ch in input.iter() {
/*
* We need to find how many bytes of UTF-8 this UCS-2 code-point needs. Because UCS-2 can only encode
* the Basic Multilingual Plane, a maximum of three bytes are needed.
*/
if (0x0000..0x0080).contains(&ch) {
// Can be encoded in a single byte
if i >= buffer_size {
return Err(Error::BufferOverflow);
}

output[i] = ch as u8;
i += 1;
} else if (0x0080..0x0800).contains(&ch) {
// Can be encoded as two bytes
if (i + 1) >= buffer_size {
return Err(Error::BufferOverflow);
}

output[i] = 0b11000000 + ch.get_bits(6..11) as u8;
output[i + 1] = 0b10000000 + ch.get_bits(0..6) as u8;
i += 2;
} else {
// Can be encoded as three bytes
if (i + 2) >= buffer_size {
return Err(Error::BufferOverflow);
}

output[i] = 0b11100000 + ch.get_bits(12..16) as u8;
output[i + 1] = 0b10000000 + ch.get_bits(6..12) as u8;
output[i + 2] = 0b10000000 + ch.get_bits(0..6) as u8;
i += 3;
}
}

Ok(i)
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -105,4 +152,17 @@ mod tests {

assert_eq!(buffer[..], [0x0151, 0x044D, 0x254B]);
}

#[test]
fn decoding() {
let input = "$¢ह한";
let mut u16_buffer = [0u16; 4];
let result = encode(input, &mut u16_buffer);
assert_eq!(result.unwrap(), 4);

let mut u8_buffer = [0u8; 9];
let result = decode(&u16_buffer, &mut u8_buffer);
assert_eq!(result.unwrap(), 9);
assert_eq!(core::str::from_utf8(&u8_buffer[0..9]), Ok("$¢ह한"));
}
}