Skip to content
This repository was archived by the owner on Jul 10, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ To decode a byte sequence with invalid sequences:

~~~~ {.rust}
all::ISO_8859_6.decode([65,99,109,101,169], DecodeStrict); // => Err(...)
all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace); // => Ok(~"Acme\ufffd")
all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore); // => Ok(~"Acme")
all::ISO_8859_6.decode([65,99,109,101,169], DecodeReplace); // => Ok(StrBuf::from_str("Acme\ufffd"))
all::ISO_8859_6.decode([65,99,109,101,169], DecodeIgnore); // => Ok(StrBuf::from_str("Acme"))
~~~~

A practical example of custom encoder traps:
Expand All @@ -56,9 +56,9 @@ fn hex_ncr_escape(_encoder: &Encoder, input: &str, output: &mut ByteWriter) -> b
}
static HexNcrEscape: EncoderTrap = EncoderTrap(hex_ncr_escape);

let orig = ~"Hello, 世界!";
let orig = "Hello, 世界!".to_owned();
let encoded = all::ASCII.encode(orig, HexNcrEscape).unwrap();
all::ASCII.decode(encoded.as_slice(), DecodeStrict); // => Ok(~"Hello, 世界!")
all::ASCII.decode(encoded.as_slice(), DecodeStrict); // => Ok(StrBuf::from_str("Hello, 世界!"))
~~~~

Getting the encoding from the string label,
Expand All @@ -69,10 +69,10 @@ let euckr = label::encoding_from_whatwg_label("euc-kr").unwrap();
euckr.name(); // => "windows-949"
euckr.whatwg_name(); // => Some("euc-kr"), for the sake of compatibility
let broken = &[0xbf, 0xec, 0xbf, 0xcd, 0xff, 0xbe, 0xd3];
euckr.decode(broken, DecodeReplace); // => Ok(~"\uc6b0\uc640\ufffd\uc559")
euckr.decode(broken, DecodeReplace); // => Ok(Strbuf::from_str("\uc6b0\uc640\ufffd\uc559"))

// corresponding rust-encoding native API:
all::WINDOWS_949.decode(broken, DecodeReplace); // => Ok(~"\uc6b0\uc640\ufffd\uc559")
all::WINDOWS_949.decode(broken, DecodeReplace); // => Ok(StrBuf::from_str("\uc6b0\uc640\ufffd\uc559"))
~~~~

Supported Encodings
Expand Down
12 changes: 6 additions & 6 deletions src/encoding/codec/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ pub struct ASCIIEncoding;

impl Encoding for ASCIIEncoding {
fn name(&self) -> &'static str { "ascii" }
fn encoder(&self) -> ~Encoder { ASCIIEncoder::new() }
fn decoder(&self) -> ~Decoder { ASCIIDecoder::new() }
fn encoder(&self) -> Box<Encoder> { ASCIIEncoder::new() }
fn decoder(&self) -> Box<Decoder> { ASCIIDecoder::new() }
}

/// An encoder for ASCII.
#[deriving(Clone)]
pub struct ASCIIEncoder;

impl ASCIIEncoder {
pub fn new() -> ~Encoder { ~ASCIIEncoder as ~Encoder }
pub fn new() -> Box<Encoder> { box ASCIIEncoder as Box<Encoder> }
}

impl Encoder for ASCIIEncoder {
fn from_self(&self) -> ~Encoder { ASCIIEncoder::new() }
fn from_self(&self) -> Box<Encoder> { ASCIIEncoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -59,11 +59,11 @@ impl Encoder for ASCIIEncoder {
pub struct ASCIIDecoder;

impl ASCIIDecoder {
pub fn new() -> ~Decoder { ~ASCIIDecoder as ~Decoder }
pub fn new() -> Box<Decoder> { box ASCIIDecoder as Box<Decoder> }
}

impl Decoder for ASCIIDecoder {
fn from_self(&self) -> ~Decoder { ASCIIDecoder::new() }
fn from_self(&self) -> Box<Decoder> { ASCIIDecoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down
15 changes: 8 additions & 7 deletions src/encoding/codec/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,26 @@ pub struct ErrorEncoding;

impl Encoding for ErrorEncoding {
fn name(&self) -> &'static str { "error" }
fn encoder(&self) -> ~Encoder { ErrorEncoder::new() }
fn decoder(&self) -> ~Decoder { ErrorDecoder::new() }
fn encoder(&self) -> Box<Encoder> { ErrorEncoder::new() }
fn decoder(&self) -> Box<Decoder> { ErrorDecoder::new() }
}

/// An encoder that always returns error.
#[deriving(Clone)]
pub struct ErrorEncoder;

impl ErrorEncoder {
pub fn new() -> ~Encoder { ~ErrorEncoder as ~Encoder }
pub fn new() -> Box<Encoder> { box ErrorEncoder as Box<Encoder> }
}

impl Encoder for ErrorEncoder {
fn from_self(&self) -> ~Encoder { ErrorEncoder::new() }
fn from_self(&self) -> Box<Encoder> { ErrorEncoder::new() }

fn raw_feed(&mut self, input: &str, _output: &mut ByteWriter) -> (uint, Option<CodecError>) {
if input.len() > 0 {
let str::CharRange {ch: _, next} = input.char_range_at(0);
(0, Some(CodecError { upto: next, cause: "unrepresentable character".into_maybe_owned() }))
(0, Some(CodecError { upto: next,
cause: "unrepresentable character".into_maybe_owned() }))
} else {
(0, None)
}
Expand All @@ -47,11 +48,11 @@ impl Encoder for ErrorEncoder {
pub struct ErrorDecoder;

impl ErrorDecoder {
pub fn new() -> ~Decoder { ~ErrorDecoder as ~Decoder }
pub fn new() -> Box<Decoder> { box ErrorDecoder as Box<Decoder> }
}

impl Decoder for ErrorDecoder {
fn from_self(&self) -> ~Decoder { ErrorDecoder::new() }
fn from_self(&self) -> Box<Decoder> { ErrorDecoder::new() }

fn raw_feed(&mut self, input: &[u8], _output: &mut StringWriter) -> (uint, Option<CodecError>) {
if input.len() > 0 {
Expand Down
24 changes: 12 additions & 12 deletions src/encoding/codec/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ pub struct EUCJPEncoding;
impl Encoding for EUCJPEncoding {
fn name(&self) -> &'static str { "euc-jp" }
fn whatwg_name(&self) -> Option<&'static str> { Some("euc-jp") }
fn encoder(&self) -> ~Encoder { EUCJPEncoder::new() }
fn decoder(&self) -> ~Decoder { EUCJP0212Decoder::new() }
fn encoder(&self) -> Box<Encoder> { EUCJPEncoder::new() }
fn decoder(&self) -> Box<Decoder> { EUCJP0212Decoder::new() }
}

/// An encoder for EUC-JP with unused G3 character set.
#[deriving(Clone)]
pub struct EUCJPEncoder;

impl EUCJPEncoder {
pub fn new() -> ~Encoder { ~EUCJPEncoder as ~Encoder }
pub fn new() -> Box<Encoder> { box EUCJPEncoder as Box<Encoder> }
}

impl Encoder for EUCJPEncoder {
fn from_self(&self) -> ~Encoder { EUCJPEncoder::new() }
fn from_self(&self) -> Box<Encoder> { EUCJPEncoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -89,11 +89,11 @@ pub struct EUCJP0212Decoder {
}

impl EUCJP0212Decoder {
pub fn new() -> ~Decoder { ~EUCJP0212Decoder { first: 0, second: 0 } as ~Decoder }
pub fn new() -> Box<Decoder> { box EUCJP0212Decoder { first: 0, second: 0 } as Box<Decoder> }
}

impl Decoder for EUCJP0212Decoder {
fn from_self(&self) -> ~Decoder { EUCJP0212Decoder::new() }
fn from_self(&self) -> Box<Decoder> { EUCJP0212Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -319,20 +319,20 @@ pub struct Windows31JEncoding;
impl Encoding for Windows31JEncoding {
fn name(&self) -> &'static str { "windows-31j" }
fn whatwg_name(&self) -> Option<&'static str> { Some("shift_jis") } // WHATWG compatibility
fn encoder(&self) -> ~Encoder { Windows31JEncoder::new() }
fn decoder(&self) -> ~Decoder { Windows31JDecoder::new() }
fn encoder(&self) -> Box<Encoder> { Windows31JEncoder::new() }
fn decoder(&self) -> Box<Decoder> { Windows31JDecoder::new() }
}

/// An encoder for Shift_JIS with IBM/NEC extensions.
#[deriving(Clone)]
pub struct Windows31JEncoder;

impl Windows31JEncoder {
pub fn new() -> ~Encoder { ~Windows31JEncoder as ~Encoder }
pub fn new() -> Box<Encoder> { box Windows31JEncoder as Box<Encoder> }
}

impl Encoder for Windows31JEncoder {
fn from_self(&self) -> ~Encoder { Windows31JEncoder::new() }
fn from_self(&self) -> Box<Encoder> { Windows31JEncoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -376,11 +376,11 @@ pub struct Windows31JDecoder {
}

impl Windows31JDecoder {
pub fn new() -> ~Decoder { ~Windows31JDecoder { lead: 0 } as ~Decoder }
pub fn new() -> Box<Decoder> { box Windows31JDecoder { lead: 0 } as Box<Decoder> }
}

impl Decoder for Windows31JDecoder {
fn from_self(&self) -> ~Decoder { Windows31JDecoder::new() }
fn from_self(&self) -> Box<Decoder> { Windows31JDecoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down
12 changes: 6 additions & 6 deletions src/encoding/codec/korean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@ pub struct Windows949Encoding;
impl Encoding for Windows949Encoding {
fn name(&self) -> &'static str { "windows-949" }
fn whatwg_name(&self) -> Option<&'static str> { Some("euc-kr") } // WHATWG compatibility
fn encoder(&self) -> ~Encoder { Windows949Encoder::new() }
fn decoder(&self) -> ~Decoder { Windows949Decoder::new() }
fn encoder(&self) -> Box<Encoder> { Windows949Encoder::new() }
fn decoder(&self) -> Box<Decoder> { Windows949Decoder::new() }
}

/// An encoder for Windows code page 949.
#[deriving(Clone)]
pub struct Windows949Encoder;

impl Windows949Encoder {
pub fn new() -> ~Encoder { ~Windows949Encoder as ~Encoder }
pub fn new() -> Box<Encoder> { box Windows949Encoder as Box<Encoder> }
}

impl Encoder for Windows949Encoder {
fn from_self(&self) -> ~Encoder { Windows949Encoder::new() }
fn from_self(&self) -> Box<Encoder> { Windows949Encoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -83,11 +83,11 @@ pub struct Windows949Decoder {
}

impl Windows949Decoder {
pub fn new() -> ~Decoder { ~Windows949Decoder { lead: 0 } as ~Decoder }
pub fn new() -> Box<Decoder> { box Windows949Decoder { lead: 0 } as Box<Decoder> }
}

impl Decoder for Windows949Decoder {
fn from_self(&self) -> ~Decoder { Windows949Decoder::new() }
fn from_self(&self) -> Box<Decoder> { Windows949Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down
26 changes: 14 additions & 12 deletions src/encoding/codec/simpchinese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ pub struct GBK18030Encoding;
impl Encoding for GBK18030Encoding {
fn name(&self) -> &'static str { "gbk18030" }
fn whatwg_name(&self) -> Option<&'static str> { Some("gbk") } // WHATWG compatibility
fn encoder(&self) -> ~Encoder { GBK18030Encoder::new() }
fn decoder(&self) -> ~Decoder { GBK18030Decoder::new() }
fn encoder(&self) -> Box<Encoder> { GBK18030Encoder::new() }
fn decoder(&self) -> Box<Decoder> { GBK18030Decoder::new() }
}

/// An encoder for an one- and two-byte subset of GB 18030.
#[deriving(Clone)]
pub struct GBK18030Encoder;

impl GBK18030Encoder {
pub fn new() -> ~Encoder { ~GBK18030Encoder as ~Encoder }
pub fn new() -> Box<Encoder> { box GBK18030Encoder as Box<Encoder> }
}

impl Encoder for GBK18030Encoder {
fn from_self(&self) -> ~Encoder { GBK18030Encoder::new() }
fn from_self(&self) -> Box<Encoder> { GBK18030Encoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -86,11 +86,11 @@ pub struct GBK18030Decoder {
}

impl GBK18030Decoder {
pub fn new() -> ~Decoder { ~GBK18030Decoder { first: 0 } as ~Decoder }
pub fn new() -> Box<Decoder> { box GBK18030Decoder { first: 0 } as Box<Decoder> }
}

impl Decoder for GBK18030Decoder {
fn from_self(&self) -> ~Decoder { GBK18030Decoder::new() }
fn from_self(&self) -> Box<Decoder> { GBK18030Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -240,20 +240,20 @@ pub struct GB18030Encoding;
impl Encoding for GB18030Encoding {
fn name(&self) -> &'static str { "gb18030" }
fn whatwg_name(&self) -> Option<&'static str> { Some("gb18030") }
fn encoder(&self) -> ~Encoder { GB18030Encoder::new() }
fn decoder(&self) -> ~Decoder { GB18030Decoder::new() }
fn encoder(&self) -> Box<Encoder> { GB18030Encoder::new() }
fn decoder(&self) -> Box<Decoder> { GB18030Decoder::new() }
}

/// An encoder for GB 18030.
#[deriving(Clone)]
pub struct GB18030Encoder;

impl GB18030Encoder {
pub fn new() -> ~Encoder { ~GB18030Encoder as ~Encoder }
pub fn new() -> Box<Encoder> { box GB18030Encoder as Box<Encoder> }
}

impl Encoder for GB18030Encoder {
fn from_self(&self) -> ~Encoder { GB18030Encoder::new() }
fn from_self(&self) -> Box<Encoder> { GB18030Encoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -300,11 +300,13 @@ pub struct GB18030Decoder {
}

impl GB18030Decoder {
pub fn new() -> ~Decoder { ~GB18030Decoder { first: 0, second: 0, third: 0 } as ~Decoder }
pub fn new() -> Box<Decoder> {
box GB18030Decoder { first: 0, second: 0, third: 0 } as Box<Decoder>
}
}

impl Decoder for GB18030Decoder {
fn from_self(&self) -> ~Decoder { GB18030Decoder::new() }
fn from_self(&self) -> Box<Decoder> { GB18030Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down
16 changes: 8 additions & 8 deletions src/encoding/codec/singlebyte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ pub struct SingleByteEncoding {
impl Encoding for SingleByteEncoding {
fn name(&self) -> &'static str { self.name }
fn whatwg_name(&self) -> Option<&'static str> { self.whatwg_name }
fn encoder(&'static self) -> ~Encoder { SingleByteEncoder::new(self.index_backward) }
fn decoder(&'static self) -> ~Decoder { SingleByteDecoder::new(self.index_forward) }
fn encoder(&'static self) -> Box<Encoder> { SingleByteEncoder::new(self.index_backward) }
fn decoder(&'static self) -> Box<Decoder> { SingleByteDecoder::new(self.index_forward) }
}

/// An encoder for single-byte encodings based on ASCII.
Expand All @@ -29,13 +29,13 @@ pub struct SingleByteEncoder {
}

impl SingleByteEncoder {
pub fn new(index_backward: extern "Rust" fn(u16) -> u8) -> ~Encoder {
~SingleByteEncoder { index_backward: index_backward } as ~Encoder
pub fn new(index_backward: extern "Rust" fn(u16) -> u8) -> Box<Encoder> {
box SingleByteEncoder { index_backward: index_backward } as Box<Encoder>
}
}

impl Encoder for SingleByteEncoder {
fn from_self(&self) -> ~Encoder { SingleByteEncoder::new(self.index_backward) }
fn from_self(&self) -> Box<Encoder> { SingleByteEncoder::new(self.index_backward) }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (uint, Option<CodecError>) {
Expand Down Expand Up @@ -72,13 +72,13 @@ pub struct SingleByteDecoder {
}

impl SingleByteDecoder {
pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> ~Decoder {
~SingleByteDecoder { index_forward: index_forward } as ~Decoder
pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<Decoder> {
box SingleByteDecoder { index_forward: index_forward } as Box<Decoder>
}
}

impl Decoder for SingleByteDecoder {
fn from_self(&self) -> ~Decoder { SingleByteDecoder::new(self.index_forward) }
fn from_self(&self) -> Box<Decoder> { SingleByteDecoder::new(self.index_forward) }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (uint, Option<CodecError>) {
Expand Down
Loading