diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 92f7014e..1048c1e8 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,9 +1,10 @@ -# Next +# 0.13.0 - Config methods are const - Added `EncoderStringWriter` to allow encoding directly to a String - `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work) - - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result` instead of `Result<()>`. + - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value. +- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be emitted instead of `InvalidLength` to make the problem more obvious. # 0.12.2 diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 3d27bbb7..ddcb7349 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -130,7 +130,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) { b.iter(|| { let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG); stream_enc.write_all(&v).unwrap(); - stream_enc.flush().unwrap(); + stream_enc.flush().unwrap(); let _ = stream_enc.into_inner(); }); } @@ -174,7 +174,10 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { .with_function("encode_slice", do_encode_bench_slice) .with_function("encode_reuse_buf_stream", do_encode_bench_stream) .with_function("encode_string_stream", do_encode_bench_string_stream) - .with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream) + .with_function( + "encode_string_reuse_buf_stream", + do_encode_bench_string_reuse_buf_stream, + ) } fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { diff --git a/examples/make_tables.rs b/examples/make_tables.rs index db6fcf2b..2f27c0eb 100644 --- a/examples/make_tables.rs +++ b/examples/make_tables.rs @@ -173,5 +173,7 @@ fn check_alphabet(alphabet: &[u8]) { // must be ASCII to be valid as single UTF-8 bytes for &b in alphabet { assert!(b <= 0x7F_u8); + // = is assumed to be padding, so cannot be used as a symbol + assert_ne!(b'=', b); } } diff --git a/src/decode.rs b/src/decode.rs index a35166bb..4cc937d5 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,4 +1,4 @@ -use crate::{tables, Config}; +use crate::{tables, Config, PAD_BYTE}; #[cfg(any(feature = "alloc", feature = "std", test))] use crate::STANDARD; @@ -30,6 +30,9 @@ pub enum DecodeError { InvalidByte(usize, u8), /// The length of the input is invalid. /// A typical cause of this is stray trailing whitespace or other separator bytes. + /// In the case where excess trailing bytes have produced an invalid length *and* the last byte + /// is also an invalid base64 symbol (as would be the case for whitespace, etc), `InvalidByte` + /// will be emitted instead of `InvalidLength` to make the issue easier to debug. InvalidLength, /// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded. /// This is indicative of corrupted or truncated Base64. @@ -44,7 +47,7 @@ impl fmt::Display for DecodeError { DecodeError::InvalidByte(index, byte) => { write!(f, "Invalid byte {}, offset {}.", byte, index) } - DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder. Trailing whitespace or other bytes?"), + DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."), DecodeError::InvalidLastSymbol(index, byte) => { write!(f, "Invalid last symbol {}, offset {}.", byte, index) } @@ -216,7 +219,7 @@ fn decode_helper( // trailing whitespace is so common that it's worth it to check the last byte to // possibly return a better error message if let Some(b) = input.last() { - if *b != b'=' && decode_table[*b as usize] == tables::INVALID_VALUE { + if *b != PAD_BYTE && decode_table[*b as usize] == tables::INVALID_VALUE { return Err(DecodeError::InvalidByte(input.len() - 1, *b)); } } @@ -340,7 +343,7 @@ fn decode_helper( let start_of_leftovers = input_index; for (i, b) in input[start_of_leftovers..].iter().enumerate() { // '=' padding - if *b == 0x3D { + if *b == PAD_BYTE { // There can be bad padding in a few ways: // 1 - Padding with non-padding characters after it // 2 - Padding after zero or one non-padding characters before it @@ -381,7 +384,7 @@ fn decode_helper( if padding_bytes > 0 { return Err(DecodeError::InvalidByte( start_of_leftovers + first_padding_index, - 0x3D, + PAD_BYTE, )); } last_symbol = *b; @@ -816,7 +819,7 @@ mod tests { symbols[1] = s2; for &s3 in STANDARD.char_set.encode_table().iter() { symbols[2] = s3; - symbols[3] = b'='; + symbols[3] = PAD_BYTE; match base64_to_bytes.get(&symbols[..]) { Some(bytes) => { @@ -852,8 +855,8 @@ mod tests { symbols[0] = s1; for &s2 in STANDARD.char_set.encode_table().iter() { symbols[1] = s2; - symbols[2] = b'='; - symbols[3] = b'='; + symbols[2] = PAD_BYTE; + symbols[3] = PAD_BYTE; match base64_to_bytes.get(&symbols[..]) { Some(bytes) => { @@ -867,22 +870,4 @@ mod tests { } } } - - #[test] - fn decode_imap() { - assert_eq!( - decode_config(b"+,,+", crate::IMAP_MUTF7), - decode_config(b"+//+", crate::STANDARD_NO_PAD) - ); - } - - #[test] - fn decode_invalid_trailing_bytes() { - // The case of trailing newlines is common enough to warrant a test for a good error - // message. - assert_eq!( - decode(b"Zm9vCg==\n"), - Err(DecodeError::InvalidByte(8, b'\n')) - ); - } } diff --git a/src/encode.rs b/src/encode.rs index 9a09a19d..b32bbfff 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -1,4 +1,4 @@ -use crate::Config; +use crate::{Config, PAD_BYTE}; #[cfg(any(feature = "alloc", feature = "std", test))] use crate::{chunked_encoder, STANDARD}; #[cfg(any(feature = "alloc", feature = "std", test))] @@ -312,7 +312,7 @@ pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize { let rem = input_len % 3; let mut bytes_written = 0; for _ in 0..((3 - rem) % 3) { - output[bytes_written] = b'='; + output[bytes_written] = PAD_BYTE; bytes_written += 1; } diff --git a/src/lib.rs b/src/lib.rs index f1303483..6bded160 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,3 +241,5 @@ pub const BINHEX: Config = Config { pad: false, decode_allow_trailing_bits: false, }; + +const PAD_BYTE: u8 = b'='; diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index a2033c4a..58b1c0ab 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -1,7 +1,7 @@ +use super::encoder::EncoderWriter; use crate::Config; use std::io; use std::io::Write; -use super::encoder::EncoderWriter; /// A `Write` implementation that base64-encodes data using the provided config and accumulates the /// resulting base64 in memory, which is then exposed as a String via `into_inner()`. @@ -56,7 +56,9 @@ pub struct EncoderStringWriter { impl EncoderStringWriter { /// Create a EncoderStringWriter that will append to the provided `StrConsumer`. pub fn from(str_consumer: S, config: Config) -> Self { - EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config) } + EncoderStringWriter { + encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config), + } } /// Encode all remaining buffered data, including any trailing incomplete input triples and @@ -66,7 +68,8 @@ impl EncoderStringWriter { /// /// Returns the base64-encoded form of the accumulated written data. pub fn into_inner(mut self) -> S { - self.encoder.finish() + self.encoder + .finish() .expect("Writing to a Vec should never fail") .str_consumer } @@ -79,7 +82,7 @@ impl EncoderStringWriter { } } -impl Write for EncoderStringWriter { +impl Write for EncoderStringWriter { fn write(&mut self, buf: &[u8]) -> io::Result { self.encoder.write(buf) } @@ -113,15 +116,14 @@ impl StrConsumer for String { /// /// This is safe because we only use it when writing base64, which is always valid UTF-8. struct Utf8SingleCodeUnitWriter { - str_consumer: S + str_consumer: S, } impl io::Write for Utf8SingleCodeUnitWriter { fn write(&mut self, buf: &[u8]) -> io::Result { // Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer // length - let s = std::str::from_utf8(buf) - .expect("Input must be valid UTF-8"); + let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8"); self.str_consumer.consume(s); @@ -138,9 +140,9 @@ impl io::Write for Utf8SingleCodeUnitWriter { mod tests { use crate::encode_config_buf; use crate::tests::random_config; + use crate::write::encoder_string_writer::EncoderStringWriter; use rand::Rng; use std::io::Write; - use crate::write::encoder_string_writer::EncoderStringWriter; #[test] fn every_possible_split_of_input() { diff --git a/tests/decode.rs b/tests/decode.rs index 44caef86..282bccd9 100644 --- a/tests/decode.rs +++ b/tests/decode.rs @@ -305,6 +305,26 @@ fn decode_reject_invalid_bytes_with_correct_error() { } } +#[test] +fn decode_imap() { + assert_eq!( + decode_config(b"+,,+", crate::IMAP_MUTF7), + decode_config(b"+//+", crate::STANDARD_NO_PAD) + ); +} + +#[test] +fn decode_invalid_trailing_bytes() { + // The case of trailing newlines is common enough to warrant a test for a good error + // message. + assert_eq!( + Err(DecodeError::InvalidByte(8, b'\n')), + decode(b"Zm9vCg==\n") + ); + // extra padding, however, is still InvalidLength + assert_eq!(Err(DecodeError::InvalidLength), decode(b"Zm9vCg===")); +} + fn config_std_pad() -> Config { Config::new(CharacterSet::Standard, true) }