Skip to content

Commit

Permalink
Add docs and other cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
marshallpierce committed Sep 30, 2020
1 parent 6bb3556 commit 4296732
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 40 deletions.
5 changes: 3 additions & 2 deletions RELEASE-NOTES.md
@@ -1,9 +1,10 @@
# Next
# 0.13.0

- Config methods are const
- Added `EncoderStringWriter` to allow encoding directly to a String
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`.
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value.
- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be emitted instead of `InvalidLength` to make the problem more obvious.

# 0.12.2

Expand Down
7 changes: 5 additions & 2 deletions benches/benchmarks.rs
Expand Up @@ -130,7 +130,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
b.iter(|| {
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
stream_enc.write_all(&v).unwrap();
stream_enc.flush().unwrap();
stream_enc.flush().unwrap();
let _ = stream_enc.into_inner();
});
}
Expand Down Expand Up @@ -174,7 +174,10 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
.with_function("encode_slice", do_encode_bench_slice)
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
.with_function("encode_string_stream", do_encode_bench_string_stream)
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
.with_function(
"encode_string_reuse_buf_stream",
do_encode_bench_string_reuse_buf_stream,
)
}

fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
Expand Down
2 changes: 2 additions & 0 deletions examples/make_tables.rs
Expand Up @@ -173,5 +173,7 @@ fn check_alphabet(alphabet: &[u8]) {
// must be ASCII to be valid as single UTF-8 bytes
for &b in alphabet {
assert!(b <= 0x7F_u8);
// = is assumed to be padding, so cannot be used as a symbol
assert_ne!(b'=', b);
}
}
37 changes: 11 additions & 26 deletions src/decode.rs
@@ -1,4 +1,4 @@
use crate::{tables, Config};
use crate::{tables, Config, PAD_BYTE};

#[cfg(any(feature = "alloc", feature = "std", test))]
use crate::STANDARD;
Expand Down Expand Up @@ -30,6 +30,9 @@ pub enum DecodeError {
InvalidByte(usize, u8),
/// The length of the input is invalid.
/// A typical cause of this is stray trailing whitespace or other separator bytes.
/// In the case where excess trailing bytes have produced an invalid length *and* the last byte
/// is also an invalid base64 symbol (as would be the case for whitespace, etc), `InvalidByte`
/// will be emitted instead of `InvalidLength` to make the issue easier to debug.
InvalidLength,
/// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
/// This is indicative of corrupted or truncated Base64.
Expand All @@ -44,7 +47,7 @@ impl fmt::Display for DecodeError {
DecodeError::InvalidByte(index, byte) => {
write!(f, "Invalid byte {}, offset {}.", byte, index)
}
DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder. Trailing whitespace or other bytes?"),
DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."),
DecodeError::InvalidLastSymbol(index, byte) => {
write!(f, "Invalid last symbol {}, offset {}.", byte, index)
}
Expand Down Expand Up @@ -216,7 +219,7 @@ fn decode_helper(
// trailing whitespace is so common that it's worth it to check the last byte to
// possibly return a better error message
if let Some(b) = input.last() {
if *b != b'=' && decode_table[*b as usize] == tables::INVALID_VALUE {
if *b != PAD_BYTE && decode_table[*b as usize] == tables::INVALID_VALUE {
return Err(DecodeError::InvalidByte(input.len() - 1, *b));
}
}
Expand Down Expand Up @@ -340,7 +343,7 @@ fn decode_helper(
let start_of_leftovers = input_index;
for (i, b) in input[start_of_leftovers..].iter().enumerate() {
// '=' padding
if *b == 0x3D {
if *b == PAD_BYTE {
// There can be bad padding in a few ways:
// 1 - Padding with non-padding characters after it
// 2 - Padding after zero or one non-padding characters before it
Expand Down Expand Up @@ -381,7 +384,7 @@ fn decode_helper(
if padding_bytes > 0 {
return Err(DecodeError::InvalidByte(
start_of_leftovers + first_padding_index,
0x3D,
PAD_BYTE,
));
}
last_symbol = *b;
Expand Down Expand Up @@ -816,7 +819,7 @@ mod tests {
symbols[1] = s2;
for &s3 in STANDARD.char_set.encode_table().iter() {
symbols[2] = s3;
symbols[3] = b'=';
symbols[3] = PAD_BYTE;

match base64_to_bytes.get(&symbols[..]) {
Some(bytes) => {
Expand Down Expand Up @@ -852,8 +855,8 @@ mod tests {
symbols[0] = s1;
for &s2 in STANDARD.char_set.encode_table().iter() {
symbols[1] = s2;
symbols[2] = b'=';
symbols[3] = b'=';
symbols[2] = PAD_BYTE;
symbols[3] = PAD_BYTE;

match base64_to_bytes.get(&symbols[..]) {
Some(bytes) => {
Expand All @@ -867,22 +870,4 @@ mod tests {
}
}
}

#[test]
fn decode_imap() {
assert_eq!(
decode_config(b"+,,+", crate::IMAP_MUTF7),
decode_config(b"+//+", crate::STANDARD_NO_PAD)
);
}

#[test]
fn decode_invalid_trailing_bytes() {
// The case of trailing newlines is common enough to warrant a test for a good error
// message.
assert_eq!(
decode(b"Zm9vCg==\n"),
Err(DecodeError::InvalidByte(8, b'\n'))
);
}
}
4 changes: 2 additions & 2 deletions src/encode.rs
@@ -1,4 +1,4 @@
use crate::Config;
use crate::{Config, PAD_BYTE};
#[cfg(any(feature = "alloc", feature = "std", test))]
use crate::{chunked_encoder, STANDARD};
#[cfg(any(feature = "alloc", feature = "std", test))]
Expand Down Expand Up @@ -312,7 +312,7 @@ pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
let rem = input_len % 3;
let mut bytes_written = 0;
for _ in 0..((3 - rem) % 3) {
output[bytes_written] = b'=';
output[bytes_written] = PAD_BYTE;
bytes_written += 1;
}

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Expand Up @@ -241,3 +241,5 @@ pub const BINHEX: Config = Config {
pad: false,
decode_allow_trailing_bits: false,
};

const PAD_BYTE: u8 = b'=';
18 changes: 10 additions & 8 deletions src/write/encoder_string_writer.rs
@@ -1,7 +1,7 @@
use super::encoder::EncoderWriter;
use crate::Config;
use std::io;
use std::io::Write;
use super::encoder::EncoderWriter;

/// A `Write` implementation that base64-encodes data using the provided config and accumulates the
/// resulting base64 in memory, which is then exposed as a String via `into_inner()`.
Expand Down Expand Up @@ -56,7 +56,9 @@ pub struct EncoderStringWriter<S: StrConsumer> {
impl<S: StrConsumer> EncoderStringWriter<S> {
/// Create a EncoderStringWriter that will append to the provided `StrConsumer`.
pub fn from(str_consumer: S, config: Config) -> Self {
EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config) }
EncoderStringWriter {
encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config),
}
}

/// Encode all remaining buffered data, including any trailing incomplete input triples and
Expand All @@ -66,7 +68,8 @@ impl<S: StrConsumer> EncoderStringWriter<S> {
///
/// Returns the base64-encoded form of the accumulated written data.
pub fn into_inner(mut self) -> S {
self.encoder.finish()
self.encoder
.finish()
.expect("Writing to a Vec<u8> should never fail")
.str_consumer
}
Expand All @@ -79,7 +82,7 @@ impl EncoderStringWriter<String> {
}
}

impl <S: StrConsumer> Write for EncoderStringWriter<S> {
impl<S: StrConsumer> Write for EncoderStringWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.encoder.write(buf)
}
Expand Down Expand Up @@ -113,15 +116,14 @@ impl StrConsumer for String {
///
/// This is safe because we only use it when writing base64, which is always valid UTF-8.
struct Utf8SingleCodeUnitWriter<S: StrConsumer> {
str_consumer: S
str_consumer: S,
}

impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
// Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer
// length
let s = std::str::from_utf8(buf)
.expect("Input must be valid UTF-8");
let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8");

self.str_consumer.consume(s);

Expand All @@ -138,9 +140,9 @@ impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
mod tests {
use crate::encode_config_buf;
use crate::tests::random_config;
use crate::write::encoder_string_writer::EncoderStringWriter;
use rand::Rng;
use std::io::Write;
use crate::write::encoder_string_writer::EncoderStringWriter;

#[test]
fn every_possible_split_of_input() {
Expand Down
20 changes: 20 additions & 0 deletions tests/decode.rs
Expand Up @@ -305,6 +305,26 @@ fn decode_reject_invalid_bytes_with_correct_error() {
}
}

#[test]
fn decode_imap() {
assert_eq!(
decode_config(b"+,,+", crate::IMAP_MUTF7),
decode_config(b"+//+", crate::STANDARD_NO_PAD)
);
}

#[test]
fn decode_invalid_trailing_bytes() {
// The case of trailing newlines is common enough to warrant a test for a good error
// message.
assert_eq!(
Err(DecodeError::InvalidByte(8, b'\n')),
decode(b"Zm9vCg==\n")
);
// extra padding, however, is still InvalidLength
assert_eq!(Err(DecodeError::InvalidLength), decode(b"Zm9vCg==="));
}

fn config_std_pad() -> Config {
Config::new(CharacterSet::Standard, true)
}

0 comments on commit 4296732

Please sign in to comment.