Rewrite the crate docstring

marshallpierce · Dec 4, 2023 · 40e162c · 40e162c
1 parent bfde751
commit 40e162c
Show file tree

Hide file tree

Showing 2 changed files with 165 additions and 69 deletions.
diff --git a/src/alphabet.rs b/src/alphabet.rs
@@ -160,21 +160,21 @@ impl fmt::Display for ParseAlphabetError {
 #[cfg(any(feature = "std", test))]
 impl error::Error for ParseAlphabetError {}
 
-/// The standard alphabet (uses `+` and `/`).
+/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
 ///
-/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
+/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
 );
 
-/// The URL safe alphabet (uses `-` and `_`).
+/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
 ///
-/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
+/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
 );
 
-/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
+/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
 ///
 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
@@ -186,7 +186,7 @@ pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
     "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
 );
 
-/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
+/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
 ///
 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(

diff --git a/src/lib.rs b/src/lib.rs
@@ -1,100 +1,196 @@
-//! # Getting started
+//! Correct, fast, and configurable [base64][] decoding and encoding. Base64
+//! transports binary data efficiently in contexts where only plain text is
+//! allowed.
 //!
-//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g.
-//! [engine::general_purpose::STANDARD_NO_PAD].
-//!     - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to
-//!       `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD]
-//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE].
-//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose].
-//! 1. Configure the engine appropriately using the engine's `Config` type.
-//!     - This is where you'll select whether to add padding (when encoding) or expect it (when
-//!     decoding). If given the choice, prefer no padding.
-//! 1. Build the engine using the selected alphabet and config.
+//! [base64]: https://developer.mozilla.org/en-US/docs/Glossary/Base64
 //!
-//! For more detail, see below.
+//! # Usage
 //!
-//! ## Alphabets
+//! Use an [`Engine`] to decode or encode base64, configured with the base64
+//! alphabet and padding behavior best suited to your application.
 //!
-//! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from.
+//! ## Engine setup
 //!
-//! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used
-//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed.
+//! There is more than one way to encode a stream of bytes as “base64”.
+//! Different applications use different encoding
+//! [alphabets][alphabet::Alphabet] and
+//! [padding behaviors][engine::general_purpose::GeneralPurposeConfig].
 //!
-//! ## Engines
+//! ### Encoding alphabet
 //!
-//! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public
-//! API provide a default, but otherwise the user must provide an `Engine` to use.
+//! Almost all base64 [alphabets][alphabet::Alphabet] use `A-Z`, `a-z`, and
+//! `0-9`, which gives nearly 64 characters (26 + 26 + 10 = 62), but they differ
+//! in their choice of their final 2.
 //!
-//! See [Engine] for more.
+//! Most applications use the [standard][alphabet::STANDARD] alphabet specified
+//! in [RFC 4648][rfc-alphabet].  If that’s all you need, you can get started
+//! quickly by using the pre-configured
+//! [`STANDARD`][engine::general_purpose::STANDARD] engine:
 //!
-//! ## Config
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
 //!
-//! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each
-//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different
-//! levels of configurability.
+//! # fn main() -> Result<(), base64::DecodeError> {
+//! assert_eq!(BASE64.decode(b"+uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
+//! assert_eq!(BASE64.encode(b"\xFF\xEC\x20\x55\0"), "/+wgVQA=");
+//! # Ok(())
+//! # }
+//! ```
 //!
-//! # Encoding
+//! [rfc-alphabet]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
 //!
-//! Several different encoding methods on [Engine] are available to you depending on your desire for
-//! convenience vs performance.
+//! Other common alphabets are available in the [`alphabet`] module.
 //!
-//! | Method                   | Output                       | Allocates                      |
-//! | ------------------------ | ---------------------------- | ------------------------------ |
-//! | [Engine::encode]         | Returns a new `String`       | Always                         |
-//! | [Engine::encode_string]  | Appends to provided `String` | Only if `String` needs to grow |
-//! | [Engine::encode_slice]   | Writes to provided `&[u8]`   | Never - fastest                |
+//! #### URL-safe alphabet
 //!
-//! All of the encoding methods will pad as per the engine's config.
+//! The standard alphabet uses `+` and `/` as its two non-alphanumeric tokens,
+//! which cannot be safely used in URL’s without encoding them as `%2B` and
+//! `%2F`.
 //!
-//! # Decoding
+//! To avoid that, some applications use a [“URL-safe” alphabet][alphabet::URL_SAFE],
+//! which uses `-` and `_` instead. To use that alternative alphabet, use the
+//! [`URL_SAFE`][engine::general_purpose::URL_SAFE] engine:
 //!
-//! Just as for encoding, there are different decoding methods available.
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::URL_SAFE as BASE64, Engine as _};
 //!
-//! | Method                   | Output                        | Allocates                      |
-//! | ------------------------ | ----------------------------- | ------------------------------ |
-//! | [Engine::decode]         | Returns a new `Vec<u8>`       | Always                         |
-//! | [Engine::decode_vec]     | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow    |
-//! | [Engine::decode_slice]   | Writes to provided `&[u8]`    | Never - fastest                |
+//! # fn main() -> Result<(), base64::DecodeError> {
+//! assert_eq!(BASE64.decode(b"-uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
+//! assert_eq!(BASE64.encode(b"\xFF\xEC\x20\x55\0"), "_-wgVQA=");
+//! # Ok(())
+//! # }
+//! ```
 //!
-//! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]).
+//! ### Padding characters
 //!
-//! Input can be invalid because it has invalid characters or invalid padding. The nature of how
-//! padding is checked depends on the engine's config.
-//! Whitespace in the input is invalid, just like any other non-base64 byte.
+//! Each base64 character represents 6 bits (2⁶ = 64) of the original binary
+//! data, and every 3 bytes of input binary data will encode to 4 base64
+//! characters (8 bits × 3 = 6 bits × 4 = 24 bits).
 //!
-//! # `Read` and `Write`
+//! When the input is not an even multiple of 3 bytes in length, [canonical][]
+//! base64 encoders insert padding characters at the end, so that the output
+//! length is always a multiple of 4:
 //!
-//! To decode a [std::io::Read] of b64 bytes, wrap a reader (file, network socket, etc) with
-//! [read::DecoderReader].
+//! [canonical]: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
 //!
-//! To write raw bytes and have them b64 encoded on the fly, wrap a [std::io::Write] with
-//! [write::EncoderWriter].
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
 //!
-//! There is some performance overhead (15% or so) because of the necessary buffer shuffling --
-//! still fast enough that almost nobody cares. Also, these implementations do not heap allocate.
+//! assert_eq!(BASE64.encode(b""),    "");
+//! assert_eq!(BASE64.encode(b"f"),   "Zg==");
+//! assert_eq!(BASE64.encode(b"fo"),  "Zm8=");
+//! assert_eq!(BASE64.encode(b"foo"), "Zm9v");
+//! ```
 //!
-//! # `Display`
+//! Canonical encoding ensures that base64 encodings will be exactly the same,
+//! byte-for-byte, regardless of input length. But the `=` padding characters
+//! aren’t necessary for decoding, and they may be omitted by using a
+//! [`NO_PAD`][engine::general_purpose::NO_PAD] configuration:
 //!
-//! See [display] for how to transparently base64-encode data via a `Display` implementation.
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD_NO_PAD as BASE64, Engine as _};
 //!
-//! # Examples
+//! assert_eq!(BASE64.encode(b""),    "");
+//! assert_eq!(BASE64.encode(b"f"),   "Zg");
+//! assert_eq!(BASE64.encode(b"fo"),  "Zm8");
+//! assert_eq!(BASE64.encode(b"foo"), "Zm9v");
+//! ```
 //!
-//! ## Using predefined engines
+//! The pre-configured `NO_PAD` engines will reject inputs containing padding
+//! `=` characters. To encode without padding and still accept padding while
+//! decoding, create an [engine][engine::general_purpose::GeneralPurpose] with
+//! that [padding mode][engine::DecodePaddingMode].
 //!
 #![cfg_attr(feature = "alloc", doc = "```")]
 #![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
-//! use base64::{Engine as _, engine::general_purpose};
+//! # use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
+//! assert_eq!(STANDARD_NO_PAD.decode(b"Zm8="), Err(base64::DecodeError::InvalidPadding));
+//! ```
+//!
+//! ## Memory allocation
+//!
+//! The [decode][Engine::decode()] and [encode][Engine::encode()] engine methods
+//! allocate memory for their results – `decode` returns a `Vec<u8>` and
+//! `encode` returns a `String`. To instead decode or encode into a buffer that
+//! you allocated, use one of the alternative methods:
+//!
+//! #### Decoding
+//!
+//! | Method                     | Output                        | Allocates memory              |
+//! | -------------------------- | ----------------------------- | ----------------------------- |
+//! | [`Engine::decode`]         | returns a new `Vec<u8>`       | always                        |
+//! | [`Engine::decode_vec`]     | appends to provided `Vec<u8>` | if `Vec` lacks capacity       |
+//! | [`Engine::decode_slice`]   | writes to provided `&[u8]`    | never
 //!
-//! let orig = b"data";
-//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig);
-//! assert_eq!("ZGF0YQ", encoded);
-//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap());
+//! #### Encoding
 //!
-//! // or, URL-safe
-//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig);
+//! | Method                     | Output                       | Allocates memory               |
+//! | -------------------------- | ---------------------------- | ------------------------------ |
+//! | [`Engine::encode`]         | returns a new `String`       | always                         |
+//! | [`Engine::encode_string`]  | appends to provided `String` | if `String` lacks capacity     |
+//! | [`Engine::encode_slice`]   | writes to provided `&[u8]`   | never                          |
+//!
+//! ## Input and output
+//!
+//! The `base64` crate can [decode][Engine::decode()] and
+//! [encode][Engine::encode()] values in memory, or
+//! [`DecoderReader`][read::DecoderReader] and
+//! [`EncoderWriter`][write::EncoderWriter] provide streaming decoding and
+//! encoding for any [readable][std::io::Read] or [writable][std::io::Write]
+//! byte stream.
+//!
+//! #### Decoding
+//!
+#![cfg_attr(feature = "std", doc = "```")]
+#![cfg_attr(not(feature = "std"), doc = "```ignore")]
+//! # use std::io;
+//! use base64::{engine::general_purpose::STANDARD, read::DecoderReader};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let mut input = io::stdin();
+//! let mut decoder = DecoderReader::new(&mut input, &STANDARD);
+//! io::copy(&mut decoder, &mut io::stdout())?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! #### Encoding
+//!
+#![cfg_attr(feature = "std", doc = "```")]
+#![cfg_attr(not(feature = "std"), doc = "```ignore")]
+//! # use std::io;
+//! use base64::{engine::general_purpose::STANDARD, write::EncoderWriter};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let mut output = io::stdout();
+//! let mut encoder = EncoderWriter::new(&mut output, &STANDARD);
+//! io::copy(&mut io::stdin(), &mut encoder)?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! #### Display
+//!
+//! If you only need a base64 representation for implementing the
+//! [`Display`][std::fmt::Display] trait, use
+//! [`Base64Display`][display::Base64Display]:
+//!
+//! ```
+//! use base64::{display::Base64Display, engine::general_purpose::STANDARD};
+//!
+//! let value = Base64Display::new(b"\0\x01\x02\x03", &STANDARD);
+//! assert_eq!("base64: AAECAw==", format!("base64: {}", value));
 //! ```
 //!
-//! ## Custom alphabet, config, and engine
+//! # Configuration
+//!
+//! Decoding and encoding behavior can be customized by creating an
+//! [engine][engine::GeneralPurpose] with an [alphabet][alphabet::Alphabet] and
+//! [padding configuration][engine::GeneralPurposeConfig]:
 //!
 #![cfg_attr(feature = "alloc", doc = "```")]
 #![cfg_attr(not(feature = "alloc"), doc = "```ignore")]