From 25ed63d9fba2886df7a825c7833d237676041d13 Mon Sep 17 00:00:00 2001
From: Nico Burns <nico@nicoburns.com>
Date: Mon, 8 Sep 2025 15:36:58 +0100
Subject: [PATCH 1/5] Import tendril crate

Signed-off-by: Nico Burns <nico@nicoburns.com>
---
 .github/workflows/main.yml |    4 +
 Cargo.toml                 |   23 +-
 tendril/Cargo.toml         |   27 +
 tendril/LICENSE-APACHE     |  201 +++
 tendril/LICENSE-MIT        |   25 +
 tendril/README.md          |   96 ++
 tendril/examples/fuzz.rs   |  146 +++
 tendril/src/bench.rs       |  159 +++
 tendril/src/buf32.rs       |  120 ++
 tendril/src/fmt.rs         |  519 ++++++++
 tendril/src/futf.rs        |  565 ++++++++
 tendril/src/lib.rs         |   37 +
 tendril/src/stream.rs      |  752 +++++++++++
 tendril/src/tendril.rs     | 2473 ++++++++++++++++++++++++++++++++++++
 tendril/src/utf8_decode.rs |   98 ++
 tendril/src/util.rs        |   45 +
 16 files changed, 5280 insertions(+), 10 deletions(-)
 create mode 100644 tendril/Cargo.toml
 create mode 100644 tendril/LICENSE-APACHE
 create mode 100644 tendril/LICENSE-MIT
 create mode 100644 tendril/README.md
 create mode 100644 tendril/examples/fuzz.rs
 create mode 100644 tendril/src/bench.rs
 create mode 100644 tendril/src/buf32.rs
 create mode 100644 tendril/src/fmt.rs
 create mode 100644 tendril/src/futf.rs
 create mode 100644 tendril/src/lib.rs
 create mode 100644 tendril/src/stream.rs
 create mode 100644 tendril/src/tendril.rs
 create mode 100644 tendril/src/utf8_decode.rs
 create mode 100644 tendril/src/util.rs

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4ebf60c1..a6480d29 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -34,6 +34,10 @@ jobs:
         if: matrix.version != 'nightly'
         run: cargo test --all
 
+      - name: Test tendril w/encoding feature
+        if: matrix.version != 'nightly'
+        run: cargo test -p tendril --features 'encoding encoding_rs'
+
       - name: Cargo doc
         if: matrix.version == 'nightly'
         run: cargo doc
diff --git a/Cargo.toml b/Cargo.toml
index 78a583ef..390a6ec9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ members = [
     "html5ever",
     "rcdom",
     "xml5ever",
+    "tendril",
 ]
 
 [workspace.package]
@@ -18,29 +19,31 @@ rust-version = "1.70.0"
 
 [workspace.dependencies]
 # Repo dependencies
+tendril = { version = "0.4.3", path = "tendril" }
 web_atoms = { version = "0.1", path = "web_atoms" }
 markup5ever = { version = "0.35.0", path = "markup5ever" }
 xml5ever = { version = "0.35.0", path = "xml5ever" }
 html5ever = { version = "0.35.0", path = "html5ever" }
 
 # External dependencies
-syn = { version = "2", features = ["full"] }
-quote = "1"
-proc-macro2 = "1"
+encoding = "0.2"
+encoding_rs = "0.8.12"
 log = "0.4"
 mac = "0.1"
-tendril = "0.4"
-string_cache = "0.9.0"
-string_cache_codegen = "0.6.1"
+new_debug_unreachable = "1.0.2"
 phf = "0.13"
 phf_codegen = "0.13"
+proc-macro2 = "1"
+quote = "1"
+syn = { version = "2", features = ["full"] }
+string_cache = "0.9.0"
+string_cache_codegen = "0.6.1"
+utf-8 = "0.7"
 
 # Dev dependencies
 criterion = "0.6"
+env_logger = "0.10"
 libtest-mimic = "0.8.1"
+rand = "0.4"
 serde_json = "1.0"
-env_logger = "0.10"
 typed-arena = "2.0.2"
-
-
-
diff --git a/tendril/Cargo.toml b/tendril/Cargo.toml
new file mode 100644
index 00000000..c424ff56
--- /dev/null
+++ b/tendril/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "tendril"
+version = "0.4.3"
+description = "Compact buffer/string type for zero-copy parsing"
+authors = [
+  "Keegan McAllister <mcallister.keegan@gmail.com>",
+  "Simon Sapin <simon.sapin@exyr.org>",
+  "Chris Morgan <me@chrismorgan.info>"
+]
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+readme = "README.md"
+edition = "2015"
+
+[dependencies]
+encoding = { workspace = true, optional = true}
+encoding_rs = { workspace = true, optional = true}
+mac = { workspace = true }
+new_debug_unreachable = { workspace = true }
+utf-8 = { workspace = true }
+
+[dev-dependencies]
+rand = { workspace = true }
+
+[features]
+bench = []
diff --git a/tendril/LICENSE-APACHE b/tendril/LICENSE-APACHE
new file mode 100644
index 00000000..16fe87b0
--- /dev/null
+++ b/tendril/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/tendril/LICENSE-MIT b/tendril/LICENSE-MIT
new file mode 100644
index 00000000..2e0fee10
--- /dev/null
+++ b/tendril/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2015 Keegan McAllister
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/tendril/README.md b/tendril/README.md
new file mode 100644
index 00000000..700e5952
--- /dev/null
+++ b/tendril/README.md
@@ -0,0 +1,96 @@
+# tendril
+
+**Warning**: This library is at a very early stage of development, and it
+contains a substantial amount of `unsafe` code. Use at your own risk!
+
+[![Build Status](https://github.com/servo/html5ever/workflows/Tendril%20CI/badge.svg)](https://github.com/servo/tendril/actions)
+
+[API Documentation](https://doc.servo.org/tendril/index.html)
+
+## Introduction
+
+`Tendril` is a compact string/buffer type, optimized for zero-copy parsing.
+Tendrils have the semantics of owned strings, but are sometimes views into
+shared buffers. When you mutate a tendril, an owned copy is made if necessary.
+Further mutations occur in-place until the string becomes shared, e.g. with
+`clone()` or `subtendril()`.
+
+Buffer sharing is accomplished through thread-local (non-atomic) reference
+counting, which has very low overhead. The Rust type system will prevent
+you at compile time from sending a tendril between threads. (See below
+for thoughts on relaxing this restriction.)
+
+Whereas `String` allocates in the heap for any non-empty string, `Tendril` can
+store small strings (up to 8 bytes) in-line, without a heap allocation.
+`Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes versus
+24. `Option<Tendril>` is the same size as `Tendril`, thanks to
+[`NonZero`][NonZero].
+
+The maximum length of a tendril is 4 GB. The library will panic if you attempt
+to go over the limit.
+
+## Formats and encoding
+
+`Tendril` uses
+[phantom types](https://doc.rust-lang.org/stable/rust-by-example/generics/phantom.html)
+to track a buffer's format. This determines at compile time which
+operations are available on a given tendril. For example, `Tendril<UTF8>` and
+`Tendril<Bytes>` can be borrowed as `&str` and `&[u8]` respectively.
+
+`Tendril` also integrates with
+[rust-encoding](https://github.com/lifthrasiir/rust-encoding) and has
+preliminary support for [WTF-8][] buffers.
+
+## Plans for the future
+
+### Ropes
+
+[html5ever][] will use `Tendril` as a zero-copy text representation. It would
+be good to preserve this all the way through to Servo's DOM. This would reduce
+memory consumption, and possibly speed up text shaping and painting. However,
+DOM text may conceivably be larger than 4 GB, and will anyway not be contiguous
+in memory around e.g. a character entity reference.
+
+*Solution:* Build a **[rope][] on top of these strings** and use that as
+Servo's representation of DOM text. We can perhaps do text shaping and/or
+painting in parallel for different chunks of a rope. html5ever can additionally
+use this rope type as a replacement for `BufferQueue`.
+
+Because the underlying buffers are reference-counted, the bulk of this rope
+is already a [persistent data structure][]. Consider what happens when
+appending two ropes to get a "new" rope. A vector-backed rope would copy a
+vector of small structs, one for each chunk, and would bump the corresponding
+refcounts. But it would not copy any of the string data.
+
+If we want more sharing, then a [2-3 finger tree][] could be a good choice.
+We would probably stick with `VecDeque` for ropes under a certain size.
+
+### UTF-16 compatibility
+
+SpiderMonkey expects text to be in UCS-2 format for the most part. The
+semantics of JavaScript strings are difficult to implement on UTF-8. This also
+applies to HTML parsing via `document.write`. Also, passing SpiderMonkey a
+string that isn't contiguous in memory will incur additional overhead and
+complexity, if not a full copy.
+
+*Solution:* Use **WTF-8 in parsing** and in the DOM. Servo will **convert to
+contiguous UTF-16 when necessary**.  The conversion can easily be parallelized,
+if we find a practical need to convert huge chunks of text all at once.
+
+### Source span information
+
+Some html5ever API consumers want to know the originating location in the HTML
+source file(s) of each token or parse error. An example application would be a
+command-line HTML validator with diagnostic output similar to `rustc`'s.
+
+*Solution:* Accept **some metadata along with each input string**. The type of
+metadata is chosen by the API consumer; it defaults to `()`, which has size
+zero. For any non-inline string, we can provide the associated metadata as well
+as a byte offset.
+
+[NonZero]: https://doc.rust-lang.org/core/nonzero/struct.NonZero.html
+[html5ever]: https://github.com/servo/html5ever
+[WTF-8]: https://simonsapin.github.io/wtf-8/
+[rope]: https://en.wikipedia.org/wiki/Rope_%28data_structure%29
+[persistent data structure]: https://en.wikipedia.org/wiki/Persistent_data_structure
+[2-3 finger tree]: https://www.staff.city.ac.uk/~ross/papers/FingerTree.html
diff --git a/tendril/examples/fuzz.rs b/tendril/examples/fuzz.rs
new file mode 100644
index 00000000..37daf560
--- /dev/null
+++ b/tendril/examples/fuzz.rs
@@ -0,0 +1,146 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! A simple fuzz tester for the library.
+
+#![deny(warnings)]
+
+extern crate rand;
+extern crate tendril;
+
+use std::borrow::ToOwned;
+
+use rand::distributions::{IndependentSample, Range};
+use rand::Rng;
+use tendril::StrTendril;
+
+fn fuzz() {
+    let mut rng = rand::thread_rng();
+    let capacity = Range::new(0u32, 1 << 14).ind_sample(&mut rng);
+    let mut buf_string = String::with_capacity(capacity as usize);
+    let mut buf_tendril = StrTendril::with_capacity(capacity);
+    let mut string_slices = vec![];
+    let mut tendril_slices = vec![];
+
+    for _ in 1..100_000 {
+        if buf_string.len() > (1 << 30) {
+            buf_string.truncate(0);
+            buf_tendril.clear();
+        }
+
+        let dist_action = Range::new(0, 100);
+        match dist_action.ind_sample(&mut rng) {
+            0..=15 => {
+                let (start, end) = random_slice(&mut rng, TEXT);
+                let snip = &TEXT[start..end];
+                buf_string.push_str(snip);
+                buf_tendril.push_slice(snip);
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            16..=31 => {
+                let (start, end) = random_slice(&mut rng, &buf_string);
+                let snip = &buf_string[start..end].to_owned();
+                buf_string.push_str(&snip);
+                buf_tendril.push_slice(&snip);
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            32..=47 => {
+                let lenstr = format!("[length = {}]", buf_tendril.len());
+                buf_string.push_str(&lenstr);
+                buf_tendril.push_slice(&lenstr);
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            48..=63 => {
+                let n = random_boundary(&mut rng, &buf_string);
+                buf_tendril.pop_front(n as u32);
+                buf_string = buf_string[n..].to_owned();
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            64..=79 => {
+                let new_len = random_boundary(&mut rng, &buf_string);
+                let n = buf_string.len() - new_len;
+                buf_string.truncate(new_len);
+                buf_tendril.pop_back(n as u32);
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            80..=90 => {
+                let (start, end) = random_slice(&mut rng, &buf_string);
+                buf_string = buf_string[start..end].to_owned();
+                buf_tendril = buf_tendril.subtendril(start as u32, (end - start) as u32);
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            91..=96 => {
+                let c = rng.gen();
+                buf_string.push(c);
+                assert!(buf_tendril.try_push_char(c).is_ok());
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            97 => {
+                buf_string.truncate(0);
+                buf_tendril.clear();
+                assert_eq!(&*buf_string, &*buf_tendril);
+            }
+
+            _ => {
+                let (start, end) = random_slice(&mut rng, &buf_string);
+                string_slices.push(buf_string[start..end].to_owned());
+                tendril_slices.push(buf_tendril.subtendril(start as u32, (end - start) as u32));
+                assert_eq!(string_slices.len(), tendril_slices.len());
+                assert!(string_slices
+                    .iter()
+                    .zip(tendril_slices.iter())
+                    .all(|(s, t)| **s == **t));
+            }
+        }
+    }
+}
+
+fn random_boundary<R: Rng>(rng: &mut R, text: &str) -> usize {
+    loop {
+        let i = Range::new(0, text.len() + 1).ind_sample(rng);
+        if text.is_char_boundary(i) {
+            return i;
+        }
+    }
+}
+
+fn random_slice<R: Rng>(rng: &mut R, text: &str) -> (usize, usize) {
+    loop {
+        let start = Range::new(0, text.len() + 1).ind_sample(rng);
+        let end = Range::new(start, text.len() + 1).ind_sample(rng);
+        if !text.is_char_boundary(start) {
+            continue;
+        }
+        if end < text.len() && !text.is_char_boundary(end) {
+            continue;
+        }
+        return (start, end);
+    }
+}
+
+static TEXT: &'static str =
+    "It was from the artists and poets that the pertinent answers came, and I \
+     know that panic would have broken loose had they been able to compare notes. \
+     As it was, lacking their original letters, I half suspected the compiler of \
+     having asked leading questions, or of having edited the correspondence in \
+     corroboration of what he had latently resolved to see.\
+\
+     ˙ǝǝs oʇ pǝʌʃosǝɹ ʎʃʇuǝʇɐʃ pɐɥ ǝɥ ʇɐɥʍ ɟo uoıʇɐɹoqoɹɹoɔ uı ǝɔuǝpuodsǝɹɹoɔ ǝɥʇ \
+     pǝʇıpǝ ƃuıʌɐɥ ɟo ɹo 'suoıʇsǝnb ƃuıpɐǝʃ pǝʞsɐ ƃuıʌɐɥ ɟo ɹǝʃıdɯoɔ ǝɥʇ pǝʇɔǝdsns \
+     ɟʃɐɥ I 'sɹǝʇʇǝʃ ʃɐuıƃıɹo ɹıǝɥʇ ƃuıʞɔɐʃ 'sɐʍ ʇı s∀ ˙sǝʇou ǝɹɐdɯoɔ oʇ ǝʃqɐ uǝǝq \
+     ʎǝɥʇ pɐɥ ǝsooʃ uǝʞoɹq ǝʌɐɥ pʃnoʍ ɔıuɐd ʇɐɥʇ ʍouʞ I puɐ 'ǝɯɐɔ sɹǝʍsuɐ ʇuǝuıʇɹǝd \
+     ǝɥʇ ʇɐɥʇ sʇǝod puɐ sʇsıʇɹɐ ǝɥʇ ɯoɹɟ sɐʍ ʇI";
+
+fn main() {
+    fuzz();
+}
diff --git a/tendril/src/bench.rs b/tendril/src/bench.rs
new file mode 100644
index 00000000..a9d2c30a
--- /dev/null
+++ b/tendril/src/bench.rs
@@ -0,0 +1,159 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::borrow::ToOwned;
+use std::collections::hash_map::{Entry, HashMap};
+
+use tendril::StrTendril;
+
+fn index_words_string(input: &String) -> HashMap<char, Vec<String>> {
+    let mut index = HashMap::new();
+    for word in input.split(|c| c == ' ') {
+        if word.len() == 0 {
+            continue;
+        }
+        let word = word.to_owned();
+        match index.entry(word.chars().next().unwrap()) {
+            Entry::Occupied(mut e) => {
+                let x: &mut Vec<String> = e.get_mut();
+                x.push(word);
+            }
+            Entry::Vacant(e) => {
+                e.insert(vec![word]);
+            }
+        }
+    }
+    index
+}
+
+fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
+    let mut index = HashMap::new();
+    let mut t = input.clone();
+    loop {
+        match t.pop_front_char_run(|c| c != ' ') {
+            None => return index,
+            Some((_, false)) => (),
+            Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
+                Entry::Occupied(mut e) => {
+                    e.get_mut().push(word);
+                }
+                Entry::Vacant(e) => {
+                    e.insert(vec![word]);
+                }
+            },
+        }
+    }
+}
+
+static EN_1: &'static str = "Days turn to nights turn to paper into rocks into plastic";
+
+static EN_2: &'static str =
+    "Here the notes in my laboratory journal cease. I was able to write the last \
+       words only with great effort. By now it was already clear to me that LSD had \
+       been the cause of the remarkable experience of the previous Friday, for the \
+       altered perceptions were of the same type as before, only much more intense. I \
+       had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
+       informed of the self-experiment, to escort me home. We went by bicycle, no \
+       automobile being available because of wartime restrictions on their use. On the \
+       way home, my condition began to assume threatening forms. Everything in my \
+       field of vision wavered and was distorted as if seen in a curved mirror. I also \
+       had the sensation of being unable to move from the spot. Nevertheless, my \
+       assistant later told me that we had traveled very rapidly. Finally, we arrived \
+       at home safe and sound, and I was just barely capable of asking my companion to \
+       summon our family doctor and request milk from the neighbors.\n\n\
+       In spite of my delirious, bewildered condition, I had brief periods of clear \
+       and effective thinking—and chose milk as a nonspecific antidote for poisoning.";
+
+static KR_1: &'static str =
+    "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
+       프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
+       개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";
+
+static HTML_KR_1: &'static str =
+    "<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
+       https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
+       메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
+       아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";
+
+mod index_words {
+    macro_rules! bench {
+        ($txt:ident) => {
+            #[allow(non_snake_case)]
+            mod $txt {
+                const SMALL_SIZE: usize = 65536;
+                const LARGE_SIZE: usize = (1 << 20);
+
+                #[bench]
+                fn index_words_string(b: &mut ::test::Bencher) {
+                    let mut s = String::new();
+                    while s.len() < SMALL_SIZE {
+                        s.push_str(::tendril::bench::$txt);
+                    }
+                    b.iter(|| ::tendril::bench::index_words_string(&s));
+                }
+
+                #[bench]
+                fn index_words_tendril(b: &mut ::test::Bencher) {
+                    let mut t = ::tendril::StrTendril::new();
+                    while t.len() < SMALL_SIZE {
+                        t.push_slice(::tendril::bench::$txt);
+                    }
+                    b.iter(|| ::tendril::bench::index_words_tendril(&t));
+                }
+
+                #[bench]
+                fn index_words_big_string(b: &mut ::test::Bencher) {
+                    let mut s = String::new();
+                    while s.len() < LARGE_SIZE {
+                        s.push_str(::tendril::bench::$txt);
+                    }
+                    b.iter(|| ::tendril::bench::index_words_string(&s));
+                }
+
+                #[bench]
+                fn index_words_big_tendril(b: &mut ::test::Bencher) {
+                    let mut t = ::tendril::StrTendril::new();
+                    while t.len() < LARGE_SIZE {
+                        t.push_slice(::tendril::bench::$txt);
+                    }
+                    b.iter(|| ::tendril::bench::index_words_tendril(&t));
+                }
+
+                #[test]
+                fn correctness() {
+                    use std::borrow::ToOwned;
+                    use tendril::bench::{index_words_string, index_words_tendril};
+                    use tendril::SliceExt;
+
+                    let txt = ::tendril::bench::$txt;
+                    let input_string = txt.to_owned();
+                    let count_s = index_words_string(&input_string);
+                    let mut keys: Vec<char> = count_s.keys().cloned().collect();
+                    keys.sort();
+
+                    let input_tendril = txt.to_tendril();
+                    let count_t = index_words_tendril(&input_tendril);
+                    let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
+                    keys_t.sort();
+
+                    assert_eq!(keys, keys_t);
+
+                    for k in &keys {
+                        let vs = &count_s[k];
+                        let vt = &count_t[k];
+                        assert_eq!(vs.len(), vt.len());
+                        assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
+                    }
+                }
+            }
+        };
+    }
+
+    bench!(EN_1);
+    bench!(EN_2);
+    bench!(KR_1);
+    bench!(HTML_KR_1);
+}
diff --git a/tendril/src/buf32.rs b/tendril/src/buf32.rs
new file mode 100644
index 00000000..d60a277a
--- /dev/null
+++ b/tendril/src/buf32.rs
@@ -0,0 +1,120 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Provides an unsafe owned buffer type, used in implementing `Tendril`.
+
+use std::{mem, ptr, slice, u32};
+
+use OFLOW;
+
+pub const MIN_CAP: u32 = 16;
+
+pub const MAX_LEN: usize = u32::MAX as usize;
+
+/// A buffer points to a header of type `H`, which is followed by `MIN_CAP` or more
+/// bytes of storage.
+pub struct Buf32<H> {
+    pub ptr: *mut H,
+    pub len: u32,
+    pub cap: u32,
+}
+
+#[inline(always)]
+fn bytes_to_vec_capacity<H>(x: u32) -> usize {
+    let header = mem::size_of::<H>();
+    debug_assert!(header > 0);
+    let x = (x as usize).checked_add(header).expect(OFLOW);
+    // Integer ceil https://stackoverflow.com/a/2745086/1162888
+    1 + ((x - 1) / header)
+}
+
+impl<H> Buf32<H> {
+    #[inline]
+    pub unsafe fn with_capacity(mut cap: u32, h: H) -> Buf32<H> {
+        if cap < MIN_CAP {
+            cap = MIN_CAP;
+        }
+
+        let mut vec = Vec::<H>::with_capacity(bytes_to_vec_capacity::<H>(cap));
+        let ptr = vec.as_mut_ptr();
+        mem::forget(vec);
+        ptr::write(ptr, h);
+
+        Buf32 {
+            ptr: ptr,
+            len: 0,
+            cap: cap,
+        }
+    }
+
+    #[inline]
+    pub unsafe fn destroy(self) {
+        mem::drop(Vec::from_raw_parts(
+            self.ptr,
+            1,
+            bytes_to_vec_capacity::<H>(self.cap),
+        ));
+    }
+
+    #[inline(always)]
+    pub unsafe fn data_ptr(&self) -> *mut u8 {
+        (self.ptr as *mut u8).offset(mem::size_of::<H>() as isize)
+    }
+
+    #[inline(always)]
+    pub unsafe fn data(&self) -> &[u8] {
+        slice::from_raw_parts(self.data_ptr(), self.len as usize)
+    }
+
+    #[inline(always)]
+    pub unsafe fn data_mut(&mut self) -> &mut [u8] {
+        slice::from_raw_parts_mut(self.data_ptr(), self.len as usize)
+    }
+
+    /// Grow the capacity to at least `new_cap`.
+    ///
+    /// This will panic if the capacity calculation overflows `u32`.
+    #[inline]
+    pub unsafe fn grow(&mut self, new_cap: u32) {
+        if new_cap <= self.cap {
+            return;
+        }
+
+        let new_cap = new_cap.checked_next_power_of_two().expect(OFLOW);
+        let mut vec = Vec::from_raw_parts(self.ptr, 0, bytes_to_vec_capacity::<H>(self.cap));
+        vec.reserve_exact(bytes_to_vec_capacity::<H>(new_cap));
+        self.ptr = vec.as_mut_ptr();
+        self.cap = new_cap;
+        mem::forget(vec);
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::Buf32;
+    use std::ptr;
+
+    #[test]
+    fn smoke_test() {
+        unsafe {
+            let mut b = Buf32::with_capacity(0, 0u8);
+            assert_eq!(b"", b.data());
+
+            b.grow(5);
+            ptr::copy_nonoverlapping(b"Hello".as_ptr(), b.data_ptr(), 5);
+
+            assert_eq!(b"", b.data());
+            b.len = 5;
+            assert_eq!(b"Hello", b.data());
+
+            b.grow(1337);
+            assert!(b.cap >= 1337);
+            assert_eq!(b"Hello", b.data());
+
+            b.destroy();
+        }
+    }
+}
diff --git a/tendril/src/fmt.rs b/tendril/src/fmt.rs
new file mode 100644
index 00000000..2ff04bbc
--- /dev/null
+++ b/tendril/src/fmt.rs
@@ -0,0 +1,519 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Marker types for formats.
+//!
+//! This module defines the types and traits used to mark a `Tendril`
+//! with the format of data it contains. It includes those formats
+//! for which `Tendril` supports at least some operations without
+//! conversion.
+//!
+//! To convert a string tendril to/from a byte tendril in an arbitrary
+//! character encoding, see the `encode` and `decode` methods on
+//! `Tendril`.
+//!
+//! `Tendril` operations may become memory-unsafe if data invalid for
+//! the format sneaks in. For that reason, these traits require
+//! `unsafe impl`.
+
+use std::default::Default;
+use std::{char, mem, str};
+
+use futf::{self, Codepoint, Meaning};
+
+/// Implementation details.
+///
+/// You don't need these unless you are implementing
+/// a new format.
+pub mod imp {
+    use std::default::Default;
+    use std::{iter, mem, slice};
+
+    /// Describes how to fix up encodings when concatenating.
+    ///
+    /// We can drop characters on either side of the splice,
+    /// and insert up to 4 bytes in the middle.
+    pub struct Fixup {
+        pub drop_left: u32,
+        pub drop_right: u32,
+        pub insert_len: u32,
+        pub insert_bytes: [u8; 4],
+    }
+
+    impl Default for Fixup {
+        #[inline(always)]
+        fn default() -> Fixup {
+            Fixup {
+                drop_left: 0,
+                drop_right: 0,
+                insert_len: 0,
+                insert_bytes: [0; 4],
+            }
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn from_u32_unchecked(n: u32) -> char {
+        mem::transmute(n)
+    }
+
+    pub struct SingleByteCharIndices<'a> {
+        inner: iter::Enumerate<slice::Iter<'a, u8>>,
+    }
+
+    impl<'a> Iterator for SingleByteCharIndices<'a> {
+        type Item = (usize, char);
+
+        #[inline]
+        fn next(&mut self) -> Option<(usize, char)> {
+            self.inner
+                .next()
+                .map(|(i, &b)| unsafe { (i, from_u32_unchecked(b as u32)) })
+        }
+    }
+
+    impl<'a> SingleByteCharIndices<'a> {
+        #[inline]
+        pub fn new(buf: &'a [u8]) -> SingleByteCharIndices<'a> {
+            SingleByteCharIndices {
+                inner: buf.iter().enumerate(),
+            }
+        }
+    }
+}
+
+/// Trait for format marker types.
+///
+/// The type implementing this trait is usually not instantiated.
+/// It's used with a phantom type parameter of `Tendril`.
+pub unsafe trait Format {
+    /// Check whether the buffer is valid for this format.
+    fn validate(buf: &[u8]) -> bool;
+
+    /// Check whether the buffer is valid for this format.
+    ///
+    /// You may assume the buffer is a prefix of a valid buffer.
+    #[inline]
+    fn validate_prefix(buf: &[u8]) -> bool {
+        <Self as Format>::validate(buf)
+    }
+
+    /// Check whether the buffer is valid for this format.
+    ///
+    /// You may assume the buffer is a suffix of a valid buffer.
+    #[inline]
+    fn validate_suffix(buf: &[u8]) -> bool {
+        <Self as Format>::validate(buf)
+    }
+
+    /// Check whether the buffer is valid for this format.
+    ///
+    /// You may assume the buffer is a contiguous subsequence
+    /// of a valid buffer, but not necessarily a prefix or
+    /// a suffix.
+    #[inline]
+    fn validate_subseq(buf: &[u8]) -> bool {
+        <Self as Format>::validate(buf)
+    }
+
+    /// Compute any fixup needed when concatenating buffers.
+    ///
+    /// The default is to do nothing.
+    ///
+    /// The function is `unsafe` because it may assume the input
+    /// buffers are already valid for the format. Also, no
+    /// bounds-checking is performed on the return value!
+    #[inline(always)]
+    unsafe fn fixup(_lhs: &[u8], _rhs: &[u8]) -> imp::Fixup {
+        Default::default()
+    }
+}
+
+/// Indicates that one format is a subset of another.
+///
+/// The subset format can be converted to the superset format
+/// for free.
+pub unsafe trait SubsetOf<Super>: Format
+where
+    Super: Format,
+{
+    /// Validate the *other* direction of conversion; check if
+    /// this buffer from the superset format conforms to the
+    /// subset format.
+    ///
+    /// The default calls `Self::validate`, but some conversions
+    /// may implement a check which is cheaper than validating
+    /// from scratch.
+    fn revalidate_subset(x: &[u8]) -> bool {
+        Self::validate(x)
+    }
+}
+
+/// Indicates a format which corresponds to a Rust slice type,
+/// representing exactly the same invariants.
+pub unsafe trait SliceFormat: Format + Sized {
+    type Slice: ?Sized + Slice;
+}
+
+/// Indicates a format which contains characters from Unicode
+/// (all of it, or some proper subset).
+pub unsafe trait CharFormat<'a>: Format {
+    /// Iterator for characters and their byte indices.
+    type Iter: Iterator<Item = (usize, char)>;
+
+    /// Iterate over the characters of the string and their byte
+    /// indices.
+    ///
+    /// You may assume the buffer is *already validated* for `Format`.
+    unsafe fn char_indices(buf: &'a [u8]) -> Self::Iter;
+
+    /// Encode the character as bytes and pass them to a continuation.
+    ///
+    /// Returns `Err(())` iff the character cannot be represented.
+    fn encode_char<F>(ch: char, cont: F) -> Result<(), ()>
+    where
+        F: FnOnce(&[u8]);
+}
+
+/// Indicates a Rust slice type that is represented in memory as bytes.
+pub unsafe trait Slice {
+    /// Access the raw bytes of the slice.
+    fn as_bytes(&self) -> &[u8];
+
+    /// Convert a byte slice to this kind of slice.
+    ///
+    /// You may assume the buffer is *already validated*
+    /// for `Format`.
+    unsafe fn from_bytes(x: &[u8]) -> &Self;
+
+    /// Convert a byte slice to this kind of slice.
+    ///
+    /// You may assume the buffer is *already validated*
+    /// for `Format`.
+    unsafe fn from_mut_bytes(x: &mut [u8]) -> &mut Self;
+}
+
+/// Marker type for uninterpreted bytes.
+///
+/// Validation will never fail for this format.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct Bytes;
+
+unsafe impl Format for Bytes {
+    #[inline(always)]
+    fn validate(_: &[u8]) -> bool {
+        true
+    }
+}
+
+unsafe impl SliceFormat for Bytes {
+    type Slice = [u8];
+}
+
+unsafe impl Slice for [u8] {
+    #[inline(always)]
+    fn as_bytes(&self) -> &[u8] {
+        self
+    }
+
+    #[inline(always)]
+    unsafe fn from_bytes(x: &[u8]) -> &[u8] {
+        x
+    }
+
+    #[inline(always)]
+    unsafe fn from_mut_bytes(x: &mut [u8]) -> &mut [u8] {
+        x
+    }
+}
+
+/// Marker type for ASCII text.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct ASCII;
+
+unsafe impl Format for ASCII {
+    #[inline]
+    fn validate(buf: &[u8]) -> bool {
+        buf.iter().all(|&n| n <= 127)
+    }
+
+    #[inline(always)]
+    fn validate_prefix(_: &[u8]) -> bool {
+        true
+    }
+
+    #[inline(always)]
+    fn validate_suffix(_: &[u8]) -> bool {
+        true
+    }
+
+    #[inline(always)]
+    fn validate_subseq(_: &[u8]) -> bool {
+        true
+    }
+}
+
+unsafe impl SubsetOf<UTF8> for ASCII {}
+unsafe impl SubsetOf<Latin1> for ASCII {}
+
+unsafe impl<'a> CharFormat<'a> for ASCII {
+    type Iter = imp::SingleByteCharIndices<'a>;
+
+    #[inline]
+    unsafe fn char_indices(buf: &'a [u8]) -> imp::SingleByteCharIndices<'a> {
+        imp::SingleByteCharIndices::new(buf)
+    }
+
+    #[inline]
+    fn encode_char<F>(ch: char, cont: F) -> Result<(), ()>
+    where
+        F: FnOnce(&[u8]),
+    {
+        let n = ch as u32;
+        if n > 0x7F {
+            return Err(());
+        }
+        cont(&[n as u8]);
+        Ok(())
+    }
+}
+
+/// Marker type for UTF-8 text.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct UTF8;
+
+unsafe impl Format for UTF8 {
+    #[inline]
+    fn validate(buf: &[u8]) -> bool {
+        str::from_utf8(buf).is_ok()
+    }
+
+    #[inline]
+    fn validate_prefix(buf: &[u8]) -> bool {
+        if buf.len() == 0 {
+            return true;
+        }
+        match futf::classify(buf, buf.len() - 1) {
+            Some(Codepoint {
+                meaning: Meaning::Whole(_),
+                ..
+            }) => true,
+            _ => false,
+        }
+    }
+
+    #[inline]
+    fn validate_suffix(buf: &[u8]) -> bool {
+        if buf.len() == 0 {
+            return true;
+        }
+        match futf::classify(buf, 0) {
+            Some(Codepoint {
+                meaning: Meaning::Whole(_),
+                ..
+            }) => true,
+            _ => false,
+        }
+    }
+
+    #[inline]
+    fn validate_subseq(buf: &[u8]) -> bool {
+        <Self as Format>::validate_prefix(buf) && <Self as Format>::validate_suffix(buf)
+    }
+}
+
+unsafe impl SubsetOf<WTF8> for UTF8 {}
+
+unsafe impl SliceFormat for UTF8 {
+    type Slice = str;
+}
+
+unsafe impl Slice for str {
+    #[inline(always)]
+    fn as_bytes(&self) -> &[u8] {
+        str::as_bytes(self)
+    }
+
+    #[inline(always)]
+    unsafe fn from_bytes(x: &[u8]) -> &str {
+        str::from_utf8_unchecked(x)
+    }
+
+    #[inline(always)]
+    unsafe fn from_mut_bytes(x: &mut [u8]) -> &mut str {
+        mem::transmute(x)
+    }
+}
+
+unsafe impl<'a> CharFormat<'a> for UTF8 {
+    type Iter = str::CharIndices<'a>;
+
+    #[inline]
+    unsafe fn char_indices(buf: &'a [u8]) -> str::CharIndices<'a> {
+        str::from_utf8_unchecked(buf).char_indices()
+    }
+
+    #[inline]
+    fn encode_char<F>(ch: char, cont: F) -> Result<(), ()>
+    where
+        F: FnOnce(&[u8]),
+    {
+        cont(ch.encode_utf8(&mut [0_u8; 4]).as_bytes());
+        Ok(())
+    }
+}
+
+/// Marker type for WTF-8 text.
+///
+/// See the [WTF-8 spec](https://simonsapin.github.io/wtf-8/).
+#[derive(Copy, Clone, Default, Debug)]
+pub struct WTF8;
+
+#[inline]
+fn wtf8_meaningful(m: Meaning) -> bool {
+    match m {
+        Meaning::Whole(_) | Meaning::LeadSurrogate(_) | Meaning::TrailSurrogate(_) => true,
+        _ => false,
+    }
+}
+
+unsafe impl Format for WTF8 {
+    #[inline]
+    fn validate(buf: &[u8]) -> bool {
+        let mut i = 0;
+        let mut prev_lead = false;
+        while i < buf.len() {
+            let codept = unwrap_or_return!(futf::classify(buf, i), false);
+            if !wtf8_meaningful(codept.meaning) {
+                return false;
+            }
+            i += codept.bytes.len();
+            prev_lead = match codept.meaning {
+                Meaning::TrailSurrogate(_) if prev_lead => return false,
+                Meaning::LeadSurrogate(_) => true,
+                _ => false,
+            };
+        }
+
+        true
+    }
+
+    #[inline]
+    fn validate_prefix(buf: &[u8]) -> bool {
+        if buf.len() == 0 {
+            return true;
+        }
+        match futf::classify(buf, buf.len() - 1) {
+            Some(c) => wtf8_meaningful(c.meaning),
+            _ => false,
+        }
+    }
+
+    #[inline]
+    fn validate_suffix(buf: &[u8]) -> bool {
+        if buf.len() == 0 {
+            return true;
+        }
+        match futf::classify(buf, 0) {
+            Some(c) => wtf8_meaningful(c.meaning),
+            _ => false,
+        }
+    }
+
+    #[inline]
+    fn validate_subseq(buf: &[u8]) -> bool {
+        <Self as Format>::validate_prefix(buf) && <Self as Format>::validate_suffix(buf)
+    }
+
+    #[inline]
+    unsafe fn fixup(lhs: &[u8], rhs: &[u8]) -> imp::Fixup {
+        const ERR: &'static str = "WTF8: internal error";
+
+        if lhs.len() >= 3 && rhs.len() >= 3 {
+            if let (
+                Some(Codepoint {
+                    meaning: Meaning::LeadSurrogate(hi),
+                    ..
+                }),
+                Some(Codepoint {
+                    meaning: Meaning::TrailSurrogate(lo),
+                    ..
+                }),
+            ) = (futf::classify(lhs, lhs.len() - 1), futf::classify(rhs, 0))
+            {
+                let mut fixup = imp::Fixup {
+                    drop_left: 3,
+                    drop_right: 3,
+                    insert_len: 0,
+                    insert_bytes: [0_u8; 4],
+                };
+
+                let n = 0x10000 + ((hi as u32) << 10) + (lo as u32);
+
+                let ch = char::from_u32(n).expect(ERR);
+                fixup.insert_len = ch.encode_utf8(&mut fixup.insert_bytes).len() as u32;
+
+                return fixup;
+            }
+        }
+
+        Default::default()
+    }
+}
+
+/// Marker type for the single-byte encoding of the first 256 Unicode codepoints.
+///
+/// This is IANA's "ISO-8859-1". It's ISO's "ISO 8859-1" with the addition of the
+/// C0 and C1 control characters from ECMA-48 / ISO 6429.
+///
+/// Not to be confused with WHATWG's "latin1" or "iso8859-1" labels (or the
+/// many other aliases), which actually stand for Windows-1252.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct Latin1;
+
+unsafe impl Format for Latin1 {
+    #[inline(always)]
+    fn validate(_: &[u8]) -> bool {
+        true
+    }
+
+    #[inline(always)]
+    fn validate_prefix(_: &[u8]) -> bool {
+        true
+    }
+
+    #[inline(always)]
+    fn validate_suffix(_: &[u8]) -> bool {
+        true
+    }
+
+    #[inline(always)]
+    fn validate_subseq(_: &[u8]) -> bool {
+        true
+    }
+}
+
+unsafe impl<'a> CharFormat<'a> for Latin1 {
+    type Iter = imp::SingleByteCharIndices<'a>;
+
+    #[inline]
+    unsafe fn char_indices(buf: &'a [u8]) -> imp::SingleByteCharIndices<'a> {
+        imp::SingleByteCharIndices::new(buf)
+    }
+
+    #[inline]
+    fn encode_char<F>(ch: char, cont: F) -> Result<(), ()>
+    where
+        F: FnOnce(&[u8]),
+    {
+        let n = ch as u32;
+        if n > 0xFF {
+            return Err(());
+        }
+        cont(&[n as u8]);
+        Ok(())
+    }
+}
diff --git a/tendril/src/futf.rs b/tendril/src/futf.rs
new file mode 100644
index 00000000..93a1c21e
--- /dev/null
+++ b/tendril/src/futf.rs
@@ -0,0 +1,565 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::{char, slice};
+
+/// Meaning of a complete or partial UTF-8 codepoint.
+///
+/// Not all checking is performed eagerly. That is, a codepoint `Prefix` or
+/// `Suffix` may in reality have no valid completion.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
+pub enum Meaning {
+    /// We found a whole codepoint.
+    Whole(char),
+
+    /// We found something that isn't a valid Unicode codepoint, but
+    /// it *would* correspond to a UTF-16 leading surrogate code unit,
+    /// i.e. a value in the range `U+D800` - `U+DBFF`.
+    ///
+    /// The argument is the code unit's 10-bit index within that range.
+    ///
+    /// These are found in UTF-8 variants such as CESU-8 and WTF-8.
+    LeadSurrogate(u16),
+
+    /// We found something that isn't a valid Unicode codepoint, but
+    /// it *would* correspond to a UTF-16 trailing surrogate code unit,
+    /// i.e. a value in the range `U+DC00` - `U+DFFF`.
+    ///
+    /// The argument is the code unit's 10-bit index within that range.
+    ///
+    /// These are found in UTF-8 variants such as CESU-8 and WTF-8.
+    TrailSurrogate(u16),
+
+    /// We found only a prefix of a codepoint before the buffer ended.
+    ///
+    /// Includes the number of additional bytes needed.
+    Prefix(usize),
+
+    /// We found only a suffix of a codepoint before running off the
+    /// start of the buffer.
+    ///
+    /// Up to 3 more bytes may be needed.
+    Suffix,
+}
+
+/// Represents a complete or partial UTF-8 codepoint.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
+pub struct Codepoint<'a> {
+    /// The bytes that make up the partial or full codepoint.
+    ///
+    /// For a `Suffix` this depends on `idx`. We don't scan forward
+    /// for additional continuation bytes after the reverse scan
+    /// failed to locate a multibyte sequence start.
+    pub bytes: &'a [u8],
+
+    /// Start of the codepoint in the buffer, expressed as an offset
+    /// back from `idx`.
+    pub rewind: usize,
+
+    /// Meaning of the partial or full codepoint.
+    pub meaning: Meaning,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+enum Byte {
+    Ascii,
+    Start(usize),
+    Cont,
+}
+
+impl Byte {
+    #[inline(always)]
+    fn classify(x: u8) -> Option<Byte> {
+        match x & 0xC0 {
+            0xC0 => match x {
+                x if x & 0b11111_000 == 0b11110_000 => Some(Byte::Start(4)),
+                x if x & 0b1111_0000 == 0b1110_0000 => Some(Byte::Start(3)),
+                x if x & 0b111_00000 == 0b110_00000 => Some(Byte::Start(2)),
+                _ => None,
+            },
+            0x80 => Some(Byte::Cont),
+            _ => Some(Byte::Ascii),
+        }
+    }
+}
+
+#[inline(always)]
+fn all_cont(buf: &[u8]) -> bool {
+    buf.iter()
+        .all(|&b| matches!(Byte::classify(b), Some(Byte::Cont)))
+}
+
+// NOTE: Assumes the buffer is a syntactically valid multi-byte UTF-8 sequence:
+// a starting byte followed by the correct number of continuation bytes.
+#[inline(always)]
+unsafe fn decode(buf: &[u8]) -> Option<Meaning> {
+    debug_assert!(buf.len() >= 2);
+    debug_assert!(buf.len() <= 4);
+    let n;
+    match buf.len() {
+        2 => {
+            n = ((*buf.get_unchecked(0) & 0b11111) as u32) << 6
+                | ((*buf.get_unchecked(1) & 0x3F) as u32);
+            if n < 0x80 {
+                return None;
+            } // Overlong
+        }
+        3 => {
+            n = ((*buf.get_unchecked(0) & 0b1111) as u32) << 12
+                | ((*buf.get_unchecked(1) & 0x3F) as u32) << 6
+                | ((*buf.get_unchecked(2) & 0x3F) as u32);
+            match n {
+                0x0000..=0x07FF => return None, // Overlong
+                0xD800..=0xDBFF => return Some(Meaning::LeadSurrogate(n as u16 - 0xD800)),
+                0xDC00..=0xDFFF => return Some(Meaning::TrailSurrogate(n as u16 - 0xDC00)),
+                _ => {}
+            }
+        }
+        4 => {
+            n = ((*buf.get_unchecked(0) & 0b111) as u32) << 18
+                | ((*buf.get_unchecked(1) & 0x3F) as u32) << 12
+                | ((*buf.get_unchecked(2) & 0x3F) as u32) << 6
+                | ((*buf.get_unchecked(3) & 0x3F) as u32);
+            if n < 0x1_0000 {
+                return None;
+            } // Overlong
+        }
+        _ => debug_unreachable!(),
+    }
+
+    char::from_u32(n).map(Meaning::Whole)
+}
+
+#[inline(always)]
+unsafe fn unsafe_slice<'a>(buf: &'a [u8], start: usize, new_len: usize) -> &'a [u8] {
+    debug_assert!(start <= buf.len());
+    debug_assert!(new_len <= (buf.len() - start));
+    slice::from_raw_parts(buf.as_ptr().offset(start as isize), new_len)
+}
+
+macro_rules! otry {
+    ($x:expr) => {
+        unwrap_or_return!($x, None)
+    };
+}
+
+/// Describes the UTF-8 codepoint containing the byte at index `idx` within
+/// `buf`.
+///
+/// Returns `None` if `idx` is out of range, or if `buf` contains invalid UTF-8
+/// in the vicinity of `idx`.
+#[inline]
+pub fn classify<'a>(buf: &'a [u8], idx: usize) -> Option<Codepoint<'a>> {
+    if idx >= buf.len() {
+        return None;
+    }
+
+    unsafe {
+        let x = *buf.get_unchecked(idx);
+        match otry!(Byte::classify(x)) {
+            Byte::Ascii => Some(Codepoint {
+                bytes: unsafe_slice(buf, idx, 1),
+                rewind: 0,
+                meaning: Meaning::Whole(x as char),
+            }),
+            Byte::Start(n) => {
+                let avail = buf.len() - idx;
+                if avail >= n {
+                    let bytes = unsafe_slice(buf, idx, n);
+                    if !all_cont(unsafe_slice(bytes, 1, n - 1)) {
+                        return None;
+                    }
+                    let meaning = otry!(decode(bytes));
+                    Some(Codepoint {
+                        bytes: bytes,
+                        rewind: 0,
+                        meaning: meaning,
+                    })
+                } else {
+                    Some(Codepoint {
+                        bytes: unsafe_slice(buf, idx, avail),
+                        rewind: 0,
+                        meaning: Meaning::Prefix(n - avail),
+                    })
+                }
+            }
+            Byte::Cont => {
+                let mut start = idx;
+                let mut checked = 0;
+                loop {
+                    if start == 0 {
+                        // Whoops, fell off the beginning.
+                        return Some(Codepoint {
+                            bytes: unsafe_slice(buf, 0, idx + 1),
+                            rewind: idx,
+                            meaning: Meaning::Suffix,
+                        });
+                    }
+
+                    start -= 1;
+                    checked += 1;
+                    match otry!(Byte::classify(*buf.get_unchecked(start))) {
+                        Byte::Cont => (),
+                        Byte::Start(n) => {
+                            let avail = buf.len() - start;
+                            if avail >= n {
+                                let bytes = unsafe_slice(buf, start, n);
+                                if checked < n {
+                                    if !all_cont(unsafe_slice(bytes, checked, n - checked)) {
+                                        return None;
+                                    }
+                                }
+                                let meaning = otry!(decode(bytes));
+                                return Some(Codepoint {
+                                    bytes: bytes,
+                                    rewind: idx - start,
+                                    meaning: meaning,
+                                });
+                            } else {
+                                return Some(Codepoint {
+                                    bytes: unsafe_slice(buf, start, avail),
+                                    rewind: idx - start,
+                                    meaning: Meaning::Prefix(n - avail),
+                                });
+                            }
+                        }
+                        _ => return None,
+                    }
+
+                    if idx - start >= 3 {
+                        // We looked at 3 bytes before a continuation byte
+                        // and didn't find a start byte.
+                        return None;
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[cfg(all(test, feature = "bench"))]
+mod tests {
+    use super::{all_cont, classify, decode, Byte, Meaning};
+    use std::borrow::ToOwned;
+    use std::io::Write;
+    use test::Bencher;
+
+    #[test]
+    fn classify_all_bytes() {
+        for n in 0x00..0x80 {
+            assert_eq!(Byte::classify(n), Some(Byte::Ascii));
+        }
+        for n in 0x80..0xC0 {
+            assert_eq!(Byte::classify(n), Some(Byte::Cont));
+        }
+        for n in 0xC0..0xE0 {
+            assert_eq!(Byte::classify(n), Some(Byte::Start(2)));
+        }
+        for n in 0xE0..0xF0 {
+            assert_eq!(Byte::classify(n), Some(Byte::Start(3)));
+        }
+        for n in 0xF0..0xF8 {
+            assert_eq!(Byte::classify(n), Some(Byte::Start(4)));
+        }
+        for n in 0xF8..0xFF {
+            assert_eq!(Byte::classify(n), None);
+        }
+        assert_eq!(Byte::classify(0xFF), None);
+    }
+
+    #[test]
+    fn test_all_cont() {
+        assert!(all_cont(b""));
+        assert!(all_cont(b"\x80"));
+        assert!(all_cont(b"\xBF"));
+        assert!(all_cont(b"\x80\xBF\x80\xBF"));
+
+        assert!(!all_cont(b"z"));
+        assert!(!all_cont(b"\xC0\xBF"));
+        assert!(!all_cont(b"\xFF"));
+        assert!(!all_cont(b"\x80\xBFz\x80\xBF"));
+        assert!(!all_cont(b"\x80\xBF\xC0\x80\xBF"));
+        assert!(!all_cont(b"\x80\xBF\xFF\x80\xBF"));
+        assert!(!all_cont(b"\x80\xBF\x80\xBFz"));
+        assert!(!all_cont(b"\x80\xBF\x80\xBF\xC0"));
+        assert!(!all_cont(b"z\x80\xBF\x80\xBF"));
+        assert!(!all_cont(b"\xC0\x80\xBF\x80\xBF"));
+    }
+
+    #[test]
+    fn test_decode() {
+        unsafe {
+            assert_eq!(Some(Meaning::Whole('ő')), decode(b"\xC5\x91"));
+            assert_eq!(Some(Meaning::Whole('\u{a66e}')), decode(b"\xEA\x99\xAE"));
+            assert_eq!(
+                Some(Meaning::Whole('\u{1f4a9}')),
+                decode(b"\xF0\x9F\x92\xA9")
+            );
+            assert_eq!(
+                Some(Meaning::Whole('\u{10ffff}')),
+                decode(b"\xF4\x8F\xBF\xBF")
+            );
+
+            assert_eq!(
+                Some(Meaning::LeadSurrogate(0x0000)),
+                decode(b"\xED\xA0\x80")
+            );
+            assert_eq!(
+                Some(Meaning::LeadSurrogate(0x0001)),
+                decode(b"\xED\xA0\x81")
+            );
+            assert_eq!(
+                Some(Meaning::LeadSurrogate(0x03FE)),
+                decode(b"\xED\xAF\xBE")
+            );
+            assert_eq!(
+                Some(Meaning::LeadSurrogate(0x03FF)),
+                decode(b"\xED\xAF\xBF")
+            );
+
+            assert_eq!(
+                Some(Meaning::TrailSurrogate(0x0000)),
+                decode(b"\xED\xB0\x80")
+            );
+            assert_eq!(
+                Some(Meaning::TrailSurrogate(0x0001)),
+                decode(b"\xED\xB0\x81")
+            );
+            assert_eq!(
+                Some(Meaning::TrailSurrogate(0x03FE)),
+                decode(b"\xED\xBF\xBE")
+            );
+            assert_eq!(
+                Some(Meaning::TrailSurrogate(0x03FF)),
+                decode(b"\xED\xBF\xBF")
+            );
+
+            // The last 4-byte UTF-8 sequence. This would be U+1FFFFF, which is out of
+            // range.
+            assert_eq!(None, decode(b"\xF7\xBF\xBF\xBF"));
+
+            // First otherwise-valid sequence (would be U+110000) that is out of range
+            assert_eq!(None, decode(b"\xF4\x90\x80\x80"));
+
+            // Overlong sequences
+            assert_eq!(None, decode(b"\xC0\x80"));
+            assert_eq!(None, decode(b"\xC1\xBF"));
+            assert_eq!(None, decode(b"\xE0\x80\x80"));
+            assert_eq!(None, decode(b"\xE0\x9F\xBF"));
+            assert_eq!(None, decode(b"\xF0\x80\x80\x80"));
+            assert_eq!(None, decode(b"\xF0\x8F\xBF\xBF"));
+
+            // For not-overlong sequence for each sequence length
+            assert_eq!(Some(Meaning::Whole('\u{80}')), decode(b"\xC2\x80"));
+            assert_eq!(Some(Meaning::Whole('\u{800}')), decode(b"\xE0\xA0\x80"));
+            assert_eq!(
+                Some(Meaning::Whole('\u{10000}')),
+                decode(b"\xF0\x90\x80\x80")
+            );
+        }
+    }
+
+    static JUNK: &'static [u8] = b"\
+        \xf8\x0d\x07\x25\xa6\x7b\x95\xeb\x47\x01\x7f\xee\
+        \x3b\x00\x60\x57\x1d\x9e\x5d\x0a\x0b\x0a\x7c\x75\
+        \x13\xa1\x82\x46\x27\x34\xe9\x52\x61\x0d\xec\x10\
+        \x54\x49\x6e\x54\xdf\x7b\xe1\x31\x8c\x06\x21\x83\
+        \x0f\xb5\x1f\x4c\x6a\x71\x52\x42\x74\xe7\x7b\x50\
+        \x59\x1f\x6a\xd4\xff\x06\x92\x33\xc4\x34\x97\xff\
+        \xcc\xb5\xc4\x00\x7b\xc3\x4a\x7f\x7e\x63\x96\x58\
+        \x51\x63\x21\x54\x53\x2f\x03\x8a\x7d\x41\x79\x98\
+        \x5b\xcb\xb8\x94\x6b\x73\xf3\x0c\x5a\xd7\xc4\x12\
+        \x7a\x2b\x9a\x2e\x67\x62\x2a\x00\x45\x2c\xfe\x7d\
+        \x8d\xd6\x51\x4e\x59\x36\x72\x1b\xae\xaa\x06\xe8\
+        \x71\x1b\x85\xd3\x35\xb5\xbe\x9e\x16\x96\x72\xd8\
+        \x1a\x48\xba\x4d\x55\x4f\x1b\xa2\x77\xfa\x8f\x71\
+        \x58\x7d\x03\x93\xa2\x3a\x76\x51\xda\x48\xe2\x3f\
+        \xeb\x8d\xda\x89\xae\xf7\xbd\x3d\xb6\x37\x97\xca\
+        \x99\xcc\x4a\x8d\x62\x89\x97\xe3\xc0\xd1\x8d\xc1\
+        \x26\x11\xbb\x8d\x53\x61\x4f\x76\x03\x00\x30\xd3\
+        \x5f\x86\x19\x52\x9c\x3e\x99\x8c\xb7\x21\x48\x1c\
+        \x85\xae\xad\xd5\x74\x00\x6c\x3e\xd0\x17\xff\x76\
+        \x5c\x32\xc3\xfb\x24\x99\xd4\x4c\xa4\x1f\x66\x46\
+        \xe7\x2d\x44\x56\x7d\x14\xd9\x76\x91\x37\x2f\xb7\
+        \xcc\x1b\xd3\xc2";
+
+    #[test]
+    fn classify_whole() {
+        assert_eq!(JUNK.len(), 256);
+
+        for &c in &[
+            '\0',
+            '\x01',
+            'o',
+            'z',
+            'ő',
+            '\u{2764}',
+            '\u{a66e}',
+            '\u{1f4a9}',
+            '\u{1f685}',
+        ] {
+            for idx in 0..JUNK.len() - 3 {
+                let mut buf = JUNK.to_owned();
+                let ch = format!("{}", c).into_bytes();
+                (&mut buf[idx..]).write_all(&ch).unwrap();
+
+                for j in 0..ch.len() {
+                    let class = classify(&buf, idx + j).unwrap();
+                    assert_eq!(class.bytes, &*ch);
+                    assert_eq!(class.rewind, j);
+                    assert_eq!(class.meaning, Meaning::Whole(c));
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn classify_surrogates() {
+        for &(s, b) in &[
+            (Meaning::LeadSurrogate(0x0000), b"\xED\xA0\x80"),
+            (Meaning::LeadSurrogate(0x0001), b"\xED\xA0\x81"),
+            (Meaning::LeadSurrogate(0x03FE), b"\xED\xAF\xBE"),
+            (Meaning::LeadSurrogate(0x03FF), b"\xED\xAF\xBF"),
+            (Meaning::TrailSurrogate(0x0000), b"\xED\xB0\x80"),
+            (Meaning::TrailSurrogate(0x0001), b"\xED\xB0\x81"),
+            (Meaning::TrailSurrogate(0x03FE), b"\xED\xBF\xBE"),
+            (Meaning::TrailSurrogate(0x03FF), b"\xED\xBF\xBF"),
+        ] {
+            for idx in 0..JUNK.len() - 2 {
+                let mut buf = JUNK.to_owned();
+                (&mut buf[idx..]).write_all(b).unwrap();
+
+                let class = classify(&buf, idx).unwrap();
+                assert_eq!(class.bytes, b);
+                assert_eq!(class.rewind, 0);
+                assert_eq!(class.meaning, s);
+            }
+        }
+    }
+
+    #[test]
+    fn classify_prefix_suffix() {
+        for &c in &['ő', '\u{a66e}', '\u{1f4a9}'] {
+            let ch = format!("{}", c).into_bytes();
+            for pfx in 1..ch.len() - 1 {
+                let mut buf = JUNK.to_owned();
+                let buflen = buf.len();
+                (&mut buf[buflen - pfx..buflen])
+                    .write_all(&ch[..pfx])
+                    .unwrap();
+                for j in 0..pfx {
+                    let idx = buflen - 1 - j;
+                    let class = classify(&buf, idx).unwrap();
+                    assert_eq!(class.bytes, &ch[..pfx]);
+                    assert_eq!(class.rewind, pfx - 1 - j);
+                    assert_eq!(class.meaning, Meaning::Prefix(ch.len() - pfx));
+                }
+            }
+            for sfx in 1..ch.len() - 1 {
+                let ch_bytes = &ch[ch.len() - sfx..];
+                let mut buf = JUNK.to_owned();
+                (&mut *buf).write_all(ch_bytes).unwrap();
+                for j in 0..sfx {
+                    let class = classify(&buf, j).unwrap();
+                    assert!(ch_bytes.starts_with(class.bytes));
+                    assert_eq!(class.rewind, j);
+                    assert_eq!(class.meaning, Meaning::Suffix);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn out_of_bounds() {
+        assert!(classify(b"", 0).is_none());
+        assert!(classify(b"", 7).is_none());
+        assert!(classify(b"aaaaaaa", 7).is_none());
+    }
+
+    #[test]
+    fn malformed() {
+        assert_eq!(None, classify(b"\xFF", 0));
+        assert_eq!(None, classify(b"\xC5\xC5", 0));
+        assert_eq!(None, classify(b"x\x91", 1));
+        assert_eq!(None, classify(b"\x91\x91\x91\x91", 3));
+        assert_eq!(None, classify(b"\x91\x91\x91\x91\x91", 4));
+        assert_eq!(None, classify(b"\xEA\x91\xFF", 1));
+        assert_eq!(None, classify(b"\xF0\x90\x90\xF0", 0));
+        assert_eq!(None, classify(b"\xF0\x90\x90\xF0", 1));
+        assert_eq!(None, classify(b"\xF0\x90\x90\xF0", 2));
+
+        for i in 0..4 {
+            // out of range: U+110000
+            assert_eq!(None, classify(b"\xF4\x90\x80\x80", i));
+
+            // out of range: U+1FFFFF
+            assert_eq!(None, classify(b"\xF7\xBF\xBF\xBF", i));
+
+            // Overlong sequences
+            assert_eq!(None, classify(b"\xC0\x80", i));
+            assert_eq!(None, classify(b"\xC1\xBF", i));
+            assert_eq!(None, classify(b"\xE0\x80\x80", i));
+            assert_eq!(None, classify(b"\xE0\x9F\xBF", i));
+            assert_eq!(None, classify(b"\xF0\x80\x80\x80", i));
+            assert_eq!(None, classify(b"\xF0\x8F\xBF\xBF", i));
+        }
+    }
+
+    static TEXT: &'static str = "
+    All human beings are born free and equal in dignity and rights.
+    They are endowed with reason and conscience and should act
+    towards one another in a spirit of brotherhood.
+
+    Minden emberi lény szabadon születik és egyenlő méltósága és
+    joga van. Az emberek, ésszel és lelkiismerettel bírván,
+    egymással szemben testvéri szellemben kell hogy viseltessenek.
+
+    เราทุกคนเกิดมาอย่างอิสระ เราทุกคนมีความคิดและความเข้าใจเป็นของเราเอง
+    เราทุกคนควรได้รับการปฏิบัติในทางเดียวกัน.
+
+    모든 인간은 태어날 때부터 자유로우며 그 존엄과 권리에 있어
+    동등하다. 인간은 천부적으로 이성과 양심을 부여받았으며 서로
+    형제애의 정신으로 행동하여야 한다.
+
+    ro remna cu se jinzi co zifre je simdu'i be le ry. nilselsi'a
+    .e lei ry. selcru .i ry. se menli gi'e se sezmarde .i .ei
+    jeseki'ubo ry. simyzu'e ta'i le tunba
+
+    ᏂᎦᏓ ᎠᏂᏴᏫ ᏂᎨᎫᏓᎸᎾ ᎠᎴ ᎤᏂᏠᏱ ᎤᎾᏕᎿ ᏚᏳᎧᏛ ᎨᏒᎢ. ᎨᏥᏁᎳ ᎤᎾᏓᏅᏖᏗ ᎠᎴ ᎤᏃᏟᏍᏗ
+    ᎠᎴ ᏌᏊ ᎨᏒ ᏧᏂᎸᏫᏍᏓᏁᏗ ᎠᎾᏟᏅᏢ ᎠᏓᏅᏙ ᎬᏗ.";
+
+    // random
+    static IXES: &'static [usize] = &[
+        778, 156, 87, 604, 1216, 365, 884, 311, 469, 515, 709, 162, 871, 206, 634, 442,
+    ];
+
+    static BOUNDARY: &'static [bool] = &[
+        false, true, true, false, false, true, true, true, true, false, false, true, true, true,
+        false, false,
+    ];
+
+    #[bench]
+    fn std_utf8_check(b: &mut Bencher) {
+        b.iter(|| {
+            assert!(IXES
+                .iter()
+                .zip(BOUNDARY.iter())
+                .all(|(&ix, &expect)| { expect == TEXT.is_char_boundary(ix) }));
+        });
+    }
+
+    // We don't expect to be as fast as is_char_boundary, because we provide more
+    // information. But we shouldn't be tremendously slower, either. A factor of
+    // 5-10 is expected on this text.
+    #[bench]
+    fn futf_check(b: &mut Bencher) {
+        b.iter(|| {
+            assert!(IXES.iter().zip(BOUNDARY.iter()).all(|(&ix, &expect)| {
+                expect == (classify(TEXT.as_bytes(), ix).unwrap().rewind == 0)
+            }));
+        });
+    }
+}
diff --git a/tendril/src/lib.rs b/tendril/src/lib.rs
new file mode 100644
index 00000000..fadc2cab
--- /dev/null
+++ b/tendril/src/lib.rs
@@ -0,0 +1,37 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(all(test, feature = "bench"), feature(test))]
+//#![cfg_attr(test, deny(warnings))]
+
+#[macro_use]
+extern crate debug_unreachable;
+#[cfg(feature = "encoding")]
+pub extern crate encoding;
+#[cfg(feature = "encoding_rs")]
+pub extern crate encoding_rs;
+#[cfg(all(test, feature = "bench"))]
+extern crate test;
+#[macro_use]
+extern crate mac;
+extern crate utf8;
+
+pub use fmt::Format;
+pub use stream::TendrilSink;
+pub use tendril::{Atomic, Atomicity, NonAtomic, SendTendril};
+pub use tendril::{ByteTendril, ReadExt, SliceExt, StrTendril, SubtendrilError, Tendril};
+pub use utf8_decode::IncompleteUtf8;
+
+pub mod fmt;
+pub mod stream;
+
+mod buf32;
+mod futf;
+mod tendril;
+mod utf8_decode;
+mod util;
+
+static OFLOW: &'static str = "tendril: overflow in buffer arithmetic";
diff --git a/tendril/src/stream.rs b/tendril/src/stream.rs
new file mode 100644
index 00000000..469d58c9
--- /dev/null
+++ b/tendril/src/stream.rs
@@ -0,0 +1,752 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Streams of tendrils.
+
+use fmt;
+use tendril::{Atomicity, NonAtomic, Tendril};
+
+use std::borrow::Cow;
+use std::fs::File;
+use std::io;
+use std::marker::PhantomData;
+use std::path::Path;
+
+#[cfg(feature = "encoding")]
+use encoding;
+#[cfg(feature = "encoding_rs")]
+use encoding_rs::{self, DecoderResult};
+use utf8;
+
+/// Trait for types that can process a tendril.
+///
+/// This is a "push" interface, unlike the "pull" interface of
+/// `Iterator<Item=Tendril<F>>`. The push interface matches
+/// [html5ever][] and other incremental parsers with a similar
+/// architecture.
+///
+/// [html5ever]: https://github.com/servo/html5ever
+pub trait TendrilSink<F, A = NonAtomic>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    /// Process this tendril.
+    fn process(&mut self, t: Tendril<F, A>);
+
+    /// Indicates that an error has occurred.
+    fn error(&mut self, desc: Cow<'static, str>);
+
+    /// What the overall result of processing is.
+    type Output;
+
+    /// Indicates the end of the stream.
+    fn finish(self) -> Self::Output;
+
+    /// Process one tendril and finish.
+    fn one<T>(mut self, t: T) -> Self::Output
+    where
+        Self: Sized,
+        T: Into<Tendril<F, A>>,
+    {
+        self.process(t.into());
+        self.finish()
+    }
+
+    /// Consume an iterator of tendrils, processing each item, then finish.
+    fn from_iter<I>(mut self, i: I) -> Self::Output
+    where
+        Self: Sized,
+        I: IntoIterator,
+        I::Item: Into<Tendril<F, A>>,
+    {
+        for t in i {
+            self.process(t.into())
+        }
+        self.finish()
+    }
+
+    /// Read from the given stream of bytes until exhaustion and process incrementally,
+    /// then finish. Return `Err` at the first I/O error.
+    fn read_from<R>(mut self, r: &mut R) -> io::Result<Self::Output>
+    where
+        Self: Sized,
+        R: io::Read,
+        F: fmt::SliceFormat<Slice = [u8]>,
+    {
+        const BUFFER_SIZE: u32 = 4 * 1024;
+        loop {
+            let mut tendril = Tendril::<F, A>::new();
+            // FIXME: this exposes uninitialized bytes to a generic R type
+            // this is fine for R=File which never reads these bytes,
+            // but user-defined types might.
+            // The standard library pushes zeros to `Vec<u8>` for that reason.
+            unsafe {
+                tendril.push_uninitialized(BUFFER_SIZE);
+            }
+            loop {
+                match r.read(&mut tendril) {
+                    Ok(0) => return Ok(self.finish()),
+                    Ok(n) => {
+                        tendril.pop_back(BUFFER_SIZE - n as u32);
+                        self.process(tendril);
+                        break;
+                    }
+                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
+                    Err(e) => return Err(e),
+                }
+            }
+        }
+    }
+
+    /// Read from the file at the given path and process incrementally,
+    /// then finish. Return `Err` at the first I/O error.
+    fn from_file<P>(self, path: P) -> io::Result<Self::Output>
+    where
+        Self: Sized,
+        P: AsRef<Path>,
+        F: fmt::SliceFormat<Slice = [u8]>,
+    {
+        self.read_from(&mut File::open(path)?)
+    }
+}
+
+/// A `TendrilSink` adaptor that takes bytes, decodes them as UTF-8,
+/// lossily replace ill-formed byte sequences with U+FFFD replacement characters,
+/// and emits Unicode (`StrTendril`).
+///
+/// This does not allocate memory: the output is either subtendrils on the input,
+/// on inline tendrils for a single code point.
+pub struct Utf8LossyDecoder<Sink, A = NonAtomic>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    pub inner_sink: Sink,
+    incomplete: Option<utf8::Incomplete>,
+    marker: PhantomData<A>,
+}
+
+impl<Sink, A> Utf8LossyDecoder<Sink, A>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    /// Create a new incremental UTF-8 decoder.
+    #[inline]
+    pub fn new(inner_sink: Sink) -> Self {
+        Utf8LossyDecoder {
+            inner_sink: inner_sink,
+            incomplete: None,
+            marker: PhantomData,
+        }
+    }
+}
+
+impl<Sink, A> TendrilSink<fmt::Bytes, A> for Utf8LossyDecoder<Sink, A>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    #[inline]
+    fn process(&mut self, mut t: Tendril<fmt::Bytes, A>) {
+        // FIXME: remove take() and map() when non-lexical borrows are stable.
+        if let Some(mut incomplete) = self.incomplete.take() {
+            let resume_at = incomplete.try_complete(&t).map(|(result, rest)| {
+                match result {
+                    Ok(s) => self.inner_sink.process(Tendril::from_slice(s)),
+                    Err(_) => {
+                        self.inner_sink.error("invalid byte sequence".into());
+                        self.inner_sink
+                            .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
+                    }
+                }
+                t.len() - rest.len()
+            });
+            match resume_at {
+                None => {
+                    self.incomplete = Some(incomplete);
+                    return;
+                }
+                Some(resume_at) => t.pop_front(resume_at as u32),
+            }
+        }
+        while !t.is_empty() {
+            let unborrowed_result = match utf8::decode(&t) {
+                Ok(s) => {
+                    debug_assert!(s.as_ptr() == t.as_ptr());
+                    debug_assert!(s.len() == t.len());
+                    Ok(())
+                }
+                Err(utf8::DecodeError::Invalid {
+                    valid_prefix,
+                    invalid_sequence,
+                    ..
+                }) => {
+                    debug_assert!(valid_prefix.as_ptr() == t.as_ptr());
+                    debug_assert!(valid_prefix.len() <= t.len());
+                    Err((
+                        valid_prefix.len(),
+                        Err(valid_prefix.len() + invalid_sequence.len()),
+                    ))
+                }
+                Err(utf8::DecodeError::Incomplete {
+                    valid_prefix,
+                    incomplete_suffix,
+                }) => {
+                    debug_assert!(valid_prefix.as_ptr() == t.as_ptr());
+                    debug_assert!(valid_prefix.len() <= t.len());
+                    Err((valid_prefix.len(), Ok(incomplete_suffix)))
+                }
+            };
+            match unborrowed_result {
+                Ok(()) => {
+                    unsafe { self.inner_sink.process(t.reinterpret_without_validating()) }
+                    return;
+                }
+                Err((valid_len, and_then)) => {
+                    if valid_len > 0 {
+                        let subtendril = t.subtendril(0, valid_len as u32);
+                        unsafe {
+                            self.inner_sink
+                                .process(subtendril.reinterpret_without_validating())
+                        }
+                    }
+                    match and_then {
+                        Ok(incomplete) => {
+                            self.incomplete = Some(incomplete);
+                            return;
+                        }
+                        Err(offset) => {
+                            self.inner_sink.error("invalid byte sequence".into());
+                            self.inner_sink
+                                .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
+                            t.pop_front(offset as u32);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    #[inline]
+    fn error(&mut self, desc: Cow<'static, str>) {
+        self.inner_sink.error(desc);
+    }
+
+    type Output = Sink::Output;
+
+    #[inline]
+    fn finish(mut self) -> Sink::Output {
+        if self.incomplete.is_some() {
+            self.inner_sink
+                .error("incomplete byte sequence at end of stream".into());
+            self.inner_sink
+                .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
+        }
+        self.inner_sink.finish()
+    }
+}
+
+/// A `TendrilSink` adaptor that takes bytes, decodes them as the given character encoding,
+/// lossily replace ill-formed byte sequences with U+FFFD replacement characters,
+/// and emits Unicode (`StrTendril`).
+///
+/// This allocates new tendrils for encodings other than UTF-8.
+#[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+pub struct LossyDecoder<Sink, A = NonAtomic>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    inner: LossyDecoderInner<Sink, A>,
+}
+
+#[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+enum LossyDecoderInner<Sink, A>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    Utf8(Utf8LossyDecoder<Sink, A>),
+    #[cfg(feature = "encoding")]
+    Encoding(Box<encoding::RawDecoder>, Sink),
+    #[cfg(feature = "encoding_rs")]
+    EncodingRs(encoding_rs::Decoder, Sink),
+}
+
+#[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+impl<Sink, A> LossyDecoder<Sink, A>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    /// Create a new incremental decoder using the encoding crate.
+    #[cfg(feature = "encoding")]
+    #[inline]
+    pub fn new(encoding: encoding::EncodingRef, sink: Sink) -> Self {
+        if encoding.name() == "utf-8" {
+            LossyDecoder::utf8(sink)
+        } else {
+            LossyDecoder {
+                inner: LossyDecoderInner::Encoding(encoding.raw_decoder(), sink),
+            }
+        }
+    }
+
+    /// Create a new incremental decoder using the encoding_rs crate.
+    #[cfg(feature = "encoding_rs")]
+    #[inline]
+    pub fn new_encoding_rs(encoding: &'static encoding_rs::Encoding, sink: Sink) -> Self {
+        if encoding == encoding_rs::UTF_8 {
+            return Self::utf8(sink);
+        }
+        Self {
+            inner: LossyDecoderInner::EncodingRs(encoding.new_decoder(), sink),
+        }
+    }
+
+    /// Create a new incremental decoder for the UTF-8 encoding.
+    ///
+    /// This is useful for content that is known at run-time to be UTF-8
+    /// (whereas `Utf8LossyDecoder` requires knowning at compile-time.)
+    #[inline]
+    pub fn utf8(sink: Sink) -> LossyDecoder<Sink, A> {
+        LossyDecoder {
+            inner: LossyDecoderInner::Utf8(Utf8LossyDecoder::new(sink)),
+        }
+    }
+
+    /// Give a reference to the inner sink.
+    pub fn inner_sink(&self) -> &Sink {
+        match self.inner {
+            LossyDecoderInner::Utf8(ref utf8) => &utf8.inner_sink,
+            #[cfg(feature = "encoding")]
+            LossyDecoderInner::Encoding(_, ref inner_sink) => inner_sink,
+            #[cfg(feature = "encoding_rs")]
+            LossyDecoderInner::EncodingRs(_, ref inner_sink) => inner_sink,
+        }
+    }
+
+    /// Give a mutable reference to the inner sink.
+    pub fn inner_sink_mut(&mut self) -> &mut Sink {
+        match self.inner {
+            LossyDecoderInner::Utf8(ref mut utf8) => &mut utf8.inner_sink,
+            #[cfg(feature = "encoding")]
+            LossyDecoderInner::Encoding(_, ref mut inner_sink) => inner_sink,
+            #[cfg(feature = "encoding_rs")]
+            LossyDecoderInner::EncodingRs(_, ref mut inner_sink) => inner_sink,
+        }
+    }
+}
+
+#[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+impl<Sink, A> TendrilSink<fmt::Bytes, A> for LossyDecoder<Sink, A>
+where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    #[inline]
+    fn process(&mut self, t: Tendril<fmt::Bytes, A>) {
+        match self.inner {
+            LossyDecoderInner::Utf8(ref mut utf8) => return utf8.process(t),
+            #[cfg(feature = "encoding")]
+            LossyDecoderInner::Encoding(ref mut decoder, ref mut sink) => {
+                let mut out = Tendril::new();
+                let mut t = t;
+                loop {
+                    match decoder.raw_feed(&*t, &mut out) {
+                        (_, Some(err)) => {
+                            out.push_char('\u{fffd}');
+                            sink.error(err.cause);
+                            debug_assert!(err.upto >= 0);
+                            t.pop_front(err.upto as u32);
+                            // continue loop and process remainder of t
+                        }
+                        (_, None) => break,
+                    }
+                }
+                if out.len() > 0 {
+                    sink.process(out);
+                }
+            }
+            #[cfg(feature = "encoding_rs")]
+            LossyDecoderInner::EncodingRs(ref mut decoder, ref mut sink) => {
+                if t.is_empty() {
+                    return;
+                }
+                decode_to_sink(t, decoder, sink, false);
+            }
+        }
+    }
+
+    #[inline]
+    fn error(&mut self, desc: Cow<'static, str>) {
+        match self.inner {
+            LossyDecoderInner::Utf8(ref mut utf8) => utf8.error(desc),
+            #[cfg(feature = "encoding")]
+            LossyDecoderInner::Encoding(_, ref mut sink) => sink.error(desc),
+            #[cfg(feature = "encoding_rs")]
+            LossyDecoderInner::EncodingRs(_, ref mut sink) => sink.error(desc),
+        }
+    }
+
+    type Output = Sink::Output;
+
+    #[inline]
+    fn finish(self) -> Sink::Output {
+        match self.inner {
+            LossyDecoderInner::Utf8(utf8) => return utf8.finish(),
+            #[cfg(feature = "encoding")]
+            LossyDecoderInner::Encoding(mut decoder, mut sink) => {
+                let mut out = Tendril::new();
+                if let Some(err) = decoder.raw_finish(&mut out) {
+                    out.push_char('\u{fffd}');
+                    sink.error(err.cause);
+                }
+                if out.len() > 0 {
+                    sink.process(out);
+                }
+                sink.finish()
+            }
+            #[cfg(feature = "encoding_rs")]
+            LossyDecoderInner::EncodingRs(mut decoder, mut sink) => {
+                decode_to_sink(Tendril::new(), &mut decoder, &mut sink, true);
+                sink.finish()
+            }
+        }
+    }
+}
+
+#[cfg(feature = "encoding_rs")]
+fn decode_to_sink<Sink, A>(
+    mut t: Tendril<fmt::Bytes, A>,
+    decoder: &mut encoding_rs::Decoder,
+    sink: &mut Sink,
+    last: bool,
+) where
+    Sink: TendrilSink<fmt::UTF8, A>,
+    A: Atomicity,
+{
+    loop {
+        let mut out = <Tendril<fmt::Bytes, A>>::new();
+        let max_len = decoder
+            .max_utf8_buffer_length_without_replacement(t.len())
+            .unwrap_or(8192);
+        unsafe {
+            out.push_uninitialized(std::cmp::min(max_len as u32, 8192));
+        }
+        let (result, bytes_read, bytes_written) =
+            decoder.decode_to_utf8_without_replacement(&t, &mut out, last);
+        if bytes_written > 0 {
+            sink.process(unsafe {
+                out.subtendril(0, bytes_written as u32)
+                    .reinterpret_without_validating()
+            });
+        }
+        match result {
+            DecoderResult::InputEmpty => return,
+            DecoderResult::OutputFull => {}
+            DecoderResult::Malformed(_, _) => {
+                sink.error(Cow::Borrowed("invalid sequence"));
+                sink.process("\u{FFFD}".into());
+            }
+        }
+        t.pop_front(bytes_read as u32);
+        if t.is_empty() {
+            return;
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::{TendrilSink, Utf8LossyDecoder};
+    use fmt;
+    use std::borrow::Cow;
+    use tendril::{Atomicity, NonAtomic, Tendril};
+
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    use super::LossyDecoder;
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    use tendril::SliceExt;
+
+    #[cfg(feature = "encoding")]
+    use encoding::all as enc;
+    #[cfg(feature = "encoding_rs")]
+    use encoding_rs as enc_rs;
+
+    struct Accumulate<A>
+    where
+        A: Atomicity,
+    {
+        tendrils: Vec<Tendril<fmt::UTF8, A>>,
+        errors: Vec<String>,
+    }
+
+    impl<A> Accumulate<A>
+    where
+        A: Atomicity,
+    {
+        fn new() -> Accumulate<A> {
+            Accumulate {
+                tendrils: vec![],
+                errors: vec![],
+            }
+        }
+    }
+
+    impl<A> TendrilSink<fmt::UTF8, A> for Accumulate<A>
+    where
+        A: Atomicity,
+    {
+        fn process(&mut self, t: Tendril<fmt::UTF8, A>) {
+            self.tendrils.push(t);
+        }
+
+        fn error(&mut self, desc: Cow<'static, str>) {
+            self.errors.push(desc.into_owned());
+        }
+
+        type Output = (Vec<Tendril<fmt::UTF8, A>>, Vec<String>);
+
+        fn finish(self) -> Self::Output {
+            (self.tendrils, self.errors)
+        }
+    }
+
+    fn check_utf8(input: &[&[u8]], expected: &[&str], errs: usize) {
+        let decoder = Utf8LossyDecoder::new(Accumulate::<NonAtomic>::new());
+        let (tendrils, errors) = decoder.from_iter(input.iter().cloned());
+        assert_eq!(
+            expected,
+            &*tendrils.iter().map(|t| &**t).collect::<Vec<_>>()
+        );
+        assert_eq!(errs, errors.len());
+    }
+
+    #[test]
+    fn utf8() {
+        check_utf8(&[], &[], 0);
+        check_utf8(&[b""], &[], 0);
+        check_utf8(&[b"xyz"], &["xyz"], 0);
+        check_utf8(&[b"x", b"y", b"z"], &["x", "y", "z"], 0);
+
+        check_utf8(&[b"xy\xEA\x99\xAEzw"], &["xy\u{a66e}zw"], 0);
+        check_utf8(&[b"xy\xEA", b"\x99\xAEzw"], &["xy", "\u{a66e}z", "w"], 0);
+        check_utf8(&[b"xy\xEA\x99", b"\xAEzw"], &["xy", "\u{a66e}z", "w"], 0);
+        check_utf8(
+            &[b"xy\xEA", b"\x99", b"\xAEzw"],
+            &["xy", "\u{a66e}z", "w"],
+            0,
+        );
+        check_utf8(&[b"\xEA", b"", b"\x99", b"", b"\xAE"], &["\u{a66e}"], 0);
+        check_utf8(
+            &[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""],
+            &["\u{a66e}"],
+            0,
+        );
+
+        check_utf8(
+            &[b"xy\xEA", b"\xFF", b"\x99\xAEz"],
+            &["xy", "\u{fffd}", "\u{fffd}", "\u{fffd}", "\u{fffd}", "z"],
+            4,
+        );
+        check_utf8(
+            &[b"xy\xEA\x99", b"\xFFz"],
+            &["xy", "\u{fffd}", "\u{fffd}", "z"],
+            2,
+        );
+
+        check_utf8(&[b"\xC5\x91\xC5\x91\xC5\x91"], &["őőő"], 0);
+        check_utf8(
+            &[b"\xC5\x91", b"\xC5\x91", b"\xC5\x91"],
+            &["ő", "ő", "ő"],
+            0,
+        );
+        check_utf8(
+            &[b"\xC5", b"\x91\xC5", b"\x91\xC5", b"\x91"],
+            &["ő", "ő", "ő"],
+            0,
+        );
+        check_utf8(
+            &[b"\xC5", b"\x91\xff", b"\x91\xC5", b"\x91"],
+            &["ő", "\u{fffd}", "\u{fffd}", "ő"],
+            2,
+        );
+
+        // incomplete char at end of input
+        check_utf8(&[b"\xC0"], &["\u{fffd}"], 1);
+        check_utf8(&[b"\xEA\x99"], &["\u{fffd}"], 1);
+    }
+
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    fn check_decode(
+        mut decoder: LossyDecoder<Accumulate<NonAtomic>>,
+        input: &[&[u8]],
+        expected: &str,
+        errs: usize,
+    ) {
+        for x in input {
+            decoder.process(x.to_tendril());
+        }
+        let (tendrils, errors) = decoder.finish();
+        let mut tendril: Tendril<fmt::UTF8> = Tendril::new();
+        for t in tendrils {
+            tendril.push_tendril(&t);
+        }
+        assert_eq!(expected, &*tendril);
+        assert_eq!(errs, errors.len());
+    }
+
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    pub type Tests = &'static [(&'static [&'static [u8]], &'static str, usize)];
+
+    #[cfg(any(feature = "encoding"))]
+    const ASCII: Tests = &[
+        (&[], "", 0),
+        (&[b""], "", 0),
+        (&[b"xyz"], "xyz", 0),
+        (&[b"xy", b"", b"", b"z"], "xyz", 0),
+        (&[b"x", b"y", b"z"], "xyz", 0),
+        (&[b"\xFF"], "\u{fffd}", 1),
+        (&[b"x\xC0yz"], "x\u{fffd}yz", 1),
+        (&[b"x", b"\xC0y", b"z"], "x\u{fffd}yz", 1),
+        (&[b"x\xC0yz\xFF\xFFw"], "x\u{fffd}yz\u{fffd}\u{fffd}w", 3),
+    ];
+
+    #[cfg(feature = "encoding")]
+    #[test]
+    fn decode_ascii() {
+        for &(input, expected, errs) in ASCII {
+            let decoder = LossyDecoder::new(enc::ASCII, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    const UTF_8: Tests = &[
+        (&[], "", 0),
+        (&[b""], "", 0),
+        (&[b"xyz"], "xyz", 0),
+        (&[b"x", b"y", b"z"], "xyz", 0),
+        (&[b"\xEA\x99\xAE"], "\u{a66e}", 0),
+        (&[b"\xEA", b"\x99\xAE"], "\u{a66e}", 0),
+        (&[b"\xEA\x99", b"\xAE"], "\u{a66e}", 0),
+        (&[b"\xEA", b"\x99", b"\xAE"], "\u{a66e}", 0),
+        (&[b"\xEA", b"", b"\x99", b"", b"\xAE"], "\u{a66e}", 0),
+        (
+            &[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""],
+            "\u{a66e}",
+            0,
+        ),
+        (&[b"xy\xEA", b"\x99\xAEz"], "xy\u{a66e}z", 0),
+        (
+            &[b"xy\xEA", b"\xFF", b"\x99\xAEz"],
+            "xy\u{fffd}\u{fffd}\u{fffd}\u{fffd}z",
+            4,
+        ),
+        (&[b"xy\xEA\x99", b"\xFFz"], "xy\u{fffd}\u{fffd}z", 2),
+        // incomplete char at end of input
+        (&[b"\xC0"], "\u{fffd}", 1),
+        (&[b"\xEA\x99"], "\u{fffd}", 1),
+    ];
+
+    #[cfg(feature = "encoding")]
+    #[test]
+    fn decode_utf8() {
+        for &(input, expected, errs) in UTF_8 {
+            let decoder = LossyDecoder::new(enc::UTF_8, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[cfg(feature = "encoding_rs")]
+    #[test]
+    fn decode_utf8_encoding_rs() {
+        for &(input, expected, errs) in UTF_8 {
+            let decoder = LossyDecoder::new_encoding_rs(enc_rs::UTF_8, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    const KOI8_U: Tests = &[
+        (&[b"\xfc\xce\xc5\xd2\xc7\xc9\xd1"], "Энергия", 0),
+        (&[b"\xfc\xce", b"\xc5\xd2\xc7\xc9\xd1"], "Энергия", 0),
+        (&[b"\xfc\xce", b"\xc5\xd2\xc7", b"\xc9\xd1"], "Энергия", 0),
+        (
+            &[b"\xfc\xce", b"", b"\xc5\xd2\xc7", b"\xc9\xd1", b""],
+            "Энергия",
+            0,
+        ),
+    ];
+
+    #[cfg(feature = "encoding")]
+    #[test]
+    fn decode_koi8_u() {
+        for &(input, expected, errs) in KOI8_U {
+            let decoder = LossyDecoder::new(enc::KOI8_U, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[cfg(feature = "encoding_rs")]
+    #[test]
+    fn decode_koi8_u_encoding_rs() {
+        for &(input, expected, errs) in KOI8_U {
+            let decoder = LossyDecoder::new_encoding_rs(enc_rs::KOI8_U, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
+    const WINDOWS_949: Tests = &[
+        (&[], "", 0),
+        (&[b""], "", 0),
+        (&[b"\xbe\xc8\xb3\xe7"], "안녕", 0),
+        (&[b"\xbe", b"\xc8\xb3\xe7"], "안녕", 0),
+        (&[b"\xbe", b"", b"\xc8\xb3\xe7"], "안녕", 0),
+        (
+            &[b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4"],
+            "안녕하세요",
+            0,
+        ),
+        (&[b"\xbe\xc8\xb3\xe7\xc7"], "안녕\u{fffd}", 1),
+        (&[b"\xbe", b"", b"\xc8\xb3"], "안\u{fffd}", 1),
+        (&[b"\xbe\x28\xb3\xe7"], "\u{fffd}(녕", 1),
+    ];
+
+    #[cfg(feature = "encoding")]
+    #[test]
+    fn decode_windows_949() {
+        for &(input, expected, errs) in WINDOWS_949 {
+            let decoder = LossyDecoder::new(enc::WINDOWS_949, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[cfg(feature = "encoding_rs")]
+    #[test]
+    fn decode_windows_949_encoding_rs() {
+        for &(input, expected, errs) in WINDOWS_949 {
+            let decoder = LossyDecoder::new_encoding_rs(enc_rs::EUC_KR, Accumulate::new());
+            check_decode(decoder, input, expected, errs);
+        }
+    }
+
+    #[test]
+    fn read_from() {
+        let decoder = Utf8LossyDecoder::new(Accumulate::<NonAtomic>::new());
+        let mut bytes: &[u8] = b"foo\xffbar";
+        let (tendrils, errors) = decoder.read_from(&mut bytes).unwrap();
+        assert_eq!(
+            &*tendrils.iter().map(|t| &**t).collect::<Vec<_>>(),
+            &["foo", "\u{FFFD}", "bar"]
+        );
+        assert_eq!(errors, &["invalid byte sequence"]);
+    }
+}
diff --git a/tendril/src/tendril.rs b/tendril/src/tendril.rs
new file mode 100644
index 00000000..0a33d827
--- /dev/null
+++ b/tendril/src/tendril.rs
@@ -0,0 +1,2473 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::borrow::Borrow;
+use std::cell::{Cell, UnsafeCell};
+use std::cmp::Ordering;
+use std::default::Default;
+use std::fmt as strfmt;
+use std::iter::FromIterator;
+use std::marker::PhantomData;
+use std::num::NonZeroUsize;
+use std::ops::{Deref, DerefMut};
+use std::sync::atomic::Ordering as AtomicOrdering;
+use std::sync::atomic::{self, AtomicUsize};
+use std::{hash, io, mem, ptr, str, u32};
+
+#[cfg(feature = "encoding")]
+use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef};
+
+use buf32::{self, Buf32};
+use fmt::imp::Fixup;
+use fmt::{self, Slice};
+use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut};
+use OFLOW;
+
+const MAX_INLINE_LEN: usize = 8;
+const MAX_INLINE_TAG: usize = 0xF;
+const EMPTY_TAG: usize = 0xF;
+
+#[inline(always)]
+fn inline_tag(len: u32) -> NonZeroUsize {
+    debug_assert!(len <= MAX_INLINE_LEN as u32);
+    unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) }
+}
+
+/// The multithreadedness of a tendril.
+///
+/// Exactly two types implement this trait:
+///
+/// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works
+///   across threads; this is akin to `Arc`.
+///
+/// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither
+///   `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`.
+///
+/// The layout of this trait is also mandated to be that of a `usize`,
+/// for it is used for reference counting.
+pub unsafe trait Atomicity: 'static {
+    #[doc(hidden)]
+    fn new() -> Self;
+
+    #[doc(hidden)]
+    fn increment(&self) -> usize;
+
+    #[doc(hidden)]
+    fn decrement(&self) -> usize;
+
+    #[doc(hidden)]
+    fn fence_acquire();
+}
+
+/// A marker of a non-atomic tendril.
+///
+/// This is the default for the second type parameter of a `Tendril`
+/// and so doesn't typically need to be written.
+///
+/// This is akin to using `Rc` for reference counting.
+#[repr(C)]
+pub struct NonAtomic(Cell<usize>);
+
+unsafe impl Atomicity for NonAtomic {
+    #[inline]
+    fn new() -> Self {
+        NonAtomic(Cell::new(1))
+    }
+
+    #[inline]
+    fn increment(&self) -> usize {
+        let value = self.0.get();
+        self.0.set(value.checked_add(1).expect(OFLOW));
+        value
+    }
+
+    #[inline]
+    fn decrement(&self) -> usize {
+        let value = self.0.get();
+        self.0.set(value - 1);
+        value
+    }
+
+    #[inline]
+    fn fence_acquire() {}
+}
+
+/// A marker of an atomic (and hence concurrent) tendril.
+///
+/// This is used as the second, optional type parameter of a `Tendril`;
+/// `Tendril<F, Atomic>` thus implements`Send`.
+///
+/// This is akin to using `Arc` for reference counting.
+pub struct Atomic(AtomicUsize);
+
+unsafe impl Atomicity for Atomic {
+    #[inline]
+    fn new() -> Self {
+        Atomic(AtomicUsize::new(1))
+    }
+
+    #[inline]
+    fn increment(&self) -> usize {
+        // Relaxed is OK because we have a reference already.
+        self.0.fetch_add(1, AtomicOrdering::Relaxed)
+    }
+
+    #[inline]
+    fn decrement(&self) -> usize {
+        self.0.fetch_sub(1, AtomicOrdering::Release)
+    }
+
+    #[inline]
+    fn fence_acquire() {
+        atomic::fence(AtomicOrdering::Acquire);
+    }
+}
+
+#[repr(C)] // Preserve field order for cross-atomicity transmutes
+struct Header<A: Atomicity> {
+    refcount: A,
+    cap: u32,
+}
+
+impl<A> Header<A>
+where
+    A: Atomicity,
+{
+    #[inline(always)]
+    unsafe fn new() -> Header<A> {
+        Header {
+            refcount: A::new(),
+            cap: 0,
+        }
+    }
+}
+
+/// Errors that can occur when slicing a `Tendril`.
+#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq)]
+pub enum SubtendrilError {
+    OutOfBounds,
+    ValidationFailed,
+}
+
+/// Compact string type for zero-copy parsing.
+///
+/// `Tendril`s have the semantics of owned strings, but are sometimes views
+/// into shared buffers. When you mutate a `Tendril`, an owned copy is made
+/// if necessary. Further mutations occur in-place until the string becomes
+/// shared, e.g. with `clone()` or `subtendril()`.
+///
+/// Buffer sharing is accomplished through thread-local (non-atomic) reference
+/// counting, which has very low overhead. The Rust type system will prevent
+/// you at compile time from sending a `Tendril` between threads. We plan to
+/// relax this restriction in the future; see `README.md`.
+///
+/// Whereas `String` allocates in the heap for any non-empty string, `Tendril`
+/// can store small strings (up to 8 bytes) in-line, without a heap allocation.
+/// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes
+/// versus 24.
+///
+/// The type parameter `F` specifies the format of the tendril, for example
+/// UTF-8 text or uninterpreted bytes. The parameter will be instantiated
+/// with one of the marker types from `tendril::fmt`. See the `StrTendril`
+/// and `ByteTendril` type aliases for two examples.
+///
+/// The type parameter `A` indicates the atomicity of the tendril; it is by
+/// default `NonAtomic`, but can be specified as `Atomic` to get a tendril
+/// which implements `Send` (viz. a thread-safe tendril).
+///
+/// The maximum length of a `Tendril` is 4 GB. The library will panic if
+/// you attempt to go over the limit.
+#[repr(C)]
+pub struct Tendril<F, A = NonAtomic>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    ptr: Cell<NonZeroUsize>,
+    buf: UnsafeCell<Buffer>,
+    marker: PhantomData<*mut F>,
+    refcount_marker: PhantomData<A>,
+}
+
+#[repr(C)]
+union Buffer {
+    heap: Heap,
+    inline: [u8; 8],
+}
+
+#[derive(Copy, Clone)]
+#[repr(C)]
+struct Heap {
+    len: u32,
+    aux: u32,
+}
+
+unsafe impl<F, A> Send for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity + Sync,
+{
+}
+
+/// `Tendril` for storing native Rust strings.
+pub type StrTendril = Tendril<fmt::UTF8>;
+
+/// `Tendril` for storing binary data.
+pub type ByteTendril = Tendril<fmt::Bytes>;
+
+impl<F, A> Clone for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline]
+    fn clone(&self) -> Tendril<F, A> {
+        unsafe {
+            if self.ptr.get().get() > MAX_INLINE_TAG {
+                self.make_buf_shared();
+                self.incref();
+            }
+
+            ptr::read(self)
+        }
+    }
+}
+
+impl<F, A> Drop for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline]
+    fn drop(&mut self) {
+        unsafe {
+            let p = self.ptr.get().get();
+            if p <= MAX_INLINE_TAG {
+                return;
+            }
+
+            let (buf, shared, _) = self.assume_buf();
+            if shared {
+                let header = self.header();
+                if (*header).refcount.decrement() == 1 {
+                    A::fence_acquire();
+                    buf.destroy();
+                }
+            } else {
+                buf.destroy();
+            }
+        }
+    }
+}
+
+macro_rules! from_iter_method {
+    ($ty:ty) => {
+        #[inline]
+        fn from_iter<I>(iterable: I) -> Self
+        where
+            I: IntoIterator<Item = $ty>,
+        {
+            let mut output = Self::new();
+            output.extend(iterable);
+            output
+        }
+    };
+}
+
+impl<A> Extend<char> for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn extend<I>(&mut self, iterable: I)
+    where
+        I: IntoIterator<Item = char>,
+    {
+        let iterator = iterable.into_iter();
+        self.force_reserve(iterator.size_hint().0 as u32);
+        for c in iterator {
+            self.push_char(c);
+        }
+    }
+}
+
+impl<A> FromIterator<char> for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    from_iter_method!(char);
+}
+
+impl<A> Extend<u8> for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn extend<I>(&mut self, iterable: I)
+    where
+        I: IntoIterator<Item = u8>,
+    {
+        let iterator = iterable.into_iter();
+        self.force_reserve(iterator.size_hint().0 as u32);
+        for b in iterator {
+            self.push_slice(&[b]);
+        }
+    }
+}
+
+impl<A> FromIterator<u8> for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    from_iter_method!(u8);
+}
+
+impl<'a, A> Extend<&'a u8> for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn extend<I>(&mut self, iterable: I)
+    where
+        I: IntoIterator<Item = &'a u8>,
+    {
+        let iterator = iterable.into_iter();
+        self.force_reserve(iterator.size_hint().0 as u32);
+        for &b in iterator {
+            self.push_slice(&[b]);
+        }
+    }
+}
+
+impl<'a, A> FromIterator<&'a u8> for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    from_iter_method!(&'a u8);
+}
+
+impl<'a, A> Extend<&'a str> for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn extend<I>(&mut self, iterable: I)
+    where
+        I: IntoIterator<Item = &'a str>,
+    {
+        for s in iterable {
+            self.push_slice(s);
+        }
+    }
+}
+
+impl<'a, A> FromIterator<&'a str> for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    from_iter_method!(&'a str);
+}
+
+impl<'a, A> Extend<&'a [u8]> for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn extend<I>(&mut self, iterable: I)
+    where
+        I: IntoIterator<Item = &'a [u8]>,
+    {
+        for s in iterable {
+            self.push_slice(s);
+        }
+    }
+}
+
+impl<'a, A> FromIterator<&'a [u8]> for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    from_iter_method!(&'a [u8]);
+}
+
+impl<'a, F, A> Extend<&'a Tendril<F, A>> for Tendril<F, A>
+where
+    F: fmt::Format + 'a,
+    A: Atomicity,
+{
+    #[inline]
+    fn extend<I>(&mut self, iterable: I)
+    where
+        I: IntoIterator<Item = &'a Tendril<F, A>>,
+    {
+        for t in iterable {
+            self.push_tendril(t);
+        }
+    }
+}
+
+impl<'a, F, A> FromIterator<&'a Tendril<F, A>> for Tendril<F, A>
+where
+    F: fmt::Format + 'a,
+    A: Atomicity,
+{
+    from_iter_method!(&'a Tendril<F, A>);
+}
+
+impl<F, A> Deref for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    A: Atomicity,
+{
+    type Target = F::Slice;
+
+    #[inline]
+    fn deref(&self) -> &F::Slice {
+        unsafe { F::Slice::from_bytes(self.as_byte_slice()) }
+    }
+}
+
+impl<F, A> DerefMut for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    A: Atomicity,
+{
+    #[inline]
+    fn deref_mut(&mut self) -> &mut F::Slice {
+        unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) }
+    }
+}
+
+impl<F, A> Borrow<[u8]> for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    A: Atomicity,
+{
+    fn borrow(&self) -> &[u8] {
+        self.as_byte_slice()
+    }
+}
+
+// Why not impl Borrow<str> for Tendril<fmt::UTF8>? str and [u8] hash differently,
+// and so a HashMap<StrTendril, _> would silently break if we indexed by str. Ick.
+// https://github.com/rust-lang/rust/issues/27108
+
+impl<F, A> PartialEq for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.as_byte_slice() == other.as_byte_slice()
+    }
+
+    #[inline]
+    fn ne(&self, other: &Self) -> bool {
+        self.as_byte_slice() != other.as_byte_slice()
+    }
+}
+
+impl<F, A> Eq for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+}
+
+impl<F, A> PartialOrd for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    <F as fmt::SliceFormat>::Slice: PartialOrd,
+    A: Atomicity,
+{
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        PartialOrd::partial_cmp(&**self, &**other)
+    }
+}
+
+impl<F, A> Ord for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    <F as fmt::SliceFormat>::Slice: Ord,
+    A: Atomicity,
+{
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        Ord::cmp(&**self, &**other)
+    }
+}
+
+impl<F, A> Default for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline(always)]
+    fn default() -> Tendril<F, A> {
+        Tendril::new()
+    }
+}
+
+impl<F, A> strfmt::Debug for Tendril<F, A>
+where
+    F: fmt::SliceFormat + Default + strfmt::Debug,
+    <F as fmt::SliceFormat>::Slice: strfmt::Debug,
+    A: Atomicity,
+{
+    #[inline]
+    fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
+        let kind = match self.ptr.get().get() {
+            p if p <= MAX_INLINE_TAG => "inline",
+            p if p & 1 == 1 => "shared",
+            _ => "owned",
+        };
+
+        write!(f, "Tendril<{:?}>({}: ", <F as Default>::default(), kind)?;
+        <<F as fmt::SliceFormat>::Slice as strfmt::Debug>::fmt(&**self, f)?;
+        write!(f, ")")
+    }
+}
+
+impl<F, A> hash::Hash for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline]
+    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
+        self.as_byte_slice().hash(hasher)
+    }
+}
+
+impl<F, A> Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    /// Create a new, empty `Tendril` in any format.
+    #[inline(always)]
+    pub fn new() -> Tendril<F, A> {
+        unsafe { Tendril::inline(&[]) }
+    }
+
+    /// Create a new, empty `Tendril` with a specified capacity.
+    #[inline]
+    pub fn with_capacity(capacity: u32) -> Tendril<F, A> {
+        let mut t: Tendril<F, A> = Tendril::new();
+        if capacity > MAX_INLINE_LEN as u32 {
+            unsafe {
+                t.make_owned_with_capacity(capacity);
+            }
+        }
+        t
+    }
+
+    /// Reserve space for additional bytes.
+    ///
+    /// This is only a suggestion. There are cases where `Tendril` will
+    /// decline to allocate until the buffer is actually modified.
+    #[inline]
+    pub fn reserve(&mut self, additional: u32) {
+        if !self.is_shared() {
+            // Don't grow a shared tendril because we'd have to copy
+            // right away.
+            self.force_reserve(additional);
+        }
+    }
+
+    /// Reserve space for additional bytes, even for shared buffers.
+    #[inline]
+    fn force_reserve(&mut self, additional: u32) {
+        let new_len = self.len32().checked_add(additional).expect(OFLOW);
+        if new_len > MAX_INLINE_LEN as u32 {
+            unsafe {
+                self.make_owned_with_capacity(new_len);
+            }
+        }
+    }
+
+    /// Get the length of the `Tendril`.
+    ///
+    /// This is named not to conflict with `len()` on the underlying
+    /// slice, if any.
+    #[inline(always)]
+    pub fn len32(&self) -> u32 {
+        match self.ptr.get().get() {
+            EMPTY_TAG => 0,
+            n if n <= MAX_INLINE_LEN => n as u32,
+            _ => unsafe { self.raw_len() },
+        }
+    }
+
+    /// Is the backing buffer shared?
+    #[inline]
+    pub fn is_shared(&self) -> bool {
+        let n = self.ptr.get().get();
+
+        (n > MAX_INLINE_TAG) && ((n & 1) == 1)
+    }
+
+    /// Is the backing buffer shared with this other `Tendril`?
+    #[inline]
+    pub fn is_shared_with(&self, other: &Tendril<F, A>) -> bool {
+        let n = self.ptr.get().get();
+
+        (n > MAX_INLINE_TAG) && (n == other.ptr.get().get())
+    }
+
+    /// Truncate to length 0 without discarding any owned storage.
+    #[inline]
+    pub fn clear(&mut self) {
+        if self.ptr.get().get() <= MAX_INLINE_TAG {
+            self.ptr
+                .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) });
+        } else {
+            let (_, shared, _) = unsafe { self.assume_buf() };
+            if shared {
+                // No need to keep a reference alive for a 0-size slice.
+                *self = Tendril::new();
+            } else {
+                unsafe { self.set_len(0) };
+            }
+        }
+    }
+
+    /// Build a `Tendril` by copying a byte slice, if it conforms to the format.
+    #[inline]
+    pub fn try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()> {
+        match F::validate(x) {
+            true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }),
+            false => Err(()),
+        }
+    }
+
+    /// View as uninterpreted bytes.
+    #[inline(always)]
+    pub fn as_bytes(&self) -> &Tendril<fmt::Bytes, A> {
+        unsafe { mem::transmute(self) }
+    }
+
+    /// Convert into uninterpreted bytes.
+    #[inline(always)]
+    pub fn into_bytes(self) -> Tendril<fmt::Bytes, A> {
+        unsafe { mem::transmute(self) }
+    }
+
+    /// Convert `self` into a type which is `Send`.
+    ///
+    /// If the tendril is owned or inline, this is free,
+    /// but if it's shared this will entail a copy of the contents.
+    #[inline]
+    pub fn into_send(mut self) -> SendTendril<F> {
+        self.make_owned();
+        SendTendril {
+            // This changes the header.refcount from A to NonAtomic, but that's
+            // OK because we have defined the format of A as a usize.
+            tendril: unsafe { mem::transmute(self) },
+        }
+    }
+
+    /// View as a superset format, for free.
+    #[inline(always)]
+    pub fn as_superset<Super>(&self) -> &Tendril<Super, A>
+    where
+        F: fmt::SubsetOf<Super>,
+        Super: fmt::Format,
+    {
+        unsafe { mem::transmute(self) }
+    }
+
+    /// Convert into a superset format, for free.
+    #[inline(always)]
+    pub fn into_superset<Super>(self) -> Tendril<Super, A>
+    where
+        F: fmt::SubsetOf<Super>,
+        Super: fmt::Format,
+    {
+        unsafe { mem::transmute(self) }
+    }
+
+    /// View as a subset format, if the `Tendril` conforms to that subset.
+    #[inline]
+    pub fn try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()>
+    where
+        Sub: fmt::SubsetOf<F>,
+    {
+        match Sub::revalidate_subset(self.as_byte_slice()) {
+            true => Ok(unsafe { mem::transmute(self) }),
+            false => Err(()),
+        }
+    }
+
+    /// Convert into a subset format, if the `Tendril` conforms to that subset.
+    #[inline]
+    pub fn try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self>
+    where
+        Sub: fmt::SubsetOf<F>,
+    {
+        match Sub::revalidate_subset(self.as_byte_slice()) {
+            true => Ok(unsafe { mem::transmute(self) }),
+            false => Err(self),
+        }
+    }
+
+    /// View as another format, if the bytes of the `Tendril` are valid for
+    /// that format.
+    #[inline]
+    pub fn try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()>
+    where
+        Other: fmt::Format,
+    {
+        match Other::validate(self.as_byte_slice()) {
+            true => Ok(unsafe { mem::transmute(self) }),
+            false => Err(()),
+        }
+    }
+
+    /// Convert into another format, if the `Tendril` conforms to that format.
+    ///
+    /// This only re-validates the existing bytes under the new format. It
+    /// will *not* change the byte content of the tendril!
+    ///
+    /// See the `encode` and `decode` methods for character encoding conversion.
+    #[inline]
+    pub fn try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self>
+    where
+        Other: fmt::Format,
+    {
+        match Other::validate(self.as_byte_slice()) {
+            true => Ok(unsafe { mem::transmute(self) }),
+            false => Err(self),
+        }
+    }
+
+    /// Push some bytes onto the end of the `Tendril`, if they conform to the
+    /// format.
+    #[inline]
+    pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> {
+        match F::validate(buf) {
+            true => unsafe {
+                self.push_bytes_without_validating(buf);
+                Ok(())
+            },
+            false => Err(()),
+        }
+    }
+
+    /// Push another `Tendril` onto the end of this one.
+    #[inline]
+    pub fn push_tendril(&mut self, other: &Tendril<F, A>) {
+        let new_len = self.len32().checked_add(other.len32()).expect(OFLOW);
+
+        unsafe {
+            if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) {
+                let (self_buf, self_shared, _) = self.assume_buf();
+                let (other_buf, other_shared, _) = other.assume_buf();
+
+                if self_shared
+                    && other_shared
+                    && (self_buf.data_ptr() == other_buf.data_ptr())
+                    && other.aux() == self.aux() + self.raw_len()
+                {
+                    self.set_len(new_len);
+                    return;
+                }
+            }
+
+            self.push_bytes_without_validating(other.as_byte_slice())
+        }
+    }
+
+    /// Attempt to slice this `Tendril` as a new `Tendril`.
+    ///
+    /// This will share the buffer when possible. Mutating a shared buffer
+    /// will copy the contents.
+    ///
+    /// The offset and length are in bytes. The function will return
+    /// `Err` if these are out of bounds, or if the resulting slice
+    /// does not conform to the format.
+    #[inline]
+    pub fn try_subtendril(
+        &self,
+        offset: u32,
+        length: u32,
+    ) -> Result<Tendril<F, A>, SubtendrilError> {
+        let self_len = self.len32();
+        if offset > self_len || length > (self_len - offset) {
+            return Err(SubtendrilError::OutOfBounds);
+        }
+
+        unsafe {
+            let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize);
+            if !F::validate_subseq(byte_slice) {
+                return Err(SubtendrilError::ValidationFailed);
+            }
+
+            Ok(self.unsafe_subtendril(offset, length))
+        }
+    }
+
+    /// Slice this `Tendril` as a new `Tendril`.
+    ///
+    /// Panics on bounds or validity check failure.
+    #[inline]
+    pub fn subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
+        self.try_subtendril(offset, length).unwrap()
+    }
+
+    /// Try to drop `n` bytes from the front.
+    ///
+    /// Returns `Err` if the bytes are not available, or the suffix fails
+    /// validation.
+    #[inline]
+    pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> {
+        if n == 0 {
+            return Ok(());
+        }
+        let old_len = self.len32();
+        if n > old_len {
+            return Err(SubtendrilError::OutOfBounds);
+        }
+        let new_len = old_len - n;
+
+        unsafe {
+            if !F::validate_suffix(unsafe_slice(
+                self.as_byte_slice(),
+                n as usize,
+                new_len as usize,
+            )) {
+                return Err(SubtendrilError::ValidationFailed);
+            }
+
+            self.unsafe_pop_front(n);
+            Ok(())
+        }
+    }
+
+    /// Drop `n` bytes from the front.
+    ///
+    /// Panics if the bytes are not available, or the suffix fails
+    /// validation.
+    #[inline]
+    pub fn pop_front(&mut self, n: u32) {
+        self.try_pop_front(n).unwrap()
+    }
+
+    /// Drop `n` bytes from the back.
+    ///
+    /// Returns `Err` if the bytes are not available, or the prefix fails
+    /// validation.
+    #[inline]
+    pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> {
+        if n == 0 {
+            return Ok(());
+        }
+        let old_len = self.len32();
+        if n > old_len {
+            return Err(SubtendrilError::OutOfBounds);
+        }
+        let new_len = old_len - n;
+
+        unsafe {
+            if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) {
+                return Err(SubtendrilError::ValidationFailed);
+            }
+
+            self.unsafe_pop_back(n);
+            Ok(())
+        }
+    }
+
+    /// Drop `n` bytes from the back.
+    ///
+    /// Panics if the bytes are not available, or the prefix fails
+    /// validation.
+    #[inline]
+    pub fn pop_back(&mut self, n: u32) {
+        self.try_pop_back(n).unwrap()
+    }
+
+    /// View as another format, without validating.
+    #[inline(always)]
+    pub unsafe fn reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A>
+    where
+        Other: fmt::Format,
+    {
+        mem::transmute(self)
+    }
+
+    /// Convert into another format, without validating.
+    #[inline(always)]
+    pub unsafe fn reinterpret_without_validating<Other>(self) -> Tendril<Other, A>
+    where
+        Other: fmt::Format,
+    {
+        mem::transmute(self)
+    }
+
+    /// Build a `Tendril` by copying a byte slice, without validating.
+    #[inline]
+    pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A> {
+        assert!(x.len() <= buf32::MAX_LEN);
+        if x.len() <= MAX_INLINE_LEN {
+            Tendril::inline(x)
+        } else {
+            Tendril::owned_copy(x)
+        }
+    }
+
+    /// Push some bytes onto the end of the `Tendril`, without validating.
+    #[inline]
+    pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) {
+        assert!(buf.len() <= buf32::MAX_LEN);
+
+        let Fixup {
+            drop_left,
+            drop_right,
+            insert_len,
+            insert_bytes,
+        } = F::fixup(self.as_byte_slice(), buf);
+
+        // FIXME: think more about overflow
+        let adj_len = self.len32() + insert_len - drop_left;
+
+        let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right;
+
+        let drop_left = drop_left as usize;
+        let drop_right = drop_right as usize;
+
+        if new_len <= MAX_INLINE_LEN as u32 {
+            let mut tmp = [0_u8; MAX_INLINE_LEN];
+            {
+                let old = self.as_byte_slice();
+                let mut dest = tmp.as_mut_ptr();
+                copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left));
+                copy_and_advance(
+                    &mut dest,
+                    unsafe_slice(&insert_bytes, 0, insert_len as usize),
+                );
+                copy_and_advance(
+                    &mut dest,
+                    unsafe_slice(buf, drop_right, buf.len() - drop_right),
+                );
+            }
+            *self = Tendril::inline(&tmp[..new_len as usize]);
+        } else {
+            self.make_owned_with_capacity(new_len);
+            let (owned, _, _) = self.assume_buf();
+            let mut dest = owned
+                .data_ptr()
+                .offset((owned.len as usize - drop_left) as isize);
+            copy_and_advance(
+                &mut dest,
+                unsafe_slice(&insert_bytes, 0, insert_len as usize),
+            );
+            copy_and_advance(
+                &mut dest,
+                unsafe_slice(buf, drop_right, buf.len() - drop_right),
+            );
+            self.set_len(new_len);
+        }
+    }
+
+    /// Slice this `Tendril` as a new `Tendril`.
+    ///
+    /// Does not check validity or bounds!
+    #[inline]
+    pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
+        if length <= MAX_INLINE_LEN as u32 {
+            Tendril::inline(unsafe_slice(
+                self.as_byte_slice(),
+                offset as usize,
+                length as usize,
+            ))
+        } else {
+            self.make_buf_shared();
+            self.incref();
+            let (buf, _, _) = self.assume_buf();
+            Tendril::shared(buf, self.aux() + offset, length)
+        }
+    }
+
+    /// Drop `n` bytes from the front.
+    ///
+    /// Does not check validity or bounds!
+    #[inline]
+    pub unsafe fn unsafe_pop_front(&mut self, n: u32) {
+        let new_len = self.len32() - n;
+        if new_len <= MAX_INLINE_LEN as u32 {
+            *self = Tendril::inline(unsafe_slice(
+                self.as_byte_slice(),
+                n as usize,
+                new_len as usize,
+            ));
+        } else {
+            self.make_buf_shared();
+            self.set_aux(self.aux() + n);
+            let len = self.raw_len();
+            self.set_len(len - n);
+        }
+    }
+
+    /// Drop `n` bytes from the back.
+    ///
+    /// Does not check validity or bounds!
+    #[inline]
+    pub unsafe fn unsafe_pop_back(&mut self, n: u32) {
+        let new_len = self.len32() - n;
+        if new_len <= MAX_INLINE_LEN as u32 {
+            *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize));
+        } else {
+            self.make_buf_shared();
+            let len = self.raw_len();
+            self.set_len(len - n);
+        }
+    }
+
+    #[inline]
+    unsafe fn incref(&self) {
+        (*self.header()).refcount.increment();
+    }
+
+    #[inline]
+    unsafe fn make_buf_shared(&self) {
+        let p = self.ptr.get().get();
+        if p & 1 == 0 {
+            let header = p as *mut Header<A>;
+            (*header).cap = self.aux();
+
+            self.ptr.set(NonZeroUsize::new_unchecked(p | 1));
+            self.set_aux(0);
+        }
+    }
+
+    // This is not public as it is of no practical value to users.
+    // By and large they shouldn't need to worry about the distinction at all,
+    // and going out of your way to make it owned is pointless.
+    #[inline]
+    fn make_owned(&mut self) {
+        unsafe {
+            let ptr = self.ptr.get().get();
+            if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 {
+                *self = Tendril::owned_copy(self.as_byte_slice());
+            }
+        }
+    }
+
+    #[inline]
+    unsafe fn make_owned_with_capacity(&mut self, cap: u32) {
+        self.make_owned();
+        let mut buf = self.assume_buf().0;
+        buf.grow(cap);
+        self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize));
+        self.set_aux(buf.cap);
+    }
+
+    #[inline(always)]
+    unsafe fn header(&self) -> *mut Header<A> {
+        (self.ptr.get().get() & !1) as *mut Header<A>
+    }
+
+    #[inline]
+    unsafe fn assume_buf(&self) -> (Buf32<Header<A>>, bool, u32) {
+        let ptr = self.ptr.get().get();
+        let header = self.header();
+        let shared = (ptr & 1) == 1;
+        let (cap, offset) = match shared {
+            true => ((*header).cap, self.aux()),
+            false => (self.aux(), 0),
+        };
+
+        (
+            Buf32 {
+                ptr: header,
+                len: offset + self.len32(),
+                cap: cap,
+            },
+            shared,
+            offset,
+        )
+    }
+
+    #[inline]
+    unsafe fn inline(x: &[u8]) -> Tendril<F, A> {
+        let len = x.len();
+        let t = Tendril {
+            ptr: Cell::new(inline_tag(len as u32)),
+            buf: UnsafeCell::new(Buffer { inline: [0; 8] }),
+            marker: PhantomData,
+            refcount_marker: PhantomData,
+        };
+        ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len);
+        t
+    }
+
+    #[inline]
+    unsafe fn owned(x: Buf32<Header<A>>) -> Tendril<F, A> {
+        Tendril {
+            ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)),
+            buf: UnsafeCell::new(Buffer {
+                heap: Heap {
+                    len: x.len,
+                    aux: x.cap,
+                },
+            }),
+            marker: PhantomData,
+            refcount_marker: PhantomData,
+        }
+    }
+
+    #[inline]
+    unsafe fn owned_copy(x: &[u8]) -> Tendril<F, A> {
+        let len32 = x.len() as u32;
+        let mut b = Buf32::with_capacity(len32, Header::new());
+        ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len());
+        b.len = len32;
+        Tendril::owned(b)
+    }
+
+    #[inline]
+    unsafe fn shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A> {
+        Tendril {
+            ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)),
+            buf: UnsafeCell::new(Buffer {
+                heap: Heap { len, aux: off },
+            }),
+            marker: PhantomData,
+            refcount_marker: PhantomData,
+        }
+    }
+
+    #[inline]
+    fn as_byte_slice<'a>(&'a self) -> &'a [u8] {
+        unsafe {
+            match self.ptr.get().get() {
+                EMPTY_TAG => &[],
+                n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n),
+                _ => {
+                    let (buf, _, offset) = self.assume_buf();
+                    copy_lifetime(
+                        self,
+                        unsafe_slice(buf.data(), offset as usize, self.len32() as usize),
+                    )
+                }
+            }
+        }
+    }
+
+    // There's no need to worry about locking on an atomic Tendril, because it makes it unique as
+    // soon as you do that.
+    #[inline]
+    fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] {
+        unsafe {
+            match self.ptr.get().get() {
+                EMPTY_TAG => &mut [],
+                n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n),
+                _ => {
+                    self.make_owned();
+                    let (mut buf, _, offset) = self.assume_buf();
+                    let len = self.len32() as usize;
+                    copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len))
+                }
+            }
+        }
+    }
+
+    unsafe fn raw_len(&self) -> u32 {
+        (*self.buf.get()).heap.len
+    }
+
+    unsafe fn set_len(&mut self, len: u32) {
+        (*self.buf.get()).heap.len = len;
+    }
+
+    unsafe fn aux(&self) -> u32 {
+        (*self.buf.get()).heap.aux
+    }
+
+    unsafe fn set_aux(&self, aux: u32) {
+        (*self.buf.get()).heap.aux = aux;
+    }
+}
+
+impl<F, A> Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    A: Atomicity,
+{
+    /// Build a `Tendril` by copying a slice.
+    #[inline]
+    pub fn from_slice(x: &F::Slice) -> Tendril<F, A> {
+        unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) }
+    }
+
+    /// Push a slice onto the end of the `Tendril`.
+    #[inline]
+    pub fn push_slice(&mut self, x: &F::Slice) {
+        unsafe { self.push_bytes_without_validating(x.as_bytes()) }
+    }
+}
+
+/// A simple wrapper to make `Tendril` `Send`.
+///
+/// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could
+/// reasonably implement, in order to clearly separate concerns this type is deliberately
+/// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and
+/// behaving as an opaque object.
+///
+/// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`,
+/// and may be returned to a `Tendril` by `Tendril::from(self)`.
+#[derive(Clone)]
+pub struct SendTendril<F>
+where
+    F: fmt::Format,
+{
+    tendril: Tendril<F>,
+}
+
+unsafe impl<F> Send for SendTendril<F> where F: fmt::Format {}
+
+impl<F, A> From<Tendril<F, A>> for SendTendril<F>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline]
+    fn from(tendril: Tendril<F, A>) -> SendTendril<F> {
+        tendril.into_send()
+    }
+}
+
+impl<F, A> From<SendTendril<F>> for Tendril<F, A>
+where
+    F: fmt::Format,
+    A: Atomicity,
+{
+    #[inline]
+    fn from(send: SendTendril<F>) -> Tendril<F, A> {
+        unsafe { mem::transmute(send.tendril) }
+        // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value
+        // will be the same (1) regardless, because the layout is defined.
+        // Thus we don't need to fiddle about resetting it or anything like that.
+    }
+}
+
+/// `Tendril`-related methods for Rust slices.
+pub trait SliceExt<F>: fmt::Slice
+where
+    F: fmt::SliceFormat<Slice = Self>,
+{
+    /// Make a `Tendril` from this slice.
+    #[inline]
+    fn to_tendril(&self) -> Tendril<F> {
+        // It should be done thusly, but at the time of writing the defaults don't help inference:
+        //fn to_tendril<A = NonAtomic>(&self) -> Tendril<Self::Format, A>
+        //    where A: Atomicity,
+        //{
+        Tendril::from_slice(self)
+    }
+}
+
+impl SliceExt<fmt::UTF8> for str {}
+impl SliceExt<fmt::Bytes> for [u8] {}
+
+impl<F, A> Tendril<F, A>
+where
+    F: for<'a> fmt::CharFormat<'a>,
+    A: Atomicity,
+{
+    /// Remove and return the first character, if any.
+    #[inline]
+    pub fn pop_front_char<'a>(&'a mut self) -> Option<char> {
+        unsafe {
+            let next_char; // first char in iterator
+            let mut skip = 0; // number of bytes to skip, or 0 to clear
+
+            {
+                // <--+
+                //  |  Creating an iterator borrows self, so introduce a
+                //  +- scope to contain the borrow (that way we can mutate
+                //     self below, after this scope exits).
+
+                let mut iter = F::char_indices(self.as_byte_slice());
+                match iter.next() {
+                    Some((_, c)) => {
+                        next_char = Some(c);
+                        if let Some((n, _)) = iter.next() {
+                            skip = n as u32;
+                        }
+                    }
+                    None => {
+                        next_char = None;
+                    }
+                }
+            }
+
+            if skip != 0 {
+                self.unsafe_pop_front(skip);
+            } else {
+                self.clear();
+            }
+
+            next_char
+        }
+    }
+
+    /// Remove and return a run of characters at the front of the `Tendril`
+    /// which are classified the same according to the function `classify`.
+    ///
+    /// Returns `None` on an empty string.
+    #[inline]
+    pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)>
+    where
+        C: FnMut(char) -> R,
+        R: PartialEq,
+    {
+        let (class, first_mismatch);
+        {
+            let mut chars = unsafe { F::char_indices(self.as_byte_slice()) };
+            let (_, first) = unwrap_or_return!(chars.next(), None);
+            class = classify(first);
+            first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class);
+        }
+
+        match first_mismatch {
+            Some((idx, _)) => unsafe {
+                let t = self.unsafe_subtendril(0, idx as u32);
+                self.unsafe_pop_front(idx as u32);
+                Some((t, class))
+            },
+            None => {
+                let t = self.clone();
+                self.clear();
+                Some((t, class))
+            }
+        }
+    }
+
+    /// Push a character, if it can be represented in this format.
+    #[inline]
+    pub fn try_push_char(&mut self, c: char) -> Result<(), ()> {
+        F::encode_char(c, |b| unsafe {
+            self.push_bytes_without_validating(b);
+        })
+    }
+}
+
+/// Extension trait for `io::Read`.
+pub trait ReadExt: io::Read {
+    fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
+    where
+        A: Atomicity;
+}
+
+impl<T> ReadExt for T
+where
+    T: io::Read,
+{
+    /// Read all bytes until EOF.
+    fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
+    where
+        A: Atomicity,
+    {
+        // Adapted from libstd/io/mod.rs.
+        const DEFAULT_BUF_SIZE: u32 = 64 * 1024;
+
+        let start_len = buf.len();
+        let mut len = start_len;
+        let mut new_write_size = 16;
+        let ret;
+        loop {
+            if len == buf.len() {
+                if new_write_size < DEFAULT_BUF_SIZE {
+                    new_write_size *= 2;
+                }
+                // FIXME: this exposes uninitialized bytes to a generic R type
+                // this is fine for R=File which never reads these bytes,
+                // but user-defined types might.
+                // The standard library pushes zeros to `Vec<u8>` for that reason.
+                unsafe {
+                    buf.push_uninitialized(new_write_size);
+                }
+            }
+
+            match self.read(&mut buf[len..]) {
+                Ok(0) => {
+                    ret = Ok(len - start_len);
+                    break;
+                }
+                Ok(n) => len += n,
+                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
+                Err(e) => {
+                    ret = Err(e);
+                    break;
+                }
+            }
+        }
+
+        let buf_len = buf.len32();
+        buf.pop_back(buf_len - (len as u32));
+        ret
+    }
+}
+
+impl<A> io::Write for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        self.push_slice(buf);
+        Ok(buf.len())
+    }
+
+    #[inline]
+    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+        self.push_slice(buf);
+        Ok(())
+    }
+
+    #[inline(always)]
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+#[cfg(feature = "encoding")]
+impl<A> encoding::ByteWriter for Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn write_byte(&mut self, b: u8) {
+        self.push_slice(&[b]);
+    }
+
+    #[inline]
+    fn write_bytes(&mut self, v: &[u8]) {
+        self.push_slice(v);
+    }
+
+    #[inline]
+    fn writer_hint(&mut self, additional: usize) {
+        self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
+    }
+}
+
+impl<F, A> Tendril<F, A>
+where
+    A: Atomicity,
+    F: fmt::SliceFormat<Slice = [u8]>,
+{
+    /// Decode from some character encoding into UTF-8.
+    ///
+    /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
+    /// for more information.
+    #[inline]
+    #[cfg(feature = "encoding")]
+    pub fn decode(
+        &self,
+        encoding: EncodingRef,
+        trap: DecoderTrap,
+    ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>> {
+        let mut ret = Tendril::new();
+        encoding.decode_to(&*self, trap, &mut ret).map(|_| ret)
+    }
+
+    /// Push "uninitialized bytes" onto the end.
+    ///
+    /// Really, this grows the tendril without writing anything to the new area.
+    /// It's only defined for byte tendrils because it's only useful if you
+    /// plan to then mutate the buffer.
+    #[inline]
+    pub unsafe fn push_uninitialized(&mut self, n: u32) {
+        let new_len = self.len32().checked_add(n).expect(OFLOW);
+        if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG {
+            self.ptr.set(inline_tag(new_len))
+        } else {
+            self.make_owned_with_capacity(new_len);
+            self.set_len(new_len);
+        }
+    }
+}
+
+impl<A> strfmt::Display for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
+        <str as strfmt::Display>::fmt(&**self, f)
+    }
+}
+
+impl<A> str::FromStr for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    type Err = ();
+
+    #[inline]
+    fn from_str(s: &str) -> Result<Self, ()> {
+        Ok(Tendril::from_slice(s))
+    }
+}
+
+impl<A> strfmt::Write for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn write_str(&mut self, s: &str) -> strfmt::Result {
+        self.push_slice(s);
+        Ok(())
+    }
+}
+
+#[cfg(feature = "encoding")]
+impl<A> encoding::StringWriter for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn write_char(&mut self, c: char) {
+        self.push_char(c);
+    }
+
+    #[inline]
+    fn write_str(&mut self, s: &str) {
+        self.push_slice(s);
+    }
+
+    #[inline]
+    fn writer_hint(&mut self, additional: usize) {
+        self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
+    }
+}
+
+impl<A> Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    /// Encode from UTF-8 into some other character encoding.
+    ///
+    /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
+    /// for more information.
+    #[inline]
+    #[cfg(feature = "encoding")]
+    pub fn encode(
+        &self,
+        encoding: EncodingRef,
+        trap: EncoderTrap,
+    ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>> {
+        let mut ret = Tendril::new();
+        encoding.encode_to(&*self, trap, &mut ret).map(|_| ret)
+    }
+
+    /// Push a character onto the end.
+    #[inline]
+    pub fn push_char(&mut self, c: char) {
+        unsafe {
+            self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
+        }
+    }
+
+    /// Create a `Tendril` from a single character.
+    #[inline]
+    pub fn from_char(c: char) -> Tendril<fmt::UTF8, A> {
+        let mut t: Tendril<fmt::UTF8, A> = Tendril::new();
+        t.push_char(c);
+        t
+    }
+
+    /// Helper for the `format_tendril!` macro.
+    #[inline]
+    pub fn format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A> {
+        use std::fmt::Write;
+        let mut output: Tendril<fmt::UTF8, A> = Tendril::new();
+        let _ = write!(&mut output, "{}", args);
+        output
+    }
+}
+
+/// Create a `StrTendril` through string formatting.
+///
+/// Works just like the standard `format!` macro.
+#[macro_export]
+macro_rules! format_tendril {
+    ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*)))
+}
+
+impl<'a, F, A> From<&'a F::Slice> for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    A: Atomicity,
+{
+    #[inline]
+    fn from(input: &F::Slice) -> Tendril<F, A> {
+        Tendril::from_slice(input)
+    }
+}
+
+impl<A> From<String> for Tendril<fmt::UTF8, A>
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn from(input: String) -> Tendril<fmt::UTF8, A> {
+        Tendril::from_slice(&*input)
+    }
+}
+
+impl<F, A> AsRef<F::Slice> for Tendril<F, A>
+where
+    F: fmt::SliceFormat,
+    A: Atomicity,
+{
+    #[inline]
+    fn as_ref(&self) -> &F::Slice {
+        &**self
+    }
+}
+
+impl<A> From<Tendril<fmt::UTF8, A>> for String
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn from(input: Tendril<fmt::UTF8, A>) -> String {
+        String::from(&*input)
+    }
+}
+
+impl<'a, A> From<&'a Tendril<fmt::UTF8, A>> for String
+where
+    A: Atomicity,
+{
+    #[inline]
+    fn from(input: &'a Tendril<fmt::UTF8, A>) -> String {
+        String::from(&**input)
+    }
+}
+
+#[cfg(all(test, feature = "bench"))]
+#[path = "bench.rs"]
+mod bench;
+
+#[cfg(test)]
+mod test {
+    use super::{
+        Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril,
+    };
+    use fmt;
+    use std::iter;
+    use std::thread;
+
+    fn assert_send<T: Send>() {}
+
+    #[test]
+    fn smoke_test() {
+        assert_eq!("", &*"".to_tendril());
+        assert_eq!("abc", &*"abc".to_tendril());
+        assert_eq!("Hello, world!", &*"Hello, world!".to_tendril());
+
+        assert_eq!(b"", &*b"".to_tendril());
+        assert_eq!(b"abc", &*b"abc".to_tendril());
+        assert_eq!(b"Hello, world!", &*b"Hello, world!".to_tendril());
+    }
+
+    #[test]
+    fn assert_sizes() {
+        use std::mem;
+        struct EmptyWithDrop;
+        impl Drop for EmptyWithDrop {
+            fn drop(&mut self) {}
+        }
+        let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
+
+        let correct = mem::size_of::<*const ()>()
+            + 8
+            + if compiler_uses_inline_drop_flags {
+                1
+            } else {
+                0
+            };
+
+        assert_eq!(correct, mem::size_of::<ByteTendril>());
+        assert_eq!(correct, mem::size_of::<StrTendril>());
+
+        // This is no longer true. See https://github.com/servo/tendril/issues/66
+        // assert_eq!(correct, mem::size_of::<Option<ByteTendril>>());
+        // assert_eq!(correct, mem::size_of::<Option<StrTendril>>());
+
+        assert_eq!(
+            mem::size_of::<*const ()>() * 2,
+            mem::size_of::<Header<Atomic>>(),
+        );
+        assert_eq!(
+            mem::size_of::<Header<Atomic>>(),
+            mem::size_of::<Header<NonAtomic>>(),
+        );
+    }
+
+    #[test]
+    fn validate_utf8() {
+        assert!(ByteTendril::try_from_byte_slice(b"\xFF").is_ok());
+        assert!(StrTendril::try_from_byte_slice(b"\xFF").is_err());
+        assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err());
+        assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err());
+        assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err());
+        assert_eq!(
+            "\u{a66e}",
+            &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap()
+        );
+
+        let mut t = StrTendril::new();
+        assert!(t.try_push_bytes(b"\xEA\x99").is_err());
+        assert!(t.try_push_bytes(b"\xAE").is_err());
+        assert!(t.try_push_bytes(b"\xEA\x99\xAE").is_ok());
+        assert_eq!("\u{a66e}", &*t);
+    }
+
+    #[test]
+    fn share_and_unshare() {
+        let s = b"foobarbaz".to_tendril();
+        assert_eq!(b"foobarbaz", &*s);
+        assert!(!s.is_shared());
+
+        let mut t = s.clone();
+        assert_eq!(s.as_ptr(), t.as_ptr());
+        assert!(s.is_shared());
+        assert!(t.is_shared());
+
+        t.push_slice(b"quux");
+        assert_eq!(b"foobarbaz", &*s);
+        assert_eq!(b"foobarbazquux", &*t);
+        assert!(s.as_ptr() != t.as_ptr());
+        assert!(!t.is_shared());
+    }
+
+    #[test]
+    fn format_display() {
+        assert_eq!("foobar", &*format!("{}", "foobar".to_tendril()));
+
+        let mut s = "foo".to_tendril();
+        assert_eq!("foo", &*format!("{}", s));
+
+        let t = s.clone();
+        assert_eq!("foo", &*format!("{}", s));
+        assert_eq!("foo", &*format!("{}", t));
+
+        s.push_slice("barbaz!");
+        assert_eq!("foobarbaz!", &*format!("{}", s));
+        assert_eq!("foo", &*format!("{}", t));
+    }
+
+    #[test]
+    fn format_debug() {
+        assert_eq!(
+            r#"Tendril<UTF8>(inline: "foobar")"#,
+            &*format!("{:?}", "foobar".to_tendril())
+        );
+        assert_eq!(
+            r#"Tendril<Bytes>(inline: [102, 111, 111, 98, 97, 114])"#,
+            &*format!("{:?}", b"foobar".to_tendril())
+        );
+
+        let t = "anextralongstring".to_tendril();
+        assert_eq!(
+            r#"Tendril<UTF8>(owned: "anextralongstring")"#,
+            &*format!("{:?}", t)
+        );
+        let _ = t.clone();
+        assert_eq!(
+            r#"Tendril<UTF8>(shared: "anextralongstring")"#,
+            &*format!("{:?}", t)
+        );
+    }
+
+    #[test]
+    fn subtendril() {
+        assert_eq!("foo".to_tendril(), "foo-bar".to_tendril().subtendril(0, 3));
+        assert_eq!("bar".to_tendril(), "foo-bar".to_tendril().subtendril(4, 3));
+
+        let mut t = "foo-bar".to_tendril();
+        t.pop_front(2);
+        assert_eq!("o-bar".to_tendril(), t);
+        t.pop_back(1);
+        assert_eq!("o-ba".to_tendril(), t);
+
+        assert_eq!(
+            "foo".to_tendril(),
+            "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3)
+        );
+        assert_eq!(
+            "oo-a-".to_tendril(),
+            "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5)
+        );
+        assert_eq!(
+            "bar".to_tendril(),
+            "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3)
+        );
+
+        let mut t = "another rather long string".to_tendril();
+        t.pop_front(2);
+        assert!(t.starts_with("other rather"));
+        t.pop_back(1);
+        assert_eq!("other rather long strin".to_tendril(), t);
+        assert!(t.is_shared());
+    }
+
+    #[test]
+    fn subtendril_invalid() {
+        assert!("\u{a66e}".to_tendril().try_subtendril(0, 2).is_err());
+        assert!("\u{a66e}".to_tendril().try_subtendril(1, 2).is_err());
+
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 3).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 2).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 1).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 3).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 2).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 1).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 2).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 1).is_err());
+        assert!("\u{1f4a9}".to_tendril().try_subtendril(3, 1).is_err());
+
+        let mut t = "\u{1f4a9}zzzzzz".to_tendril();
+        assert!(t.try_pop_front(1).is_err());
+        assert!(t.try_pop_front(2).is_err());
+        assert!(t.try_pop_front(3).is_err());
+        assert!(t.try_pop_front(4).is_ok());
+        assert_eq!("zzzzzz", &*t);
+
+        let mut t = "zzzzzz\u{1f4a9}".to_tendril();
+        assert!(t.try_pop_back(1).is_err());
+        assert!(t.try_pop_back(2).is_err());
+        assert!(t.try_pop_back(3).is_err());
+        assert!(t.try_pop_back(4).is_ok());
+        assert_eq!("zzzzzz", &*t);
+    }
+
+    #[test]
+    fn conversion() {
+        assert_eq!(
+            &[0x66, 0x6F, 0x6F].to_tendril(),
+            "foo".to_tendril().as_bytes()
+        );
+        assert_eq!(
+            [0x66, 0x6F, 0x6F].to_tendril(),
+            "foo".to_tendril().into_bytes()
+        );
+
+        let ascii: Tendril<fmt::ASCII> = b"hello".to_tendril().try_reinterpret().unwrap();
+        assert_eq!(&"hello".to_tendril(), ascii.as_superset());
+        assert_eq!("hello".to_tendril(), ascii.clone().into_superset());
+
+        assert!(b"\xFF"
+            .to_tendril()
+            .try_reinterpret::<fmt::ASCII>()
+            .is_err());
+
+        let t = "hello".to_tendril();
+        let ascii: &Tendril<fmt::ASCII> = t.try_as_subset().unwrap();
+        assert_eq!(b"hello", &**ascii.as_bytes());
+
+        assert!("ő"
+            .to_tendril()
+            .try_reinterpret_view::<fmt::ASCII>()
+            .is_err());
+        assert!("ő".to_tendril().try_as_subset::<fmt::ASCII>().is_err());
+
+        let ascii: Tendril<fmt::ASCII> = "hello".to_tendril().try_into_subset().unwrap();
+        assert_eq!(b"hello", &**ascii.as_bytes());
+
+        assert!("ő".to_tendril().try_reinterpret::<fmt::ASCII>().is_err());
+        assert!("ő".to_tendril().try_into_subset::<fmt::ASCII>().is_err());
+    }
+
+    #[test]
+    fn clear() {
+        let mut t = "foo-".to_tendril();
+        t.clear();
+        assert_eq!(t.len(), 0);
+        assert_eq!(t.len32(), 0);
+        assert_eq!(&*t, "");
+
+        let mut t = "much longer".to_tendril();
+        let s = t.clone();
+        t.clear();
+        assert_eq!(t.len(), 0);
+        assert_eq!(t.len32(), 0);
+        assert_eq!(&*t, "");
+        assert_eq!(&*s, "much longer");
+    }
+
+    #[test]
+    fn push_tendril() {
+        let mut t = "abc".to_tendril();
+        t.push_tendril(&"xyz".to_tendril());
+        assert_eq!("abcxyz", &*t);
+    }
+
+    #[test]
+    fn wtf8() {
+        assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD").is_ok());
+        assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xB2\xA9").is_ok());
+        assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err());
+
+        let t: Tendril<fmt::WTF8> =
+            Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap();
+        assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3));
+        assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3));
+        assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
+
+        assert!(t.try_subtendril(0, 1).is_err());
+        assert!(t.try_subtendril(0, 2).is_err());
+        assert!(t.try_subtendril(1, 1).is_err());
+
+        assert!(t.try_subtendril(3, 1).is_err());
+        assert!(t.try_subtendril(3, 2).is_err());
+        assert!(t.try_subtendril(4, 1).is_err());
+
+        // paired surrogates
+        let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBD").unwrap();
+        assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
+        assert_eq!(b"\xF0\x9F\x92\xA9", t.as_byte_slice());
+        assert!(t.try_reinterpret_view::<fmt::UTF8>().is_ok());
+
+        // unpaired surrogates
+        let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBB").unwrap();
+        assert!(t.try_push_bytes(b"\xED\xA0").is_err());
+        assert!(t.try_push_bytes(b"\xED").is_err());
+        assert!(t.try_push_bytes(b"\xA0").is_err());
+        assert!(t.try_push_bytes(b"\xED\xA0\xBD").is_ok());
+        assert_eq!(b"\xED\xA0\xBB\xED\xA0\xBD", t.as_byte_slice());
+        assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
+        assert_eq!(b"\xED\xA0\xBB\xF0\x9F\x92\xA9", t.as_byte_slice());
+        assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
+    }
+
+    #[test]
+    fn front_char() {
+        let mut t = "".to_tendril();
+        assert_eq!(None, t.pop_front_char());
+        assert_eq!(None, t.pop_front_char());
+
+        let mut t = "abc".to_tendril();
+        assert_eq!(Some('a'), t.pop_front_char());
+        assert_eq!(Some('b'), t.pop_front_char());
+        assert_eq!(Some('c'), t.pop_front_char());
+        assert_eq!(None, t.pop_front_char());
+        assert_eq!(None, t.pop_front_char());
+
+        let mut t = "főo-a-longer-string-bar-baz".to_tendril();
+        assert_eq!(28, t.len());
+        assert_eq!(Some('f'), t.pop_front_char());
+        assert_eq!(Some('ő'), t.pop_front_char());
+        assert_eq!(Some('o'), t.pop_front_char());
+        assert_eq!(Some('-'), t.pop_front_char());
+        assert_eq!(23, t.len());
+    }
+
+    #[test]
+    fn char_run() {
+        for &(s, exp) in &[
+            ("", None),
+            (" ", Some((" ", true))),
+            ("x", Some(("x", false))),
+            ("  \t  \n", Some(("  \t  \n", true))),
+            ("xyzzy", Some(("xyzzy", false))),
+            ("   xyzzy", Some(("   ", true))),
+            ("xyzzy   ", Some(("xyzzy", false))),
+            ("   xyzzy  ", Some(("   ", true))),
+            ("xyzzy   hi", Some(("xyzzy", false))),
+            ("中 ", Some(("中", false))),
+            (" 中 ", Some((" ", true))),
+            ("  中 ", Some(("  ", true))),
+            ("   中 ", Some(("   ", true))),
+        ] {
+            let mut t = s.to_tendril();
+            let res = t.pop_front_char_run(char::is_whitespace);
+            match exp {
+                None => assert!(res.is_none()),
+                Some((es, ec)) => {
+                    let (rt, rc) = res.unwrap();
+                    assert_eq!(es, &*rt);
+                    assert_eq!(ec, rc);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn deref_mut_inline() {
+        let mut t = "xyő".to_tendril().into_bytes();
+        t[3] = 0xff;
+        assert_eq!(b"xy\xC5\xFF", &*t);
+        assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
+        t[3] = 0x8b;
+        assert_eq!("xyŋ", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
+
+        unsafe {
+            t.push_uninitialized(3);
+            t[4] = 0xEA;
+            t[5] = 0x99;
+            t[6] = 0xAE;
+            assert_eq!(
+                "xyŋ\u{a66e}",
+                &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
+            );
+            t.push_uninitialized(20);
+            t.pop_back(20);
+            assert_eq!(
+                "xyŋ\u{a66e}",
+                &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
+            );
+        }
+    }
+
+    #[test]
+    fn deref_mut() {
+        let mut t = b"0123456789".to_tendril();
+        let u = t.clone();
+        assert!(t.is_shared());
+        t[9] = 0xff;
+        assert!(!t.is_shared());
+        assert_eq!(b"0123456789", &*u);
+        assert_eq!(b"012345678\xff", &*t);
+    }
+
+    #[test]
+    fn push_char() {
+        let mut t = "xyz".to_tendril();
+        t.push_char('o');
+        assert_eq!("xyzo", &*t);
+        t.push_char('ő');
+        assert_eq!("xyzoő", &*t);
+        t.push_char('\u{a66e}');
+        assert_eq!("xyzoő\u{a66e}", &*t);
+        t.push_char('\u{1f4a9}');
+        assert_eq!("xyzoő\u{a66e}\u{1f4a9}", &*t);
+        assert_eq!(t.len(), 13);
+    }
+
+    #[test]
+    #[cfg(feature = "encoding")]
+    fn encode() {
+        use encoding::{all, EncoderTrap};
+
+        let t = "안녕하세요 러스트".to_tendril();
+        assert_eq!(
+            b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae",
+            &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap()
+        );
+
+        let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril();
+        assert_eq!(
+            b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
+                     \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?",
+            &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap()
+        );
+
+        let t = "\u{1f4a9}".to_tendril();
+        assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err());
+    }
+
+    #[test]
+    #[cfg(feature = "encoding")]
+    fn decode() {
+        use encoding::{all, DecoderTrap};
+
+        let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\
+                  \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae"
+            .to_tendril();
+        assert_eq!(
+            "안녕하세요 러스트",
+            &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap()
+        );
+
+        let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
+                  \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21"
+            .to_tendril();
+        assert_eq!(
+            "Энергия пробуждения ия-я-я!",
+            &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap()
+        );
+
+        let t = b"x \xff y".to_tendril();
+        assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err());
+
+        let t = b"x \xff y".to_tendril();
+        assert_eq!(
+            "x \u{fffd} y",
+            &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap()
+        );
+    }
+
+    #[test]
+    fn ascii() {
+        fn mk(x: &[u8]) -> Tendril<fmt::ASCII> {
+            x.to_tendril().try_reinterpret().unwrap()
+        }
+
+        let mut t = mk(b"xyz");
+        assert_eq!(Some('x'), t.pop_front_char());
+        assert_eq!(Some('y'), t.pop_front_char());
+        assert_eq!(Some('z'), t.pop_front_char());
+        assert_eq!(None, t.pop_front_char());
+
+        let mut t = mk(b" \t xyz");
+        assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
+        assert!(Some((mk(b"xyz"), false)) == t.pop_front_char_run(char::is_whitespace));
+        assert!(t.pop_front_char_run(char::is_whitespace).is_none());
+
+        let mut t = Tendril::<fmt::ASCII>::new();
+        assert!(t.try_push_char('x').is_ok());
+        assert!(t.try_push_char('\0').is_ok());
+        assert!(t.try_push_char('\u{a0}').is_err());
+        assert_eq!(b"x\0", t.as_byte_slice());
+    }
+
+    #[test]
+    fn latin1() {
+        fn mk(x: &[u8]) -> Tendril<fmt::Latin1> {
+            x.to_tendril().try_reinterpret().unwrap()
+        }
+
+        let mut t = mk(b"\xd8_\xd8");
+        assert_eq!(Some('Ø'), t.pop_front_char());
+        assert_eq!(Some('_'), t.pop_front_char());
+        assert_eq!(Some('Ø'), t.pop_front_char());
+        assert_eq!(None, t.pop_front_char());
+
+        let mut t = mk(b" \t \xfe\xa7z");
+        assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
+        assert!(Some((mk(b"\xfe\xa7z"), false)) == t.pop_front_char_run(char::is_whitespace));
+        assert!(t.pop_front_char_run(char::is_whitespace).is_none());
+
+        let mut t = Tendril::<fmt::Latin1>::new();
+        assert!(t.try_push_char('x').is_ok());
+        assert!(t.try_push_char('\0').is_ok());
+        assert!(t.try_push_char('\u{a0}').is_ok());
+        assert!(t.try_push_char('ő').is_err());
+        assert!(t.try_push_char('я').is_err());
+        assert!(t.try_push_char('\u{a66e}').is_err());
+        assert!(t.try_push_char('\u{1f4a9}').is_err());
+        assert_eq!(b"x\0\xa0", t.as_byte_slice());
+    }
+
+    #[test]
+    fn format() {
+        assert_eq!("", &*format_tendril!(""));
+        assert_eq!(
+            "two and two make 4",
+            &*format_tendril!("two and two make {}", 2 + 2)
+        );
+    }
+
+    #[test]
+    fn merge_shared() {
+        let t = "012345678901234567890123456789".to_tendril();
+        let a = t.subtendril(10, 20);
+        assert!(a.is_shared());
+        assert_eq!("01234567890123456789", &*a);
+        let mut b = t.subtendril(0, 10);
+        assert!(b.is_shared());
+        assert_eq!("0123456789", &*b);
+
+        b.push_tendril(&a);
+        assert!(b.is_shared());
+        assert!(a.is_shared());
+        assert!(a.is_shared_with(&b));
+        assert!(b.is_shared_with(&a));
+        assert_eq!("012345678901234567890123456789", &*b);
+
+        assert!(t.is_shared());
+        assert!(t.is_shared_with(&a));
+        assert!(t.is_shared_with(&b));
+    }
+
+    #[test]
+    fn merge_cant_share() {
+        let t = "012345678901234567890123456789".to_tendril();
+        let mut b = t.subtendril(0, 10);
+        assert!(b.is_shared());
+        assert_eq!("0123456789", &*b);
+
+        b.push_tendril(&"abcd".to_tendril());
+        assert!(!b.is_shared());
+        assert_eq!("0123456789abcd", &*b);
+    }
+
+    #[test]
+    fn shared_doesnt_reserve() {
+        let mut t = "012345678901234567890123456789".to_tendril();
+        let a = t.subtendril(1, 10);
+
+        assert!(t.is_shared());
+        t.reserve(10);
+        assert!(t.is_shared());
+
+        let _ = a;
+    }
+
+    #[test]
+    fn out_of_bounds() {
+        assert!("".to_tendril().try_subtendril(0, 1).is_err());
+        assert!("abc".to_tendril().try_subtendril(0, 4).is_err());
+        assert!("abc".to_tendril().try_subtendril(3, 1).is_err());
+        assert!("abc".to_tendril().try_subtendril(7, 1).is_err());
+
+        let mut t = "".to_tendril();
+        assert!(t.try_pop_front(1).is_err());
+        assert!(t.try_pop_front(5).is_err());
+        assert!(t.try_pop_front(500).is_err());
+        assert!(t.try_pop_back(1).is_err());
+        assert!(t.try_pop_back(5).is_err());
+        assert!(t.try_pop_back(500).is_err());
+
+        let mut t = "abcd".to_tendril();
+        assert!(t.try_pop_front(1).is_ok());
+        assert!(t.try_pop_front(4).is_err());
+        assert!(t.try_pop_front(500).is_err());
+        assert!(t.try_pop_back(1).is_ok());
+        assert!(t.try_pop_back(3).is_err());
+        assert!(t.try_pop_back(500).is_err());
+    }
+
+    #[test]
+    fn compare() {
+        for &a in &[
+            "indiscretions",
+            "validity",
+            "hallucinogenics",
+            "timelessness",
+            "original",
+            "microcosms",
+            "boilers",
+            "mammoth",
+        ] {
+            for &b in &[
+                "intrepidly",
+                "frigid",
+                "spa",
+                "cardigans",
+                "guileful",
+                "evaporated",
+                "unenthusiastic",
+                "legitimate",
+            ] {
+                let ta = a.to_tendril();
+                let tb = b.to_tendril();
+
+                assert_eq!(a.eq(b), ta.eq(&tb));
+                assert_eq!(a.ne(b), ta.ne(&tb));
+                assert_eq!(a.lt(b), ta.lt(&tb));
+                assert_eq!(a.le(b), ta.le(&tb));
+                assert_eq!(a.gt(b), ta.gt(&tb));
+                assert_eq!(a.ge(b), ta.ge(&tb));
+                assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb));
+                assert_eq!(a.cmp(b), ta.cmp(&tb));
+            }
+        }
+    }
+
+    #[test]
+    fn extend_and_from_iterator() {
+        // Testing Extend<T> and FromIterator<T> for the various Ts.
+
+        // Tendril<F>
+        let mut t = "Hello".to_tendril();
+        t.extend(None::<&Tendril<_>>.into_iter());
+        assert_eq!("Hello", &*t);
+        t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]);
+        assert_eq!("Hello, world!", &*t);
+        assert_eq!(
+            "Hello, world!",
+            &*[
+                "Hello".to_tendril(),
+                ", ".to_tendril(),
+                "world".to_tendril(),
+                "!".to_tendril()
+            ]
+            .iter()
+            .collect::<StrTendril>()
+        );
+
+        // &str
+        let mut t = "Hello".to_tendril();
+        t.extend(None::<&str>.into_iter());
+        assert_eq!("Hello", &*t);
+        t.extend([", ", "world", "!"].iter().map(|&s| s));
+        assert_eq!("Hello, world!", &*t);
+        assert_eq!(
+            "Hello, world!",
+            &*["Hello", ", ", "world", "!"]
+                .iter()
+                .map(|&s| s)
+                .collect::<StrTendril>()
+        );
+
+        // &[u8]
+        let mut t = b"Hello".to_tendril();
+        t.extend(None::<&[u8]>.into_iter());
+        assert_eq!(b"Hello", &*t);
+        t.extend(
+            [b", ".as_ref(), b"world".as_ref(), b"!".as_ref()]
+                .iter()
+                .map(|&s| s),
+        );
+        assert_eq!(b"Hello, world!", &*t);
+        assert_eq!(
+            b"Hello, world!",
+            &*[
+                b"Hello".as_ref(),
+                b", ".as_ref(),
+                b"world".as_ref(),
+                b"!".as_ref()
+            ]
+            .iter()
+            .map(|&s| s)
+            .collect::<ByteTendril>()
+        );
+
+        let string = "the quick brown fox jumps over the lazy dog";
+        let string_expected = string.to_tendril();
+        let bytes = string.as_bytes();
+        let bytes_expected = bytes.to_tendril();
+
+        // char
+        assert_eq!(string_expected, string.chars().collect());
+        let mut tendril = StrTendril::new();
+        tendril.extend(string.chars());
+        assert_eq!(string_expected, tendril);
+
+        // &u8
+        assert_eq!(bytes_expected, bytes.iter().collect());
+        let mut tendril = ByteTendril::new();
+        tendril.extend(bytes);
+        assert_eq!(bytes_expected, tendril);
+
+        // u8
+        assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect());
+        let mut tendril = ByteTendril::new();
+        tendril.extend(bytes.iter().map(|&b| b));
+        assert_eq!(bytes_expected, tendril);
+    }
+
+    #[test]
+    fn from_str() {
+        use std::str::FromStr;
+        let t: Tendril<_> = FromStr::from_str("foo bar baz").unwrap();
+        assert_eq!("foo bar baz", &*t);
+    }
+
+    #[test]
+    fn from_char() {
+        assert_eq!("o", &*StrTendril::from_char('o'));
+        assert_eq!("ő", &*StrTendril::from_char('ő'));
+        assert_eq!("\u{a66e}", &*StrTendril::from_char('\u{a66e}'));
+        assert_eq!("\u{1f4a9}", &*StrTendril::from_char('\u{1f4a9}'));
+    }
+
+    #[test]
+    #[cfg_attr(miri, ignore)] // slow
+    fn read() {
+        fn check(x: &[u8]) {
+            use std::io::Cursor;
+            let mut t = ByteTendril::new();
+            assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap());
+            assert_eq!(x, &*t);
+        }
+
+        check(b"");
+        check(b"abcd");
+
+        let long: Vec<u8> = iter::repeat(b'x').take(1_000_000).collect();
+        check(&long);
+    }
+
+    #[test]
+    fn hash_map_key() {
+        use std::collections::HashMap;
+
+        // As noted with Borrow, indexing on HashMap<StrTendril, _> is byte-based because of
+        // https://github.com/rust-lang/rust/issues/27108.
+        let mut map = HashMap::new();
+        map.insert("foo".to_tendril(), 1);
+        assert_eq!(map.get(b"foo".as_ref()), Some(&1));
+        assert_eq!(map.get(b"bar".as_ref()), None);
+
+        let mut map = HashMap::new();
+        map.insert(b"foo".to_tendril(), 1);
+        assert_eq!(map.get(b"foo".as_ref()), Some(&1));
+        assert_eq!(map.get(b"bar".as_ref()), None);
+    }
+
+    #[test]
+    fn atomic() {
+        assert_send::<Tendril<fmt::UTF8, Atomic>>();
+        let s: Tendril<fmt::UTF8, Atomic> = Tendril::from_slice("this is a string");
+        assert!(!s.is_shared());
+        let mut t = s.clone();
+        assert!(s.is_shared());
+        let sp = s.as_ptr() as usize;
+        thread::spawn(move || {
+            assert!(t.is_shared());
+            t.push_slice(" extended");
+            assert_eq!("this is a string extended", &*t);
+            assert!(t.as_ptr() as usize != sp);
+            assert!(!t.is_shared());
+        })
+        .join()
+        .unwrap();
+        assert!(s.is_shared());
+        assert_eq!("this is a string", &*s);
+    }
+
+    #[test]
+    fn send() {
+        assert_send::<SendTendril<fmt::UTF8>>();
+        let s = "this is a string".to_tendril();
+        let t = s.clone();
+        let s2 = s.into_send();
+        thread::spawn(move || {
+            let s = StrTendril::from(s2);
+            assert!(!s.is_shared());
+            assert_eq!("this is a string", &*s);
+        })
+        .join()
+        .unwrap();
+        assert_eq!("this is a string", &*t);
+    }
+
+    /// https://github.com/servo/tendril/issues/58
+    #[test]
+    fn issue_58() {
+        let data = "<p><i>Hello!</p>, World!</i>";
+        let s: Tendril<fmt::UTF8, NonAtomic> = data.into();
+        assert_eq!(&*s, data);
+        let s: Tendril<fmt::UTF8, Atomic> = s.into_send().into();
+        assert_eq!(&*s, data);
+    }
+
+    #[test]
+    fn inline_send() {
+        let s = "x".to_tendril();
+        let t = s.clone();
+        let s2 = s.into_send();
+        thread::spawn(move || {
+            let s = StrTendril::from(s2);
+            assert!(!s.is_shared());
+            assert_eq!("x", &*s);
+        })
+        .join()
+        .unwrap();
+        assert_eq!("x", &*t);
+    }
+}
diff --git a/tendril/src/utf8_decode.rs b/tendril/src/utf8_decode.rs
new file mode 100644
index 00000000..b682d57a
--- /dev/null
+++ b/tendril/src/utf8_decode.rs
@@ -0,0 +1,98 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use fmt;
+use tendril::{Atomicity, Tendril};
+use utf8;
+
+pub struct IncompleteUtf8(utf8::Incomplete);
+
+impl<A> Tendril<fmt::Bytes, A>
+where
+    A: Atomicity,
+{
+    pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8>
+    where
+        F: FnMut(Tendril<fmt::UTF8, A>),
+    {
+        loop {
+            if self.is_empty() {
+                return None;
+            }
+            let unborrowed_result = match utf8::decode(&self) {
+                Ok(s) => {
+                    debug_assert!(s.as_ptr() == self.as_ptr());
+                    debug_assert!(s.len() == self.len());
+                    Ok(())
+                }
+                Err(utf8::DecodeError::Invalid {
+                    valid_prefix,
+                    invalid_sequence,
+                    ..
+                }) => {
+                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
+                    debug_assert!(valid_prefix.len() <= self.len());
+                    Err((
+                        valid_prefix.len(),
+                        Err(valid_prefix.len() + invalid_sequence.len()),
+                    ))
+                }
+                Err(utf8::DecodeError::Incomplete {
+                    valid_prefix,
+                    incomplete_suffix,
+                }) => {
+                    debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
+                    debug_assert!(valid_prefix.len() <= self.len());
+                    Err((valid_prefix.len(), Ok(incomplete_suffix)))
+                }
+            };
+            match unborrowed_result {
+                Ok(()) => {
+                    unsafe { push_utf8(self.reinterpret_without_validating()) }
+                    return None;
+                }
+                Err((valid_len, and_then)) => {
+                    if valid_len > 0 {
+                        let subtendril = self.subtendril(0, valid_len as u32);
+                        unsafe { push_utf8(subtendril.reinterpret_without_validating()) }
+                    }
+                    match and_then {
+                        Ok(incomplete) => return Some(IncompleteUtf8(incomplete)),
+                        Err(offset) => {
+                            push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
+                            self.pop_front(offset as u32)
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl IncompleteUtf8 {
+    pub fn try_complete<A, F>(
+        &mut self,
+        mut input: Tendril<fmt::Bytes, A>,
+        mut push_utf8: F,
+    ) -> Result<Tendril<fmt::Bytes, A>, ()>
+    where
+        A: Atomicity,
+        F: FnMut(Tendril<fmt::UTF8, A>),
+    {
+        let resume_at;
+        match self.0.try_complete(&input) {
+            None => return Err(()),
+            Some((result, rest)) => {
+                push_utf8(Tendril::from_slice(
+                    result.unwrap_or(utf8::REPLACEMENT_CHARACTER),
+                ));
+                resume_at = input.len() - rest.len();
+            }
+        }
+        input.pop_front(resume_at as u32);
+        Ok(input)
+    }
+}
diff --git a/tendril/src/util.rs b/tendril/src/util.rs
new file mode 100644
index 00000000..28c55c12
--- /dev/null
+++ b/tendril/src/util.rs
@@ -0,0 +1,45 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::mem;
+use std::{ptr, slice};
+
+#[inline(always)]
+pub unsafe fn unsafe_slice<'a>(buf: &'a [u8], start: usize, new_len: usize) -> &'a [u8] {
+    debug_assert!(start <= buf.len());
+    debug_assert!(new_len <= (buf.len() - start));
+    slice::from_raw_parts(buf.as_ptr().offset(start as isize), new_len)
+}
+
+#[inline(always)]
+pub unsafe fn unsafe_slice_mut<'a>(
+    buf: &'a mut [u8],
+    start: usize,
+    new_len: usize,
+) -> &'a mut [u8] {
+    debug_assert!(start <= buf.len());
+    debug_assert!(new_len <= (buf.len() - start));
+    slice::from_raw_parts_mut(buf.as_mut_ptr().offset(start as isize), new_len)
+}
+
+#[inline(always)]
+pub unsafe fn copy_and_advance(dest: &mut *mut u8, src: &[u8]) {
+    ptr::copy_nonoverlapping(src.as_ptr(), *dest, src.len());
+    *dest = dest.offset(src.len() as isize)
+}
+
+#[inline(always)]
+pub unsafe fn copy_lifetime_mut<'a, S: ?Sized, T: ?Sized + 'a>(
+    _ptr: &'a mut S,
+    ptr: &mut T,
+) -> &'a mut T {
+    mem::transmute(ptr)
+}
+
+#[inline(always)]
+pub unsafe fn copy_lifetime<'a, S: ?Sized, T: ?Sized + 'a>(_ptr: &'a S, ptr: &T) -> &'a T {
+    mem::transmute(ptr)
+}

From 204251a54a3105bf5792f2a2ebb9b8dba69567b7 Mon Sep 17 00:00:00 2001
From: Nico Burns <nico@nicoburns.com>
Date: Mon, 8 Sep 2025 17:00:27 +0100
Subject: [PATCH 2/5] cargo fmt

---
 tendril/examples/fuzz.rs   | 18 +++++++++---------
 tendril/src/bench.rs       |  8 ++++----
 tendril/src/futf.rs        | 14 +++++++-------
 tendril/src/stream.rs      | 36 ++++++++++++++++++------------------
 tendril/src/tendril.rs     | 18 +++++++++---------
 tendril/src/utf8_decode.rs | 14 +++++++-------
 6 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/tendril/examples/fuzz.rs b/tendril/examples/fuzz.rs
index 37daf560..13a44e01 100644
--- a/tendril/examples/fuzz.rs
+++ b/tendril/examples/fuzz.rs
@@ -39,7 +39,7 @@ fn fuzz() {
                 buf_string.push_str(snip);
                 buf_tendril.push_slice(snip);
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             16..=31 => {
                 let (start, end) = random_slice(&mut rng, &buf_string);
@@ -47,21 +47,21 @@ fn fuzz() {
                 buf_string.push_str(&snip);
                 buf_tendril.push_slice(&snip);
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             32..=47 => {
                 let lenstr = format!("[length = {}]", buf_tendril.len());
                 buf_string.push_str(&lenstr);
                 buf_tendril.push_slice(&lenstr);
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             48..=63 => {
                 let n = random_boundary(&mut rng, &buf_string);
                 buf_tendril.pop_front(n as u32);
                 buf_string = buf_string[n..].to_owned();
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             64..=79 => {
                 let new_len = random_boundary(&mut rng, &buf_string);
@@ -69,27 +69,27 @@ fn fuzz() {
                 buf_string.truncate(new_len);
                 buf_tendril.pop_back(n as u32);
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             80..=90 => {
                 let (start, end) = random_slice(&mut rng, &buf_string);
                 buf_string = buf_string[start..end].to_owned();
                 buf_tendril = buf_tendril.subtendril(start as u32, (end - start) as u32);
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             91..=96 => {
                 let c = rng.gen();
                 buf_string.push(c);
                 assert!(buf_tendril.try_push_char(c).is_ok());
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             97 => {
                 buf_string.truncate(0);
                 buf_tendril.clear();
                 assert_eq!(&*buf_string, &*buf_tendril);
-            }
+            },
 
             _ => {
                 let (start, end) = random_slice(&mut rng, &buf_string);
@@ -100,7 +100,7 @@ fn fuzz() {
                     .iter()
                     .zip(tendril_slices.iter())
                     .all(|(s, t)| **s == **t));
-            }
+            },
         }
     }
 }
diff --git a/tendril/src/bench.rs b/tendril/src/bench.rs
index a9d2c30a..ca2341ab 100644
--- a/tendril/src/bench.rs
+++ b/tendril/src/bench.rs
@@ -20,10 +20,10 @@ fn index_words_string(input: &String) -> HashMap<char, Vec<String>> {
             Entry::Occupied(mut e) => {
                 let x: &mut Vec<String> = e.get_mut();
                 x.push(word);
-            }
+            },
             Entry::Vacant(e) => {
                 e.insert(vec![word]);
-            }
+            },
         }
     }
     index
@@ -39,10 +39,10 @@ fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
             Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
                 Entry::Occupied(mut e) => {
                     e.get_mut().push(word);
-                }
+                },
                 Entry::Vacant(e) => {
                     e.insert(vec![word]);
-                }
+                },
             },
         }
     }
diff --git a/tendril/src/futf.rs b/tendril/src/futf.rs
index 93a1c21e..013e7ca6 100644
--- a/tendril/src/futf.rs
+++ b/tendril/src/futf.rs
@@ -106,7 +106,7 @@ unsafe fn decode(buf: &[u8]) -> Option<Meaning> {
             if n < 0x80 {
                 return None;
             } // Overlong
-        }
+        },
         3 => {
             n = ((*buf.get_unchecked(0) & 0b1111) as u32) << 12
                 | ((*buf.get_unchecked(1) & 0x3F) as u32) << 6
@@ -115,9 +115,9 @@ unsafe fn decode(buf: &[u8]) -> Option<Meaning> {
                 0x0000..=0x07FF => return None, // Overlong
                 0xD800..=0xDBFF => return Some(Meaning::LeadSurrogate(n as u16 - 0xD800)),
                 0xDC00..=0xDFFF => return Some(Meaning::TrailSurrogate(n as u16 - 0xDC00)),
-                _ => {}
+                _ => {},
             }
-        }
+        },
         4 => {
             n = ((*buf.get_unchecked(0) & 0b111) as u32) << 18
                 | ((*buf.get_unchecked(1) & 0x3F) as u32) << 12
@@ -126,7 +126,7 @@ unsafe fn decode(buf: &[u8]) -> Option<Meaning> {
             if n < 0x1_0000 {
                 return None;
             } // Overlong
-        }
+        },
         _ => debug_unreachable!(),
     }
 
@@ -185,7 +185,7 @@ pub fn classify<'a>(buf: &'a [u8], idx: usize) -> Option<Codepoint<'a>> {
                         meaning: Meaning::Prefix(n - avail),
                     })
                 }
-            }
+            },
             Byte::Cont => {
                 let mut start = idx;
                 let mut checked = 0;
@@ -225,7 +225,7 @@ pub fn classify<'a>(buf: &'a [u8], idx: usize) -> Option<Codepoint<'a>> {
                                     meaning: Meaning::Prefix(n - avail),
                                 });
                             }
-                        }
+                        },
                         _ => return None,
                     }
 
@@ -235,7 +235,7 @@ pub fn classify<'a>(buf: &'a [u8], idx: usize) -> Option<Codepoint<'a>> {
                         return None;
                     }
                 }
-            }
+            },
         }
     }
 }
diff --git a/tendril/src/stream.rs b/tendril/src/stream.rs
index 469d58c9..afac7bc9 100644
--- a/tendril/src/stream.rs
+++ b/tendril/src/stream.rs
@@ -94,8 +94,8 @@ where
                         tendril.pop_back(BUFFER_SIZE - n as u32);
                         self.process(tendril);
                         break;
-                    }
-                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
+                    },
+                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {},
                     Err(e) => return Err(e),
                 }
             }
@@ -162,7 +162,7 @@ where
                         self.inner_sink.error("invalid byte sequence".into());
                         self.inner_sink
                             .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
-                    }
+                    },
                 }
                 t.len() - rest.len()
             });
@@ -170,7 +170,7 @@ where
                 None => {
                     self.incomplete = Some(incomplete);
                     return;
-                }
+                },
                 Some(resume_at) => t.pop_front(resume_at as u32),
             }
         }
@@ -180,7 +180,7 @@ where
                     debug_assert!(s.as_ptr() == t.as_ptr());
                     debug_assert!(s.len() == t.len());
                     Ok(())
-                }
+                },
                 Err(utf8::DecodeError::Invalid {
                     valid_prefix,
                     invalid_sequence,
@@ -192,7 +192,7 @@ where
                         valid_prefix.len(),
                         Err(valid_prefix.len() + invalid_sequence.len()),
                     ))
-                }
+                },
                 Err(utf8::DecodeError::Incomplete {
                     valid_prefix,
                     incomplete_suffix,
@@ -200,13 +200,13 @@ where
                     debug_assert!(valid_prefix.as_ptr() == t.as_ptr());
                     debug_assert!(valid_prefix.len() <= t.len());
                     Err((valid_prefix.len(), Ok(incomplete_suffix)))
-                }
+                },
             };
             match unborrowed_result {
                 Ok(()) => {
                     unsafe { self.inner_sink.process(t.reinterpret_without_validating()) }
                     return;
-                }
+                },
                 Err((valid_len, and_then)) => {
                     if valid_len > 0 {
                         let subtendril = t.subtendril(0, valid_len as u32);
@@ -219,15 +219,15 @@ where
                         Ok(incomplete) => {
                             self.incomplete = Some(incomplete);
                             return;
-                        }
+                        },
                         Err(offset) => {
                             self.inner_sink.error("invalid byte sequence".into());
                             self.inner_sink
                                 .process(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
                             t.pop_front(offset as u32);
-                        }
+                        },
                     }
-                }
+                },
             }
         }
     }
@@ -365,21 +365,21 @@ where
                             debug_assert!(err.upto >= 0);
                             t.pop_front(err.upto as u32);
                             // continue loop and process remainder of t
-                        }
+                        },
                         (_, None) => break,
                     }
                 }
                 if out.len() > 0 {
                     sink.process(out);
                 }
-            }
+            },
             #[cfg(feature = "encoding_rs")]
             LossyDecoderInner::EncodingRs(ref mut decoder, ref mut sink) => {
                 if t.is_empty() {
                     return;
                 }
                 decode_to_sink(t, decoder, sink, false);
-            }
+            },
         }
     }
 
@@ -411,12 +411,12 @@ where
                     sink.process(out);
                 }
                 sink.finish()
-            }
+            },
             #[cfg(feature = "encoding_rs")]
             LossyDecoderInner::EncodingRs(mut decoder, mut sink) => {
                 decode_to_sink(Tendril::new(), &mut decoder, &mut sink, true);
                 sink.finish()
-            }
+            },
         }
     }
 }
@@ -449,11 +449,11 @@ fn decode_to_sink<Sink, A>(
         }
         match result {
             DecoderResult::InputEmpty => return,
-            DecoderResult::OutputFull => {}
+            DecoderResult::OutputFull => {},
             DecoderResult::Malformed(_, _) => {
                 sink.error(Cow::Borrowed("invalid sequence"));
                 sink.process("\u{FFFD}".into());
-            }
+            },
         }
         t.pop_front(bytes_read as u32);
         if t.is_empty() {
diff --git a/tendril/src/tendril.rs b/tendril/src/tendril.rs
index 0a33d827..d7561996 100644
--- a/tendril/src/tendril.rs
+++ b/tendril/src/tendril.rs
@@ -1155,7 +1155,7 @@ where
                         self,
                         unsafe_slice(buf.data(), offset as usize, self.len32() as usize),
                     )
-                }
+                },
             }
         }
     }
@@ -1173,7 +1173,7 @@ where
                     let (mut buf, _, offset) = self.assume_buf();
                     let len = self.len32() as usize;
                     copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len))
-                }
+                },
             }
         }
     }
@@ -1301,10 +1301,10 @@ where
                         if let Some((n, _)) = iter.next() {
                             skip = n as u32;
                         }
-                    }
+                    },
                     None => {
                         next_char = None;
-                    }
+                    },
                 }
             }
 
@@ -1346,7 +1346,7 @@ where
                 let t = self.clone();
                 self.clear();
                 Some((t, class))
-            }
+            },
         }
     }
 
@@ -1400,13 +1400,13 @@ where
                 Ok(0) => {
                     ret = Ok(len - start_len);
                     break;
-                }
+                },
                 Ok(n) => len += n,
-                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
+                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {},
                 Err(e) => {
                     ret = Err(e);
                     break;
-                }
+                },
             }
         }
 
@@ -1999,7 +1999,7 @@ mod test {
                     let (rt, rc) = res.unwrap();
                     assert_eq!(es, &*rt);
                     assert_eq!(ec, rc);
-                }
+                },
             }
         }
     }
diff --git a/tendril/src/utf8_decode.rs b/tendril/src/utf8_decode.rs
index b682d57a..16d98802 100644
--- a/tendril/src/utf8_decode.rs
+++ b/tendril/src/utf8_decode.rs
@@ -27,7 +27,7 @@ where
                     debug_assert!(s.as_ptr() == self.as_ptr());
                     debug_assert!(s.len() == self.len());
                     Ok(())
-                }
+                },
                 Err(utf8::DecodeError::Invalid {
                     valid_prefix,
                     invalid_sequence,
@@ -39,7 +39,7 @@ where
                         valid_prefix.len(),
                         Err(valid_prefix.len() + invalid_sequence.len()),
                     ))
-                }
+                },
                 Err(utf8::DecodeError::Incomplete {
                     valid_prefix,
                     incomplete_suffix,
@@ -47,13 +47,13 @@ where
                     debug_assert!(valid_prefix.as_ptr() == self.as_ptr());
                     debug_assert!(valid_prefix.len() <= self.len());
                     Err((valid_prefix.len(), Ok(incomplete_suffix)))
-                }
+                },
             };
             match unborrowed_result {
                 Ok(()) => {
                     unsafe { push_utf8(self.reinterpret_without_validating()) }
                     return None;
-                }
+                },
                 Err((valid_len, and_then)) => {
                     if valid_len > 0 {
                         let subtendril = self.subtendril(0, valid_len as u32);
@@ -64,9 +64,9 @@ where
                         Err(offset) => {
                             push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER));
                             self.pop_front(offset as u32)
-                        }
+                        },
                     }
-                }
+                },
             }
         }
     }
@@ -90,7 +90,7 @@ impl IncompleteUtf8 {
                     result.unwrap_or(utf8::REPLACEMENT_CHARACTER),
                 ));
                 resume_at = input.len() - rest.len();
-            }
+            },
         }
         input.pop_front(resume_at as u32);
         Ok(input)

From 9acddfd5e500cd71463a611afd5d9069d535473d Mon Sep 17 00:00:00 2001
From: Nico Burns <nico@nicoburns.com>
Date: Mon, 8 Sep 2025 17:05:24 +0100
Subject: [PATCH 3/5] Ignore warnings and clippy lints

Signed-off-by: Nico Burns <nico@nicoburns.com>
---
 tendril/examples/fuzz.rs |  3 +++
 tendril/src/lib.rs       | 27 +++++++++++++++++++++++++++
 tendril/src/stream.rs    |  2 +-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/tendril/examples/fuzz.rs b/tendril/examples/fuzz.rs
index 13a44e01..df14c453 100644
--- a/tendril/examples/fuzz.rs
+++ b/tendril/examples/fuzz.rs
@@ -7,6 +7,9 @@
 //! A simple fuzz tester for the library.
 
 #![deny(warnings)]
+#![allow(clippy::redundant_static_lifetimes)]
+#![allow(clippy::needless_borrow)]
+#![allow(clippy::borrow_deref_ref)]
 
 extern crate rand;
 extern crate tendril;
diff --git a/tendril/src/lib.rs b/tendril/src/lib.rs
index fadc2cab..a5c7a39b 100644
--- a/tendril/src/lib.rs
+++ b/tendril/src/lib.rs
@@ -6,6 +6,33 @@
 
 #![cfg_attr(all(test, feature = "bench"), feature(test))]
 //#![cfg_attr(test, deny(warnings))]
+#![allow(unnecessary_transmutes)]
+#![allow(bare_trait_objects)]
+#![allow(clippy::ptr_offset_with_cast)]
+#![allow(clippy::needless_lifetimes)]
+#![allow(clippy::needless_late_init)]
+#![allow(clippy::explicit_auto_deref)]
+#![allow(clippy::result_unit_err)]
+#![allow(clippy::op_ref)]
+#![allow(clippy::missing_safety_doc)]
+#![allow(clippy::missing_transmute_annotations)]
+#![allow(clippy::partialeq_ne_impl)]
+#![allow(clippy::legacy_numeric_constants)]
+#![allow(clippy::collapsible_if)]
+#![allow(clippy::wrong_self_convention)]
+#![allow(clippy::len_zero)]
+#![allow(clippy::transmute_bytes_to_str)]
+#![allow(clippy::match_like_matches_macro)]
+#![allow(clippy::redundant_static_lifetimes)]
+#![allow(clippy::redundant_field_names)]
+#![allow(clippy::unusual_byte_groupings)]
+#![allow(clippy::borrow_deref_ref)]
+#![allow(clippy::needless_return)]
+#![allow(clippy::while_let_loop)]
+#![allow(clippy::mutable_key_type)]
+#![allow(clippy::manual_repeat_n)]
+#![allow(clippy::map_clone)]
+#![allow(clippy::useless_conversion)]
 
 #[macro_use]
 extern crate debug_unreachable;
diff --git a/tendril/src/stream.rs b/tendril/src/stream.rs
index afac7bc9..45183c5d 100644
--- a/tendril/src/stream.rs
+++ b/tendril/src/stream.rs
@@ -605,7 +605,7 @@ mod test {
     #[cfg(any(feature = "encoding", feature = "encoding_rs"))]
     pub type Tests = &'static [(&'static [&'static [u8]], &'static str, usize)];
 
-    #[cfg(any(feature = "encoding"))]
+    #[cfg(feature = "encoding")]
     const ASCII: Tests = &[
         (&[], "", 0),
         (&[b""], "", 0),

From e783cb181ead5a92861537800f60d6f7f85af811 Mon Sep 17 00:00:00 2001
From: Nico Burns <nico@nicoburns.com>
Date: Mon, 8 Sep 2025 19:12:48 +0100
Subject: [PATCH 4/5] Port tendril benchmarks to criterion

Signed-off-by: Nico Burns <nico@nicoburns.com>
---
 tendril/Cargo.toml         |  13 ++-
 tendril/benches/futf.rs    |  66 +++++++++++++++
 tendril/benches/tendril.rs | 163 +++++++++++++++++++++++++++++++++++++
 tendril/src/bench.rs       | 159 ------------------------------------
 tendril/src/futf.rs        |  58 +------------
 tendril/src/lib.rs         |   8 +-
 tendril/src/tendril.rs     |   4 -
 7 files changed, 245 insertions(+), 226 deletions(-)
 create mode 100644 tendril/benches/futf.rs
 create mode 100644 tendril/benches/tendril.rs
 delete mode 100644 tendril/src/bench.rs

diff --git a/tendril/Cargo.toml b/tendril/Cargo.toml
index c424ff56..14dae0d1 100644
--- a/tendril/Cargo.toml
+++ b/tendril/Cargo.toml
@@ -22,6 +22,15 @@ utf-8 = { workspace = true }
 
 [dev-dependencies]
 rand = { workspace = true }
+criterion = { workspace = true }
+tendril = { workspace = true }
+
+[[bench]]
+name = "futf"
+harness = false
+
+[[bench]]
+name = "tendril"
+harness = false
+
 
-[features]
-bench = []
diff --git a/tendril/benches/futf.rs b/tendril/benches/futf.rs
new file mode 100644
index 00000000..312fee52
--- /dev/null
+++ b/tendril/benches/futf.rs
@@ -0,0 +1,66 @@
+extern crate criterion;
+extern crate tendril;
+
+use criterion::{criterion_group, criterion_main, Bencher, Criterion};
+use tendril::futf::classify;
+
+static TEXT: &str = "
+    All human beings are born free and equal in dignity and rights.
+    They are endowed with reason and conscience and should act
+    towards one another in a spirit of brotherhood.
+
+    Minden emberi lény szabadon születik és egyenlő méltósága és
+    joga van. Az emberek, ésszel és lelkiismerettel bírván,
+    egymással szemben testvéri szellemben kell hogy viseltessenek.
+
+    เราทุกคนเกิดมาอย่างอิสระ เราทุกคนมีความคิดและความเข้าใจเป็นของเราเอง
+    เราทุกคนควรได้รับการปฏิบัติในทางเดียวกัน.
+
+    모든 인간은 태어날 때부터 자유로우며 그 존엄과 권리에 있어
+    동등하다. 인간은 천부적으로 이성과 양심을 부여받았으며 서로
+    형제애의 정신으로 행동하여야 한다.
+
+    ro remna cu se jinzi co zifre je simdu'i be le ry. nilselsi'a
+    .e lei ry. selcru .i ry. se menli gi'e se sezmarde .i .ei
+    jeseki'ubo ry. simyzu'e ta'i le tunba
+
+    ᏂᎦᏓ ᎠᏂᏴᏫ ᏂᎨᎫᏓᎸᎾ ᎠᎴ ᎤᏂᏠᏱ ᎤᎾᏕᎿ ᏚᏳᎧᏛ ᎨᏒᎢ. ᎨᏥᏁᎳ ᎤᎾᏓᏅᏖᏗ ᎠᎴ ᎤᏃᏟᏍᏗ
+    ᎠᎴ ᏌᏊ ᎨᏒ ᏧᏂᎸᏫᏍᏓᏁᏗ ᎠᎾᏟᏅᏢ ᎠᏓᏅᏙ ᎬᏗ.";
+
+// random
+static IXES: &[usize] = &[
+    778, 156, 87, 604, 1216, 365, 884, 311, 469, 515, 709, 162, 871, 206, 634, 442,
+];
+
+static BOUNDARY: &[bool] = &[
+    false, true, true, false, false, true, true, true, true, false, false, true, true, true, false,
+    false,
+];
+
+fn std_utf8_check(b: &mut Bencher) {
+    b.iter(|| {
+        assert!(IXES
+            .iter()
+            .zip(BOUNDARY.iter())
+            .all(|(&ix, &expect)| { expect == TEXT.is_char_boundary(ix) }));
+    });
+}
+
+// We don't expect to be as fast as is_char_boundary, because we provide more
+// information. But we shouldn't be tremendously slower, either. A factor of
+// 5-10 is expected on this text.
+fn futf_check(b: &mut Bencher) {
+    b.iter(|| {
+        assert!(IXES.iter().zip(BOUNDARY.iter()).all(|(&ix, &expect)| {
+            expect == (classify(TEXT.as_bytes(), ix).unwrap().rewind == 0)
+        }));
+    });
+}
+
+fn tendril_benchmarks(c: &mut Criterion) {
+    c.bench_function("std_utf8_check", std_utf8_check);
+    c.bench_function("futf_check", futf_check);
+}
+
+criterion_group!(benches, tendril_benchmarks);
+criterion_main!(benches);
diff --git a/tendril/benches/tendril.rs b/tendril/benches/tendril.rs
new file mode 100644
index 00000000..749ba5c6
--- /dev/null
+++ b/tendril/benches/tendril.rs
@@ -0,0 +1,163 @@
+// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// // option. This file may not be copied, modified, or distributed
+// // except according to those terms.
+
+// use std::borrow::ToOwned;
+// use std::collections::hash_map::{Entry, HashMap};
+
+#![allow(clippy::manual_pattern_char_comparison)]
+
+extern crate criterion;
+extern crate tendril;
+use std::collections::{hash_map::Entry, HashMap};
+
+use criterion::{criterion_group, criterion_main, Bencher, Criterion};
+use tendril::StrTendril;
+
+static EN_1: &str = "Days turn to nights turn to paper into rocks into plastic";
+
+static EN_2: &str = "Here the notes in my laboratory journal cease. I was able to write the last \
+       words only with great effort. By now it was already clear to me that LSD had \
+       been the cause of the remarkable experience of the previous Friday, for the \
+       altered perceptions were of the same type as before, only much more intense. I \
+       had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
+       informed of the self-experiment, to escort me home. We went by bicycle, no \
+       automobile being available because of wartime restrictions on their use. On the \
+       way home, my condition began to assume threatening forms. Everything in my \
+       field of vision wavered and was distorted as if seen in a curved mirror. I also \
+       had the sensation of being unable to move from the spot. Nevertheless, my \
+       assistant later told me that we had traveled very rapidly. Finally, we arrived \
+       at home safe and sound, and I was just barely capable of asking my companion to \
+       summon our family doctor and request milk from the neighbors.\n\n\
+       In spite of my delirious, bewildered condition, I had brief periods of clear \
+       and effective thinking—and chose milk as a nonspecific antidote for poisoning.";
+
+static KR_1: &str = "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
+       프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
+       개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";
+
+static HTML_KR_1: &str = "<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
+       https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
+       메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
+       아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";
+
+const SMALL_SIZE: usize = 65536;
+const LARGE_SIZE: usize = 1 << 20;
+
+fn index_words_string(input: &str) -> HashMap<char, Vec<String>> {
+    let mut index = HashMap::new();
+    for word in input.split(|c| c == ' ') {
+        if word.is_empty() {
+            continue;
+        }
+        let word = word.to_owned();
+        match index.entry(word.chars().next().unwrap()) {
+            Entry::Occupied(mut e) => {
+                let x: &mut Vec<String> = e.get_mut();
+                x.push(word);
+            },
+            Entry::Vacant(e) => {
+                e.insert(vec![word]);
+            },
+        }
+    }
+    index
+}
+
+fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
+    let mut index = HashMap::new();
+    let mut t = input.clone();
+    loop {
+        match t.pop_front_char_run(|c| c != ' ') {
+            None => return index,
+            Some((_, false)) => (),
+            Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
+                Entry::Occupied(mut e) => {
+                    e.get_mut().push(word);
+                },
+                Entry::Vacant(e) => {
+                    e.insert(vec![word]);
+                },
+            },
+        }
+    }
+}
+
+fn test_correctness(txt: &str) {
+    use std::borrow::ToOwned;
+    use tendril::SliceExt;
+
+    let input_string = txt.to_owned();
+    let count_s = index_words_string(&input_string);
+    let mut keys: Vec<char> = count_s.keys().cloned().collect();
+    keys.sort();
+
+    let input_tendril = txt.to_tendril();
+    let count_t = index_words_tendril(&input_tendril);
+    let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
+    keys_t.sort();
+
+    assert_eq!(keys, keys_t);
+
+    for k in &keys {
+        let vs = &count_s[k];
+        let vt = &count_t[k];
+        assert_eq!(vs.len(), vt.len());
+        assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
+    }
+}
+
+fn index_words_small_string(b: &mut Bencher, txt: &str) {
+    let mut s = String::new();
+    while s.len() < SMALL_SIZE {
+        s.push_str(txt);
+    }
+    b.iter(|| index_words_string(&s));
+}
+
+fn index_words_small_tendril(b: &mut Bencher, txt: &str) {
+    let mut t = StrTendril::new();
+    while t.len() < SMALL_SIZE {
+        t.push_slice(txt);
+    }
+    b.iter(|| index_words_tendril(&t));
+}
+
+fn index_words_big_string(b: &mut Bencher, txt: &str) {
+    let mut s = String::new();
+    while s.len() < LARGE_SIZE {
+        s.push_str(txt);
+    }
+    b.iter(|| index_words_string(&s));
+}
+
+fn index_words_big_tendril(b: &mut Bencher, txt: &str) {
+    let mut t = StrTendril::new();
+    while t.len() < LARGE_SIZE {
+        t.push_slice(txt);
+    }
+    b.iter(|| index_words_tendril(&t));
+}
+
+fn run_bench_group(c: &mut Criterion, group_name: &str, txt: &str) {
+    let mut group = c.benchmark_group(group_name);
+
+    test_correctness(txt);
+
+    group.bench_with_input("index_words_small_string", txt, index_words_small_string);
+    group.bench_with_input("index_words_small_tendril", txt, index_words_small_tendril);
+    group.bench_with_input("index_words_big_string", txt, index_words_big_string);
+    group.bench_with_input("index_words_big_tendril", txt, index_words_big_tendril);
+}
+
+fn tendril_benchmarks(c: &mut Criterion) {
+    run_bench_group(c, "en_1", EN_1);
+    run_bench_group(c, "en_2", EN_2);
+    run_bench_group(c, "kr_1", KR_1);
+    run_bench_group(c, "html_kr_1", HTML_KR_1);
+}
+
+criterion_group!(benches, tendril_benchmarks);
+criterion_main!(benches);
diff --git a/tendril/src/bench.rs b/tendril/src/bench.rs
deleted file mode 100644
index ca2341ab..00000000
--- a/tendril/src/bench.rs
+++ /dev/null
@@ -1,159 +0,0 @@
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use std::borrow::ToOwned;
-use std::collections::hash_map::{Entry, HashMap};
-
-use tendril::StrTendril;
-
-fn index_words_string(input: &String) -> HashMap<char, Vec<String>> {
-    let mut index = HashMap::new();
-    for word in input.split(|c| c == ' ') {
-        if word.len() == 0 {
-            continue;
-        }
-        let word = word.to_owned();
-        match index.entry(word.chars().next().unwrap()) {
-            Entry::Occupied(mut e) => {
-                let x: &mut Vec<String> = e.get_mut();
-                x.push(word);
-            },
-            Entry::Vacant(e) => {
-                e.insert(vec![word]);
-            },
-        }
-    }
-    index
-}
-
-fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
-    let mut index = HashMap::new();
-    let mut t = input.clone();
-    loop {
-        match t.pop_front_char_run(|c| c != ' ') {
-            None => return index,
-            Some((_, false)) => (),
-            Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
-                Entry::Occupied(mut e) => {
-                    e.get_mut().push(word);
-                },
-                Entry::Vacant(e) => {
-                    e.insert(vec![word]);
-                },
-            },
-        }
-    }
-}
-
-static EN_1: &'static str = "Days turn to nights turn to paper into rocks into plastic";
-
-static EN_2: &'static str =
-    "Here the notes in my laboratory journal cease. I was able to write the last \
-       words only with great effort. By now it was already clear to me that LSD had \
-       been the cause of the remarkable experience of the previous Friday, for the \
-       altered perceptions were of the same type as before, only much more intense. I \
-       had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
-       informed of the self-experiment, to escort me home. We went by bicycle, no \
-       automobile being available because of wartime restrictions on their use. On the \
-       way home, my condition began to assume threatening forms. Everything in my \
-       field of vision wavered and was distorted as if seen in a curved mirror. I also \
-       had the sensation of being unable to move from the spot. Nevertheless, my \
-       assistant later told me that we had traveled very rapidly. Finally, we arrived \
-       at home safe and sound, and I was just barely capable of asking my companion to \
-       summon our family doctor and request milk from the neighbors.\n\n\
-       In spite of my delirious, bewildered condition, I had brief periods of clear \
-       and effective thinking—and chose milk as a nonspecific antidote for poisoning.";
-
-static KR_1: &'static str =
-    "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
-       프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
-       개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";
-
-static HTML_KR_1: &'static str =
-    "<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
-       https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
-       메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
-       아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";
-
-mod index_words {
-    macro_rules! bench {
-        ($txt:ident) => {
-            #[allow(non_snake_case)]
-            mod $txt {
-                const SMALL_SIZE: usize = 65536;
-                const LARGE_SIZE: usize = (1 << 20);
-
-                #[bench]
-                fn index_words_string(b: &mut ::test::Bencher) {
-                    let mut s = String::new();
-                    while s.len() < SMALL_SIZE {
-                        s.push_str(::tendril::bench::$txt);
-                    }
-                    b.iter(|| ::tendril::bench::index_words_string(&s));
-                }
-
-                #[bench]
-                fn index_words_tendril(b: &mut ::test::Bencher) {
-                    let mut t = ::tendril::StrTendril::new();
-                    while t.len() < SMALL_SIZE {
-                        t.push_slice(::tendril::bench::$txt);
-                    }
-                    b.iter(|| ::tendril::bench::index_words_tendril(&t));
-                }
-
-                #[bench]
-                fn index_words_big_string(b: &mut ::test::Bencher) {
-                    let mut s = String::new();
-                    while s.len() < LARGE_SIZE {
-                        s.push_str(::tendril::bench::$txt);
-                    }
-                    b.iter(|| ::tendril::bench::index_words_string(&s));
-                }
-
-                #[bench]
-                fn index_words_big_tendril(b: &mut ::test::Bencher) {
-                    let mut t = ::tendril::StrTendril::new();
-                    while t.len() < LARGE_SIZE {
-                        t.push_slice(::tendril::bench::$txt);
-                    }
-                    b.iter(|| ::tendril::bench::index_words_tendril(&t));
-                }
-
-                #[test]
-                fn correctness() {
-                    use std::borrow::ToOwned;
-                    use tendril::bench::{index_words_string, index_words_tendril};
-                    use tendril::SliceExt;
-
-                    let txt = ::tendril::bench::$txt;
-                    let input_string = txt.to_owned();
-                    let count_s = index_words_string(&input_string);
-                    let mut keys: Vec<char> = count_s.keys().cloned().collect();
-                    keys.sort();
-
-                    let input_tendril = txt.to_tendril();
-                    let count_t = index_words_tendril(&input_tendril);
-                    let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
-                    keys_t.sort();
-
-                    assert_eq!(keys, keys_t);
-
-                    for k in &keys {
-                        let vs = &count_s[k];
-                        let vt = &count_t[k];
-                        assert_eq!(vs.len(), vt.len());
-                        assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
-                    }
-                }
-            }
-        };
-    }
-
-    bench!(EN_1);
-    bench!(EN_2);
-    bench!(KR_1);
-    bench!(HTML_KR_1);
-}
diff --git a/tendril/src/futf.rs b/tendril/src/futf.rs
index 013e7ca6..5fac52d5 100644
--- a/tendril/src/futf.rs
+++ b/tendril/src/futf.rs
@@ -240,12 +240,11 @@ pub fn classify<'a>(buf: &'a [u8], idx: usize) -> Option<Codepoint<'a>> {
     }
 }
 
-#[cfg(all(test, feature = "bench"))]
+#[cfg(test)]
 mod tests {
     use super::{all_cont, classify, decode, Byte, Meaning};
     use std::borrow::ToOwned;
     use std::io::Write;
-    use test::Bencher;
 
     #[test]
     fn classify_all_bytes() {
@@ -507,59 +506,4 @@ mod tests {
             assert_eq!(None, classify(b"\xF0\x8F\xBF\xBF", i));
         }
     }
-
-    static TEXT: &'static str = "
-    All human beings are born free and equal in dignity and rights.
-    They are endowed with reason and conscience and should act
-    towards one another in a spirit of brotherhood.
-
-    Minden emberi lény szabadon születik és egyenlő méltósága és
-    joga van. Az emberek, ésszel és lelkiismerettel bírván,
-    egymással szemben testvéri szellemben kell hogy viseltessenek.
-
-    เราทุกคนเกิดมาอย่างอิสระ เราทุกคนมีความคิดและความเข้าใจเป็นของเราเอง
-    เราทุกคนควรได้รับการปฏิบัติในทางเดียวกัน.
-
-    모든 인간은 태어날 때부터 자유로우며 그 존엄과 권리에 있어
-    동등하다. 인간은 천부적으로 이성과 양심을 부여받았으며 서로
-    형제애의 정신으로 행동하여야 한다.
-
-    ro remna cu se jinzi co zifre je simdu'i be le ry. nilselsi'a
-    .e lei ry. selcru .i ry. se menli gi'e se sezmarde .i .ei
-    jeseki'ubo ry. simyzu'e ta'i le tunba
-
-    ᏂᎦᏓ ᎠᏂᏴᏫ ᏂᎨᎫᏓᎸᎾ ᎠᎴ ᎤᏂᏠᏱ ᎤᎾᏕᎿ ᏚᏳᎧᏛ ᎨᏒᎢ. ᎨᏥᏁᎳ ᎤᎾᏓᏅᏖᏗ ᎠᎴ ᎤᏃᏟᏍᏗ
-    ᎠᎴ ᏌᏊ ᎨᏒ ᏧᏂᎸᏫᏍᏓᏁᏗ ᎠᎾᏟᏅᏢ ᎠᏓᏅᏙ ᎬᏗ.";
-
-    // random
-    static IXES: &'static [usize] = &[
-        778, 156, 87, 604, 1216, 365, 884, 311, 469, 515, 709, 162, 871, 206, 634, 442,
-    ];
-
-    static BOUNDARY: &'static [bool] = &[
-        false, true, true, false, false, true, true, true, true, false, false, true, true, true,
-        false, false,
-    ];
-
-    #[bench]
-    fn std_utf8_check(b: &mut Bencher) {
-        b.iter(|| {
-            assert!(IXES
-                .iter()
-                .zip(BOUNDARY.iter())
-                .all(|(&ix, &expect)| { expect == TEXT.is_char_boundary(ix) }));
-        });
-    }
-
-    // We don't expect to be as fast as is_char_boundary, because we provide more
-    // information. But we shouldn't be tremendously slower, either. A factor of
-    // 5-10 is expected on this text.
-    #[bench]
-    fn futf_check(b: &mut Bencher) {
-        b.iter(|| {
-            assert!(IXES.iter().zip(BOUNDARY.iter()).all(|(&ix, &expect)| {
-                expect == (classify(TEXT.as_bytes(), ix).unwrap().rewind == 0)
-            }));
-        });
-    }
 }
diff --git a/tendril/src/lib.rs b/tendril/src/lib.rs
index a5c7a39b..2d9f8d10 100644
--- a/tendril/src/lib.rs
+++ b/tendril/src/lib.rs
@@ -4,7 +4,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-#![cfg_attr(all(test, feature = "bench"), feature(test))]
 //#![cfg_attr(test, deny(warnings))]
 #![allow(unnecessary_transmutes)]
 #![allow(bare_trait_objects)]
@@ -40,8 +39,6 @@ extern crate debug_unreachable;
 pub extern crate encoding;
 #[cfg(feature = "encoding_rs")]
 pub extern crate encoding_rs;
-#[cfg(all(test, feature = "bench"))]
-extern crate test;
 #[macro_use]
 extern crate mac;
 extern crate utf8;
@@ -56,9 +53,12 @@ pub mod fmt;
 pub mod stream;
 
 mod buf32;
-mod futf;
 mod tendril;
 mod utf8_decode;
 mod util;
 
+// Exposed for benchmarking purposes only
+#[doc(hidden)]
+pub mod futf;
+
 static OFLOW: &'static str = "tendril: overflow in buffer arithmetic";
diff --git a/tendril/src/tendril.rs b/tendril/src/tendril.rs
index d7561996..d5fbd7d2 100644
--- a/tendril/src/tendril.rs
+++ b/tendril/src/tendril.rs
@@ -1655,10 +1655,6 @@ where
     }
 }
 
-#[cfg(all(test, feature = "bench"))]
-#[path = "bench.rs"]
-mod bench;
-
 #[cfg(test)]
 mod test {
     use super::{

From 3b6b60c7fe9831b867194b0b341cf7778b60257b Mon Sep 17 00:00:00 2001
From: Nico Burns <nico@nicoburns.com>
Date: Mon, 8 Sep 2025 20:08:36 +0100
Subject: [PATCH 5/5] Add tendril to RELEASING.MD

Signed-off-by: Nico Burns <nico@nicoburns.com>
---
 RELEASING.MD | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/RELEASING.MD b/RELEASING.MD
index fa5940be..816b1595 100644
--- a/RELEASING.MD
+++ b/RELEASING.MD
@@ -11,6 +11,8 @@ published to crates.io.
 
 The **web_atoms** crate is on a separate cycle as it needs frequent releases but these rarely contain breaking changes.
 
+The **tendril** crate is on a separate cycle as it is a utility crate that is rarely updated.
+
 ## Making a release of **web_atoms**:
 
 - Bump the version in `web_atoms/Cargo.toml`
@@ -20,6 +22,13 @@ The **web_atoms** crate is on a separate cycle as it needs frequent releases but
 - Publish the new version of **web_atoms**
 - Optionally: publish a new version of the other crates to match
 
+## Making a release of **tendril**:
+
+- Bump the version in `tendril/Cargo.toml`
+- Update the version **tendril** in the workspace `Cargo.toml`'s `[workspace.dependencies]` section to match
+- Publish the new version of **tendril**
+- Optionally: publish a new version of the other crates to match
+
 ## Making a release of all other crates
 
 In the workspace `Cargo.toml`: