diff --git a/examples/interactive.rs b/examples/interactive.rs index 873af000..5510b1d5 100644 --- a/examples/interactive.rs +++ b/examples/interactive.rs @@ -101,7 +101,7 @@ mod unix_only { )?; row += 2; - let mut lines = wrap(text, options).collect::>(); + let mut lines = wrap(text, options); if let Some(line) = lines.last() { // If `text` ends with a newline, the final wrapped line // contains this newline. This will in turn leave the diff --git a/examples/layout.rs b/examples/layout.rs index 04654d99..e2316858 100644 --- a/examples/layout.rs +++ b/examples/layout.rs @@ -17,7 +17,7 @@ fn main() { for width in 15..60 { options.width = width; - let lines = wrap(example, &options).collect::>(); + let lines = wrap(example, &options); if lines != prev_lines { let title = format!(" Width: {} ", width); println!(".{:-^1$}.", title, width + 2); diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 00000000..4b15e5b0 --- /dev/null +++ b/src/core.rs @@ -0,0 +1,588 @@ +//! Building blocks for advanced wrapping functionality. +//! +//! The functions and structs in this module can be used to implement +//! advanced wrapping functionality when the `wrap` and `fill` +//! function don't do what you want. + +use unicode_width::UnicodeWidthChar; +use unicode_width::UnicodeWidthStr; + +/// The CSI or "Control Sequence Introducer" introduces an ANSI escape +/// sequence. This is typically used for colored text and will be +/// ignored when computing the text width. +const CSI: (char, char) = ('\x1b', '['); +/// The final bytes of an ANSI escape sequence must be in this range. +const ANSI_FINAL_BYTE: std::ops::RangeInclusive = '\x40'..='\x7e'; + +/// Skip ANSI escape sequences. The `ch` is the current `char`, the +/// `chars` provide the following characters. The `chars` will be +/// modified if `ch` is the start of an ANSI escape sequence. +fn skip_ansi_escape_sequence>(ch: char, chars: &mut I) -> bool { + if ch == CSI.0 && chars.next() == Some(CSI.1) { + // We have found the start of an ANSI escape code, typically + // used for colored terminal text. We skip until we find a + // "final byte" in the range 0x40–0x7E. + for ch in chars { + if ANSI_FINAL_BYTE.contains(&ch) { + return true; + } + } + } + return false; +} + +/// A (text) fragment denotes the unit which we wrap into lines. +/// +/// Fragments represent an abstract _word_ plus the _whitespace_ +/// following the word. In case the word falls at the end of the line, +/// the whitespace is dropped and a so-called _penalty_ is inserted +/// instead (typically `"-"` if the word was hyphenated). +/// +/// For wrapping purposes, the precise content of the word, the +/// whitespace, and the penalty is irrelevant. All we need to know is +/// the displayed width of each part, which this trait provides. +pub trait Fragment: std::fmt::Debug { + /// Displayed width of word represented by this fragment. + fn width(&self) -> usize; + + /// Displayed width of the whitespace that must follow the word + /// when the word is not at the end of a line. + fn whitespace_width(&self) -> usize; + + /// Displayed width of the penalty that must be inserted if the + /// word falls at the end of a line. + fn penalty_width(&self) -> usize; +} + +/// A piece of wrappable text, including any trailing whitespace. +/// +/// A `Word` is an example of a `Fragment`, so it has a width, +/// trailing whitespace, and potentially a penalty item. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Word<'a> { + word: &'a str, + width: usize, + pub(crate) whitespace: &'a str, + pub(crate) penalty: &'a str, +} + +impl std::ops::Deref for Word<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.word + } +} + +impl<'a> Word<'a> { + /// Construct a new `Word`. + /// + /// A trailing strech of `' '` is automatically taken to be the + /// whitespace part of the word. + pub fn from(word: &str) -> Word<'_> { + let trimmed = word.trim_end_matches(' '); + let mut chars = trimmed.chars(); + let mut width = 0; + while let Some(ch) = chars.next() { + if skip_ansi_escape_sequence(ch, &mut chars) { + continue; + }; + width += ch.width().unwrap_or(0); + } + + Word { + word: trimmed, + width: width, + whitespace: &word[trimmed.len()..], + penalty: "", + } + } + + /// Break this word into smaller words with a width of at most + /// `line_width`. The whitespace and penalty from this `Word` is + /// added to the last piece. + /// + /// # Examples + /// + /// ``` + /// use textwrap::core::Word; + /// assert_eq!(Word::from("Hello! ").break_apart(3).collect::>(), + /// vec![Word::from("Hel"), Word::from("lo! ")]); + /// ``` + pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator> + 'b { + let mut char_indices = self.word.char_indices(); + let mut offset = 0; + let mut width = 0; + + std::iter::from_fn(move || { + while let Some((idx, ch)) = char_indices.next() { + if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { + continue; + } + + let ch_width = ch.width().unwrap_or(0); + if width > 0 && width + ch_width > line_width { + let word = Word { + word: &self.word[offset..idx], + width: width, + whitespace: "", + penalty: "", + }; + offset = idx; + width = ch_width; + return Some(word); + } + + width += ch_width; + } + + if offset < self.word.len() { + let word = Word { + word: &self.word[offset..], + width: width, + whitespace: self.whitespace, + penalty: self.penalty, + }; + offset = self.word.len(); + return Some(word); + } + + None + }) + } +} + +impl Fragment for Word<'_> { + fn width(&self) -> usize { + self.width + } + + // We assume the whitespace consist of ' ' only. This allows us to + // compute the display width in constant time. + fn whitespace_width(&self) -> usize { + self.whitespace.len() + } + + // We assume the penalty is `""` or `"-"`. This allows us to + // compute the display width in constant time. + fn penalty_width(&self) -> usize { + self.penalty.len() + } +} + +/// Split line into words separated by regions of `' '` characters. +/// +/// # Examples +/// +/// ``` +/// use textwrap::core::{Fragment, Word, find_words}; +/// let words = find_words("Hello World!").collect::>(); +/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]); +/// assert_eq!(words[0].width(), 5); +/// assert_eq!(words[0].whitespace_width(), 1); +/// assert_eq!(words[0].penalty_width(), 0); +/// ``` +pub fn find_words(line: &str) -> impl Iterator { + let mut start = 0; + let mut in_whitespace = false; + let mut char_indices = line.char_indices(); + + std::iter::from_fn(move || { + // for (idx, ch) in char_indices does not work, gives this + // error: + // + // > cannot move out of `char_indices`, a captured variable in + // > an `FnMut` closure + #[allow(clippy::while_let_on_iterator)] + while let Some((idx, ch)) = char_indices.next() { + if in_whitespace && ch != ' ' { + let word = Word::from(&line[start..idx]); + start = idx; + in_whitespace = ch == ' '; + return Some(word); + } + + in_whitespace = ch == ' '; + } + + if start < line.len() { + let word = Word::from(&line[start..]); + start = line.len(); + return Some(word); + } + + None + }) +} + +/// Split words into smaller words according to the split points given +/// by `options`. +/// +/// Note that we split all words, regardless of their length. This is +/// to more cleanly separate the business of splitting (including +/// automatic hyphenation) from the business of word wrapping. +/// +/// # Examples +/// +/// ``` +/// use textwrap::{Options, NoHyphenation}; +/// use textwrap::core::{Word, split_words}; +/// +/// // The default splitter is HyphenSplitter: +/// let options = Options::new(80); +/// assert_eq!( +/// split_words(vec![Word::from("foo-bar")], &&options).collect::>(), +/// vec![Word::from("foo-"), Word::from("bar")] +/// ); +/// +/// // The NoHyphenation splitter ignores the '-': +/// let options = Options::new(80).splitter(Box::new(NoHyphenation)); +/// assert_eq!( +/// split_words(vec![Word::from("foo-bar")], &&options).collect::>(), +/// vec![Word::from("foo-bar")] +/// ); +/// ``` +pub fn split_words<'a, I, T: crate::WrapOptions>( + words: I, + options: &'a T, +) -> impl Iterator> +where + I: IntoIterator>, +{ + words.into_iter().flat_map(move |word| { + let mut prev = 0; + let mut split_points = options.split_points(&word).into_iter(); + std::iter::from_fn(move || { + if let Some(idx) = split_points.next() { + let need_hyphen = !word[..idx].ends_with('-'); + let w = Word { + word: &word.word[prev..idx], + width: word[prev..idx].width(), + whitespace: "", + penalty: if need_hyphen { "-" } else { "" }, + }; + prev = idx; + return Some(w); + } + + if prev < word.word.len() || prev == 0 { + let w = Word { + word: &word.word[prev..], + width: word[prev..].width(), + whitespace: word.whitespace, + penalty: word.penalty, + }; + prev = word.word.len() + 1; + return Some(w); + } + + None + }) + }) +} + +/// Forcibly break words wider than `line_width` into smaller words. +/// +/// This simply calls `Word::break_apart` on words that are too wide. +/// This means that no extra `'-'` is inserted, the word is simply +/// broken into smaller pieces. +pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec> +where + I: IntoIterator>, +{ + let mut shortened_words = Vec::new(); + for word in words { + if word.width() > line_width { + shortened_words.extend(word.break_apart(line_width)); + } else { + shortened_words.push(word); + } + } + shortened_words +} + +/// Wrap abstract fragments into lines of differnet widths. +/// +/// The `line_widths` maps the line number to the desired width. This +/// can be used to implement hanging indentation. +/// +/// The fragments must already have been split into the desired +/// widths, this function will not (and cannot) attempt to split them +/// further when arranging them into lines. +/// +/// # Examples +/// +/// Imagine you're building a house site and you have a number of +/// tasks you need to execute. Things like pour foundation, complete +/// framing, install plumbing, electric cabling, install insolutation. +/// +/// The construction workers can only work during daytime, so they +/// need to pack up everything at night. Because they need to secure +/// their tools and move machines back to the garage, this process +/// takes much more time than the time it would take them to simply +/// switch to another task. +/// +/// You would like to make a list of taks to execute every day based +/// on your estimates. You can model this with a program like this: +/// +/// ``` +/// use textwrap::core::{Fragment, wrap_fragments}; +/// +/// #[derive(Debug)] +/// struct Task<'a> { +/// name: &'a str, +/// hours: usize, // Time needed to complete task. +/// sweep: usize, // Time needed for a quick sweep after task during the day. +/// cleanup: usize, // Time needed to cleanup after task at end of day. +/// } +/// +/// impl Fragment for Task<'_> { +/// fn width(&self) -> usize { self.hours } +/// fn whitespace_width(&self) -> usize { self.sweep } +/// fn penalty_width(&self) -> usize { self.cleanup } +/// } +/// +/// // The morning tasks +/// let tasks = vec![ +/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 }, +/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 }, +/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 }, +/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 }, +/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 }, +/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 }, +/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 }, +/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 }, +/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 }, +/// ]; +/// +/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> { +/// let mut days = Vec::new(); +/// for day in wrap_fragments(&tasks, |i| { day_length }) { +/// let last = day.last().unwrap(); +/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum(); +/// let names = day.iter().map(|t| t.name).collect::>(); +/// days.push((work_hours - last.sweep + last.cleanup, names)); +/// } +/// days +/// } +/// +/// // With a single crew working 8 hours a day: +/// assert_eq!(assign_days(&tasks, 8), [ +/// (7, vec!["Foundation"]), +/// (8, vec!["Framing", "Plumbing"]), +/// (7, vec!["Electrical", "Insulation"]), +/// (5, vec!["Drywall"]), +/// (7, vec!["Floors", "Countertops"]), +/// (4, vec!["Bathrooms"]), +/// ]); +/// +/// // With two crews working in shifts, 16 hours a day: +/// assert_eq!(assign_days(&tasks, 16), [ +/// (14, vec!["Foundation", "Framing", "Plumbing"]), +/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]), +/// (6, vec!["Countertops", "Bathrooms"]), +/// ]); +/// ``` +/// +/// Apologies to anyone who actually knows how to build a house and +/// knows how long each step takes :-) +pub fn wrap_fragments usize>( + fragments: &[T], + line_widths: F, +) -> Vec<&[T]> { + let mut lines = Vec::new(); + let mut start = 0; + let mut width = 0; + + for (idx, fragment) in fragments.iter().enumerate() { + let line_width = line_widths(lines.len()); + if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { + lines.push(&fragments[start..idx]); + start = idx; + width = 0; + } + width += fragment.width() + fragment.whitespace_width(); + } + lines.push(&fragments[start..]); + lines +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Options, WordSplitter}; + #[cfg(feature = "hyphenation")] + use hyphenation::{Language, Load, Standard}; + + // Like assert_eq!, but the left expression is an iterator. + macro_rules! assert_iter_eq { + ($left:expr, $right:expr) => { + assert_eq!($left.collect::>(), $right); + }; + } + + #[test] + fn skip_ansi_escape_sequence_works() { + let blue_text = "\u{1b}[34mHello\u{1b}[0m"; + let mut chars = blue_text.chars(); + let ch = chars.next().unwrap(); + assert!(skip_ansi_escape_sequence(ch, &mut chars)); + assert_eq!(chars.next(), Some('H')); + } + + #[test] + fn find_words_empty() { + assert_iter_eq!(find_words(""), vec![]); + } + + #[test] + fn find_words_single_word() { + assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]); + } + + #[test] + fn find_words_two_words() { + assert_iter_eq!( + find_words("foo bar"), + vec![Word::from("foo "), Word::from("bar")] + ); + } + + #[test] + fn find_words_multiple_words() { + assert_iter_eq!( + find_words("foo bar baz"), + vec![Word::from("foo "), Word::from("bar "), Word::from("baz")] + ); + } + + #[test] + fn find_words_whitespace() { + assert_iter_eq!(find_words(" "), vec![Word::from(" ")]); + } + + #[test] + fn find_words_inter_word_whitespace() { + assert_iter_eq!( + find_words("foo bar"), + vec![Word::from("foo "), Word::from("bar")] + ) + } + + #[test] + fn find_words_trailing_whitespace() { + assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]); + } + + #[test] + fn find_words_leading_whitespace() { + assert_iter_eq!( + find_words(" foo"), + vec![Word::from(" "), Word::from("foo")] + ); + } + + #[test] + fn find_words_multi_column_char() { + assert_iter_eq!( + find_words("\u{1f920}"), // cowboy emoji 🤠 + vec![Word::from("\u{1f920}")] + ); + } + + #[test] + fn find_words_hyphens() { + assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]); + assert_iter_eq!( + find_words("foo- bar"), + vec![Word::from("foo- "), Word::from("bar")] + ); + assert_iter_eq!( + find_words("foo - bar"), + vec![Word::from("foo "), Word::from("- "), Word::from("bar")] + ); + assert_iter_eq!( + find_words("foo -bar"), + vec![Word::from("foo "), Word::from("-bar")] + ); + } + + #[test] + fn split_words_no_words() { + assert_iter_eq!(split_words(vec![], &80), vec![]); + } + + #[test] + fn split_words_empty_word() { + assert_iter_eq!( + split_words(vec![Word::from(" ")], &80), + vec![Word::from(" ")] + ); + } + + #[test] + fn split_words_hyphen_splitter() { + assert_iter_eq!( + split_words(vec![Word::from("foo-bar")], &80), + vec![Word::from("foo-"), Word::from("bar")] + ); + } + + #[test] + fn split_words_short_line() { + // Note that `split_words` does not take the line width into + // account, that is the job of `break_words`. + assert_iter_eq!( + split_words(vec![Word::from("foobar")], &3), + vec![Word::from("foobar")] + ); + } + + #[test] + fn split_words_adds_penalty() { + #[derive(Debug)] + struct FixedSplitPoint; + impl WordSplitter for FixedSplitPoint { + fn split_points(&self, _: &str) -> Vec { + vec![3] + } + } + + let options = Options::new(80).splitter(Box::new(FixedSplitPoint)); + assert_iter_eq!( + split_words(vec![Word::from("foobar")].into_iter(), &&options), + vec![ + Word { + word: "foo", + width: 3, + whitespace: "", + penalty: "-" + }, + Word { + word: "bar", + width: 3, + whitespace: "", + penalty: "" + } + ] + ); + + assert_iter_eq!( + split_words(vec![Word::from("fo-bar")].into_iter(), &&options), + vec![ + Word { + word: "fo-", + width: 3, + whitespace: "", + penalty: "" + }, + Word { + word: "bar", + width: 3, + whitespace: "", + penalty: "" + } + ] + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index a59e6d8d..e92594ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,21 +95,8 @@ #![allow(clippy::redundant_field_names)] use std::borrow::Cow; -use std::str::CharIndices; - -use unicode_width::UnicodeWidthChar; use unicode_width::UnicodeWidthStr; -/// A non-breaking space. -const NBSP: char = '\u{a0}'; - -/// The CSI or "Control Sequence Introducer" introduces an ANSI escape -/// sequence. This is typically used for colored text and will be -/// ignored when computing the text width. -const CSI: (char, char) = ('\u{1b}', '['); -/// The final bytes of an ANSI escape sequence must be in this range. -const ANSI_FINAL_BYTE: std::ops::RangeInclusive = '\x40'..='\x7e'; - mod indentation; pub use crate::indentation::dedent; pub use crate::indentation::indent; @@ -117,6 +104,8 @@ pub use crate::indentation::indent; mod splitting; pub use crate::splitting::{HyphenSplitter, NoHyphenation, WordSplitter}; +pub mod core; + /// Options for wrapping and filling text. Used with the [`wrap`] and /// [`fill`] functions. /// @@ -132,8 +121,8 @@ pub trait WrapOptions { /// Allow long words to be broken if they cannot fit on a line. /// When set to `false`, some lines may be longer than `width`. fn break_words(&self) -> bool; - /// Split word as with `WordSplitter::split`. - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; + /// Find indices where `word` can be split. + fn split_points(&self, word: &str) -> Vec; } /// Holds settings for wrapping and filling text. @@ -188,8 +177,8 @@ impl WrapOptions for &Options<'_> { self.break_words } #[inline] - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { - self.splitter.split(word) + fn split_points(&self, word: &str) -> Vec { + self.splitter.split_points(word) } } @@ -226,8 +215,8 @@ impl WrapOptions for usize { true } #[inline] - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { - HyphenSplitter.split(word) + fn split_points(&self, word: &str) -> Vec { + HyphenSplitter.split_points(word) } } @@ -358,210 +347,6 @@ impl<'a> Options<'a> { } } -/// Like `char::is_whitespace`, but non-breaking spaces don't count. -#[inline] -fn is_whitespace(ch: char) -> bool { - ch.is_whitespace() && ch != NBSP -} - -#[derive(Debug)] -struct WrapIter<'input, T: WrapOptions> { - options: T, - - // String to wrap. - source: &'input str, - // CharIndices iterator over self.source. - char_indices: CharIndices<'input>, - // Byte index where the current line starts. - start: usize, - // Byte index of the last place where the string can be split. - split: usize, - // Size in bytes of the character at self.source[self.split]. - split_len: usize, - // Width of self.source[self.start..idx]. - line_width: usize, - // Width of self.source[self.start..self.split]. - line_width_at_split: usize, - // Tracking runs of whitespace characters. - in_whitespace: bool, - // Has iterator finished producing elements? - finished: bool, -} - -impl WrapIter<'_, T> { - fn new(options: T, s: &str) -> WrapIter<'_, T> { - let initial_indent_width = options.initial_indent().width(); - - WrapIter { - options: options, - source: s, - char_indices: s.char_indices(), - start: 0, - split: 0, - split_len: 0, - line_width: initial_indent_width, - line_width_at_split: initial_indent_width, - in_whitespace: false, - finished: false, - } - } - - fn create_result_line(&self) -> Cow<'static, str> { - let indent = if self.start == 0 { - self.options.initial_indent() - } else { - self.options.subsequent_indent() - }; - if indent.is_empty() { - Cow::Borrowed("") // return Cow<'static, str> - } else { - // This removes the link between the lifetime of the - // indentation and the input string. The non-empty - // indentation will force us to create an owned `String` - // in any case. - Cow::Owned(String::from(indent)) - } - } -} - -impl<'input, T: WrapOptions> Iterator for WrapIter<'input, T> { - type Item = Cow<'input, str>; - - fn next(&mut self) -> Option> { - if self.finished { - return None; - } - - while let Some((idx, ch)) = self.char_indices.next() { - if ch == CSI.0 && self.char_indices.next().map(|(_, ch)| ch) == Some(CSI.1) { - // We have found the start of an ANSI escape code, - // typically used for colored text. We ignore all - // characters until we find a "final byte" in the - // range 0x40–0x7E. - while let Some((_, ch)) = self.char_indices.next() { - if ANSI_FINAL_BYTE.contains(&ch) { - break; - } - } - // Done with the escape sequence, we continue with - // next character in the outer loop. - continue; - } - - let char_width = ch.width().unwrap_or(0); - let char_len = ch.len_utf8(); - if ch == '\n' { - self.split = idx; - self.split_len = char_len; - self.line_width_at_split = self.line_width; - self.in_whitespace = false; - - // If this is not the final line, return the current line. Otherwise, - // we will return the line with its line break after exiting the loop - if self.split + self.split_len < self.source.len() { - let mut line = self.create_result_line(); - line += &self.source[self.start..self.split]; - - self.start = self.split + self.split_len; - self.line_width = self.options.subsequent_indent().width(); - - return Some(line); - } - } else if is_whitespace(ch) { - // Extend the previous split or create a new one. - if self.in_whitespace { - self.split_len += char_len; - } else { - self.split = idx; - self.split_len = char_len; - } - self.line_width_at_split = self.line_width + char_width; - self.in_whitespace = true; - } else if self.line_width + char_width > self.options.width() { - // There is no room for this character on the current - // line. Try to split the final word. - self.in_whitespace = false; - let remaining_text = &self.source[self.split + self.split_len..]; - let final_word = match remaining_text.find(is_whitespace) { - Some(i) => &remaining_text[..i], - None => remaining_text, - }; - - let mut hyphen = ""; - let splits = self.options.split(final_word); - for &(head, hyp, _) in splits.iter().rev() { - if self.line_width_at_split + head.width() + hyp.width() <= self.options.width() - { - // We can fit head into the current line. - // Advance the split point by the width of the - // whitespace and the head length. - self.split += self.split_len + head.len(); - // The new `split_len` is equal to the stretch - // of whitespace following the split. - self.split_len = remaining_text[head.len()..] - .char_indices() - .skip_while(|(_, ch)| is_whitespace(*ch)) - .next() - .map_or(0, |(idx, _)| idx); - self.line_width_at_split += head.width() + hyp.width(); - hyphen = hyp; - break; - } - } - - if self.start >= self.split { - // The word is too big to fit on a single line. - if self.options.break_words() { - // Break work at current index. - self.split = idx; - self.split_len = 0; - self.line_width_at_split = self.line_width; - } else { - // Add smallest split. - self.split += self.split_len + splits[0].0.len(); - // The new `split_len` is equal to the stretch - // of whitespace following the smallest split. - self.split_len = remaining_text[splits[0].0.len()..] - .char_indices() - .skip_while(|(_, ch)| is_whitespace(*ch)) - .next() - .map_or(0, |(idx, _)| idx); - self.line_width_at_split = self.line_width; - } - } - - if self.start < self.split { - let mut line = self.create_result_line(); - line += &self.source[self.start..self.split]; - line += hyphen; - - self.start = self.split + self.split_len; - self.line_width += self.options.subsequent_indent().width(); - self.line_width -= self.line_width_at_split; - self.line_width += char_width; - self.line_width_at_split = self.options.subsequent_indent().width(); - - return Some(line); - } - } else { - self.in_whitespace = false; - } - self.line_width += char_width; - } - - self.finished = true; - - // Add final line. - if self.start < self.source.len() { - let mut line = self.create_result_line(); - line += &self.source[self.start..]; - return Some(line); - } - - None - } -} - /// Return the current terminal width. If the terminal width cannot be /// determined (typically because the standard output is not connected /// to a terminal), a default width of 80 characters will be used. @@ -622,7 +407,7 @@ pub fn fill(text: &str, options: T) -> String { // indentation, no hyphenation). let mut result = String::with_capacity(text.len()); - for (i, line) in wrap(text, options).enumerate() { + for (i, line) in wrap(text, options).iter().enumerate() { if i > 0 { result.push('\n'); } @@ -634,7 +419,9 @@ pub fn fill(text: &str, options: T) -> String { /// Wrap a line of text at `width` characters. /// -/// The result is an iterator yielding individual lines. Use the +/// The result is a vector of lines, each line is of type `Cow<'_, +/// str>`, which means that the line will borrow from the input `&str` +/// if possible. The lines do not have a trailing `'\n'`. Use the /// [`fill`] function if you need a `String` instead. /// /// The easiest way to use this function is to pass an integer for @@ -644,7 +431,7 @@ pub fn fill(text: &str, options: T) -> String { /// use textwrap::wrap; /// /// let lines = wrap("Memory safety without garbage collection.", 15); -/// assert_eq!(lines.collect::>(), &[ +/// assert_eq!(lines, &[ /// "Memory safety", /// "without garbage", /// "collection.", @@ -659,7 +446,7 @@ pub fn fill(text: &str, options: T) -> String { /// /// let options = Options::new(15).initial_indent("- ").subsequent_indent(" "); /// let lines = wrap("Memory safety without garbage collection.", &options); -/// assert_eq!(lines.collect::>(), &[ +/// assert_eq!(lines, &[ /// "- Memory safety", /// " without", /// " garbage", @@ -681,7 +468,7 @@ pub fn fill(text: &str, options: T) -> String { /// /// let options = Options::new(15).subsequent_indent("...."); /// let lines = wrap("Wrapping text all day long.", &options); -/// let annotated = lines.map(|line| match line { +/// let annotated = lines.iter().map(|line| match line { /// Borrowed(text) => format!("[Borrowed] {}", text), /// Owned(text) => format!("[Owned] {}", text), /// }).collect::>(); @@ -693,8 +480,85 @@ pub fn fill(text: &str, options: T) -> String { /// ``` /// /// [`fill`]: fn.fill.html -pub fn wrap(text: &str, options: T) -> impl Iterator> { - WrapIter::new(options, text) +pub fn wrap(text: &str, options: T) -> Vec> { + let initial_width = options + .width() + .saturating_sub(options.initial_indent().width()); + let subsequent_width = options + .width() + .saturating_sub(options.subsequent_indent().width()); + + let mut lines = Vec::new(); + for line in text.split('\n') { + let words = core::find_words(line); + let split_words = core::split_words(words, &options); + let broken_words = if options.break_words() { + let mut broken_words = core::break_words(split_words, subsequent_width); + if !options.initial_indent().is_empty() { + // Without this, the first word will always go into + // the first line. However, since we break words based + // on the _second_ line width, it can be wrong to + // unconditionally put the first word onto the first + // line. An empty zero-width word fixed this. + broken_words.insert(0, core::Word::from("")); + } + broken_words + } else { + split_words.collect::>() + }; + + #[rustfmt::skip] + let line_lengths = |i| if i == 0 { initial_width } else { subsequent_width }; + let wrapped_words = core::wrap_fragments(&broken_words, line_lengths); + + let mut idx = 0; + for words in wrapped_words { + let last_word = match words.last() { + None => { + lines.push(Cow::from("")); + continue; + } + Some(word) => word, + }; + + // We assume here that all words are contiguous in `line`. + // That is, the sum of their lengths should add up to the + // lenght of `line`. + let len = words + .iter() + .map(|word| word.len() + word.whitespace.len()) + .sum::() + - last_word.whitespace.len(); + + // The result is owned if we have indentation, otherwise + // we can simply borrow an empty string. + let mut result = if lines.is_empty() && !options.initial_indent().is_empty() { + Cow::Owned(options.initial_indent().to_owned()) + } else if !lines.is_empty() && !options.subsequent_indent().is_empty() { + Cow::Owned(options.subsequent_indent().to_owned()) + } else { + // We can use an empty string here since string + // concatenation for `Cow` preserves a borrowed value + // when either side is empty. + Cow::from("") + }; + + result += &line[idx..idx + len]; + + if !last_word.penalty.is_empty() { + result.to_mut().push_str(&last_word.penalty); + } + + lines.push(result); + + // Advance by the length of `result`, plus the length of + // `last_word.whitespace` -- even if we had a penalty, we + // need to skip over the whitespace. + idx += len + last_word.whitespace.len(); + } + } + + lines } #[cfg(test)] @@ -703,12 +567,6 @@ mod tests { #[cfg(feature = "hyphenation")] use hyphenation::{Language, Load, Standard}; - macro_rules! assert_iter_eq { - ($left:expr, $right:expr) => { - assert_eq!($left.collect::>(), $right); - }; - } - #[test] fn options_agree_with_usize() { let opt_usize: &dyn WrapOptions = &42; @@ -722,70 +580,59 @@ mod tests { ); assert_eq!(opt_usize.break_words(), opt_options.break_words()); assert_eq!( - opt_usize.split("hello-world"), - opt_options.split("hello-world") + opt_usize.split_points("hello-world"), + opt_options.split_points("hello-world") ); } #[test] fn no_wrap() { - assert_iter_eq!(wrap("foo", 10), vec!["foo"]); + assert_eq!(wrap("foo", 10), vec!["foo"]); } #[test] - fn simple() { - assert_iter_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); + fn wrap_simple() { + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); } #[test] - fn multi_word_on_line() { - assert_iter_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); + fn multiple_words_on_first_line() { + assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); } #[test] fn long_word() { - assert_iter_eq!(wrap("foo", 0), vec!["f", "o", "o"]); + assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); } #[test] fn long_words() { - assert_iter_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); + assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); } #[test] fn max_width() { - assert_iter_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); + assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); } #[test] fn leading_whitespace() { - assert_iter_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); + assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); } #[test] fn trailing_whitespace() { - assert_iter_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]); - } - - #[test] - fn interior_whitespace() { - assert_iter_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]); - } - - #[test] - fn extra_whitespace_start_of_line() { // Whitespace is only significant inside a line. After a line // gets too long and is broken, the first word starts in - // column zero and is not indented. The line before might end - // up with trailing whitespace. - assert_iter_eq!(wrap("foo bar", 5), vec!["foo", "bar"]); + // column zero and is not indented. + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); } #[test] fn issue_99() { // We did not reset the in_whitespace flag correctly and did // not handle single-character words after a line break. - assert_iter_eq!( + assert_eq!( wrap("aaabbbccc x yyyzzzwww", 9), vec!["aaabbbccc", "x", "yyyzzzwww"] ); @@ -795,22 +642,26 @@ mod tests { fn issue_129() { // The dash is an em-dash which takes up four bytes. We used // to panic since we tried to index into the character. - assert_iter_eq!(wrap("x – x", 1), vec!["x", "–", "x"]); + assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]); } #[test] fn wide_character_handling() { - assert_iter_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); - assert_iter_eq!( + assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); + assert_eq!( wrap("Hello, World!", 15), vec!["Hello,", "World!"] ); } #[test] - fn empty_input_not_indented() { + fn empty_line_is_indented() { + // Previously, indentation was not applied to empty lines. + // However, this is somewhat inconsistent and undesirable if + // the indentation is something like a border ("| ") which you + // want to apply to all lines, empty or not. let options = Options::new(10).initial_indent("!!!"); - assert_eq!(fill("", &options), ""); + assert_eq!(fill("", &options), "!!!"); } #[test] @@ -819,10 +670,19 @@ mod tests { assert_eq!(fill("foo", &options), ">>>foo"); } + #[test] + fn indent_first() { + let options = Options::new(10).initial_indent("👉👉"); + assert_eq!( + wrap("x x x x x x x x x x x x x", &options), + vec!["👉👉x x x", "x x x x x", "x x x x x"] + ); + } + #[test] fn indent_multiple_lines() { let options = Options::new(6).initial_indent("* ").subsequent_indent(" "); - assert_iter_eq!( + assert_eq!( wrap("foo bar baz", &options), vec!["* foo", " bar", " baz"] ); @@ -831,29 +691,39 @@ mod tests { #[test] fn indent_break_words() { let options = Options::new(5).initial_indent("* ").subsequent_indent(" "); - assert_iter_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); + assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn initial_indent_break_words() { + // This is a corner-case showing how the long word is broken + // according to the width of the subsequent lines. The first + // fragment of the word no longer fits on the first line, + // which ends up being pure indentation. + let options = Options::new(5).initial_indent("-->"); + assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]); } #[test] fn hyphens() { - assert_iter_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); + assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); } #[test] fn trailing_hyphen() { let options = Options::new(5).break_words(false); - assert_iter_eq!(wrap("foobar-", &options), vec!["foobar-"]); + assert_eq!(wrap("foobar-", &options), vec!["foobar-"]); } #[test] fn multiple_hyphens() { - assert_iter_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); + assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); } #[test] fn hyphens_flag() { let options = Options::new(5).break_words(false); - assert_iter_eq!( + assert_eq!( wrap("The --foo-bar flag.", &options), vec!["The", "--foo-", "bar", "flag."] ); @@ -862,39 +732,39 @@ mod tests { #[test] fn repeated_hyphens() { let options = Options::new(4).break_words(false); - assert_iter_eq!(wrap("foo--bar", &options), vec!["foo--bar"]); + assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]); } #[test] fn hyphens_alphanumeric() { - assert_iter_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); + assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); } #[test] fn hyphens_non_alphanumeric() { let options = Options::new(5).break_words(false); - assert_iter_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]); + assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]); } #[test] fn multiple_splits() { - assert_iter_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); + assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); } #[test] fn forced_split() { let options = Options::new(5).break_words(false); - assert_iter_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]); + assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]); } #[test] fn multiple_unbroken_words_issue_193() { let options = Options::new(3).break_words(false); - assert_iter_eq!( + assert_eq!( wrap("small large tiny", &options), vec!["small", "large", "tiny"] ); - assert_iter_eq!( + assert_eq!( wrap("small large tiny", &options), vec!["small", "large", "tiny"] ); @@ -903,28 +773,28 @@ mod tests { #[test] fn very_narrow_lines_issue_193() { let options = Options::new(1).break_words(false); - assert_iter_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); - assert_iter_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); + assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); + assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); } #[test] fn no_hyphenation() { let options = Options::new(8).splitter(Box::new(NoHyphenation)); - assert_iter_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); + assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); } #[test] #[cfg(feature = "hyphenation")] - fn auto_hyphenation() { + fn auto_hyphenation_double_hyphenation() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(10); - assert_iter_eq!( + assert_eq!( wrap("Internationalization", &options), vec!["Internatio", "nalization"] ); let options = Options::new(10).splitter(Box::new(dictionary)); - assert_iter_eq!( + assert_eq!( wrap("Internationalization", &options), vec!["Interna-", "tionaliza-", "tion"] ); @@ -935,15 +805,15 @@ mod tests { fn auto_hyphenation_issue_158() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(10); - assert_iter_eq!( + assert_eq!( wrap("participation is the key to success", &options), vec!["participat", "ion is the", "key to", "success"] ); let options = Options::new(10).splitter(Box::new(dictionary)); - assert_iter_eq!( + assert_eq!( wrap("participation is the key to success", &options), - vec!["participa-", "tion is the", "key to", "success"] + vec!["participa-", "tion is", "the key to", "success"] ); } @@ -954,7 +824,7 @@ mod tests { // into account. let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(15).splitter(Box::new(dictionary)); - assert_iter_eq!( + assert_eq!( wrap("garbage collection", &options), vec!["garbage col-", "lection"] ); @@ -968,7 +838,7 @@ mod tests { use std::borrow::Cow::{Borrowed, Owned}; let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(10).splitter(Box::new(dictionary)); - let lines = wrap("Internationalization", &options).collect::>(); + let lines = wrap("Internationalization", &options); if let Borrowed(s) = lines[0] { assert!(false, "should not have been borrowed: {:?}", s); } @@ -985,13 +855,13 @@ mod tests { fn auto_hyphenation_with_hyphen() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(8).break_words(false); - assert_iter_eq!( + assert_eq!( wrap("over-caffinated", &options), vec!["over-", "caffinated"] ); let options = options.splitter(Box::new(dictionary)); - assert_iter_eq!( + assert_eq!( wrap("over-caffinated", &options), vec!["over-", "caffi-", "nated"] ); @@ -999,17 +869,22 @@ mod tests { #[test] fn break_words() { - assert_iter_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); + assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); } #[test] fn break_words_wide_characters() { - assert_iter_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]); + assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]); } #[test] fn break_words_zero_width() { - assert_iter_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); + assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn break_long_first_word() { + assert_eq!(wrap("testx y", 4), vec!["test", "x y"]); } #[test] @@ -1019,14 +894,21 @@ mod tests { } #[test] - fn preserve_line_breaks() { - assert_eq!(fill("test\n", 11), "test\n"); - assert_eq!(fill("test\n\na\n\n", 11), "test\n\na\n\n"); - assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7"); + fn break_words_empty_lines() { + assert_eq!( + fill("foo\nbar", &Options::new(2).break_words(false)), + "foo\nbar" + ); } #[test] - fn wrap_preserve_line_breaks() { + fn preserve_line_breaks() { + assert_eq!(fill("", 80), ""); + assert_eq!(fill("\n", 80), "\n"); + assert_eq!(fill("\n\n\n", 80), "\n\n\n"); + assert_eq!(fill("test\n", 80), "test\n"); + assert_eq!(fill("test\n\na\n\n", 80), "test\n\na\n\n"); + assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7"); assert_eq!(fill("1 3 5 7\n1 3 5 7", 5), "1 3 5\n7\n1 3 5\n7"); } diff --git a/src/splitting.rs b/src/splitting.rs index c35ae584..619804f9 100644 --- a/src/splitting.rs +++ b/src/splitting.rs @@ -5,13 +5,7 @@ //! functionality. [`HyphenSplitter`] is the default implementation of //! this treat: it will simply split words on existing hyphens. -/// An interface for splitting words. -/// -/// When the [`wrap`] function tries to fit text into a line, it will -/// eventually find a word that it too large the current text width. -/// It will then call the currently configured `WordSplitter` to have -/// it attempt to split the word into smaller parts. This trait -/// describes that functionality via the [`split`] method. +/// The `WordSplitter` trait describes where words can be split. /// /// If the `textwrap` crate has been compiled with the `hyphenation` /// feature enabled, you will find an implementation of `WordSplitter` @@ -20,25 +14,23 @@ /// for details. /// /// [`wrap`]: ../fn.wrap.html -/// [`split`]: #tymethod.split /// [`hyphenation` documentation]: https://docs.rs/hyphenation/ pub trait WordSplitter: std::fmt::Debug { - /// Return all possible splits of word. Each split is a triple - /// with a head, a hyphen, and a tail where `head + &tail == word`. - /// The hyphen can be empty if there is already a hyphen in the - /// head. + /// Return all possible indices where `word` can be split. + /// + /// The indices returned must be in range `0..word.len()`. They + /// should point to the index _after_ the split point, i.e., after + /// `-` if splitting on hyphens. This way, `word.split_at(idx)` + /// will break the word into two well-formed pieces. /// - /// The splits should go from smallest to longest and should - /// include no split at all. So the word "technology" could be - /// split into + /// # Examples /// - /// ```no_run - /// vec![("tech", "-", "nology"), - /// ("technol", "-", "ogy"), - /// ("technolo", "-", "gy"), - /// ("technology", "", "")]; /// ``` - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; + /// use textwrap::{NoHyphenation, HyphenSplitter, WordSplitter}; + /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]); + /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]); + /// ``` + fn split_points(&self, word: &str) -> Vec; } /// Use this as a [`Options.splitter`] to avoid any kind of @@ -48,7 +40,7 @@ pub trait WordSplitter: std::fmt::Debug { /// use textwrap::{wrap, Options, NoHyphenation}; /// /// let options = Options::new(8).splitter(Box::new(NoHyphenation)); -/// assert_eq!(wrap("foo bar-baz", &options).collect::>(), +/// assert_eq!(wrap("foo bar-baz", &options), /// vec!["foo", "bar-baz"]); /// ``` /// @@ -59,8 +51,8 @@ pub struct NoHyphenation; /// `NoHyphenation` implements `WordSplitter` by not splitting the /// word at all. impl WordSplitter for NoHyphenation { - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { - vec![(word, "", "")] + fn split_points(&self, _: &str) -> Vec { + Vec::new() } } @@ -80,40 +72,24 @@ pub struct HyphenSplitter; /// characters, which prevents a word like "--foo-bar" from being /// split on the first or second hyphen. impl WordSplitter for HyphenSplitter { - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { - let mut triples = Vec::new(); - // Split on hyphens, smallest split first. We only use hyphens - // that are surrounded by alphanumeric characters. This is to - // avoid splitting on repeated hyphens, such as those found in - // --foo-bar. - let mut char_indices = word.char_indices(); - // Early return if the word is empty. - let mut prev = match char_indices.next() { - None => return vec![(word, "", "")], - Some((_, ch)) => ch, - }; + fn split_points(&self, word: &str) -> Vec { + let mut splits = Vec::new(); - // Find current word, or return early if the word only has a - // single character. - let (mut idx, mut cur) = match char_indices.next() { - None => return vec![(word, "", "")], - Some((idx, cur)) => (idx, cur), - }; + for (idx, _) in word.match_indices('-') { + // We only use hyphens that are surrounded by alphanumeric + // characters. This is to avoid splitting on repeated hyphens, + // such as those found in --foo-bar. + let prev = word[..idx].chars().next_back(); + let next = word[idx + 1..].chars().next(); - for (i, next) in char_indices { - if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { - let (head, tail) = word.split_at(idx + 1); - triples.push((head, "", tail)); + if prev.filter(|ch| ch.is_alphanumeric()).is_some() + && next.filter(|ch| ch.is_alphanumeric()).is_some() + { + splits.push(idx + 1); // +1 due to width of '-'. } - prev = cur; - idx = i; - cur = next; } - // Finally option is no split at all. - triples.push((word, "", "")); - - triples + splits } } @@ -124,18 +100,8 @@ impl WordSplitter for HyphenSplitter { /// enabled. #[cfg(feature = "hyphenation")] impl WordSplitter for hyphenation::Standard { - fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + fn split_points(&self, word: &str) -> Vec { use hyphenation::Hyphenator; - // Find splits based on language dictionary. - let mut triples = Vec::new(); - for n in self.hyphenate(word).breaks { - let (head, tail) = word.split_at(n); - let hyphen = if head.ends_with('-') { "" } else { "-" }; - triples.push((head, hyphen, tail)); - } - // Finally option is no split at all. - triples.push((word, "", "")); - - triples + self.hyphenate(word).breaks } }