Skip to content

Commit

Permalink
Fix handling of trailing bare CR in str::lines
Browse files Browse the repository at this point in the history
Previously "bare\r" was split into ["bare"] even though the
documentation said that only LF and CRLF count as newlines.

This fix is a behavioural change, even though it brings the behaviour
into line with the documentation, and into line with that of
`std::io::BufRead::lines()`.

This is an alternative to #91051, which proposes to document rather
than fix the behaviour.

Fixes #94435.

Co-authored-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
  • Loading branch information
KamilaBorowska and ijackson committed Oct 6, 2022
1 parent 0d8a0c5 commit cef81dc
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
26 changes: 19 additions & 7 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1499,13 +1499,25 @@ fn test_split_whitespace() {

#[test]
fn test_lines() {
let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n";
let lines: Vec<&str> = data.lines().collect();
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);

let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
let lines: Vec<&str> = data.lines().collect();
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
fn t(data: &str, expected: &[&str]) {
let lines: Vec<&str> = data.lines().collect();
assert_eq!(lines, expected);
}
t("", &[]);
t("\n", &[""]);
t("\n2nd", &["", "2nd"]);
t("\r\n", &[""]);
t("bare\r", &["bare\r"]);
t("bare\rcr", &["bare\rcr"]);
t("Text\n\r", &["Text", "\r"]);
t(
"\nMäry häd ä little lämb\n\r\nLittle lämb\n",
&["", "Märy häd ä little lämb", "", "Little lämb"],
);
t(
"\r\nMäry häd ä little lämb\n\nLittle lämb",
&["", "Märy häd ä little lämb", "", "Little lämb"],
);
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,7 @@ generate_pattern_iterators! {
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug)]
pub struct Lines<'a>(pub(super) Map<SplitTerminator<'a, char>, LinesMap>);
pub struct Lines<'a>(pub(super) Map<SplitInclusive<'a, char>, LinesMap>);

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Lines<'a> {
Expand Down
8 changes: 4 additions & 4 deletions library/core/src/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ impl str {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn lines(&self) -> Lines<'_> {
Lines(self.split_terminator('\n').map(LinesMap))
Lines(self.split_inclusive('\n').map(LinesMap))
}

/// An iterator over the lines of a string.
Expand Down Expand Up @@ -2591,9 +2591,9 @@ impl_fn_for_zst! {
/// A nameable, cloneable fn type
#[derive(Clone)]
struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
let l = line.len();
if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
else { line }
let Some(line) = line.strip_suffix('\n') else { return line };
let Some(line) = line.strip_suffix('\r') else { return line };
line
};

#[derive(Clone)]
Expand Down

0 comments on commit cef81dc

Please sign in to comment.