Skip to content

Commit

Permalink
Write a CountPred for line counting, and make Lines::count use it
Browse files Browse the repository at this point in the history
  • Loading branch information
thomcc committed Apr 7, 2024
1 parent ade7869 commit 1466713
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 10 deletions.
32 changes: 29 additions & 3 deletions library/core/src/str/count.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Code for efficiently counting the number of `char`s in a UTF-8 encoded
//! string.
//! Code for efficiently counting the number of `char`s or lines in a UTF-8
//! encoded string
//!
//! ## `char` count details
//!
//! Broadly, UTF-8 encodes `char`s as a "leading" byte which begins the `char`,
//! followed by some number (possibly 0) of continuation bytes.
Expand All @@ -21,12 +23,21 @@ use core::intrinsics::unlikely;

const USIZE_SIZE: usize = core::mem::size_of::<usize>();
const UNROLL_INNER: usize = 4;
const LSB: usize = usize::repeat_u8(0x01);

#[inline]
pub(super) fn count_chars(s: &str) -> usize {
count::<CharCount>(s)
}

#[inline]
pub(super) fn count_lines(s: &str) -> usize {
// `foo\nbar` is 2 lines, and `foo\nbar\n` is also 2 lines, so the line
// count is exactly the newline count if the input ends in a newline, and
// the newline count + 1 otherwise.
count::<NewlineCount>(s) + (!s.ends_with('\n')) as usize
}

trait CountPred {
/// Bytes in `u` which match the pred must be `0x01` in the result, bytes
/// which fail the pred must be `0x00`.
Expand All @@ -46,6 +57,22 @@ impl CountPred for CharCount {
contains_non_continuation_byte(u)
}
}
struct NewlineCount;
impl CountPred for NewlineCount {
#[inline]
fn count_general_case(s: &[u8]) -> usize {
s.iter().filter(|b| **b == b'\n').count()
}
#[inline]
fn test_each_byte_in_word(u: usize) -> usize {
const NEWLINES: usize = usize::repeat_u8(b'\n');
const NOT_MSB: usize = usize::repeat_u8(0x7f);
// bytes of `diff` are nonzero when bytes of `u` don't contain newline
let diff = u ^ NEWLINES;
let res = !(((diff & NOT_MSB).wrapping_add(NOT_MSB) | diff) >> 7);
res & LSB
}
}

#[inline]
fn count<P: CountPred>(s: &str) -> usize {
Expand Down Expand Up @@ -137,7 +164,6 @@ fn do_count<P: CountPred>(s: &str) -> usize {
// true)
#[inline]
fn contains_non_continuation_byte(w: usize) -> usize {
const LSB: usize = usize::repeat_u8(0x01);
((!w >> 7) | (w >> 6)) & LSB
}

Expand Down
5 changes: 5 additions & 0 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,11 @@ impl<'a> Iterator for Lines<'a> {
fn last(mut self) -> Option<&'a str> {
self.next_back()
}

#[inline]
fn count(self) -> usize {
self.remainder().map(super::count::count_lines).unwrap_or_default()
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
10 changes: 3 additions & 7 deletions tests/ui/std/stdio-from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@ use std::env;
use std::fs::File;
use std::io;
use std::io::{Read, Write};
use std::process::{Command, Stdio};
use std::path::PathBuf;
use std::process::{Command, Stdio};

fn main() {
if env::args().len() > 1 {
child().unwrap()
} else {
parent().unwrap()
}
if env::args().len() > 1 { child().unwrap() } else { parent().unwrap() }
}

fn parent() -> io::Result<()> {
Expand Down Expand Up @@ -55,7 +51,7 @@ fn parent() -> io::Result<()> {
for line in data.lines() {
assert_eq!(line, "foo");
}
assert_eq!(data.lines().count(), 8);
assert_eq!(data.lines().count(), 8, "{:?}", data);
Ok(())
}

Expand Down

0 comments on commit 1466713

Please sign in to comment.