Skip to content

Commit

Permalink
Rollup merge of #81136 - Xavientois:io_reader_size_hint, r=cramertj
Browse files Browse the repository at this point in the history
Improved IO Bytes Size Hint

After trying to implement better `size_hint()` return values for `File` in [this PR](#81044) and changing to implementing it for `BufReader` in [this PR](#81052), I have arrived at this implementation that provides tighter bounds for the `Bytes` iterator of various readers including `BufReader`, `Empty`, and `Chain`.

Unfortunately, for `BufReader`, the size_hint only improves after calling `fill_buffer` due to it using the contents of the buffer for the hint. Nevertheless, the the tighter bounds  should result in better pre-allocation of space to handle the contents of the `Bytes` iterator.

Closes #81052
  • Loading branch information
m-ou-se committed Mar 5, 2021
2 parents ec2619c + 7674ae1 commit 6013811
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 3 deletions.
10 changes: 9 additions & 1 deletion library/std/src/io/buffered/bufreader.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::cmp;
use crate::fmt;
use crate::io::{self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, DEFAULT_BUF_SIZE};
use crate::io::{
self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
};

/// The `BufReader<R>` struct adds buffering to any reader.
///
Expand Down Expand Up @@ -435,3 +437,9 @@ impl<R: Seek> Seek for BufReader<R> {
})
}
}

impl<T> SizeHint for BufReader<T> {
fn lower_bound(&self) -> usize {
self.buffer().len()
}
}
37 changes: 37 additions & 0 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2238,6 +2238,19 @@ impl<T: BufRead, U: BufRead> BufRead for Chain<T, U> {
}
}

impl<T, U> SizeHint for Chain<T, U> {
fn lower_bound(&self) -> usize {
SizeHint::lower_bound(&self.first) + SizeHint::lower_bound(&self.second)
}

fn upper_bound(&self) -> Option<usize> {
match (SizeHint::upper_bound(&self.first), SizeHint::upper_bound(&self.second)) {
(Some(first), Some(second)) => Some(first + second),
_ => None,
}
}
}

/// Reader adaptor which limits the bytes read from an underlying reader.
///
/// This struct is generally created by calling [`take`] on a reader.
Expand Down Expand Up @@ -2464,6 +2477,30 @@ impl<R: Read> Iterator for Bytes<R> {
};
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
SizeHint::size_hint(&self.inner)
}
}

trait SizeHint {
fn lower_bound(&self) -> usize;

fn upper_bound(&self) -> Option<usize>;

fn size_hint(&self) -> (usize, Option<usize>) {
(self.lower_bound(), self.upper_bound())
}
}

impl<T> SizeHint for T {
default fn lower_bound(&self) -> usize {
0
}

default fn upper_bound(&self) -> Option<usize> {
None
}
}

/// An iterator over the contents of an instance of `BufRead` split on a
Expand Down
49 changes: 48 additions & 1 deletion library/std/src/io/tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{repeat, Cursor, SeekFrom};
use crate::cmp::{self, min};
use crate::io::{self, IoSlice, IoSliceMut};
use crate::io::{BufRead, Read, Seek, Write};
use crate::io::{BufRead, BufReader, Read, Seek, Write};
use crate::ops::Deref;

#[test]
Expand Down Expand Up @@ -198,6 +198,53 @@ fn chain_bufread() {
cmp_bufread(chain1, chain2, &testdata[..]);
}

#[test]
fn bufreader_size_hint() {
let testdata = b"ABCDEFGHIJKL";
let mut buf_reader = BufReader::new(&testdata[..]);
assert_eq!(buf_reader.buffer().len(), 0);

let buffer_length = testdata.len();
buf_reader.fill_buf().unwrap();

// Check that size hint matches buffer contents
let mut buffered_bytes = buf_reader.bytes();
let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
assert_eq!(lower_bound, buffer_length);

// Check that size hint matches buffer contents after advancing
buffered_bytes.next().unwrap().unwrap();
let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
assert_eq!(lower_bound, buffer_length - 1);
}

#[test]
fn empty_size_hint() {
let size_hint = io::empty().bytes().size_hint();
assert_eq!(size_hint, (0, Some(0)));
}

#[test]
fn chain_empty_size_hint() {
let chain = io::empty().chain(io::empty());
let size_hint = chain.bytes().size_hint();
assert_eq!(size_hint, (0, Some(0)));
}

#[test]
fn chain_size_hint() {
let testdata = b"ABCDEFGHIJKL";
let mut buf_reader_1 = BufReader::new(&testdata[..6]);
let mut buf_reader_2 = BufReader::new(&testdata[6..]);

buf_reader_1.fill_buf().unwrap();
buf_reader_2.fill_buf().unwrap();

let chain = buf_reader_1.chain(buf_reader_2);
let size_hint = chain.bytes().size_hint();
assert_eq!(size_hint, (testdata.len(), None));
}

#[test]
fn chain_zero_length_read_is_not_eof() {
let a = b"A";
Expand Down
10 changes: 9 additions & 1 deletion library/std/src/io/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
mod tests;

use crate::fmt;
use crate::io::{self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
use crate::io::{
self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, SizeHint, Write,
};

/// A reader which is always at EOF.
///
Expand Down Expand Up @@ -80,6 +82,12 @@ impl fmt::Debug for Empty {
}
}

impl SizeHint for Empty {
fn upper_bound(&self) -> Option<usize> {
Some(0)
}
}

/// A reader which yields one byte over and over and over and over and over and...
///
/// This struct is generally created by calling [`repeat()`]. Please
Expand Down

0 comments on commit 6013811

Please sign in to comment.