Skip to content

Commit

Permalink
Auto merge of #81156 - DrMeepster:read_buf, r=joshtriplett
Browse files Browse the repository at this point in the history
Implement most of RFC 2930, providing the ReadBuf abstraction

This replaces the `Initializer` abstraction for permitting reading into uninitialized buffers, closing #42788.

This leaves several APIs described in the RFC out of scope for the initial implementation:

* read_buf_vectored
* `ReadBufs`

Closes #42788, by removing the relevant APIs.
  • Loading branch information
bors committed Dec 9, 2021
2 parents 600820d + cd23799 commit 3b263ce
Show file tree
Hide file tree
Showing 25 changed files with 899 additions and 289 deletions.
22 changes: 9 additions & 13 deletions library/std/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mod tests;

use crate::ffi::OsString;
use crate::fmt;
use crate::io::{self, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
use crate::io::{self, IoSlice, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, Write};
use crate::path::{Path, PathBuf};
use crate::sys::fs as fs_imp;
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
Expand Down Expand Up @@ -623,15 +623,13 @@ impl Read for File {
self.inner.read_vectored(bufs)
}

#[inline]
fn is_read_vectored(&self) -> bool {
self.inner.is_read_vectored()
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
self.inner.read_buf(buf)
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
// SAFETY: Read is guaranteed to work on uninitialized memory
unsafe { Initializer::nop() }
fn is_read_vectored(&self) -> bool {
self.inner.is_read_vectored()
}

// Reserves space in the buffer based on the file size when available.
Expand Down Expand Up @@ -677,6 +675,10 @@ impl Read for &File {
self.inner.read(buf)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
self.inner.read_buf(buf)
}

fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
self.inner.read_vectored(bufs)
}
Expand All @@ -686,12 +688,6 @@ impl Read for &File {
self.inner.is_read_vectored()
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
// SAFETY: Read is guaranteed to work on uninitialized memory
unsafe { Initializer::nop() }
}

// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
Expand Down
59 changes: 43 additions & 16 deletions library/std/src/io/buffered/bufreader.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::cmp;
use crate::fmt;
use crate::io::{
self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
};
use crate::mem::MaybeUninit;

/// The `BufReader<R>` struct adds buffering to any reader.
///
Expand Down Expand Up @@ -47,9 +48,10 @@ use crate::io::{
#[stable(feature = "rust1", since = "1.0.0")]
pub struct BufReader<R> {
inner: R,
buf: Box<[u8]>,
buf: Box<[MaybeUninit<u8>]>,
pos: usize,
cap: usize,
init: usize,
}

impl<R: Read> BufReader<R> {
Expand Down Expand Up @@ -91,11 +93,8 @@ impl<R: Read> BufReader<R> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
unsafe {
let mut buf = Box::new_uninit_slice(capacity).assume_init();
inner.initializer().initialize(&mut buf);
BufReader { inner, buf, pos: 0, cap: 0 }
}
let buf = Box::new_uninit_slice(capacity);
BufReader { inner, buf, pos: 0, cap: 0, init: 0 }
}
}

Expand Down Expand Up @@ -171,7 +170,8 @@ impl<R> BufReader<R> {
/// ```
#[stable(feature = "bufreader_buffer", since = "1.37.0")]
pub fn buffer(&self) -> &[u8] {
&self.buf[self.pos..self.cap]
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf[self.pos..self.cap]) }
}

/// Returns the number of bytes the internal buffer can hold at once.
Expand Down Expand Up @@ -271,6 +271,25 @@ impl<R: Read> Read for BufReader<R> {
Ok(nread)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.pos == self.cap && buf.remaining() >= self.buf.len() {
self.discard_buffer();
return self.inner.read_buf(buf);
}

let prev = buf.filled_len();

let mut rem = self.fill_buf()?;
rem.read_buf(buf)?;

self.consume(buf.filled_len() - prev); //slice impl of read_buf known to never unfill buf

Ok(())
}

// Small read_exacts from a BufReader are extremely common when used with a deserializer.
// The default implementation calls read in a loop, which results in surprisingly poor code
// generation for the common path where the buffer has enough bytes to fill the passed-in
Expand Down Expand Up @@ -303,16 +322,11 @@ impl<R: Read> Read for BufReader<R> {
self.inner.is_read_vectored()
}

// we can't skip unconditionally because of the large buffer case in read.
unsafe fn initializer(&self) -> Initializer {
self.inner.initializer()
}

// The inner reader might have an optimized `read_to_end`. Drain our buffer and then
// delegate to the inner implementation.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let nread = self.cap - self.pos;
buf.extend_from_slice(&self.buf[self.pos..self.cap]);
buf.extend_from_slice(&self.buffer());
self.discard_buffer();
Ok(nread + self.inner.read_to_end(buf)?)
}
Expand Down Expand Up @@ -363,10 +377,23 @@ impl<R: Read> BufRead for BufReader<R> {
// to tell the compiler that the pos..cap slice is always valid.
if self.pos >= self.cap {
debug_assert!(self.pos == self.cap);
self.cap = self.inner.read(&mut self.buf)?;

let mut readbuf = ReadBuf::uninit(&mut self.buf);

// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
// from the last time this function was called
unsafe {
readbuf.assume_init(self.init);
}

self.inner.read_buf(&mut readbuf)?;

self.cap = readbuf.filled_len();
self.init = readbuf.initialized_len();

self.pos = 0;
}
Ok(&self.buf[self.pos..self.cap])
Ok(self.buffer())
}

fn consume(&mut self, amt: usize) {
Expand Down
52 changes: 51 additions & 1 deletion library/std/src/io/buffered/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::io::prelude::*;
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, SeekFrom};
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, ReadBuf, SeekFrom};
use crate::mem::MaybeUninit;
use crate::panic;
use crate::sync::atomic::{AtomicUsize, Ordering};
use crate::thread;
Expand Down Expand Up @@ -55,6 +56,55 @@ fn test_buffered_reader() {
assert_eq!(reader.read(&mut buf).unwrap(), 0);
}

#[test]
fn test_buffered_reader_read_buf() {
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];
let mut reader = BufReader::with_capacity(2, inner);

let mut buf = [MaybeUninit::uninit(); 3];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [5, 6, 7]);
assert_eq!(reader.buffer(), []);

let mut buf = [MaybeUninit::uninit(); 2];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [0, 1]);
assert_eq!(reader.buffer(), []);

let mut buf = [MaybeUninit::uninit(); 1];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [2]);
assert_eq!(reader.buffer(), [3]);

let mut buf = [MaybeUninit::uninit(); 3];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [3]);
assert_eq!(reader.buffer(), []);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [3, 4]);
assert_eq!(reader.buffer(), []);

buf.clear();

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled_len(), 0);
}

#[test]
fn test_buffered_reader_seek() {
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];
Expand Down
81 changes: 38 additions & 43 deletions library/std/src/io/copy.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
use super::{BufWriter, ErrorKind, Read, ReadBuf, Result, Write, DEFAULT_BUF_SIZE};
use crate::mem::MaybeUninit;

/// Copies the entire contents of a reader into a writer.
Expand Down Expand Up @@ -82,33 +82,30 @@ impl<I: Write> BufferedCopySpec for BufWriter<I> {
return stack_buffer_copy(reader, writer);
}

// FIXME: #42788
//
// - This creates a (mut) reference to a slice of
// _uninitialized_ integers, which is **undefined behavior**
//
// - Only the standard library gets to soundly "ignore" this,
// based on its privileged knowledge of unstable rustc
// internals;
unsafe {
let spare_cap = writer.buffer_mut().spare_capacity_mut();
reader.initializer().initialize(MaybeUninit::slice_assume_init_mut(spare_cap));
}

let mut len = 0;
let mut init = 0;

loop {
let buf = writer.buffer_mut();
let spare_cap = buf.spare_capacity_mut();

if spare_cap.len() >= DEFAULT_BUF_SIZE {
match reader.read(unsafe { MaybeUninit::slice_assume_init_mut(spare_cap) }) {
Ok(0) => return Ok(len), // EOF reached
Ok(bytes_read) => {
assert!(bytes_read <= spare_cap.len());
// SAFETY: The initializer contract guarantees that either it or `read`
// will have initialized these bytes. And we just checked that the number
// of bytes is within the buffer capacity.
let mut read_buf = ReadBuf::uninit(buf.spare_capacity_mut());

// SAFETY: init is either 0 or the initialized_len of the previous iteration
unsafe {
read_buf.assume_init(init);
}

if read_buf.capacity() >= DEFAULT_BUF_SIZE {
match reader.read_buf(&mut read_buf) {
Ok(()) => {
let bytes_read = read_buf.filled_len();

if bytes_read == 0 {
return Ok(len);
}

init = read_buf.initialized_len() - bytes_read;

// SAFETY: ReadBuf guarantees all of its filled bytes are init
unsafe { buf.set_len(buf.len() + bytes_read) };
len += bytes_read as u64;
// Read again if the buffer still has enough capacity, as BufWriter itself would do
Expand All @@ -129,28 +126,26 @@ fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
reader: &mut R,
writer: &mut W,
) -> Result<u64> {
let mut buf = MaybeUninit::<[u8; DEFAULT_BUF_SIZE]>::uninit();
// FIXME: #42788
//
// - This creates a (mut) reference to a slice of
// _uninitialized_ integers, which is **undefined behavior**
//
// - Only the standard library gets to soundly "ignore" this,
// based on its privileged knowledge of unstable rustc
// internals;
unsafe {
reader.initializer().initialize(buf.assume_init_mut());
}
let mut buf = [MaybeUninit::uninit(); DEFAULT_BUF_SIZE];
let mut buf = ReadBuf::uninit(&mut buf);

let mut len = 0;

let mut written = 0;
loop {
let len = match reader.read(unsafe { buf.assume_init_mut() }) {
Ok(0) => return Ok(written),
Ok(len) => len,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
match reader.read_buf(&mut buf) {
Ok(()) => {}
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
writer.write_all(unsafe { &buf.assume_init_ref()[..len] })?;
written += len as u64;

if buf.filled().is_empty() {
break;
}

len += buf.filled().len() as u64;
writer.write_all(buf.filled())?;
buf.clear();
}

Ok(len)
}
17 changes: 11 additions & 6 deletions library/std/src/io/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod tests;
use crate::io::prelude::*;

use crate::cmp;
use crate::io::{self, Error, ErrorKind, Initializer, IoSlice, IoSliceMut, SeekFrom};
use crate::io::{self, Error, ErrorKind, IoSlice, IoSliceMut, ReadBuf, SeekFrom};

use core::convert::TryInto;

Expand Down Expand Up @@ -324,6 +324,16 @@ where
Ok(n)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
let prev_filled = buf.filled_len();

Read::read_buf(&mut self.fill_buf()?, buf)?;

self.pos += (buf.filled_len() - prev_filled) as u64;

Ok(())
}

fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
let mut nread = 0;
for buf in bufs {
Expand All @@ -346,11 +356,6 @@ where
self.pos += n as u64;
Ok(())
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
Loading

0 comments on commit 3b263ce

Please sign in to comment.