Skip to content

Commit

Permalink
Do not expose uninitialized memory to ReadBytes
Browse files Browse the repository at this point in the history
As Youngsuk Kim pointed out, a pathological user-provided ReadBytes
implementation could read from the buffer that it is only supposed to
write to, which means it would be reading uninitialized memory.

My first attempt at sidestepping this was to just use a vec![0; len] to
have a zeroed buffer to read into, but this has severe performance
impact. Fortunately, we can fix the problem without introducing extra
zeroing, by reversing the roles: ReadBytes no longer receives the
buffer, it should produce it. This way, the BufferedReader impl for
ReadBytes can keep the uninitialized memory entirely in the function,
and the Cursor<u8> impl simply becomes a call to slice::to_vec, without
any unsafe at all.

I haven't measured the performance impact of this yet.
  • Loading branch information
ruuda committed Jan 11, 2021
1 parent 66bf343 commit 18ae310
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 52 deletions.
8 changes: 4 additions & 4 deletions src/crc.rs
Expand Up @@ -135,8 +135,8 @@ impl<R: ReadBytes> ReadBytes for Crc8Reader<R> {
}
}

fn read_into(&mut self, _buffer: &mut [u8]) -> io::Result<()> {
panic!("CRC reader does not support read_into.");
fn read_vec(&mut self, _len: usize) -> io::Result<Vec<u8>> {
panic!("CRC reader does not support read_vec.");
}

fn skip(&mut self, _amount: u32) -> io::Result<()> {
Expand Down Expand Up @@ -167,8 +167,8 @@ impl<R: ReadBytes> ReadBytes for Crc16Reader<R> {
}
}

fn read_into(&mut self, _buffer: &mut [u8]) -> io::Result<()> {
panic!("CRC reader does not support read_into.");
fn read_vec(&mut self, _len: usize) -> io::Result<Vec<u8>> {
panic!("CRC reader does not support read_vec.");
}

fn skip(&mut self, _amount: u32) -> io::Result<()> {
Expand Down
55 changes: 28 additions & 27 deletions src/input.rs
Expand Up @@ -75,8 +75,8 @@ pub trait ReadBytes {
/// Reads a single byte, not failing on EOF.
fn read_u8_or_eof(&mut self) -> io::Result<Option<u8>>;

/// Reads until the provided buffer is full.
fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()>;
/// Read exactly `len` bytes into a new vector.
fn read_vec(&mut self, len: usize) -> io::Result<Vec<u8>>;

/// Skips over the specified number of bytes.
///
Expand Down Expand Up @@ -164,8 +164,16 @@ impl<R: io::Read> ReadBytes for BufferedReader<R>
Ok(Some(try!(self.read_u8())))
}

fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
let mut bytes_left = buffer.len();
fn read_vec(&mut self, len: usize) -> io::Result<Vec<u8>> {
// Watch out, we allocate a vector of uninitialized memory here. We only
// call `copy_from_slice` on this vector, it is never read from. We only
// return the vector if bytes_left is zero, and bytes_left can only
// reach zero if we fully overwrote it. Therefore, the uninitialized
// memory is not exposed outside of this function.
let mut buffer = Vec::with_capacity(len);
unsafe { buffer.set_len(len); }

let mut bytes_left = len;

while bytes_left > 0 {
let from = buffer.len() - bytes_left;
Expand All @@ -186,7 +194,7 @@ impl<R: io::Read> ReadBytes for BufferedReader<R>
}
}

Ok(())
Ok(buffer)
}

fn skip(&mut self, mut amount: u32) -> io::Result<()> {
Expand Down Expand Up @@ -222,8 +230,8 @@ impl<'r, R: ReadBytes> ReadBytes for &'r mut R {
(*self).read_u8_or_eof()
}

fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
(*self).read_into(buffer)
fn read_vec(&mut self, len: usize) -> io::Result<Vec<u8>> {
(*self).read_vec(len)
}

fn skip(&mut self, amount: u32) -> io::Result<()> {
Expand Down Expand Up @@ -253,14 +261,13 @@ impl<T: AsRef<[u8]>> ReadBytes for io::Cursor<T> {
}
}

fn read_into(&mut self, buffer: &mut [u8]) -> io::Result<()> {
fn read_vec(&mut self, len: usize) -> io::Result<Vec<u8>> {
let pos = self.position();
if pos + buffer.len() as u64 <= self.get_ref().as_ref().len() as u64 {
if pos + len as u64 <= self.get_ref().as_ref().len() as u64 {
let start = pos as usize;
let end = pos as usize + buffer.len();
buffer.copy_from_slice(&self.get_ref().as_ref()[start..end]);
self.set_position(pos + buffer.len() as u64);
Ok(())
let end = pos as usize + len;
self.set_position(pos + len as u64);
Ok(self.get_ref().as_ref()[start..end].to_vec())
} else {
Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected eof"))
}
Expand All @@ -278,27 +285,21 @@ impl<T: AsRef<[u8]>> ReadBytes for io::Cursor<T> {
}

#[test]
fn verify_read_into_buffered_reader() {
fn verify_read_vec_buffered_reader() {
let mut reader = BufferedReader::new(io::Cursor::new(vec![2u8, 3, 5, 7, 11, 13, 17, 19, 23]));
let mut buf1 = [0u8; 3];
let mut buf2 = [0u8; 5];
let mut buf3 = [0u8; 2];
reader.read_into(&mut buf1).ok().unwrap();
reader.read_into(&mut buf2).ok().unwrap();
assert!(reader.read_into(&mut buf3).is_err());
let buf1 = reader.read_vec(3).ok().unwrap();
let buf2 = reader.read_vec(5).ok().unwrap();
assert!(reader.read_vec(2).is_err());
assert_eq!(&buf1[..], &[2u8, 3, 5]);
assert_eq!(&buf2[..], &[7u8, 11, 13, 17, 19]);
}

#[test]
fn verify_read_into_cursor() {
fn verify_read_vec_cursor() {
let mut cursor = io::Cursor::new(vec![2u8, 3, 5, 7, 11, 13, 17, 19, 23]);
let mut buf1 = [0u8; 3];
let mut buf2 = [0u8; 5];
let mut buf3 = [0u8; 2];
cursor.read_into(&mut buf1).ok().unwrap();
cursor.read_into(&mut buf2).ok().unwrap();
assert!(cursor.read_into(&mut buf3).is_err());
let buf1 = cursor.read_vec(3).ok().unwrap();
let buf2 = cursor.read_vec(5).ok().unwrap();
assert!(cursor.read_vec(2).is_err());
assert_eq!(&buf1[..], &[2u8, 3, 5]);
assert_eq!(&buf2[..], &[7u8, 11, 13, 17, 19]);
}
Expand Down
30 changes: 9 additions & 21 deletions src/metadata.rs
Expand Up @@ -9,8 +9,9 @@

use error::{Error, Result, fmt_err};
use input::ReadBytes;
use std::str;
use std::mem;
use std::slice;
use std::str;

#[derive(Clone, Copy)]
struct MetadataBlockHeader {
Expand Down Expand Up @@ -352,8 +353,11 @@ fn read_streaminfo_block<R: ReadBytes>(input: &mut R) -> Result<StreamInfo> {
let n_samples = (n_samples_msb as u64) << 32 | n_samples_lsb as u64;

// Next are 128 bits (16 bytes) of MD5 signature.
// TODO: Try and see if expanding this into 16 read_u8 calls is any faster.
let mut md5sum = [0u8; 16];
try!(input.read_into(&mut md5sum));
let md5sum_vec = try!(input.read_vec(16));
md5sum.copy_from_slice(&md5sum_vec[..]);
mem::drop(md5sum_vec);

// Lower bounds can never be larger than upper bounds. Note that 0 indicates
// unknown for the frame size. Also, the block size must be at least 16.
Expand Down Expand Up @@ -429,13 +433,7 @@ fn read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result
// 32-bit vendor string length, and comment count.
let vendor_len = try!(input.read_le_u32());
if vendor_len > length - 8 { return fmt_err("vendor string too long") }
let mut vendor_bytes = Vec::with_capacity(vendor_len as usize);

// We can safely set the lenght of the vector here; the uninitialized memory
// is not exposed. If `read_into` succeeds, it will have overwritten all
// bytes. If not, an error is returned and the memory is never exposed.
unsafe { vendor_bytes.set_len(vendor_len as usize); }
try!(input.read_into(&mut vendor_bytes));
let vendor_bytes = try!(input.read_vec(vendor_len as usize));
let vendor = try!(String::from_utf8(vendor_bytes));

// Next up is the number of comments. Because every comment is at least 4
Expand Down Expand Up @@ -469,11 +467,7 @@ fn read_vorbis_comment_block<R: ReadBytes>(input: &mut R, length: u32) -> Result
continue;
}

// For the same reason as above, setting the length is safe here.
let mut comment_bytes = Vec::with_capacity(comment_len as usize);
unsafe { comment_bytes.set_len(comment_len as usize); }
try!(input.read_into(&mut comment_bytes));

let comment_bytes = try!(input.read_vec(comment_len as usize));
bytes_left -= comment_len;

if let Some(sep_index) = comment_bytes.iter().position(|&x| x == b'=') {
Expand Down Expand Up @@ -537,13 +531,7 @@ fn read_application_block<R: ReadBytes>(input: &mut R, length: u32) -> Result<(u
let id = try!(input.read_be_u32());

// Four bytes of the block have been used for the ID, the rest is payload.
// Create a vector of uninitialized memory, and read the block into it. The
// uninitialized memory is never exposed: read_into will either fill the
// buffer completely, or return an err, in which case the memory is not
// exposed.
let mut data = Vec::with_capacity(length as usize - 4);
unsafe { data.set_len(length as usize - 4); }
try!(input.read_into(&mut data));
let data = try!(input.read_vec(length as usize - 4));

Ok((id, data))
}
Expand Down

0 comments on commit 18ae310

Please sign in to comment.