Skip to content

Commit

Permalink
Auto merge of #115549 - saethlin:include-bytes-resilient, r=jackh726
Browse files Browse the repository at this point in the history
Fall back to the unoptimized implementation in read_binary_file if File::metadata lies

Fixes #115458

r? `@jackh726` because you approved the previous PR
  • Loading branch information
bors committed Sep 21, 2023
2 parents cbce15c + 5f33647 commit 4fda889
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
33 changes: 31 additions & 2 deletions compiler/rustc_span/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,39 @@ impl FileLoader for RealFileLoader {

let mut bytes = Lrc::new_uninit_slice(len as usize);
let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap());
file.read_buf_exact(buf.unfilled())?;
match file.read_buf_exact(buf.unfilled()) {
Ok(()) => {}
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
drop(bytes);
return fs::read(path).map(Vec::into);
}
Err(e) => return Err(e),
}
// SAFETY: If the read_buf_exact call returns Ok(()), then we have
// read len bytes and initialized the buffer.
Ok(unsafe { bytes.assume_init() })
let bytes = unsafe { bytes.assume_init() };

// At this point, we've read all the bytes that filesystem metadata reported exist.
// But we are not guaranteed to be at the end of the file, because we did not attempt to do
// a read with a non-zero-sized buffer and get Ok(0).
// So we do small read to a fixed-size buffer. If the read returns no bytes then we're
// already done, and we just return the Lrc we built above.
// If the read returns bytes however, we just fall back to reading into a Vec then turning
// that into an Lrc, losing our nice peak memory behavior. This fallback code path should
// be rarely exercised.

let mut probe = [0u8; 32];
let n = loop {
match file.read(&mut probe) {
Ok(0) => return Ok(bytes),
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
Ok(n) => break n,
}
};
let mut bytes: Vec<u8> = bytes.iter().copied().chain(probe[..n].iter().copied()).collect();
file.read_to_end(&mut bytes)?;
Ok(bytes.into())
}
}

Expand Down
27 changes: 27 additions & 0 deletions compiler/rustc_span/src/source_map/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,3 +567,30 @@ fn test_next_point() {
assert_eq!(span.hi().0, 6);
assert!(sm.span_to_snippet(span).is_err());
}

#[cfg(target_os = "linux")]
#[test]
fn read_binary_file_handles_lying_stat() {
// read_binary_file tries to read the contents of a file into an Lrc<[u8]> while
// never having two copies of the data in memory at once. This is an optimization
// to support include_bytes! with large files. But since Rust allocators are
// sensitive to alignment, our implementation can't be bootstrapped off calling
// std::fs::read. So we test that we have the same behavior even on files where
// fs::metadata lies.

// stat always says that /proc/self/cmdline is length 0, but it isn't.
let cmdline = Path::new("/proc/self/cmdline");
let len = std::fs::metadata(cmdline).unwrap().len() as usize;
let real = std::fs::read(cmdline).unwrap();
assert!(len < real.len());
let bin = RealFileLoader.read_binary_file(cmdline).unwrap();
assert_eq!(&real[..], &bin[..]);

// stat always says that /sys/devices/system/cpu/kernel_max is the size of a block.
let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max");
let len = std::fs::metadata(kernel_max).unwrap().len() as usize;
let real = std::fs::read(kernel_max).unwrap();
assert!(len > real.len());
let bin = RealFileLoader.read_binary_file(kernel_max).unwrap();
assert_eq!(&real[..], &bin[..]);
}

0 comments on commit 4fda889

Please sign in to comment.