Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mmap the incremental data instead of reading it. #83214

Merged
merged 4 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 71 additions & 7 deletions compiler/rustc_incremental/src/persist/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
use std::env;
use std::fs;
use std::io::{self, Read};
use std::path::Path;
use std::path::{Path, PathBuf};

use rustc_data_structures::memmap::Mmap;
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
use rustc_serialize::Encoder;
use rustc_session::Session;

/// The first few bytes of files generated by incremental compilation.
const FILE_MAGIC: &[u8] = b"RSIC";
Expand All @@ -28,7 +30,7 @@ const HEADER_FORMAT_VERSION: u16 = 0;
/// the Git commit hash.
const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION");

pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
pub(crate) fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
stream.emit_raw_bytes(FILE_MAGIC)?;
stream.emit_raw_bytes(&[
(HEADER_FORMAT_VERSION >> 0) as u8,
Expand All @@ -41,6 +43,61 @@ pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileE
stream.emit_raw_bytes(rustc_version.as_bytes())
}

pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
where
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
{
debug!("save: storing data in {}", path_buf.display());

// Delete the old file, if any.
// Note: It's important that we actually delete the old file and not just
// truncate and overwrite it, since it might be a shared hard-link, the
// underlying data of which we don't want to modify.
//
// We have to ensure we have dropped the memory maps to this file
// before performing this removal.
match fs::remove_file(&path_buf) {
Ok(()) => {
debug!("save: remove old file");
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => {
sess.err(&format!(
"unable to delete old {} at `{}`: {}",
name,
path_buf.display(),
err
));
return;
}
}

let mut encoder = match FileEncoder::new(&path_buf) {
Ok(encoder) => encoder,
Err(err) => {
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
return;
}
};

if let Err(err) = write_file_header(&mut encoder, sess.is_nightly_build()) {
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encode(&mut encoder) {
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encoder.flush() {
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
return;
}

debug!("save: data written to disk successfully");
}

/// Reads the contents of a file with a file header as defined in this module.
///
/// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a
Expand All @@ -54,14 +111,21 @@ pub fn read_file(
report_incremental_info: bool,
path: &Path,
nightly_build: bool,
) -> io::Result<Option<(Vec<u8>, usize)>> {
let data = match fs::read(path) {
Ok(data) => data,
) -> io::Result<Option<(Mmap, usize)>> {
let file = match fs::File::open(path) {
Ok(file) => file,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err),
};
// SAFETY: This process must not modify nor remove the backing file while the memory map lives.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes it sound like it is okay when other processes modify or remove the file. Shouldn't the comment be more like

// SAFETY: No process must modify or remove the backing file while the memory map lives.
// We ensure this to be the case for rustc itself. There is no way to prevent another
// process from modifying this file, so UB can arise if they do.

// For the dep-graph and the work product index, it is as soon as the decoding is done.
// For the query result cache, the memory map is dropped in save_dep_graph before calling
// save_in and trying to remove the backing file.
//
// There is no way to prevent another process from modifying this file.
let mmap = unsafe { Mmap::map(file) }?;

let mut file = io::Cursor::new(data);
let mut file = io::Cursor::new(&*mmap);

// Check FILE_MAGIC
{
Expand Down Expand Up @@ -103,7 +167,7 @@ pub fn read_file(
}

let post_header_start_pos = file.position() as usize;
Ok(Some((file.into_inner(), post_header_start_pos)))
Ok(Some((mmap, post_header_start_pos)))
}

fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) {
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_incremental/src/persist/load.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Code to save/load the dep-graph from files.

use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::memmap::Mmap;
use rustc_middle::dep_graph::{SerializedDepGraph, WorkProduct, WorkProductId};
use rustc_middle::ty::OnDiskCache;
use rustc_serialize::opaque::Decoder;
Expand Down Expand Up @@ -48,7 +49,7 @@ fn load_data(
report_incremental_info: bool,
path: &Path,
nightly_build: bool,
) -> LoadResult<(Vec<u8>, usize)> {
) -> LoadResult<(Mmap, usize)> {
match file_format::read_file(report_incremental_info, path, nightly_build) {
Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos },
Ok(None) => {
Expand Down
67 changes: 11 additions & 56 deletions compiler/rustc_incremental/src/persist/save.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
use rustc_serialize::Encodable as RustcEncodable;
use rustc_session::Session;
use std::fs;
use std::io;
use std::path::PathBuf;

use super::data::*;
use super::dirty_clean;
Expand Down Expand Up @@ -44,7 +42,14 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
join(
move || {
sess.time("incr_comp_persist_result_cache", || {
save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e));
// Drop the memory map so that we can remove the file and write to it.
if let Some(odc) = &tcx.on_disk_cache {
odc.drop_serialized_data(tcx);
}

file_format::save_in(sess, query_cache_path, "query cache", |e| {
encode_query_cache(tcx, e)
});
});
},
move || {
Expand Down Expand Up @@ -86,7 +91,9 @@ pub fn save_work_product_index(
debug!("save_work_product_index()");
dep_graph.assert_ignored();
let path = work_products_path(sess);
save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e));
file_format::save_in(sess, path, "work product index", |e| {
encode_work_product_index(&new_work_products, e)
});

// We also need to clean out old work-products, as not all of them are
// deleted during invalidation. Some object files don't change their
Expand All @@ -113,58 +120,6 @@ pub fn save_work_product_index(
});
}

pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
where
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
{
debug!("save: storing data in {}", path_buf.display());

// Delete the old file, if any.
// Note: It's important that we actually delete the old file and not just
// truncate and overwrite it, since it might be a shared hard-link, the
// underlying data of which we don't want to modify
match fs::remove_file(&path_buf) {
Ok(()) => {
debug!("save: remove old file");
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => {
sess.err(&format!(
"unable to delete old {} at `{}`: {}",
name,
path_buf.display(),
err
));
return;
}
}

let mut encoder = match FileEncoder::new(&path_buf) {
Ok(encoder) => encoder,
Err(err) => {
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
return;
}
};

if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) {
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encode(&mut encoder) {
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encoder.flush() {
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
return;
}

debug!("save: data written to disk successfully");
}

fn encode_work_product_index(
work_products: &FxHashMap<WorkProductId, WorkProduct>,
encoder: &mut FileEncoder,
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_middle/src/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use crate::ty::{
use rustc_ast as ast;
use rustc_attr as attr;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::memmap::Mmap;
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
Expand Down Expand Up @@ -71,7 +72,7 @@ use std::sync::Arc;

pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
fn new(sess: &'tcx Session, data: Vec<u8>, start_pos: usize) -> Self
fn new(sess: &'tcx Session, data: Mmap, start_pos: usize) -> Self
where
Self: Sized;

Expand Down Expand Up @@ -100,6 +101,8 @@ pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
fn register_reused_dep_node(&self, tcx: TyCtxt<'tcx>, dep_node: &DepNode);
fn store_foreign_def_id_hash(&self, def_id: DefId, hash: DefPathHash);

fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>);

fn serialize(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult;
}

Expand Down
58 changes: 35 additions & 23 deletions compiler/rustc_query_impl/src/on_disk_cache.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::QueryCtxt;
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell};
use rustc_data_structures::memmap::Mmap;
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell, RwLock};
use rustc_data_structures::unhash::UnhashMap;
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE};
use rustc_hir::definitions::DefPathHash;
Expand Down Expand Up @@ -42,7 +43,7 @@ const TAG_EXPN_DATA: u8 = 1;
/// any side effects that have been emitted during a query.
pub struct OnDiskCache<'sess> {
// The complete cache data in serialized form.
serialized_data: Vec<u8>,
serialized_data: RwLock<Option<Mmap>>,

// Collects all `QuerySideEffects` created during the current compilation
// session.
Expand Down Expand Up @@ -182,7 +183,8 @@ impl EncodedSourceFileId {
}

impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
debug_assert!(sess.opts.incremental.is_some());

// Wrap in a scope so we can borrow `data`.
Expand All @@ -204,7 +206,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
};

Self {
serialized_data: data,
serialized_data: RwLock::new(Some(data)),
file_index_to_stable_id: footer.file_index_to_stable_id,
file_index_to_file: Default::default(),
cnum_map: OnceCell::new(),
Expand All @@ -225,7 +227,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {

fn new_empty(source_map: &'sess SourceMap) -> Self {
Self {
serialized_data: Vec::new(),
serialized_data: RwLock::new(None),
file_index_to_stable_id: Default::default(),
file_index_to_file: Default::default(),
cnum_map: OnceCell::new(),
Expand All @@ -244,7 +246,31 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
}
}

fn serialize(&self, tcx: TyCtxt<'sess>, encoder: &mut FileEncoder) -> FileEncodeResult {
/// Execute all cache promotions and release the serialized backing Mmap.
///
/// Cache promotions require invoking queries, which needs to read the serialized data.
/// In order to serialize the new on-disk cache, the former on-disk cache file needs to be
/// deleted, hence we won't be able to refer to its memmapped data.
fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why this is in its own method instead of being part of serialize?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function invokes queries, so needs the on-disk cache to be available. Meanwhile, references to the mmapped file need to be dropped before calling save_in which removes the backing file. I will add a comment.

// Register any dep nodes that we reused from the previous session,
// but didn't `DepNode::construct` in this session. This ensures
// that their `DefPathHash` to `RawDefId` mappings are registered
// in 'latest_foreign_def_path_hashes' if necessary, since that
// normally happens in `DepNode::construct`.
tcx.dep_graph.register_reused_dep_nodes(tcx);

// Load everything into memory so we can write it out to the on-disk
// cache. The vast majority of cacheable query results should already
// be in memory, so this should be a cheap operation.
// Do this *before* we clone 'latest_foreign_def_path_hashes', since
// loading existing queries may cause us to create new DepNodes, which
// may in turn end up invoking `store_foreign_def_id_hash`
tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));

*self.serialized_data.write() = None;
}

fn serialize<'tcx>(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult {
// Serializing the `DepGraph` should not modify it.
tcx.dep_graph.with_ignore(|| {
// Allocate `SourceFileIndex`es.
Expand All @@ -266,21 +292,6 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
(file_to_file_index, file_index_to_stable_id)
};

// Register any dep nodes that we reused from the previous session,
// but didn't `DepNode::construct` in this session. This ensures
// that their `DefPathHash` to `RawDefId` mappings are registered
// in 'latest_foreign_def_path_hashes' if necessary, since that
// normally happens in `DepNode::construct`.
tcx.dep_graph.register_reused_dep_nodes(tcx);

// Load everything into memory so we can write it out to the on-disk
// cache. The vast majority of cacheable query results should already
// be in memory, so this should be a cheap operation.
// Do this *before* we clone 'latest_foreign_def_path_hashes', since
// loading existing queries may cause us to create new DepNodes, which
// may in turn end up invoking `store_foreign_def_id_hash`
tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));

let latest_foreign_def_path_hashes = self.latest_foreign_def_path_hashes.lock().clone();
let hygiene_encode_context = HygieneEncodeContext::default();

Expand Down Expand Up @@ -564,7 +575,7 @@ impl<'sess> OnDiskCache<'sess> {
})
}

fn with_decoder<'a, 'tcx, T, F: FnOnce(&mut CacheDecoder<'sess, 'tcx>) -> T>(
fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T>(
&'sess self,
tcx: TyCtxt<'tcx>,
pos: AbsoluteBytePos,
Expand All @@ -575,9 +586,10 @@ impl<'sess> OnDiskCache<'sess> {
{
let cnum_map = self.cnum_map.get_or_init(|| Self::compute_cnum_map(tcx));

let serialized_data = self.serialized_data.read();
let mut decoder = CacheDecoder {
tcx,
opaque: opaque::Decoder::new(&self.serialized_data[..], pos.to_usize()),
opaque: opaque::Decoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
source_map: self.source_map,
cnum_map,
file_index_to_file: &self.file_index_to_file,
Expand Down