Skip to content

Commit

Permalink
Auto merge of #52266 - michaelwoerister:incr-thinlto-preliminaries, r…
Browse files Browse the repository at this point in the history
…=alexcrichton

Preliminary work for incremental ThinLTO.

Since implementing incremental ThinLTO is a bit more involved than I initially thought, I'm splitting out some of the things that already work. This PR (1) adds a way accessing some ThinLTO information in `rustc` and (2) does some cleanup around CGU/object file naming (which makes things quite a bit nicer).

This is probably best reviewed one commit at a time.
  • Loading branch information
bors committed Jul 13, 2018
2 parents 254f879 + e045a6c commit a14a361
Show file tree
Hide file tree
Showing 25 changed files with 457 additions and 171 deletions.
79 changes: 77 additions & 2 deletions src/librustc/mir/mono.rs
Expand Up @@ -8,15 +8,16 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use hir::def_id::DefId;
use hir::def_id::{DefId, CrateNum};
use syntax::ast::NodeId;
use syntax::symbol::InternedString;
use syntax::symbol::{Symbol, InternedString};
use ty::{Instance, TyCtxt};
use util::nodemap::FxHashMap;
use rustc_data_structures::base_n;
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
StableHasher};
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
use std::fmt;
use std::hash::Hash;

#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
Expand Down Expand Up @@ -173,6 +174,80 @@ impl<'tcx> CodegenUnit<'tcx> {
self.size_estimate = Some(size_estimate + delta);
}
}

/// CGU names should fulfill the following requirements:
/// - They should be able to act as a file name on any kind of file system
/// - They should not collide with other CGU names, even for different versions
/// of the same crate.
///
/// Consequently, we don't use special characters except for '.' and '-' and we
/// prefix each name with the crate-name and crate-disambiguator.
///
/// This function will build CGU names of the form:
///
/// ```
/// <crate-name>.<crate-disambiguator>(-<component>)*[.<special-suffix>]
/// ```
///
/// The '.' before `<special-suffix>` makes sure that names with a special
/// suffix can never collide with a name built out of regular Rust
/// identifiers (e.g. module paths).
pub fn build_cgu_name<I, C, S>(tcx: TyCtxt,
cnum: CrateNum,
components: I,
special_suffix: Option<S>)
-> InternedString
where I: IntoIterator<Item=C>,
C: fmt::Display,
S: fmt::Display,
{
let cgu_name = CodegenUnit::build_cgu_name_no_mangle(tcx,
cnum,
components,
special_suffix);

if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
cgu_name
} else {
let cgu_name = &cgu_name.as_str()[..];
Symbol::intern(&CodegenUnit::mangle_name(cgu_name)).as_interned_str()
}
}

/// Same as `CodegenUnit::build_cgu_name()` but will never mangle the
/// resulting name.
pub fn build_cgu_name_no_mangle<I, C, S>(tcx: TyCtxt,
cnum: CrateNum,
components: I,
special_suffix: Option<S>)
-> InternedString
where I: IntoIterator<Item=C>,
C: fmt::Display,
S: fmt::Display,
{
use std::fmt::Write;

let mut cgu_name = String::with_capacity(64);

// Start out with the crate name and disambiguator
write!(cgu_name,
"{}.{}",
tcx.crate_name(cnum),
tcx.crate_disambiguator(cnum)).unwrap();

// Add the components
for component in components {
write!(cgu_name, "-{}", component).unwrap();
}

if let Some(special_suffix) = special_suffix {
// We add a dot in here so it cannot clash with anything in a regular
// Rust identifier
write!(cgu_name, ".{}", special_suffix).unwrap();
}

Symbol::intern(&cgu_name[..]).as_interned_str()
}
}

impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {
Expand Down
9 changes: 9 additions & 0 deletions src/librustc/session/mod.rs
Expand Up @@ -26,6 +26,7 @@ use util::nodemap::{FxHashMap, FxHashSet};
use util::common::{duration_to_secs_str, ErrorReported};
use util::common::ProfileQueriesMsg;

use rustc_data_structures::base_n;
use rustc_data_structures::sync::{self, Lrc, Lock, LockCell, OneThread, Once, RwLock};

use syntax::ast::NodeId;
Expand Down Expand Up @@ -1185,6 +1186,14 @@ impl CrateDisambiguator {
}
}

impl fmt::Display for CrateDisambiguator {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let (a, b) = self.0.as_value();
let as_u128 = a as u128 | ((b as u128) << 64);
f.write_str(&base_n::encode(as_u128, base_n::CASE_INSENSITIVE))
}
}

impl From<Fingerprint> for CrateDisambiguator {
fn from(fingerprint: Fingerprint) -> CrateDisambiguator {
CrateDisambiguator(fingerprint)
Expand Down
7 changes: 0 additions & 7 deletions src/librustc_codegen_llvm/back/link.rs
Expand Up @@ -45,13 +45,6 @@ use std::process::{Output, Stdio};
use std::str;
use syntax::attr;

/// The LLVM module name containing crate-metadata. This includes a `.` on
/// purpose, so it cannot clash with the name of a user-defined module.
pub const METADATA_MODULE_NAME: &'static str = "crate.metadata";

// same as for metadata above, but for allocator shim
pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator";

pub use rustc_codegen_utils::link::{find_crate_name, filename_for_input, default_output_for_target,
invalid_output_for_target, build_link_meta, out_filename,
check_file_is_writeable};
Expand Down
151 changes: 142 additions & 9 deletions src/librustc_codegen_llvm/back/lto.rs
Expand Up @@ -20,16 +20,23 @@ use rustc::hir::def_id::LOCAL_CRATE;
use rustc::middle::exported_symbols::SymbolExportLevel;
use rustc::session::config::{self, Lto};
use rustc::util::common::time_ext;
use rustc_data_structures::fx::FxHashMap;
use time_graph::Timeline;
use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};

use libc;

use std::ffi::CString;
use std::ffi::{CString, CStr};
use std::fs::File;
use std::io;
use std::mem;
use std::path::Path;
use std::ptr;
use std::slice;
use std::sync::Arc;

pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-imports.bin";

pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
match crate_type {
config::CrateTypeExecutable |
Expand Down Expand Up @@ -193,7 +200,7 @@ pub(crate) fn run(cgcx: &CodegenContext,
}
Lto::Thin |
Lto::ThinLocal => {
thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
thin_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
}
Lto::No => unreachable!(),
}
Expand Down Expand Up @@ -231,7 +238,7 @@ fn fat_lto(cgcx: &CodegenContext,
.expect("must be codegen'ing at least one module");
let module = modules.remove(costliest_module);
let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
info!("using {:?} as a base module", module.llmod_id);
info!("using {:?} as a base module", module.name);

// For all other modules we codegened we'll need to link them into our own
// bitcode. All modules were codegened in their own LLVM context, however,
Expand All @@ -241,7 +248,7 @@ fn fat_lto(cgcx: &CodegenContext,
for module in modules {
let llvm = module.llvm().expect("can't lto pre-codegened modules");
let buffer = ModuleBuffer::new(llvm.llmod);
let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
let llmod_id = CString::new(&module.name[..]).unwrap();
serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
}

Expand Down Expand Up @@ -346,7 +353,8 @@ impl Drop for Linker {
/// calculating the *index* for ThinLTO. This index will then be shared amongst
/// all of the `LtoModuleCodegen` units returned below and destroyed once
/// they all go out of scope.
fn thin_lto(diag_handler: &Handler,
fn thin_lto(cgcx: &CodegenContext,
diag_handler: &Handler,
modules: Vec<ModuleCodegen>,
serialized_modules: Vec<(SerializedModule, CString)>,
symbol_white_list: &[*const libc::c_char],
Expand All @@ -368,9 +376,9 @@ fn thin_lto(diag_handler: &Handler,
// the most expensive portion of this small bit of global
// analysis!
for (i, module) in modules.iter().enumerate() {
info!("local module: {} - {}", i, module.llmod_id);
info!("local module: {} - {}", i, module.name);
let llvm = module.llvm().expect("can't lto precodegened module");
let name = CString::new(module.llmod_id.clone()).unwrap();
let name = CString::new(module.name.clone()).unwrap();
let buffer = ThinBuffer::new(llvm.llmod);
thin_modules.push(llvm::ThinLTOModule {
identifier: name.as_ptr(),
Expand All @@ -379,7 +387,7 @@ fn thin_lto(diag_handler: &Handler,
});
thin_buffers.push(buffer);
module_names.push(name);
timeline.record(&module.llmod_id);
timeline.record(&module.name);
}

// FIXME: All upstream crates are deserialized internally in the
Expand Down Expand Up @@ -424,6 +432,18 @@ fn thin_lto(diag_handler: &Handler,
let msg = format!("failed to prepare thin LTO context");
return Err(write::llvm_err(&diag_handler, msg))
}

// Save the ThinLTO import information for incremental compilation.
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
let imports = ThinLTOImports::from_thin_lto_data(data);
if let Err(err) = imports.save_to_file(&path) {
let msg = format!("Error while writing ThinLTO import data: {}",
err);
return Err(write::llvm_err(&diag_handler, msg));
}
}

let data = ThinData(data);
info!("thin LTO data created");
timeline.record("data");
Expand Down Expand Up @@ -656,7 +676,6 @@ impl ThinModule {
llcx,
tm,
}),
llmod_id: self.name().to_string(),
name: self.name().to_string(),
kind: ModuleKind::Regular,
};
Expand Down Expand Up @@ -776,3 +795,117 @@ impl ThinModule {
Ok(module)
}
}


#[derive(Debug)]
pub struct ThinLTOImports {
// key = llvm name of importing module, value = list of modules it imports from
imports: FxHashMap<String, Vec<String>>,
}

impl ThinLTOImports {

pub fn new() -> ThinLTOImports {
ThinLTOImports {
imports: FxHashMap(),
}
}

/// Load the ThinLTO import map from ThinLTOData.
unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports {

fn module_name_to_str(c_str: &CStr) -> &str {
match c_str.to_str() {
Ok(s) => s,
Err(e) => {
bug!("Encountered non-utf8 LLVM module name `{}`: {}",
c_str.to_string_lossy(),
e)
}
}
}

unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void,
importing_module_name: *const libc::c_char,
imported_module_name: *const libc::c_char) {
let map = &mut* (payload as *mut ThinLTOImports);

let importing_module_name = CStr::from_ptr(importing_module_name);
let importing_module_name = module_name_to_str(&importing_module_name);
let imported_module_name = CStr::from_ptr(imported_module_name);
let imported_module_name = module_name_to_str(&imported_module_name);

if !map.imports.contains_key(importing_module_name) {
map.imports.insert(importing_module_name.to_owned(), vec![]);
}

map.imports
.get_mut(importing_module_name)
.unwrap()
.push(imported_module_name.to_owned());
}

let mut map = ThinLTOImports {
imports: FxHashMap(),
};

llvm::LLVMRustGetThinLTOModuleImports(data,
imported_module_callback,
&mut map as *mut _ as *mut libc::c_void);
map
}

pub fn save_to_file(&self, path: &Path) -> io::Result<()> {
use std::io::Write;

let file = File::create(path)?;
let mut writer = io::BufWriter::new(file);

for (importing_module_name, imported_modules) in &self.imports {
writeln!(writer, "{}", importing_module_name)?;

for imported_module in imported_modules {
writeln!(writer, " {}", imported_module)?;
}

writeln!(writer)?;
}

Ok(())
}

pub fn load_from_file(path: &Path) -> io::Result<ThinLTOImports> {
use std::io::BufRead;

let mut imports = FxHashMap();
let mut current_module = None;
let mut current_imports = vec![];

let file = File::open(path)?;

for line in io::BufReader::new(file).lines() {
let line = line?;

if line.is_empty() {
let importing_module = current_module
.take()
.expect("Importing module not set");

imports.insert(importing_module,
mem::replace(&mut current_imports, vec![]));
} else if line.starts_with(" ") {
// This is an imported module
assert_ne!(current_module, None);
current_imports.push(line.trim().to_string());
} else {
// This is the beginning of a new module
assert_eq!(current_module, None);
current_module = Some(line.trim().to_string());
}
}

Ok(ThinLTOImports {
imports
})
}
}
3 changes: 1 addition & 2 deletions src/librustc_codegen_llvm/back/write.rs
Expand Up @@ -696,7 +696,7 @@ unsafe fn codegen(cgcx: &CodegenContext,

if config.emit_bc_compressed {
let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
let data = bytecode::encode(&module.llmod_id, data);
let data = bytecode::encode(&module.name, data);
if let Err(e) = fs::write(&dst, data) {
diag_handler.err(&format!("failed to write bytecode: {}", e));
}
Expand Down Expand Up @@ -1308,7 +1308,6 @@ fn execute_work_item(cgcx: &CodegenContext,
assert_eq!(bytecode_compressed.is_some(), config.emit_bc_compressed);

Ok(WorkItemResult::Compiled(CompiledModule {
llmod_id: module.llmod_id.clone(),
name: module_name,
kind: ModuleKind::Regular,
pre_existing: true,
Expand Down

0 comments on commit a14a361

Please sign in to comment.