Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of almost fresh builds #8837

Merged
merged 5 commits into from
Nov 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions src/cargo/core/compiler/custom_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ fn emit_build_output(
output: &BuildOutput,
out_dir: &Path,
package_id: PackageId,
) {
) -> CargoResult<()> {
let library_paths = output
.library_paths
.iter()
Expand All @@ -144,7 +144,8 @@ fn emit_build_output(
out_dir,
}
.to_json_string();
state.stdout(msg);
state.stdout(msg)?;
Ok(())
}

fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Job> {
Expand Down Expand Up @@ -353,13 +354,13 @@ fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Job> {
warnings_in_case_of_panic.push(warning.to_owned());
}
if extra_verbose {
state.stdout(format!("{}{}", prefix, stdout));
state.stdout(format!("{}{}", prefix, stdout))?;
}
Ok(())
},
&mut |stderr| {
if extra_verbose {
state.stderr(format!("{}{}", prefix, stderr));
state.stderr(format!("{}{}", prefix, stderr))?;
}
Ok(())
},
Expand Down Expand Up @@ -396,7 +397,7 @@ fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Job> {
BuildOutput::parse(&output.stdout, &pkg_name, &script_out_dir, &script_out_dir)?;

if json_messages {
emit_build_output(state, &parsed_output, script_out_dir.as_path(), id);
emit_build_output(state, &parsed_output, script_out_dir.as_path(), id)?;
}
build_script_outputs
.lock()
Expand All @@ -421,7 +422,7 @@ fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Job> {
};

if json_messages {
emit_build_output(state, &output, script_out_dir.as_path(), id);
emit_build_output(state, &output, script_out_dir.as_path(), id)?;
}

build_script_outputs
Expand All @@ -432,7 +433,7 @@ fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Job> {
});

let mut job = if cx.bcx.build_config.build_plan {
Job::new(Work::noop(), Freshness::Dirty)
Job::new_dirty(Work::noop())
} else {
fingerprint::prepare_target(cx, unit, false)?
};
Expand Down
9 changes: 3 additions & 6 deletions src/cargo/core/compiler/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,7 @@ use crate::util::paths;
use crate::util::{internal, profile, ProcessBuilder};

use super::custom_build::BuildDeps;
use super::job::{
Freshness::{Dirty, Fresh},
Job, Work,
};
use super::job::{Job, Work};
use super::{BuildContext, Context, FileFlavor, Unit};

/// Determines if a `unit` is up-to-date, and if not prepares necessary work to
Expand Down Expand Up @@ -396,7 +393,7 @@ pub fn prepare_target(cx: &mut Context<'_, '_>, unit: &Unit, force: bool) -> Car
}

if compare.is_ok() && !force {
return Ok(Job::new(Work::noop(), Fresh));
return Ok(Job::new_fresh());
}

// Clear out the old fingerprint file if it exists. This protects when
Expand Down Expand Up @@ -469,7 +466,7 @@ pub fn prepare_target(cx: &mut Context<'_, '_>, unit: &Unit, force: bool) -> Car
Work::new(move |_| write_fingerprint(&loc, &fingerprint))
};

Ok(Job::new(write_fingerprint, Dirty))
Ok(Job::new_dirty(write_fingerprint))
}

/// Dependency edge information for fingerprints. This is generated for each
Expand Down
15 changes: 13 additions & 2 deletions src/cargo/core/compiler/job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,20 @@ impl Work {
}

impl Job {
/// Creates a new job that does nothing.
pub fn new_fresh() -> Job {
Job {
work: Work::noop(),
fresh: Freshness::Fresh,
}
}

/// Creates a new job representing a unit of work.
pub fn new(work: Work, fresh: Freshness) -> Job {
Job { work, fresh }
pub fn new_dirty(work: Work) -> Job {
Job {
work,
fresh: Freshness::Dirty,
}
}

/// Consumes this job by running it, returning the result of the
Expand Down
82 changes: 65 additions & 17 deletions src/cargo/core/compiler/job_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,30 @@ impl std::fmt::Display for JobId {
}
}

/// A `JobState` is constructed by `JobQueue::run` and passed to `Job::run`. It includes everything
/// necessary to communicate between the main thread and the execution of the job.
///
/// The job may execute on either a dedicated thread or the main thread. If the job executes on the
/// main thread, the `output` field must be set to prevent a deadlock.
pub struct JobState<'a> {
/// Channel back to the main thread to coordinate messages and such.
///
/// When the `output` field is `Some`, care must be taken to avoid calling `push_bounded` on
/// the message queue to prevent a deadlock.
messages: Arc<Queue<Message>>,

/// Normally output is sent to the job queue with backpressure. When the job is fresh
/// however we need to immediately display the output to prevent a deadlock as the
/// output messages are processed on the same thread as they are sent from. `output`
/// defines where to output in this case.
///
/// Currently the `Shell` inside `Config` is wrapped in a `RefCell` and thus can't be passed
/// between threads. This means that it isn't possible for multiple output messages to be
/// interleaved. In the future, it may be wrapped in a `Mutex` instead. In this case
/// interleaving is still prevented as the lock would be held for the whole printing of an
/// output message.
output: Option<&'a Config>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you include some notes that it is crucial that this is only set on the main thread? There are several concerns:

  • The RefMut from shell can cause panics if used between threads.
  • The event loop is carefully crafted to avoid flickering of the progress bar. By splitting the output into different places, this makes it harder to ensure batches of messages are grouped together. I think the change is OK because it is all on the main thread, but it is pretty subtle.
  • Keeping one thread responsible for output helps prevent interleaving of messages. In particular, some messages are not printed atomically (like the "status" messages).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Config is not Sync, so it isn't possible to assign it when not running on the main thread. Also because Shell is wrapped in a RefCell it isn't possible to borrow it twice at the same time to interleave output.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Still, please add a comment discussing the concerns about the coordination of ownership of the output. My concern about interleaving is about the future, and things to watch out for if this is ever changed (like wrapping output in a mutex). It might also help to explain how JobState works with respect to how it is constructed and passed into the job threads.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interleaving would still not be possible when the RefCell is replaced with a Mutex as the mutex would be locked for the duration of the printing of a single message. It would be hard to accidentally unlock the mutex in the middle of printing.

I can add some more docs to JobState.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


/// The job id that this state is associated with, used when sending
/// messages back to the main thread.
id: JobId,
Expand Down Expand Up @@ -231,12 +251,24 @@ impl<'a> JobState<'a> {
.push(Message::BuildPlanMsg(module_name, cmd, filenames));
}

pub fn stdout(&self, stdout: String) {
self.messages.push_bounded(Message::Stdout(stdout));
pub fn stdout(&self, stdout: String) -> CargoResult<()> {
if let Some(config) = self.output {
writeln!(config.shell().out(), "{}", stdout)?;
} else {
self.messages.push_bounded(Message::Stdout(stdout));
}
Ok(())
}

pub fn stderr(&self, stderr: String) {
self.messages.push_bounded(Message::Stderr(stderr));
pub fn stderr(&self, stderr: String) -> CargoResult<()> {
if let Some(config) = self.output {
let mut shell = config.shell();
shell.print_ansi(stderr.as_bytes())?;
shell.err().write_all(b"\n")?;
} else {
self.messages.push_bounded(Message::Stderr(stderr));
}
Ok(())
}

/// A method used to signal to the coordinator thread that the rmeta file
Expand Down Expand Up @@ -826,16 +858,9 @@ impl<'cfg> DrainState<'cfg> {
self.note_working_on(cx.bcx.config, unit, fresh)?;
}

let doit = move || {
let state = JobState {
id,
messages: messages.clone(),
rmeta_required: Cell::new(rmeta_required),
_marker: marker::PhantomData,
};

let doit = move |state: JobState<'_>| {
let mut sender = FinishOnDrop {
messages: &messages,
messages: &state.messages,
id,
result: None,
};
Expand All @@ -854,7 +879,9 @@ impl<'cfg> DrainState<'cfg> {
// we need to make sure that the metadata is flagged as produced so
// send a synthetic message here.
if state.rmeta_required.get() && sender.result.as_ref().unwrap().is_ok() {
messages.push(Message::Finish(id, Artifact::Metadata, Ok(())));
state
.messages
.push(Message::Finish(state.id, Artifact::Metadata, Ok(())));
}

// Use a helper struct with a `Drop` implementation to guarantee
Expand All @@ -880,10 +907,31 @@ impl<'cfg> DrainState<'cfg> {
};

match fresh {
Freshness::Fresh => self.timings.add_fresh(),
Freshness::Dirty => self.timings.add_dirty(),
Freshness::Fresh => {
self.timings.add_fresh();
// Running a fresh job on the same thread is often much faster than spawning a new
// thread to run the job.
doit(JobState {
id,
messages: messages.clone(),
output: Some(cx.bcx.config),
rmeta_required: Cell::new(rmeta_required),
_marker: marker::PhantomData,
});
}
Freshness::Dirty => {
self.timings.add_dirty();
scope.spawn(move |_| {
doit(JobState {
id,
messages: messages.clone(),
output: None,
rmeta_required: Cell::new(rmeta_required),
_marker: marker::PhantomData,
})
});
}
}
scope.spawn(move |_| doit());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was an amount of back and forth about this awhile ago. Originally Cargo spawned threads for everything, but then I eventually optimized it to do what you have here. In #7838 we reverted back to what we have today, but IIRC @ehuss did some measurements and found the cost to be negligible. I can't seem to find a record of that conversation though, @ehuss do you remember that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the PR description of #7838:

On Linux and Windows, it was generally about 0 to 5% slower.

This roughly matches my results.

This PR keeps not buffering the rustc output, but for fresh jobs it directly sends it to the terminal without the indirection through the message queue, thereby slightly improving performance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One issue I believe with this PR is that it can still deadlock. If the message queue is full and a fresh job tries to push onto it then the deadlock happens. That won't happen for stdout but other messages go through the queue as well (like build plans and such).

Another issue which I'm having trouble remembering is that we want the main "event loop" to complete quickly each iteration, and if we're copying a lot of rustc output onto the console that isn't happening. I forget the consequences of a slow loop iteration though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other messages don't use the bounded push_bounded, but the unbounded push.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did some measurements and found the cost to be negligible. I can't seem to find a record of that conversation though, @ehuss do you remember that?

Just the note in that PR that it was expected to cost about 5%. That number will vary significantly based on the project and the system. I ran some tests with this PR, and it is pretty much the same (no difference on macos, about 5% on Linux).

My main concern from back then with this approach is that it introduces some subtle complexity that didn't really seem to be worth the 5% improvement, but I can't think of any specific problems with this PR other than being harder to understand.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry I forgot about push_bounded vs push, makes sense to me!

@ehuss I'm curious, how are you measuring? I'd expect that thread creation on Windows and macOS to be a good deal more expensive than on Linux

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just using hyperfine, like this:

hyperfine -w1  "~/Proj/cargo/target/release/cargo check" "~/Proj/cargo2/target/release/cargo check"

In various projects of various sizes. Larger projects should show a bigger difference.

I just re-ran the test on a larger project (libra, 600+ packages), and the numbers look comparable to linux, so I was probably just comparing a smaller project where the difference was less noticeable.

master:
  Time (mean ± σ):     851.4 ms ±  23.5 ms    [User: 574.5 ms, System: 247.4 ms]
  Range (min … max):   820.7 ms … 897.5 ms    10 runs

improve_perf:
  Time (mean ± σ):     815.7 ms ±  34.4 ms    [User: 567.6 ms, System: 289.2 ms]
  Range (min … max):   757.6 ms … 857.3 ms    10 runs

Summary
  '~/Proj/rust/cargo3/target/release/cargo check' ran
    1.04 ± 0.05 times faster than '~/Proj/rust/cargo2/target/release/cargo check'


Ok(())
}
Expand Down
16 changes: 8 additions & 8 deletions src/cargo/core/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ fn compile<'cfg>(
custom_build::prepare(cx, unit)?
} else if unit.mode.is_doc_test() {
// We run these targets later, so this is just a no-op for now.
Job::new(Work::noop(), Freshness::Fresh)
Job::new_fresh()
} else if build_plan {
Job::new(rustc(cx, unit, &exec.clone())?, Freshness::Dirty)
Job::new_dirty(rustc(cx, unit, &exec.clone())?)
} else {
let force = exec.force_rebuild(unit) || force_rebuild;
let mut job = fingerprint::prepare_target(cx, unit, force)?;
Expand Down Expand Up @@ -448,7 +448,7 @@ fn link_targets(cx: &mut Context<'_, '_>, unit: &Unit, fresh: bool) -> CargoResu
fresh,
}
.to_json_string();
state.stdout(msg);
state.stdout(msg)?;
}
Ok(())
}))
Expand Down Expand Up @@ -1139,7 +1139,7 @@ fn on_stdout_line(
_package_id: PackageId,
_target: &Target,
) -> CargoResult<()> {
state.stdout(line.to_string());
state.stdout(line.to_string())?;
Ok(())
}

Expand Down Expand Up @@ -1177,7 +1177,7 @@ fn on_stderr_line_inner(
// something like that), so skip over everything that doesn't look like a
// JSON message.
if !line.starts_with('{') {
state.stderr(line.to_string());
state.stderr(line.to_string())?;
return Ok(true);
}

Expand All @@ -1189,7 +1189,7 @@ fn on_stderr_line_inner(
// to stderr.
Err(e) => {
debug!("failed to parse json: {:?}", e);
state.stderr(line.to_string());
state.stderr(line.to_string())?;
return Ok(true);
}
};
Expand Down Expand Up @@ -1225,7 +1225,7 @@ fn on_stderr_line_inner(
.map(|v| String::from_utf8(v).expect("utf8"))
.expect("strip should never fail")
};
state.stderr(rendered);
state.stderr(rendered)?;
return Ok(true);
}
}
Expand Down Expand Up @@ -1316,7 +1316,7 @@ fn on_stderr_line_inner(
// Switch json lines from rustc/rustdoc that appear on stderr to stdout
// instead. We want the stdout of Cargo to always be machine parseable as
// stderr has our colorized human-readable messages.
state.stdout(msg);
state.stdout(msg)?;
Ok(true)
}

Expand Down