Skip to content

Commit

Permalink
Auto merge of rust-lang#77257 - ecstatic-morse:optimize-int-range-fro…
Browse files Browse the repository at this point in the history
…m-pat, r=Mark-Simulacrum

Optimize `IntRange::from_pat`, then shrink `ParamEnv`

Resolves rust-lang#77058.

r? `@Mark-Simulacrum`
cc `@vandenheuvel`

Looking at the output of `perf report` for rust-lang#76244, the hot instructions seemed to be around the call to `pat_constructor` in `IntRange::from_pat`. I carried out an obvious optimization, but it actually made the instruction count higher (see rust-lang#77075). However, it seems to have mitigated whatever was causing the pipeline stalls, so when combined with rust-lang#76244, it's a net win.

As you can see below, the regression in rust-lang#76244 seems to have originated from something measured by `stalled-cycles-backend`. I'll try to collect some finer-grained stats to see if I can isolate it. I wish I had a better idea of what was going on here. I'd like to prevent the regression from reappearing in the future due to small changes in unrelated code.

<details>
<summary>Current `master`:</summary>

```
 Performance counter stats for 'cargo +baseline-stage1 check':

          2,275.67 msec task-clock:u              #    0.998 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
            49,826      page-faults:u             #    0.022 M/sec
     5,117,221,678      cycles:u                  #    2.249 GHz
       299,655,943      stalled-cycles-frontend:u #    5.86% frontend cycles idle
     2,284,213,395      stalled-cycles-backend:u  #   44.64% backend cycles idle
     8,051,871,959      instructions:u            #    1.57  insn per cycle
                                                  #    0.28  stalled cycles per insn
     1,359,589,402      branches:u                #  597.447 M/sec
         7,359,347      branch-misses:u           #    0.54% of all branches

       2.281030026 seconds time elapsed

       2.108197000 seconds user
       0.164183000 seconds sys
```
</details>

<details>
<summary>Shrink `ParamEnv` without changing `IntRange::from_pat`:</summary>

```
 Performance counter stats for 'cargo +perf-stage1 check':

          2,751.79 msec task-clock:u              #    0.996 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
            50,103      page-faults:u             #    0.018 M/sec
     6,260,590,019      cycles:u                  #    2.275 GHz
       317,355,920      stalled-cycles-frontend:u #    5.07% frontend cycles idle
     3,397,743,582      stalled-cycles-backend:u  #   54.27% backend cycles idle
     8,276,224,367      instructions:u            #    1.32  insn per cycle
                                                  #    0.41  stalled cycles per insn
     1,370,453,386      branches:u                #  498.023 M/sec
         7,281,031      branch-misses:u           #    0.53% of all branches

       2.763265838 seconds time elapsed

       2.544578000 seconds user
       0.204548000 seconds sys
```
</details>

<details>
<summary>Shrink `ParamEnv` and change `IntRange::from_pat`: </summary>

```
 Performance counter stats for 'cargo +perf-stage1 check':

          2,295.57 msec task-clock:u              #    0.996 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
            49,959      page-faults:u             #    0.022 M/sec
     5,151,407,066      cycles:u                  #    2.244 GHz
       324,517,829      stalled-cycles-frontend:u #    6.30% frontend cycles idle
     2,301,671,001      stalled-cycles-backend:u  #   44.68% backend cycles idle
     8,130,868,329      instructions:u            #    1.58  insn per cycle
                                                  #    0.28  stalled cycles per insn
     1,356,618,512      branches:u                #  590.972 M/sec
         7,323,800      branch-misses:u           #    0.54% of all branches

       2.304509653 seconds time elapsed

       2.128090000 seconds user
       0.163909000 seconds sys
```
</details>
  • Loading branch information
bors committed Sep 29, 2020
2 parents fc2daaa + c4d8089 commit 48cab67
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 9 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_infer/src/traits/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub type TraitObligation<'tcx> = Obligation<'tcx, ty::PolyTraitPredicate<'tcx>>;

// `PredicateObligation` is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(target_arch = "x86_64")]
static_assert_size!(PredicateObligation<'_>, 40);
static_assert_size!(PredicateObligation<'_>, 32);

pub type Obligations<'tcx, O> = Vec<Obligation<'tcx, O>>;
pub type PredicateObligations<'tcx> = Vec<PredicateObligation<'tcx>>;
Expand Down
5 changes: 1 addition & 4 deletions compiler/rustc_middle/src/ty/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1751,9 +1751,6 @@ pub struct ParamEnv<'tcx> {
///
/// Note: This is packed, use the reveal() method to access it.
packed: CopyTaggedPtr<&'tcx List<Predicate<'tcx>>, traits::Reveal, true>,

/// FIXME: This field is not used, but removing it causes a performance degradation. See #76913.
unused_field: Option<DefId>,
}

unsafe impl rustc_data_structures::tagged_ptr::Tag for traits::Reveal {
Expand Down Expand Up @@ -1834,7 +1831,7 @@ impl<'tcx> ParamEnv<'tcx> {
/// Construct a trait environment with the given set of predicates.
#[inline]
pub fn new(caller_bounds: &'tcx List<Predicate<'tcx>>, reveal: Reveal) -> Self {
ty::ParamEnv { packed: CopyTaggedPtr::new(caller_bounds, reveal), unused_field: None }
ty::ParamEnv { packed: CopyTaggedPtr::new(caller_bounds, reveal) }
}

pub fn with_user_facing(mut self) -> Self {
Expand Down
30 changes: 27 additions & 3 deletions compiler/rustc_mir_build/src/thir/pattern/_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1787,9 +1787,32 @@ impl<'tcx> IntRange<'tcx> {
param_env: ty::ParamEnv<'tcx>,
pat: &Pat<'tcx>,
) -> Option<IntRange<'tcx>> {
match pat_constructor(tcx, param_env, pat)? {
IntRange(range) => Some(range),
_ => None,
// This MUST be kept in sync with `pat_constructor`.
match *pat.kind {
PatKind::AscribeUserType { .. } => bug!(), // Handled by `expand_pattern`
PatKind::Or { .. } => bug!("Or-pattern should have been expanded earlier on."),

PatKind::Binding { .. }
| PatKind::Wild
| PatKind::Leaf { .. }
| PatKind::Deref { .. }
| PatKind::Variant { .. }
| PatKind::Array { .. }
| PatKind::Slice { .. } => None,

PatKind::Constant { value } => Self::from_const(tcx, param_env, value, pat.span),

PatKind::Range(PatRange { lo, hi, end }) => {
let ty = lo.ty;
Self::from_range(
tcx,
lo.eval_bits(tcx, param_env, lo.ty),
hi.eval_bits(tcx, param_env, hi.ty),
ty,
&end,
pat.span,
)
}
}
}

Expand Down Expand Up @@ -2196,6 +2219,7 @@ fn pat_constructor<'tcx>(
param_env: ty::ParamEnv<'tcx>,
pat: &Pat<'tcx>,
) -> Option<Constructor<'tcx>> {
// This MUST be kept in sync with `IntRange::from_pat`.
match *pat.kind {
PatKind::AscribeUserType { .. } => bug!(), // Handled by `expand_pattern`
PatKind::Binding { .. } | PatKind::Wild => None,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_trait_selection/src/traits/fulfill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub struct PendingPredicateObligation<'tcx> {

// `PendingPredicateObligation` is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(target_arch = "x86_64")]
static_assert_size!(PendingPredicateObligation<'_>, 64);
static_assert_size!(PendingPredicateObligation<'_>, 56);

impl<'a, 'tcx> FulfillmentContext<'tcx> {
/// Creates a new fulfillment context.
Expand Down

0 comments on commit 48cab67

Please sign in to comment.