Skip to content

Commit

Permalink
Merging r367417:
Browse files Browse the repository at this point in the history
------------------------------------------------------------------------
r367417 | lebedevri | 2019-07-31 14:06:38 +0200 (Wed, 31 Jul 2019) | 38 lines

[DivRemPairs] Avoid RAUW pitfalls (PR42823)

Summary:
`DivRemPairs` internally creates two maps:
* {sign, divident, divisor} -> div instruction
* {sign, divident, divisor} -> rem instruction
Then it iterates over rem map, and looks if there is an entry
in div map with the same key. Then depending on some internal logic
it may RAUW rem instruction with something else.

But if that rem instruction is an input to other div/rem,
then it was used as a key in these maps, so the old value (used in key)
is now dandling, because RAUW didn't update those maps.
And we can't even RAUW map keys in general, there's `ValueMap`,
but we don't have a single `Value` as key...

The bug was discovered via D65298, and the test there exists.
Now, i'm not sure how to expose this issue in trunk.
The bug is clearly there if i change the map keys to be `AssertingVH`/`PoisoningVH`,
but i guess this didn't miscompiled anything thus far?
I really don't think this is benin without that patch.

The fix is actually rather straight-forward - instead of trying to somehow
shoe-horn `ValueMap` here (doesn't fit, key isn't just `Value`), or writing a new
`ValueMap` with key being a struct of `Value`s, we can just have an intermediate
data structure - a vector, each entry containing matching `Div, Rem` pair,
and pre-filling it before doing any modifications.
This way we won't need to query map after doing RAUW, so no bug is possible.

Reviewers: spatel, bogner, RKSimon, craig.topper

Reviewed By: spatel

Subscribers: hiraditya, hans, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65451
------------------------------------------------------------------------

llvm-svn: 367531
  • Loading branch information
zmodem committed Aug 1, 2019
1 parent ddea81c commit f08bb47
Show file tree
Hide file tree
Showing 6 changed files with 639 additions and 142 deletions.
11 changes: 7 additions & 4 deletions llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
Expand Up @@ -19,6 +19,7 @@

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/IR/ValueHandle.h"
#include <cstdint>

namespace llvm {
Expand All @@ -28,8 +29,8 @@ class Value;

struct DivRemMapKey {
bool SignedOp;
Value *Dividend;
Value *Divisor;
AssertingVH<Value> Dividend;
AssertingVH<Value> Divisor;

DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
Expand All @@ -50,8 +51,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
}

static unsigned getHashValue(const DivRemMapKey &Val) {
return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
reinterpret_cast<uintptr_t>(Val.Divisor)) ^
return (unsigned)(reinterpret_cast<uintptr_t>(
static_cast<Value *>(Val.Dividend)) ^
reinterpret_cast<uintptr_t>(
static_cast<Value *>(Val.Divisor))) ^
(unsigned)Val.SignedOp;
}
};
Expand Down
114 changes: 88 additions & 26 deletions llvm/lib/Transforms/Scalar/DivRemPairs.cpp
Expand Up @@ -23,6 +23,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"

using namespace llvm;

#define DEBUG_TYPE "div-rem-pairs"
Expand All @@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed");
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
"Controls transformations in div-rem-pairs pass");

/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). If they exist in different basic blocks, bring
/// them together by hoisting or replace the common division operation that is
/// implicit in the remainder:
/// X % Y <--> X - ((X / Y) * Y).
///
/// We can largely ignore the normal safety and cost constraints on speculation
/// of these ops when we find a matching pair. This is because we are already
/// guaranteed that any exceptions and most cost are already incurred by the
/// first member of the pair.
///
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
/// SimplifyCFG, but it's split off on its own because it's different enough
/// that it doesn't quite match the stated objectives of those passes.
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
const DominatorTree &DT) {
bool Changed = false;
/// A thin wrapper to store two values that we matched as div-rem pair.
/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
struct DivRemPairWorklistEntry {
/// The actual udiv/sdiv instruction. Source of truth.
AssertingVH<Instruction> DivInst;

/// The instruction that we have matched as a remainder instruction.
/// Should only be used as Value, don't introspect it.
AssertingVH<Instruction> RemInst;

DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
: DivInst(DivInst_), RemInst(RemInst_) {
assert((DivInst->getOpcode() == Instruction::UDiv ||
DivInst->getOpcode() == Instruction::SDiv) &&
"Not a division.");
assert(DivInst->getType() == RemInst->getType() && "Types should match.");
// We can't check anything else about remainder instruction,
// it's not strictly required to be a urem/srem.
}

/// The type for this pair, identical for both the div and rem.
Type *getType() const { return DivInst->getType(); }

/// Is this pair signed or unsigned?
bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }

/// In this pair, what are the divident and divisor?
Value *getDividend() const { return DivInst->getOperand(0); }
Value *getDivisor() const { return DivInst->getOperand(1); }
};
using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;

/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). Place those pairs into a worklist for further
/// processing. This indirection is needed because we have to use TrackingVH<>
/// because we will be doing RAUW, and if one of the rem instructions we change
/// happens to be an input to another div/rem in the maps, we'd have problems.
static DivRemWorklistTy getWorklist(Function &F) {
// Insert all divide and remainder instructions into maps keyed by their
// operands and opcode (signed or unsigned).
DenseMap<DivRemMapKey, Instruction *> DivMap;
Expand All @@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
}
}

// We'll accumulate the matching pairs of div-rem instructions here.
DivRemWorklistTy Worklist;

// We can iterate over either map because we are only looking for matched
// pairs. Choose remainders for efficiency because they are usually even more
// rare than division.
Expand All @@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (!DivInst)
continue;

// We have a matching pair of div/rem instructions. If one dominates the
// other, hoist and/or replace one.
// We have a matching pair of div/rem instructions.
NumPairs++;
Instruction *RemInst = RemPair.second;
bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);

// Place it in the worklist.
Worklist.emplace_back(DivInst, RemInst);
}

return Worklist;
}

/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). If they exist in different basic blocks, bring
/// them together by hoisting or replace the common division operation that is
/// implicit in the remainder:
/// X % Y <--> X - ((X / Y) * Y).
///
/// We can largely ignore the normal safety and cost constraints on speculation
/// of these ops when we find a matching pair. This is because we are already
/// guaranteed that any exceptions and most cost are already incurred by the
/// first member of the pair.
///
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
/// SimplifyCFG, but it's split off on its own because it's different enough
/// that it doesn't quite match the stated objectives of those passes.
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
const DominatorTree &DT) {
bool Changed = false;

// Get the matching pairs of div-rem instructions. We want this extra
// indirection to avoid dealing with having to RAUW the keys of the maps.
DivRemWorklistTy Worklist = getWorklist(F);

// Process each entry in the worklist.
for (DivRemPairWorklistEntry &E : Worklist) {
bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());

auto &DivInst = E.DivInst;
auto &RemInst = E.RemInst;

// If the target supports div+rem and the instructions are in the same block
// already, there's nothing to do. The backend should handle this. If the
Expand All @@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// The target does not have a single div/rem operation. Decompose the
// remainder calculation as:
// X % Y --> X - ((X / Y) * Y).
Value *X = RemInst->getOperand(0);
Value *Y = RemInst->getOperand(1);
Value *X = E.getDividend();
Value *Y = E.getDivisor();
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
Instruction *Sub = BinaryOperator::CreateSub(X, Mul);

Expand Down Expand Up @@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,

// Now kill the explicit remainder. We have replaced it with:
// (sub X, (mul (div X, Y), Y)
RemInst->replaceAllUsesWith(Sub);
RemInst->eraseFromParent();
Sub->setName(RemInst->getName() + ".decomposed");
Instruction *OrigRemInst = RemInst;
// Update AssertingVH<> with new instruction so it doesn't assert.
RemInst = Sub;
// And replace the original instruction with the new one.
OrigRemInst->replaceAllUsesWith(Sub);
OrigRemInst->eraseFromParent();
NumDecomposed++;
}
Changed = true;
Expand Down Expand Up @@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
return optimizeDivRem(F, TTI, DT);
}
};
}
} // namespace

char DivRemPairsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
Expand Down
172 changes: 172 additions & 0 deletions llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll
@@ -0,0 +1,172 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -div-rem-pairs -S -mtriple=powerpc64-unknown-unknown | FileCheck %s

declare void @foo(i32, i32)

define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) {
; CHECK-LABEL: @decompose_illegal_srem_same_block(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[T0:%.*]] = mul i32 [[DIV]], [[B]]
; CHECK-NEXT: [[REM:%.*]] = sub i32 [[A]], [[T0]]
; CHECK-NEXT: call void @foo(i32 [[REM]], i32 [[DIV]])
; CHECK-NEXT: ret void
;
%div = sdiv i32 %a, %b
%t0 = mul i32 %div, %b
%rem = sub i32 %a, %t0
call void @foo(i32 %rem, i32 %div)
ret void
}

define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) {
; CHECK-LABEL: @decompose_illegal_urem_same_block(
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[T0:%.*]] = mul i32 [[DIV]], [[B]]
; CHECK-NEXT: [[REM:%.*]] = sub i32 [[A]], [[T0]]
; CHECK-NEXT: call void @foo(i32 [[REM]], i32 [[DIV]])
; CHECK-NEXT: ret void
;
%div = udiv i32 %a, %b
%t0 = mul i32 %div, %b
%rem = sub i32 %a, %t0
call void @foo(i32 %rem, i32 %div)
ret void
}

; Recompose and hoist the srem if it's safe and free, otherwise keep as-is..

define i16 @hoist_srem(i16 %a, i16 %b) {
; CHECK-LABEL: @hoist_srem(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = sdiv i16 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[DIV]], 42
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
; CHECK: if:
; CHECK-NEXT: [[T0:%.*]] = mul i16 [[DIV]], [[B]]
; CHECK-NEXT: [[REM:%.*]] = sub i16 [[A]], [[T0]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[RET:%.*]] = phi i16 [ [[REM]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i16 [[RET]]
;
entry:
%div = sdiv i16 %a, %b
%cmp = icmp eq i16 %div, 42
br i1 %cmp, label %if, label %end

if:
%t0 = mul i16 %div, %b
%rem = sub i16 %a, %t0
br label %end

end:
%ret = phi i16 [ %rem, %if ], [ 3, %entry ]
ret i16 %ret
}

; Recompose and hoist the urem if it's safe and free, otherwise keep as-is..

define i8 @hoist_urem(i8 %a, i8 %b) {
; CHECK-LABEL: @hoist_urem(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[DIV]], 42
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
; CHECK: if:
; CHECK-NEXT: [[T0:%.*]] = mul i8 [[DIV]], [[B]]
; CHECK-NEXT: [[REM:%.*]] = sub i8 [[A]], [[T0]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[REM]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i8 [[RET]]
;
entry:
%div = udiv i8 %a, %b
%cmp = icmp eq i8 %div, 42
br i1 %cmp, label %if, label %end

if:
%t0 = mul i8 %div, %b
%rem = sub i8 %a, %t0
br label %end

end:
%ret = phi i8 [ %rem, %if ], [ 3, %entry ]
ret i8 %ret
}

; Be careful with RAUW/invalidation if this is a srem-of-srem.

define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) {
; CHECK-LABEL: @srem_of_srem_unexpanded(
; CHECK-NEXT: [[T0:%.*]] = mul nsw i32 [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0]]
; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]]
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]]
; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y]]
; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]]
; CHECK-NEXT: ret i32 [[T6_DECOMPOSED]]
;
%t0 = mul nsw i32 %Z, %Y
%t1 = sdiv i32 %X, %t0
%t2 = mul nsw i32 %t0, %t1
%t3 = srem i32 %X, %t0
%t4 = sdiv i32 %t3, %Y
%t5 = mul nsw i32 %t4, %Y
%t6 = srem i32 %t3, %Y
ret i32 %t6
}
define i32 @srem_of_srem_expanded(i32 %X, i32 %Y, i32 %Z) {
; CHECK-LABEL: @srem_of_srem_expanded(
; CHECK-NEXT: [[T0:%.*]] = mul nsw i32 [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
; CHECK-NEXT: [[T3:%.*]] = sub nsw i32 [[X]], [[T2]]
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3]], [[Y]]
; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
; CHECK-NEXT: [[T6:%.*]] = sub nsw i32 [[T3]], [[T5]]
; CHECK-NEXT: ret i32 [[T6]]
;
%t0 = mul nsw i32 %Z, %Y
%t1 = sdiv i32 %X, %t0
%t2 = mul nsw i32 %t0, %t1
%t3 = sub nsw i32 %X, %t2
%t4 = sdiv i32 %t3, %Y
%t5 = mul nsw i32 %t4, %Y
%t6 = sub nsw i32 %t3, %t5
ret i32 %t6
}

; If the target doesn't have a unified div/rem op for the type, keep decomposed rem

define i128 @dont_hoist_urem(i128 %a, i128 %b) {
; CHECK-LABEL: @dont_hoist_urem(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = udiv i128 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[DIV]], 42
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
; CHECK: if:
; CHECK-NEXT: [[T0:%.*]] = mul i128 [[DIV]], [[B]]
; CHECK-NEXT: [[REM:%.*]] = sub i128 [[A]], [[T0]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[REM]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i128 [[RET]]
;
entry:
%div = udiv i128 %a, %b
%cmp = icmp eq i128 %div, 42
br i1 %cmp, label %if, label %end

if:
%t0 = mul i128 %div, %b
%rem = sub i128 %a, %t0
br label %end

end:
%ret = phi i128 [ %rem, %if ], [ 3, %entry ]
ret i128 %ret
}

0 comments on commit f08bb47

Please sign in to comment.