Skip to content

Commit

Permalink
[LICM] Infer proper alignment from loads during scalar promotion
Browse files Browse the repository at this point in the history
This patch fixes an issue where we would compute an unnecessarily small alignment during scalar promotion when no store is not to be guaranteed to execute, but we've proven load speculation safety. Since speculating a load requires proving the existing alignment is valid at the new location (see Loads.cpp), we can use the alignment fact from the load.

For non-atomics, this is a performance problem. For atomics, this is a correctness issue, though an *incredibly* rare one to see in practice. For atomics, we might not be able to lower an improperly aligned load or store (i.e. i32 align 1). If such an instruction makes it all the way to codegen, we *may* fail to codegen the operation, or we may simply generate a slow call to a library function. The part that makes this super hard to see in practice is that the memory location actually *is* well aligned, and instcombine knows that. So, to see a failure, you have to have a) hit the bug in LICM, b) somehow hit a depth limit in InstCombine/ValueTracking to avoid fixing the alignment, and c) then have generated an instruction which fails codegen rather than simply emitting a slow libcall. All around, pretty hard to hit.

Differential Revision: https://reviews.llvm.org/D58809

llvm-svn: 355217
  • Loading branch information
preames committed Mar 1, 2019
1 parent 06ed385 commit 2226e9a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
26 changes: 23 additions & 3 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1924,9 +1924,21 @@ bool llvm::promoteLoopAccessesToScalars(
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();

if (!DereferenceableInPH)
DereferenceableInPH = isSafeToExecuteUnconditionally(
*Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
unsigned InstAlignment = Load->getAlignment();
if (!InstAlignment)
InstAlignment =
MDL.getABITypeAlignment(Load->getType());

// Note that proving a load safe to speculate requires proving
// sufficient alignment at the target location. Proving it guaranteed
// to execute does as well. Thus we can increase our guaranteed
// alignment as well.
if (!DereferenceableInPH || (InstAlignment > Alignment))
if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
ORE, Preheader->getTerminator())) {
DereferenceableInPH = true;
Alignment = std::max(Alignment, InstAlignment);
}
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
Expand Down Expand Up @@ -1997,6 +2009,14 @@ bool llvm::promoteLoopAccessesToScalars(
if (SawUnorderedAtomic && SawNotAtomic)
return false;

// If we're inserting an atomic load in the preheader, we must be able to
// lower it. We're only guaranteed to be able to lower naturally aligned
// atomics.
auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
if (SawUnorderedAtomic &&
Alignment < MDL.getTypeStoreSize(SomePtrElemType))
return false;

// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
return false;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/LICM/promote-tls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ entry:

for.body.lr.ph: ; preds = %entry
; CHECK-LABEL: for.body.lr.ph:
; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1
; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4
br label %for.header

for.header:
Expand All @@ -35,7 +35,7 @@ for.header:

early-exit:
; CHECK-LABEL: early-exit:
; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
; CHECK: store i32 %new1.lcssa, i32* %addr, align 4
ret i32* null

for.body:
Expand All @@ -47,7 +47,7 @@ for.body:

for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
; CHECK: store i32 %new.lcssa, i32* %addr, align 1
; CHECK: store i32 %new.lcssa, i32* %addr, align 4
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}
Expand All @@ -62,7 +62,7 @@ entry:

for.body.lr.ph: ; preds = %entry
; CHECK-LABEL: for.body.lr.ph:
; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1
; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4
br label %for.header

for.header:
Expand All @@ -75,7 +75,7 @@ for.header:

early-exit:
; CHECK-LABEL: early-exit:
; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
; CHECK: store i32 %new1.lcssa, i32* %addr, align 4
ret i32* null

for.body:
Expand All @@ -87,7 +87,7 @@ for.body:

for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
; CHECK: store i32 %new.lcssa, i32* %addr, align 1
; CHECK: store i32 %new.lcssa, i32* %addr, align 4
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/Transforms/LICM/scalar-promote-unwind.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ define void @test3(i1 zeroext %y) uwtable {
entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: %a = alloca i32
; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 1
; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 4
%a = alloca i32
br label %for.body

Expand All @@ -90,20 +90,18 @@ for.body:

for.cond.cleanup:
; CHECK-LABEL: for.cond.cleanup:
; CHECK: store i32 %add.lcssa, i32* %a, align 1
; CHECK: store i32 %add.lcssa, i32* %a, align 4
; CHECK-NEXT: ret void
ret void
}

;; Same as test3, but with unordered atomics
;; FIXME: doing the transform w/o alignment here is wrong since we're
;; creating an unaligned atomic which we may not be able to lower.
define void @test3b(i1 zeroext %y) uwtable {
; CHECK-LABEL: @test3
entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: %a = alloca i32
; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 1
; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 4
%a = alloca i32
br label %for.body

Expand All @@ -119,7 +117,7 @@ for.body:

for.cond.cleanup:
; CHECK-LABEL: for.cond.cleanup:
; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 1
; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 4
; CHECK-NEXT: ret void
ret void
}
Expand Down

0 comments on commit 2226e9a

Please sign in to comment.