Skip to content

Commit

Permalink
[MergeICmps] Ignore clobbering instructions before the loads
Browse files Browse the repository at this point in the history
This is another followup to D106591. Even if there is an
instruction that clobbers one of the loads, this doesn't matter if
it happens before the loads. Those instructions aren't affected by
the transform at all.

The gep-references-bb.ll is modified to preserve the spirit of the
test, as the store to @g no longer impacts the transform.

Differential Revision: https://reviews.llvm.org/D108782
  • Loading branch information
nikic committed Aug 27, 2021
1 parent 696e790 commit 757409d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 25 deletions.
12 changes: 7 additions & 5 deletions llvm/lib/Transforms/Scalar/MergeICmps.cpp
Expand Up @@ -239,11 +239,13 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// If this instruction may clobber the loads and is in middle of the BCE cmp
// block instructions, then bail for now.
if (Inst->mayWriteToMemory()) {
// Disallow instructions that might modify the BCE operands
MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
isModSet(AA.getModRefInfo(Inst, RLoc)))
auto MayClobber = [&](LoadInst *LI) {
// If a potentially clobbering instruction comes before the load,
// we can still safely sink the load.
return !Inst->comesBefore(LI) &&
isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
};
if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
return false;
}
// Make sure this instruction does not use any of the BCE cmp block
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
Expand Up @@ -11,12 +11,12 @@ target triple = "x86_64"
define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) nofree nosync {
; CHECK-LABEL: @bug(
; CHECK-NEXT: bb0:
; CHECK-NEXT: store i32 1, i32* @g, align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[TRIPLE:%.*]], %Triple* [[RHS:%.*]], i64 0, i32 0
; CHECK-NEXT: [[L0_ADDR:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[LHS:%.*]], i64 0, i32 0
; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[L0_ADDR]], align 4
; CHECK-NEXT: [[R0_ADDR:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[RHS]], i64 0, i32 0
; CHECK-NEXT: [[R0:%.*]] = load i32, i32* [[R0_ADDR]], align 4
; CHECK-NEXT: store i32 1, i32* @g, align 4
; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 [[L0]], [[R0]]
; CHECK-NEXT: br i1 [[CMP0]], label %"bb1+bb2", label [[FINAL:%.*]]
; CHECK: "bb1+bb2":
Expand All @@ -32,12 +32,12 @@ define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull deref
; CHECK-NEXT: ret i1 [[RET]]
;
bb0:
store i32 1, i32* @g
%gep = getelementptr %Triple, %Triple* %rhs, i64 0, i32 0
%l0_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 0
%l0 = load i32, i32* %l0_addr, align 4
%r0_addr = getelementptr inbounds %Triple, %Triple* %rhs, i64 0, i32 0
%r0 = load i32, i32* %r0_addr, align 4
store i32 1, i32* @g
%cmp0 = icmp eq i32 %l0, %r0
br i1 %cmp0, label %bb1, label %final

Expand Down
28 changes: 10 additions & 18 deletions llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
Expand Up @@ -141,28 +141,20 @@ opeq1.exit:
ret i1 %8
}

; TODO: The call happen before the loads, so it cannot clobber them.
; The call happens before the loads, so it cannot clobber them.
define zeroext i1 @opeq1_call_before_loads(
; X86-LABEL: @opeq1_call_before_loads(
; X86-NEXT: entry:
; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
; X86-NEXT: call void (...) @foo()
; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
; X86-NEXT: br i1 [[CMP_I]], label %"land.rhs.i+land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT:%.*]]
; X86: "land.rhs.i+land.rhs.i.2+land.rhs.i.3":
; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1
; X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP2]] to i8*
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP3]] to i8*
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 12)
; X86-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
; X86-NEXT: br label [[OPEQ1_EXIT]]
; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
; X86: opeq1.exit:
; X86-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TMP4]], %"land.rhs.i+land.rhs.i.2+land.rhs.i.3" ]
; X86-NEXT: ret i1 [[TMP5]]
; X86-NEXT: ret i1 [[TMP2]]
;
; Make sure this call is moved to the beginning of the entry block.
%S* nocapture readonly dereferenceable(16) %a,
Expand Down

0 comments on commit 757409d

Please sign in to comment.