Skip to content

Commit

Permalink
[InstCombine][x86] fix insertion point bug in vector demanded elts fo…
Browse files Browse the repository at this point in the history
…ld (PR48476)

This transform was added at:
c63799f

From what I see, it's the first demanded elements transform that adds
a new instruction using the IRBuilder. There are similar folds in
the generic demanded bits chunk of instcombine that also use the
InsertPointGuard code pattern.

The tests here would assert/crash because the new instruction was
being added at the start of the demanded elements analysis rather
than at the instruction that is being replaced.
  • Loading branch information
rotateright committed Dec 11, 2020
1 parent 0d48d26 commit 204bdc5
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
17 changes: 12 additions & 5 deletions llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1916,13 +1916,20 @@ Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
case Intrinsic::x86_sse3_addsub_ps:
case Intrinsic::x86_avx_addsub_pd_256:
case Intrinsic::x86_avx_addsub_ps_256: {
// If none of the even or none of the odd lanes are required, turn this
// into a generic FP math instruction.
APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
if (DemandedElts.isSubsetOf(SubMask))
return IC.Builder.CreateFSub(II.getArgOperand(0), II.getArgOperand(1));

APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
if (DemandedElts.isSubsetOf(AddMask))
return IC.Builder.CreateFAdd(II.getArgOperand(0), II.getArgOperand(1));
bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
if (IsSubOnly || IsAddOnly) {
assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
IRBuilderBase::InsertPointGuard Guard(IC.Builder);
IC.Builder.SetInsertPoint(&II);
Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
return IC.Builder.CreateBinOp(
IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
}

simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>)
declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>)
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg) #0

;
; Demanded Elts
Expand Down Expand Up @@ -164,4 +165,30 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo
ret void
}

define double @PR48476_fsub(<2 x double> %x) {
; CHECK-LABEL: @PR48476_fsub(
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> <double 0.000000e+00, double undef>, [[X:%.*]]
; CHECK-NEXT: [[T2:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[X]], i8 6)
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[T2]], i32 0
; CHECK-NEXT: ret double [[VECEXT]]
;
%t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x)
%t2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %t1, <2 x double> %x, i8 6)
%vecext = extractelement <2 x double> %t2, i32 0
ret double %vecext
}

define double @PR48476_fadd_fsub(<2 x double> %x) {
; CHECK-LABEL: @PR48476_fadd_fsub(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], <double undef, double 0.000000e+00>
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]]
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[VECEXT]]
;
%t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x)
%s = shufflevector <2 x double> %t1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
%t2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %s, <2 x double> %x)
%vecext = extractelement <2 x double> %t2, i32 0
ret double %vecext
}

0 comments on commit 204bdc5

Please sign in to comment.