Skip to content

Commit 08b3c0d

Browse files
committed
[GlobalISel] Combine G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
In order to not generate an unnecessary G_CTLZ, I extended the constant folder in the CSEMIRBuilder to handle G_CTLZ. I also added some extra handing of vector constants too. It seems we don't have any support for doing constant folding of vector constants, so the tests show some other useless G_SUB instructions too. Differential Revision: https://reviews.llvm.org/D111036
1 parent 23800b0 commit 08b3c0d

File tree

12 files changed

+342
-153
lines changed

12 files changed

+342
-153
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,10 @@ class CombinerHelper {
610610
bool matchUDivByConst(MachineInstr &MI);
611611
void applyUDivByConst(MachineInstr &MI);
612612

613+
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
614+
bool matchUMulHToLShr(MachineInstr &MI);
615+
void applyUMulHToLShr(MachineInstr &MI);
616+
613617
/// Try to transform \p MI by using all of the above
614618
/// combine functions. Returns true if changed.
615619
bool tryCombine(MachineInstr &MI);

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,11 @@ Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
272272
Register Src,
273273
const MachineRegisterInfo &MRI);
274274

275+
/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
276+
/// then it tries to do an element-wise constant fold.
277+
Optional<SmallVector<unsigned>>
278+
ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
279+
275280
/// Test if the given value is known to have exactly one bit set. This differs
276281
/// from computeKnownBits in that it doesn't necessarily determine which bit is
277282
/// set.

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,14 @@ def mulo_by_2: GICombineRule<
724724
[{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
725725
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
726726

727+
def mulh_to_lshr : GICombineRule<
728+
(defs root:$root),
729+
(match (wip_match_opcode G_UMULH):$root,
730+
[{ return Helper.matchUMulHToLShr(*${root}); }]),
731+
(apply [{ Helper.applyUMulHToLShr(*${root}); }])>;
732+
733+
def mulh_combines : GICombineGroup<[mulh_to_lshr]>;
734+
727735
// FIXME: These should use the custom predicate feature once it lands.
728736
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
729737
undef_to_negative_one,
@@ -771,7 +779,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
771779
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
772780
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
773781
form_bitfield_extract, constant_fold, fabs_fneg_fold,
774-
intdiv_combines]>;
782+
intdiv_combines, mulh_combines]>;
775783

776784
// A combine group used to for prelegalizer combiners at -O0. The combines in
777785
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
1515
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
16+
#include "llvm/CodeGen/GlobalISel/Utils.h"
1617
#include "llvm/IR/DebugInfoMetadata.h"
1718

1819
using namespace llvm;
@@ -213,6 +214,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
213214
return buildFConstant(DstOps[0], *Cst);
214215
break;
215216
}
217+
case TargetOpcode::G_CTLZ: {
218+
assert(SrcOps.size() == 1 && "Expected one source");
219+
assert(DstOps.size() == 1 && "Expected one dest");
220+
auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
221+
if (!MaybeCsts)
222+
break;
223+
if (MaybeCsts->size() == 1)
224+
return buildConstant(DstOps[0], (*MaybeCsts)[0]);
225+
// This was a vector constant. Build a G_BUILD_VECTOR for them.
226+
SmallVector<Register> ConstantRegs;
227+
LLT VecTy = DstOps[0].getLLTTy(*getMRI());
228+
for (unsigned Cst : *MaybeCsts)
229+
ConstantRegs.emplace_back(
230+
buildConstant(VecTy.getScalarType(), Cst).getReg(0));
231+
return buildBuildVector(DstOps[0], ConstantRegs);
232+
}
216233
}
217234
bool CanCopy = checkCopyToDefsPossible(DstOps);
218235
if (!canPerformCSEForOpc(Opc))

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
6969
return I;
7070
}
7171

72+
/// Determines the LogBase2 value for a non-null input value using the
73+
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
74+
static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
75+
auto &MRI = *MIB.getMRI();
76+
LLT Ty = MRI.getType(V);
77+
auto Ctlz = MIB.buildCTLZ(Ty, V);
78+
auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
79+
return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
80+
}
81+
7282
/// \returns The big endian in-memory byte position of byte \p I in a
7383
/// \p ByteWidth bytes wide type.
7484
///
@@ -4579,6 +4589,36 @@ void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
45794589
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
45804590
}
45814591

4592+
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
4593+
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
4594+
Register RHS = MI.getOperand(2).getReg();
4595+
Register Dst = MI.getOperand(0).getReg();
4596+
LLT Ty = MRI.getType(Dst);
4597+
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4598+
auto CstVal = isConstantOrConstantSplatVector(*MRI.getVRegDef(RHS), MRI);
4599+
if (!CstVal || CstVal->isOne() || !isPowerOf2_64(CstVal->getZExtValue()))
4600+
return false;
4601+
return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
4602+
}
4603+
4604+
void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
4605+
Register LHS = MI.getOperand(1).getReg();
4606+
Register RHS = MI.getOperand(2).getReg();
4607+
Register Dst = MI.getOperand(0).getReg();
4608+
LLT Ty = MRI.getType(Dst);
4609+
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4610+
unsigned NumEltBits = Ty.getScalarSizeInBits();
4611+
4612+
Builder.setInstrAndDebugLoc(MI);
4613+
auto LogBase2 = buildLogBase2(RHS, Builder);
4614+
auto ShiftAmt =
4615+
Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
4616+
auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
4617+
Builder.buildLShr(Dst, LHS, Trunc);
4618+
MI.eraseFromParent();
4619+
}
4620+
4621+
45824622
bool CombinerHelper::tryCombine(MachineInstr &MI) {
45834623
if (tryCombineCopy(MI))
45844624
return true;

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
726726
return None;
727727
}
728728

729+
Optional<SmallVector<unsigned>>
730+
llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
731+
LLT Ty = MRI.getType(Src);
732+
SmallVector<unsigned> FoldedCTLZs;
733+
auto tryFoldScalar = [&](Register R) -> Optional<unsigned> {
734+
auto MaybeCst = getIConstantVRegVal(R, MRI);
735+
if (!MaybeCst)
736+
return None;
737+
return MaybeCst->countLeadingZeros();
738+
};
739+
if (Ty.isVector()) {
740+
// Try to constant fold each element.
741+
auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
742+
if (!BV)
743+
return None;
744+
for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
745+
if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
746+
FoldedCTLZs.emplace_back(*MaybeFold);
747+
continue;
748+
}
749+
return None;
750+
}
751+
return FoldedCTLZs;
752+
}
753+
if (auto MaybeCst = tryFoldScalar(Src)) {
754+
FoldedCTLZs.emplace_back(*MaybeCst);
755+
return FoldedCTLZs;
756+
}
757+
return None;
758+
}
759+
729760
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
730761
GISelKnownBits *KB) {
731762
Optional<DefinitionAndSourceRegister> DefSrcReg =

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,16 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
2222
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_1]
2323
; GISEL-NEXT: adrp x8, .LCPI0_0
2424
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
25-
; GISEL-NEXT: umull2 v3.4s, v0.8h, v1.8h
26-
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
27-
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
28-
; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
29-
; GISEL-NEXT: umull2 v3.4s, v0.8h, v2.8h
30-
; GISEL-NEXT: umull v0.4s, v0.4h, v2.4h
31-
; GISEL-NEXT: uzp2 v0.8h, v0.8h, v3.8h
32-
; GISEL-NEXT: add v0.8h, v0.8h, v1.8h
25+
; GISEL-NEXT: adrp x8, .LCPI0_2
26+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI0_2]
27+
; GISEL-NEXT: sub v1.8h, v2.8h, v1.8h
28+
; GISEL-NEXT: neg v1.8h, v1.8h
29+
; GISEL-NEXT: umull2 v2.4s, v0.8h, v3.8h
30+
; GISEL-NEXT: umull v3.4s, v0.4h, v3.4h
31+
; GISEL-NEXT: uzp2 v2.8h, v3.8h, v2.8h
32+
; GISEL-NEXT: sub v0.8h, v0.8h, v2.8h
33+
; GISEL-NEXT: ushl v0.8h, v0.8h, v1.8h
34+
; GISEL-NEXT: add v0.8h, v0.8h, v2.8h
3335
; GISEL-NEXT: ushr v0.8h, v0.8h, #4
3436
; GISEL-NEXT: ret
3537
%1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
@@ -155,28 +157,30 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
155157
;
156158
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
157159
; GISEL: // %bb.0:
160+
; GISEL-NEXT: adrp x8, .LCPI3_5
161+
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_5]
158162
; GISEL-NEXT: adrp x8, .LCPI3_4
159-
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_4]
160-
; GISEL-NEXT: adrp x8, .LCPI3_3
161-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
163+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_4]
162164
; GISEL-NEXT: adrp x8, .LCPI3_2
163165
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
164166
; GISEL-NEXT: adrp x8, .LCPI3_1
165167
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_1]
168+
; GISEL-NEXT: adrp x8, .LCPI3_3
169+
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_3]
166170
; GISEL-NEXT: adrp x8, .LCPI3_0
167-
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_0]
168-
; GISEL-NEXT: umull2 v6.4s, v0.8h, v2.8h
171+
; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI3_0]
172+
; GISEL-NEXT: sub v3.8h, v4.8h, v3.8h
173+
; GISEL-NEXT: umull2 v4.4s, v0.8h, v2.8h
169174
; GISEL-NEXT: umull v2.4s, v0.4h, v2.4h
170-
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v6.8h
171-
; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
172-
; GISEL-NEXT: sub v5.8h, v0.8h, v2.8h
173-
; GISEL-NEXT: umull2 v6.4s, v5.8h, v3.8h
174-
; GISEL-NEXT: umull v3.4s, v5.4h, v3.4h
175-
; GISEL-NEXT: uzp2 v3.8h, v3.8h, v6.8h
176-
; GISEL-NEXT: neg v4.8h, v4.8h
175+
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
176+
; GISEL-NEXT: neg v3.8h, v3.8h
177+
; GISEL-NEXT: sub v4.8h, v0.8h, v2.8h
178+
; GISEL-NEXT: cmeq v1.8h, v1.8h, v6.8h
179+
; GISEL-NEXT: ushl v3.8h, v4.8h, v3.8h
180+
; GISEL-NEXT: neg v5.8h, v5.8h
177181
; GISEL-NEXT: shl v1.8h, v1.8h, #15
178182
; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
179-
; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
183+
; GISEL-NEXT: ushl v2.8h, v2.8h, v5.8h
180184
; GISEL-NEXT: sshr v1.8h, v1.8h, #15
181185
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
182186
; GISEL-NEXT: ret

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,20 @@ body: |
3434
; CHECK-NEXT: {{ $}}
3535
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
3636
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
37-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
38-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
37+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
3938
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
4039
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
41-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
4240
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
4341
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
44-
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
45-
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
46-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
47-
; CHECK-NEXT: $q0 = COPY [[LSHR]](<8 x s16>)
42+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
43+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
44+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 16
45+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
46+
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(<8 x s16>) = G_SUB [[BUILD_VECTOR3]], [[BUILD_VECTOR2]]
47+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[SUB]], [[SUB1]](<8 x s16>)
48+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LSHR]], [[UMULH]]
49+
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR1]](<8 x s16>)
50+
; CHECK-NEXT: $q0 = COPY [[LSHR1]](<8 x s16>)
4851
; CHECK-NEXT: RET_ReallyLR implicit $q0
4952
%0:_(<8 x s16>) = COPY $q0
5053
%2:_(s16) = G_CONSTANT i16 23
@@ -208,30 +211,33 @@ body: |
208211
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 127
209212
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
210213
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
211-
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
212-
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
213-
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
214-
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
215-
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
216-
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
217-
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
218-
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
219-
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
220-
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
221-
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
222-
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
223-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C16]](s16), [[C18]](s16), [[C19]](s16)
224-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16)
225-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C17]](s16), [[C17]](s16), [[C20]](s16)
214+
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
215+
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
216+
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
217+
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
218+
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
219+
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
220+
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
221+
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
222+
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
223+
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
224+
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
225+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C12]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16)
226+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C11]](s16), [[C11]](s16), [[C11]](s16), [[C11]](s16), [[C16]](s16), [[C16]](s16), [[C19]](s16)
226227
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
227228
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
228-
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
229-
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
230-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
231-
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
229+
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
230+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16)
231+
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 16
232232
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16)
233-
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
234-
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
233+
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(<8 x s16>) = G_SUB [[BUILD_VECTOR4]], [[BUILD_VECTOR3]]
234+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[SUB]], [[SUB1]](<8 x s16>)
235+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LSHR]], [[UMULH]]
236+
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
237+
; CHECK-NEXT: [[C22:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
238+
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16)
239+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR5]]
240+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
235241
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
236242
; CHECK-NEXT: RET_ReallyLR implicit $q0
237243
%0:_(<8 x s16>) = COPY $q0

0 commit comments

Comments
 (0)