112 changes: 56 additions & 56 deletions llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
// Custom print the vector compare instructions to get the immediate
// translated into the mnemonic.
switch (MI->getOpcode()) {
case X86::CMPPDrmi: case X86::CMPPDrri:
case X86::CMPPSrmi: case X86::CMPPSrri:
case X86::CMPSDrm: case X86::CMPSDrr:
case X86::CMPSDrm_Int: case X86::CMPSDrr_Int:
case X86::CMPSSrm: case X86::CMPSSrr:
case X86::CMPSSrm_Int: case X86::CMPSSrr_Int:
case X86::CMPPDrmi: case X86::CMPPDrri:
case X86::CMPPSrmi: case X86::CMPPSrri:
case X86::CMPSDrmi: case X86::CMPSDrri:
case X86::CMPSDrmi_Int: case X86::CMPSDrri_Int:
case X86::CMPSSrmi: case X86::CMPSSrri:
case X86::CMPSSrmi_Int: case X86::CMPSSrri_Int:
if (Imm >= 0 && Imm <= 7) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/false, OS);
Expand All @@ -96,56 +96,56 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
}
break;

case X86::VCMPPDrmi: case X86::VCMPPDrri:
case X86::VCMPPDYrmi: case X86::VCMPPDYrri:
case X86::VCMPPDZ128rmi: case X86::VCMPPDZ128rri:
case X86::VCMPPDZ256rmi: case X86::VCMPPDZ256rri:
case X86::VCMPPDZrmi: case X86::VCMPPDZrri:
case X86::VCMPPSrmi: case X86::VCMPPSrri:
case X86::VCMPPSYrmi: case X86::VCMPPSYrri:
case X86::VCMPPSZ128rmi: case X86::VCMPPSZ128rri:
case X86::VCMPPSZ256rmi: case X86::VCMPPSZ256rri:
case X86::VCMPPSZrmi: case X86::VCMPPSZrri:
case X86::VCMPSDrm: case X86::VCMPSDrr:
case X86::VCMPSDZrm: case X86::VCMPSDZrr:
case X86::VCMPSDrm_Int: case X86::VCMPSDrr_Int:
case X86::VCMPSDZrm_Int: case X86::VCMPSDZrr_Int:
case X86::VCMPSSrm: case X86::VCMPSSrr:
case X86::VCMPSSZrm: case X86::VCMPSSZrr:
case X86::VCMPSSrm_Int: case X86::VCMPSSrr_Int:
case X86::VCMPSSZrm_Int: case X86::VCMPSSZrr_Int:
case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
case X86::VCMPPDZrmik: case X86::VCMPPDZrrik:
case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
case X86::VCMPSHZrm: case X86::VCMPSHZrr:
case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
case X86::VCMPPDrmi: case X86::VCMPPDrri:
case X86::VCMPPDYrmi: case X86::VCMPPDYrri:
case X86::VCMPPDZ128rmi: case X86::VCMPPDZ128rri:
case X86::VCMPPDZ256rmi: case X86::VCMPPDZ256rri:
case X86::VCMPPDZrmi: case X86::VCMPPDZrri:
case X86::VCMPPSrmi: case X86::VCMPPSrri:
case X86::VCMPPSYrmi: case X86::VCMPPSYrri:
case X86::VCMPPSZ128rmi: case X86::VCMPPSZ128rri:
case X86::VCMPPSZ256rmi: case X86::VCMPPSZ256rri:
case X86::VCMPPSZrmi: case X86::VCMPPSZrri:
case X86::VCMPSDrmi: case X86::VCMPSDrri:
case X86::VCMPSDZrmi: case X86::VCMPSDZrri:
case X86::VCMPSDrmi_Int: case X86::VCMPSDrri_Int:
case X86::VCMPSDZrmi_Int: case X86::VCMPSDZrri_Int:
case X86::VCMPSSrmi: case X86::VCMPSSrri:
case X86::VCMPSSZrmi: case X86::VCMPSSZrri:
case X86::VCMPSSrmi_Int: case X86::VCMPSSrri_Int:
case X86::VCMPSSZrmi_Int: case X86::VCMPSSZrri_Int:
case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
case X86::VCMPPDZrmik: case X86::VCMPPDZrrik:
case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk:
case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk:
case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk:
case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk:
case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
case X86::VCMPSHZrmi: case X86::VCMPSHZrri:
case X86::VCMPSHZrmi_Int: case X86::VCMPSHZrri_Int:
case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk:
case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2198,7 +2198,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *VK1 = &X86::VK1RegClass;

unsigned CmpOpcode =
(RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
(RetVT == MVT::f32) ? X86::VCMPSSZrri : X86::VCMPSDZrri;
Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
CC);

Expand Down Expand Up @@ -2228,7 +2228,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// instructions as the AND/ANDN/OR sequence due to register moves, so
// don't bother.
unsigned CmpOpcode =
(RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
(RetVT == MVT::f32) ? X86::VCMPSSrri : X86::VCMPSDrri;
unsigned BlendOpcode =
(RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;

Expand All @@ -2242,8 +2242,8 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
} else {
// Choose the SSE instruction sequence based on data type (float or double).
static const uint16_t OpcTable[2][4] = {
{ X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
{ X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
{ X86::CMPSSrri, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
{ X86::CMPSDrri, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
};

const uint16_t *Opc = nullptr;
Expand Down
98 changes: 49 additions & 49 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -1937,58 +1937,58 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
PatFrag OpNode_su, PatFrag OpNodeSAE_su,
X86FoldableSchedWrite sched> {
defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
defm rri_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>,
EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
defm rmi_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;

let Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc),
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>,
EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
defm rrib_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc),
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>,
EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;

let isCodeGenOnly = 1 in {
let isCommutable = 1 in
def rr : AVX512Ii8<0xC2, MRMSrcReg,
(outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
!strconcat("vcmp", _.Suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
timm:$cc))]>,
EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
!strconcat("vcmp", _.Suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
timm:$cc))]>,
EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
def rri : AVX512Ii8<0xC2, MRMSrcReg,
(outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
!strconcat("vcmp", _.Suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
timm:$cc))]>,
EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
def rmi : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
!strconcat("vcmp", _.Suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
timm:$cc))]>,
EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}

Expand Down Expand Up @@ -2437,15 +2437,15 @@ defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
(VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
(VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;

def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
(VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
(VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
}

let Predicates = [HasFP16] in {
def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
(VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
(VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
}

// ----------------------------------------------------------------
Expand Down
40 changes: 20 additions & 20 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2573,11 +2573,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
WorkingMI->getOperand(3).setImm(
X86::getSwappedVPCOMImm(MI.getOperand(3).getImm() & 0x7));
break;
case X86::VCMPSDZrr:
case X86::VCMPSSZrr:
case X86::VCMPSDZrri:
case X86::VCMPSSZrri:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
case X86::VCMPSHZrr:
case X86::VCMPSHZrri:
case X86::VCMPPHZrri:
case X86::VCMPPHZ128rri:
case X86::VCMPPHZ256rri:
Expand Down Expand Up @@ -2820,21 +2820,21 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;

switch (MI.getOpcode()) {
case X86::CMPSDrr:
case X86::CMPSSrr:
case X86::CMPSDrri:
case X86::CMPSSrri:
case X86::CMPPDrri:
case X86::CMPPSrri:
case X86::VCMPSDrr:
case X86::VCMPSSrr:
case X86::VCMPSDrri:
case X86::VCMPSSrri:
case X86::VCMPPDrri:
case X86::VCMPPSrri:
case X86::VCMPPDYrri:
case X86::VCMPPSYrri:
case X86::VCMPSDZrr:
case X86::VCMPSSZrr:
case X86::VCMPSDZrri:
case X86::VCMPSSZrri:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
case X86::VCMPSHZrr:
case X86::VCMPSHZrri:
case X86::VCMPPHZrri:
case X86::VCMPPHZ128rri:
case X86::VCMPPHZ256rri:
Expand Down Expand Up @@ -7510,9 +7510,9 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::ADDSSrr_Int:
case X86::VADDSSrr_Int:
case X86::VADDSSZrr_Int:
case X86::CMPSSrr_Int:
case X86::VCMPSSrr_Int:
case X86::VCMPSSZrr_Int:
case X86::CMPSSrri_Int:
case X86::VCMPSSrri_Int:
case X86::VCMPSSZrri_Int:
case X86::DIVSSrr_Int:
case X86::VDIVSSrr_Int:
case X86::VDIVSSZrr_Int:
Expand All @@ -7533,7 +7533,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VSUBSSZrr_Int:
case X86::VADDSSZrr_Intk:
case X86::VADDSSZrr_Intkz:
case X86::VCMPSSZrr_Intk:
case X86::VCMPSSZrri_Intk:
case X86::VDIVSSZrr_Intk:
case X86::VDIVSSZrr_Intkz:
case X86::VMAXSSZrr_Intk:
Expand Down Expand Up @@ -7679,9 +7679,9 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::ADDSDrr_Int:
case X86::VADDSDrr_Int:
case X86::VADDSDZrr_Int:
case X86::CMPSDrr_Int:
case X86::VCMPSDrr_Int:
case X86::VCMPSDZrr_Int:
case X86::CMPSDrri_Int:
case X86::VCMPSDrri_Int:
case X86::VCMPSDZrri_Int:
case X86::DIVSDrr_Int:
case X86::VDIVSDrr_Int:
case X86::VDIVSDZrr_Int:
Expand All @@ -7702,7 +7702,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VSUBSDZrr_Int:
case X86::VADDSDZrr_Intk:
case X86::VADDSDZrr_Intkz:
case X86::VCMPSDZrr_Intk:
case X86::VCMPSDZrri_Intk:
case X86::VDIVSDZrr_Intk:
case X86::VDIVSDZrr_Intkz:
case X86::VMAXSDZrr_Intk:
Expand Down Expand Up @@ -7814,15 +7814,15 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
// instruction isn't scalar (SH).
switch (UserOpc) {
case X86::VADDSHZrr_Int:
case X86::VCMPSHZrr_Int:
case X86::VCMPSHZrri_Int:
case X86::VDIVSHZrr_Int:
case X86::VMAXSHZrr_Int:
case X86::VMINSHZrr_Int:
case X86::VMULSHZrr_Int:
case X86::VSUBSHZrr_Int:
case X86::VADDSHZrr_Intk:
case X86::VADDSHZrr_Intkz:
case X86::VCMPSHZrr_Intk:
case X86::VCMPSHZrri_Intk:
case X86::VDIVSHZrr_Intk:
case X86::VDIVSHZrr_Intkz:
case X86::VMAXSHZrr_Intk:
Expand Down
46 changes: 23 additions & 23 deletions llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -1830,29 +1830,29 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
PatFrag ld_frag, string asm,
X86FoldableSchedWrite sched,
PatFrags mem_frags> {
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$cc), asm,
[(set VR128:$dst, (OpNode (VT VR128:$src1),
VR128:$src2, timm:$cc))]>,
Sched<[sched]>, SIMD_EXC;
def rri_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$cc), asm,
[(set VR128:$dst, (OpNode (VT VR128:$src1),
VR128:$src2, timm:$cc))]>,
Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, u8imm:$cc), asm,
[(set VR128:$dst, (OpNode (VT VR128:$src1),
(mem_frags addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
def rmi_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, u8imm:$cc), asm,
[(set VR128:$dst, (OpNode (VT VR128:$src1),
(mem_frags addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;

let isCodeGenOnly = 1 in {
let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>,
Sched<[sched]>, SIMD_EXC;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode RC:$src1,
(ld_frag addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
def rri : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>,
Sched<[sched]>, SIMD_EXC;
def rmi : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode RC:$src1,
(ld_frag addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}

Expand Down Expand Up @@ -2023,11 +2023,11 @@ let Predicates = [HasAVX] in {

def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
(VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
(VCMPSDrmi FR64:$src1, addr:$src2, timm:$cc)>;

def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
(VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
(VCMPSSrmi FR32:$src1, addr:$src2, timm:$cc)>;
}

let Predicates = [UseSSE2] in {
Expand All @@ -2037,7 +2037,7 @@ let Predicates = [UseSSE2] in {

def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
(CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
(CMPSDrmi FR64:$src1, addr:$src2, timm:$cc)>;
}

let Predicates = [UseSSE1] in {
Expand All @@ -2047,7 +2047,7 @@ let Predicates = [UseSSE1] in {

def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
(CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
(CMPSSrmi FR32:$src1, addr:$src2, timm:$cc)>;
}

//===----------------------------------------------------------------------===//
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86SchedSapphireRapids.td
Original file line number Diff line number Diff line change
Expand Up @@ -663,8 +663,8 @@ def : InstRW<[SPRWriteResGroup12], (instregex "^ADD_F(P?)rST0$",
"^SUB(R?)_FST0r$",
"^VALIGN(D|Q)Z256rri((k|kz)?)$",
"^VCMPP(D|H|S)Z(128|256)rri(k?)$",
"^VCMPS(D|H|S)Zrr$",
"^VCMPS(D|H|S)Zrr(b?)_Int(k?)$",
"^VCMPS(D|H|S)Zrri$",
"^VCMPS(D|H|S)Zrr(b?)i_Int(k?)$",
"^VFPCLASSP(D|H|S)Z(128|256)rr(k?)$",
"^VFPCLASSS(D|H|S)Zrr(k?)$",
"^VPACK(S|U)S(DW|WB)Yrr$",
Expand Down Expand Up @@ -2739,8 +2739,8 @@ def : InstRW<[SPRWriteResGroup263, ReadAfterVecYLd], (instregex "^VCMPP(D|H|S)Z(
"^VPCMPUDZ((256)?)rmib(k?)$",
"^VPTEST(N?)M(B|D|Q|W)Z((256)?)rm(k?)$",
"^VPTEST(N?)M(D|Q)Z((256)?)rmb(k?)$")>;
def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrm$",
"^VCMPS(D|H|S)Zrm_Int(k?)$",
def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrmi$",
"^VCMPS(D|H|S)Zrmi_Int(k?)$",
"^VFPCLASSS(D|H|S)Zrmk$")>;

def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Utils/CodeExtractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::WriteOnly:
case Attribute::Writable:
case Attribute::DeadOnUnwind:
case Attribute::Range:
// These are not really attributes.
case Attribute::None:
case Attribute::EndAttrKinds:
Expand Down
676 changes: 195 additions & 481 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions llvm/test/Assembler/range-attribute-invalid-range.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s

; CHECK: the range should not represent the full or empty set!
define void @range_empty(i8 range(i8 0, 0) %a) {
ret void
}
6 changes: 6 additions & 0 deletions llvm/test/Assembler/range-attribute-invalid-type.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s

; CHECK: the range must have integer type!
define void @range_vector_type(i8 range(<4 x i32> 0, 0) %a) {
ret void
}
10 changes: 10 additions & 0 deletions llvm/test/Bitcode/attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,16 @@ define void @f91(ptr dead_on_unwind %p) {
ret void
}

; CHECK: define range(i32 -1, 42) i32 @range_attribute(<4 x i32> range(i32 -1, 42) %a)
define range(i32 -1, 42) i32 @range_attribute(<4 x i32> range(i32 -1, 42) %a) {
ret i32 0
}

; CHECK: define void @wide_range_attribute(i128 range(i128 618970019642690137449562111, 618970019642690137449562114) %a)
define void @wide_range_attribute(i128 range(i128 618970019642690137449562111, 618970019642690137449562114) %a) {
ret void
}

; CHECK: attributes #0 = { noreturn }
; CHECK: attributes #1 = { nounwind }
; CHECK: attributes #2 = { memory(none) }
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[C]](p64)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
%1:_(s32) = G_CONSTANT i32 42
%1:_(s64) = G_CONSTANT i64 42
%2:_(s32) = G_CONSTANT i32 2
%3:_(p64) = G_INTTOPTR %2
%4:_(p64) = G_PTR_ADD %3, %1
Expand All @@ -26,7 +26,7 @@ body: |
; CHECK-LABEL: name: agc.test_combine_ptradd_constants_ptrres
; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
; CHECK: $x0 = COPY [[C]](p64)
%1:_(s32) = G_CONSTANT i32 42
%1:_(s64) = G_CONSTANT i64 42
%2:_(s32) = G_CONSTANT i32 2
%3:_(p64) = G_INTTOPTR %2
%4:_(p64) = G_PTR_ADD %3, %1
Expand All @@ -39,12 +39,12 @@ body: |
liveins: $x0, $x1
; Ensure non-constant G_PTR_ADDs are not folded.
; CHECK-LABEL: name: agc.test_not_combine_variable_ptradd
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
; CHECK: [[COPY:%[0-9]+]]:_(p64) = COPY $x1
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s32)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p64)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
%1:_(s32) = G_CONSTANT i32 42
%1:_(s64) = G_CONSTANT i64 42
%2:_(p64) = COPY $x1
%3:_(p64) = G_PTR_ADD %2, %1
%4:_(s64) = G_PTRTOINT %3
Expand Down
17 changes: 0 additions & 17 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
Original file line number Diff line number Diff line change
@@ -1,23 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64 -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_ptr_add_small
body: |
bb.0.entry:
; CHECK-LABEL: name: test_ptr_add_small
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 8
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
; CHECK: $x0 = COPY [[PTR_ADD]](p0)
%0:_(p0) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s8) = G_TRUNC %1(s64)
%3:_(p0) = G_PTR_ADD %0, %2(s8)
$x0 = COPY %3(p0)
...
---
name: test_ptr_add_vec_p0
body: |
bb.0.entry:
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_ptradd_crash__offset_smaller
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%1:_(p1) = G_CONSTANT i64 0
Expand All @@ -27,8 +28,12 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_ptradd_crash__offset_wider
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
; CHECK: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 3
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[C]](s128)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[TRUNC]], [[C1]](s64)
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[SHL]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%1:_(p1) = G_CONSTANT i64 0
Expand Down
21 changes: 6 additions & 15 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 63, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -28,10 +27,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX12-NEXT: v_and_b32_e32 v2, 63, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX12-NEXT: global_load_b32 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -46,9 +43,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -63,10 +59,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX12-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 1, v2
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX12-NEXT: global_load_u16 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -81,9 +75,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 31, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -98,10 +91,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
; GFX12-NEXT: v_and_b32_e32 v2, 31, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v2
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX12-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
Expand Down
110 changes: 47 additions & 63 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,44 @@
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s

define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GCN: ; %bb.0:
; GCN-NEXT: s_and_b32 s0, s4, 3
; GCN-NEXT: s_lshl_b32 s0, s0, 4
; GCN-NEXT: s_ashr_i32 s1, s0, 31
; GCN-NEXT: s_add_u32 s0, s2, s0
; GCN-NEXT: s_addc_u32 s1, s3, s1
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ; return to shader part epilog
; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s4, 3
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s4, 3
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_and_b32 s0, s4, 3
; GFX7-NEXT: s_lshl_b32 s0, s0, 4
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s4, 3
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: s_add_u32 s0, s2, s0
; GFX10-NEXT: s_addc_u32 s1, s3, s1
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s4, 3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
; GFX11-NEXT: s_add_u32 s0, s2, s0
; GFX11-NEXT: s_addc_u32 s1, s3, s1
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], s0 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%vector = load <4 x i128>, ptr addrspace(4) %ptr
Expand All @@ -48,8 +55,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s2, 3
; GFX9-NEXT: s_mov_b32 s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
Expand All @@ -65,8 +72,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s2, 3
; GFX8-NEXT: s_mov_b32 s1, 0
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
Expand All @@ -82,10 +89,10 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_and_b32 s0, s2, 3
; GFX7-NEXT: s_mov_b32 s1, 0
; GFX7-NEXT: s_lshl_b32 s0, s0, 4
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s1
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
Expand All @@ -97,8 +104,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX10-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s0, s2, 3
; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: v_mov_b32_e32 v3, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
Expand All @@ -114,9 +121,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
; GFX11-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s2, 3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
Expand All @@ -140,9 +146,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -152,9 +157,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -164,9 +168,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
Expand All @@ -179,9 +182,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -192,10 +194,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -208,13 +208,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
Expand All @@ -227,10 +222,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX8-NEXT: v_and_b32_e32 v0, 3, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
Expand All @@ -242,10 +236,10 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
Expand All @@ -259,13 +253,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX10-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v0
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: v_readfirstlane_b32 s1, v1
Expand All @@ -276,14 +265,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
; GFX11-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v0
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
Expand Down
190 changes: 62 additions & 128 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll

Large diffs are not rendered by default.

207 changes: 0 additions & 207 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir
Original file line number Diff line number Diff line change
Expand Up @@ -205,210 +205,3 @@ body: |
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...

---
name: test_gep_global_s16_idx
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-LABEL: name: test_gep_global_s16_idx
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s16) = G_TRUNC %1
%3:_(p1) = G_PTR_ADD %0, %2
$vgpr0_vgpr1 = COPY %3
...

---
name: test_gep_global_s32_idx
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-LABEL: name: test_gep_global_s32_idx
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT]](s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(p1) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...

---
name: test_gep_global_s96_idx
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; CHECK-LABEL: name: test_gep_global_s96_idx
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[TRUNC]](s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
%2:_(p1) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...

---
name: test_gep_local_i16_idx
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: test_gep_local_i16_idx
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %1
%3:_(p3) = G_PTR_ADD %0, %2
$vgpr0 = COPY %3
...

---
name: test_gep_local_i64_idx
body: |
bb.0:
liveins: $vgpr0, $vgpr1_vgpr2
; CHECK-LABEL: name: test_gep_local_i64_idx
; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[TRUNC]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
%0:_(p3) = COPY $vgpr0
%1:_(s64) = COPY $vgpr1_vgpr2
%2:_(p3) = G_PTR_ADD %0, %1
$vgpr0 = COPY %2
...

---
name: test_gep_v2p1_v2i32
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-LABEL: name: test_gep_v2p1_v2i32
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[SEXT]](s64)
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[SEXT1]](s64)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
%0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
%2:_(<2 x p1>) = G_PTR_ADD %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
...

---
name: test_gep_v2p1_v2i96
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
; CHECK-LABEL: name: test_gep_v2p1_v2i96
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY $vgpr7_vgpr8_vgpr9
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[TRUNC]](s64)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[COPY2]](s96)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[TRUNC1]](s64)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
%0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
%2:_(s96) = COPY $vgpr7_vgpr8_vgpr9
%3:_(<2 x s96>) = G_BUILD_VECTOR %1, %2
%4:_(<2 x p1>) = G_PTR_ADD %0, %3
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...

---
name: test_gep_v2p3_v2s16
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-LABEL: name: test_gep_v2p3_v2s16
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[SEXT_INREG]](s32)
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[SEXT_INREG1]](s32)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
%0:_(<2 x p3>) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...

---
name: test_gep_v2p3_v2s64
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; CHECK-LABEL: name: test_gep_v2p3_v2s64
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[TRUNC]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[TRUNC1]](s32)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
%0:_(<2 x p3>) = COPY $vgpr0_vgpr1
%1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
$vgpr0_vgpr1 = COPY %2
...
28 changes: 0 additions & 28 deletions llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
define void @test_load_store_64_novfp() #1 { ret void }

define void @test_gep_s32() { ret void }
define void @test_gep_s16() { ret void }

attributes #0 = { "target-features"="+vfp2" }
attributes #1 = { "target-features"="-vfp2sp" }
Expand Down Expand Up @@ -211,30 +210,3 @@ body: |
$r0 = COPY %2(p0)
BX_RET 14, $noreg, implicit $r0
...
---
name: test_gep_s16
# CHECK-LABEL: name: test_gep_s16
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.0:
liveins: $r0
%0(p0) = COPY $r0
%1(s16) = G_LOAD %0(p0) :: (load (s16))
; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
; CHECK: {{%[0-9]+}}:_(p0) = G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s32)
; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
%2(p0) = G_PTR_ADD %0, %1(s16)
$r0 = COPY %2(p0)
BX_RET 14, $noreg, implicit $r0
...
55 changes: 55 additions & 0 deletions llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK

--- |
define void @test_gep_i32c(ptr %addr) {
%arrayidx = getelementptr i32, ptr undef, i32 5
ret void
}
define void @test_gep_i32(ptr %addr, i32 %ofs) {
%arrayidx = getelementptr i32, ptr undef, i32 %ofs
ret void
}
...
---
name: test_gep_i32c
legalized: false
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
; CHECK-LABEL: name: test_gep_i32c
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
; CHECK-NEXT: RET 0
%0(p0) = IMPLICIT_DEF
%1(s32) = G_CONSTANT i32 20
%2(p0) = G_PTR_ADD %0, %1(s32)
G_STORE %2, %0 :: (store (p0) into %ir.addr)
RET 0
...
---
name: test_gep_i32
legalized: false
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
; CHECK-LABEL: name: test_gep_i32
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
; CHECK-NEXT: RET 0
%0(p0) = IMPLICIT_DEF
%1(s32) = IMPLICIT_DEF
%2(p0) = G_PTR_ADD %0, %1(s32)
G_STORE %2, %0 :: (store (p0) into %ir.addr)
RET 0
...
55 changes: 55 additions & 0 deletions llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=X64

--- |
define void @test_gep_i64c(ptr %addr) {
%arrayidx = getelementptr i32, ptr undef, i64 5
ret void
}
define void @test_gep_i64(ptr %addr, i64 %ofs) {
%arrayidx = getelementptr i32, ptr undef, i64 %ofs
ret void
}
...
---
name: test_gep_i64c
legalized: false
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
; X64-LABEL: name: test_gep_i64c
; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
; X64-NEXT: RET 0
%0(p0) = IMPLICIT_DEF
%1(s64) = G_CONSTANT i64 20
%2(p0) = G_PTR_ADD %0, %1(s64)
G_STORE %2, %0 :: (store (p0) into %ir.addr)
RET 0
...
---
name: test_gep_i64
legalized: false
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
; X64-LABEL: name: test_gep_i64
; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
; X64-NEXT: RET 0
%0(p0) = IMPLICIT_DEF
%1(s64) = IMPLICIT_DEF
%2(p0) = G_PTR_ADD %0, %1(s64)
G_STORE %2, %0 :: (store (p0) into %ir.addr)
RET 0
...
224 changes: 0 additions & 224 deletions llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir

This file was deleted.

19 changes: 7 additions & 12 deletions llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1380,23 +1380,18 @@ body: |
bb.0 (%ir-block.0):
; FAST-LABEL: name: test_gep
; FAST: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
; FAST: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
; FAST: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
; FAST: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
; FAST: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
; FAST: RET 0
;
; GREEDY-LABEL: name: test_gep
; GREEDY: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
; GREEDY: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
; GREEDY: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
; GREEDY: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
; GREEDY: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
; GREEDY: RET 0
%0(p0) = G_IMPLICIT_DEF
%1(s32) = G_CONSTANT i32 20
%2(p0) = G_PTR_ADD %0, %1(s32)
%3(s64) = G_CONSTANT i64 20
%4(p0) = G_PTR_ADD %0, %3(s64)
%1(s64) = G_CONSTANT i64 20
%2(p0) = G_PTR_ADD %0, %1(s64)
RET 0
...
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float
; CHECK-NEXT: bb.1.if:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[VCMPSSZrr:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY3]], [[COPY2]], 0, implicit $mxcsr
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr]]
; CHECK-NEXT: [[VCMPSSZrri:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrri [[COPY3]], [[COPY2]], 0, implicit $mxcsr
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vk16 = COPY [[VCMPSSZrri]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vk32 = COPY [[COPY7]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vk8 = COPY [[COPY8]]
; CHECK-NEXT: JMP_1 %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.else:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[VCMPSSZrr1:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY1]], [[COPY]], 0, implicit $mxcsr
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr1]]
; CHECK-NEXT: [[VCMPSSZrri1:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrri [[COPY1]], [[COPY]], 0, implicit $mxcsr
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vk16 = COPY [[VCMPSSZrri1]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vk32 = COPY [[COPY10]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
; CHECK-NEXT: {{ $}}
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/domain-reassignment.mir
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,14 @@ body: |
; CHECK: JMP_1 %bb.1
; CHECK: bb.1.if:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[VCMPSSZrr:%[0-9]+]]:vk1 = VCMPSSZrr [[COPY3]], [[COPY2]], 0
; CHECK: [[COPY9:%[0-9]+]]:vk32 = COPY [[VCMPSSZrr]]
; CHECK: [[VCMPSSZrri:%[0-9]+]]:vk1 = VCMPSSZrri [[COPY3]], [[COPY2]], 0
; CHECK: [[COPY9:%[0-9]+]]:vk32 = COPY [[VCMPSSZrri]]
; CHECK: [[COPY10:%[0-9]+]]:vk8 = COPY [[COPY9]]
; CHECK: JMP_1 %bb.3
; CHECK: bb.2.else:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[VCMPSSZrr1:%[0-9]+]]:vk1 = VCMPSSZrr [[COPY1]], [[COPY]], 0
; CHECK: [[COPY11:%[0-9]+]]:vk32 = COPY [[VCMPSSZrr1]]
; CHECK: [[VCMPSSZrri1:%[0-9]+]]:vk1 = VCMPSSZrri [[COPY1]], [[COPY]], 0
; CHECK: [[COPY11:%[0-9]+]]:vk32 = COPY [[VCMPSSZrri1]]
; CHECK: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
; CHECK: bb.3.exit:
; CHECK: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY10]], %bb.1
Expand Down Expand Up @@ -173,7 +173,7 @@ body: |
bb.1.if:
successors: %bb.3(0x80000000)
%14 = VCMPSSZrr %7, %8, 0, implicit $mxcsr
%14 = VCMPSSZrri %7, %8, 0, implicit $mxcsr
; check that cross domain copies are replaced with same domain copies.
Expand All @@ -183,7 +183,7 @@ body: |
bb.2.else:
successors: %bb.3(0x80000000)
%12 = VCMPSSZrr %9, %10, 0, implicit $mxcsr
%12 = VCMPSSZrri %9, %10, 0, implicit $mxcsr
; check that cross domain copies are replaced with same domain copies.
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ define float @sqrt_ieee_ninf(float %f) #0 {
; CHECK-NEXT: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load (s32) from constant-pool)
; CHECK-NEXT: [[VPANDrr:%[0-9]+]]:vr128 = VPANDrr killed [[COPY2]], killed [[VPBROADCASTDrm]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDrr]]
; CHECK-NEXT: [[VCMPSSrm:%[0-9]+]]:fr32 = nofpexcept VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load (s32) from constant-pool)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vr128 = COPY [[VCMPSSrm]]
; CHECK-NEXT: [[VCMPSSrmi:%[0-9]+]]:fr32 = nofpexcept VCMPSSrmi killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load (s32) from constant-pool)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vr128 = COPY [[VCMPSSrmi]]
; CHECK-NEXT: [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY4]], killed [[COPY1]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK-NEXT: $xmm0 = COPY [[COPY5]]
Expand Down Expand Up @@ -84,8 +84,8 @@ define float @sqrt_daz_ninf(float %f) #1 {
; CHECK-NEXT: [[VMULSSrr5:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr killed [[VMULSSrr4]], killed [[VFMADD213SSr1]], implicit $mxcsr
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128 = COPY [[VMULSSrr5]]
; CHECK-NEXT: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
; CHECK-NEXT: [[VCMPSSrr:%[0-9]+]]:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr128 = COPY [[VCMPSSrr]]
; CHECK-NEXT: [[VCMPSSrri:%[0-9]+]]:fr32 = nofpexcept VCMPSSrri [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr128 = COPY [[VCMPSSrri]]
; CHECK-NEXT: [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY2]], killed [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK-NEXT: $xmm0 = COPY [[COPY3]]
Expand Down
6 changes: 5 additions & 1 deletion llvm/test/MachineVerifier/test_g_ptr_add.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
# REQUIRES: aarch64-registered-target

---
Expand Down Expand Up @@ -29,4 +29,8 @@ body: |
; CHECK: Bad machine code: gep first operand must be a pointer
%6:_(s64) = G_PTR_ADD %1, %1
%7:_(s32) = G_IMPLICIT_DEF
; CHECK: Bad machine code: gep offset operand must match index size for address space
%8:_(p0) = G_PTR_ADD %0, %7
...
34 changes: 17 additions & 17 deletions llvm/test/TableGen/x86-fold-tables.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1941,10 +1941,10 @@ static const X86FoldTableEntry Table2[] = {
{X86::CMOV64rr, X86::CMOV64rm, 0},
{X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16},
{X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16},
{X86::CMPSDrr, X86::CMPSDrm, 0},
{X86::CMPSDrr_Int, X86::CMPSDrm_Int, TB_NO_REVERSE},
{X86::CMPSSrr, X86::CMPSSrm, 0},
{X86::CMPSSrr_Int, X86::CMPSSrm_Int, TB_NO_REVERSE},
{X86::CMPSDrri, X86::CMPSDrmi, 0},
{X86::CMPSDrri_Int, X86::CMPSDrmi_Int, TB_NO_REVERSE},
{X86::CMPSSrri, X86::CMPSSrmi, 0},
{X86::CMPSSrri_Int, X86::CMPSSrmi_Int, TB_NO_REVERSE},
{X86::CRC32r32r16, X86::CRC32r32m16, 0},
{X86::CRC32r32r16_EVEX, X86::CRC32r32m16_EVEX, 0},
{X86::CRC32r32r32, X86::CRC32r32m32, 0},
Expand Down Expand Up @@ -2390,16 +2390,16 @@ static const X86FoldTableEntry Table2[] = {
{X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0},
{X86::VCMPPSZrri, X86::VCMPPSZrmi, 0},
{X86::VCMPPSrri, X86::VCMPPSrmi, 0},
{X86::VCMPSDZrr, X86::VCMPSDZrm, 0},
{X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE},
{X86::VCMPSDrr, X86::VCMPSDrm, 0},
{X86::VCMPSDrr_Int, X86::VCMPSDrm_Int, TB_NO_REVERSE},
{X86::VCMPSHZrr, X86::VCMPSHZrm, 0},
{X86::VCMPSHZrr_Int, X86::VCMPSHZrm_Int, TB_NO_REVERSE},
{X86::VCMPSSZrr, X86::VCMPSSZrm, 0},
{X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE},
{X86::VCMPSSrr, X86::VCMPSSrm, 0},
{X86::VCMPSSrr_Int, X86::VCMPSSrm_Int, TB_NO_REVERSE},
{X86::VCMPSDZrri, X86::VCMPSDZrmi, 0},
{X86::VCMPSDZrri_Int, X86::VCMPSDZrmi_Int, TB_NO_REVERSE},
{X86::VCMPSDrri, X86::VCMPSDrmi, 0},
{X86::VCMPSDrri_Int, X86::VCMPSDrmi_Int, TB_NO_REVERSE},
{X86::VCMPSHZrri, X86::VCMPSHZrmi, 0},
{X86::VCMPSHZrri_Int, X86::VCMPSHZrmi_Int, TB_NO_REVERSE},
{X86::VCMPSSZrri, X86::VCMPSSZrmi, 0},
{X86::VCMPSSZrri_Int, X86::VCMPSSZrmi_Int, TB_NO_REVERSE},
{X86::VCMPSSrri, X86::VCMPSSrmi, 0},
{X86::VCMPSSrri_Int, X86::VCMPSSrmi_Int, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmkz, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmkz, 0},
{X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmkz, 0},
Expand Down Expand Up @@ -3973,9 +3973,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0},
{X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0},
{X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0},
{X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE},
{X86::VCMPSHZrr_Intk, X86::VCMPSHZrm_Intk, TB_NO_REVERSE},
{X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE},
{X86::VCMPSDZrri_Intk, X86::VCMPSDZrmi_Intk, TB_NO_REVERSE},
{X86::VCMPSHZrri_Intk, X86::VCMPSHZrmi_Intk, TB_NO_REVERSE},
{X86::VCMPSSZrri_Intk, X86::VCMPSSZrmi_Intk, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmk, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmk, 0},
{X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmk, 0},
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]]
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[SUB0]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;test_i16_extend NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer,dce,instcombine -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=YAML %s
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-5 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: opt -S -mtriple=aarch64--linux-gnu -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-7 -pass-remarks-output=%t < %s | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=YAML %s


Expand Down
20 changes: 15 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,21 @@ entry:
define i64 @red_zext_ld_4xi64(ptr %ptr) {
; CHECK-LABEL: @red_zext_ld_4xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64
; CHECK-NEXT: ret i64 [[TMP3]]
; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1
; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64
; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]]
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1
; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64
; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]]
; CHECK-NEXT: ret i64 [[ADD_3]]
;
entry:
%ld0 = load i8, ptr %ptr
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -802,10 +802,9 @@ define i64 @red_zext_ld_4xi64(ptr %ptr) {
; CHECK-LABEL: @red_zext_ld_4xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64
; CHECK-NEXT: ret i64 [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP1]])
; CHECK-NEXT: ret i64 [[TMP2]]
;
entry:
%ld0 = load i8, ptr %ptr
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@ define { i64, i64 } @patatino(double %arg) {
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP12]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1
; CHECK-NEXT: ret { i64, i64 } [[T17]]
;
bb:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-3 < %s | FileCheck %s

define void @t(i64 %v) {
; CHECK-LABEL: define void @t(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ define void @test(i8 %0) {
; CHECK-SAME: i8 [[TMP0:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 0, i8 poison>, i8 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i16> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
; CHECK-NEXT: [[ADD:%.*]] = or i32 [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 1
; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SHR]] to i8
; CHECK-NEXT: store i8 [[CONV9]], ptr null, align 1
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,15 @@ define void @test(i64 %d.promoted.i) {
; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32
; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0
; CHECK-NEXT: store i32 [[TMP10]], ptr null, align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> <i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[AND_1_I_1]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1>
; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
43 changes: 19 additions & 24 deletions llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@ target triple = "x86_64-unknown-linux-gnu"
define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
; SSE-LABEL: @PR31243_zext(
; SSE-NEXT: entry:
; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
; SSE-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
Expand Down Expand Up @@ -76,15 +73,12 @@ entry:
define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
; SSE-LABEL: @PR31243_sext(
; SSE-NEXT: entry:
; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
; SSE-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
; SSE-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64
; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
; SSE-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64
; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
; SSE-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
; SSE-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
; SSE-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
Expand All @@ -95,12 +89,13 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
; AVX-NEXT: [[TMP6:%.*]] = sext i8 [[TMP5]] to i64
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16>
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0
; AVX-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64
; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]]
; AVX-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1
; AVX-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64
; AVX-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]]
; AVX-NEXT: [[T6:%.*]] = load i8, ptr [[T4]], align 1
; AVX-NEXT: [[T7:%.*]] = load i8, ptr [[T5]], align 1
; AVX-NEXT: [[T8:%.*]] = add i8 [[T6]], [[T7]]
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 undef, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
Expand Down Expand Up @@ -52,8 +52,8 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
Expand Down Expand Up @@ -89,8 +89,8 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) {
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 undef>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
Expand Down Expand Up @@ -127,8 +127,8 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
Expand Down Expand Up @@ -165,8 +165,8 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
Expand Down Expand Up @@ -202,8 +202,8 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Transforms/SLPVectorizer/X86/resched.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,26 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK: if.then22.i:
; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1
; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[SHUFFLE1]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5
; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6
; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP4]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP7]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_4_I_I]], i32 5
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_5_I_I]], i32 6
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_6_I_I]], i32 7
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; CHECK-NEXT: [[TMP14:%.*]] = trunc <16 x i32> [[TMP13]] to <16 x i8>
; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i8> [[TMP14]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr undef, align 1
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>
; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i8> [[TMP13]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr undef, align 1
; CHECK-NEXT: unreachable
; CHECK: if.end50.i:
; CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ define i1 @test(i1 %cmp5.not.31) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> <i1 poison, i1 false, i1 false, i1 false>, i1 [[CMP5_NOT_31]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 2, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 0
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1>
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], <i32 2, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: ret i1 [[CMP_NOT_I_I]]
;
entry:
Expand Down
Loading