151 changes: 151 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,13 +1775,96 @@ static bool getFPPatterns(MachineInstr &Root,
return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
}

/// Utility routine that checks if \param MO is defined by an
/// \param CombineOpc instruction in the basic block \param MBB
static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
const MachineOperand &MO,
unsigned CombineOpc) {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const MachineInstr *MI = nullptr;

if (MO.isReg() && MO.getReg().isVirtual())
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
return nullptr;
// Must only used by the user we combine with.
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
return nullptr;

return MI;
}

/// Utility routine that checks if \param MO is defined by a SLLI in \param
/// MBB that can be combined by splitting across 2 SHXADD instructions. The
/// first SHXADD shift amount is given by \param OuterShiftAmt.
static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
const MachineOperand &MO,
unsigned OuterShiftAmt) {
const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
if (!ShiftMI)
return false;

unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
return false;

return true;
}

// Returns the shift amount from a SHXADD instruction. Returns 0 if the
// instruction is not a SHXADD.
static unsigned getSHXADDShiftAmount(unsigned Opc) {
switch (Opc) {
default:
return 0;
case RISCV::SH1ADD:
return 1;
case RISCV::SH2ADD:
return 2;
case RISCV::SH3ADD:
return 3;
}
}

// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
// (sh3add (sh2add Y, Z), X).
static bool
getSHXADDPatterns(const MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
if (!ShiftAmt)
return false;

const MachineBasicBlock &MBB = *Root.getParent();

const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
if (!AddMI)
return false;

bool Found = false;
if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
Found = true;
}
if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
Found = true;
}

return Found;
}

bool RISCVInstrInfo::getMachineCombinerPatterns(
MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
bool DoRegPressureReduce) const {

if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
return true;

if (getSHXADDPatterns(Root, Patterns))
return true;

return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);
}
Expand Down Expand Up @@ -1864,6 +1947,68 @@ static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
DelInstrs.push_back(&Root);
}

// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
// shXadd instructions. The outer shXadd keeps its original opcode.
static void
genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
MachineFunction *MF = Root.getMF();
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
assert(OuterShiftAmt != 0 && "Unexpected opcode");

MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
MachineInstr *ShiftMI =
MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());

unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
assert(InnerShiftAmt > OuterShiftAmt && "Unexpected shift amount");

unsigned InnerOpc;
switch (InnerShiftAmt - OuterShiftAmt) {
default:
llvm_unreachable("Unexpected shift amount");
case 0:
InnerOpc = RISCV::ADD;
break;
case 1:
InnerOpc = RISCV::SH1ADD;
break;
case 2:
InnerOpc = RISCV::SH2ADD;
break;
case 3:
InnerOpc = RISCV::SH3ADD;
break;
}

const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);
const MachineOperand &Y = ShiftMI->getOperand(1);
const MachineOperand &Z = Root.getOperand(1);

Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);

auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)
.addReg(Y.getReg(), getKillRegState(Y.isKill()))
.addReg(Z.getReg(), getKillRegState(Z.isKill()));
auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),
Root.getOperand(0).getReg())
.addReg(NewVR, RegState::Kill)
.addReg(X.getReg(), getKillRegState(X.isKill()));

InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
InsInstrs.push_back(MIB1);
InsInstrs.push_back(MIB2);
DelInstrs.push_back(ShiftMI);
DelInstrs.push_back(AddMI);
DelInstrs.push_back(&Root);
}

void RISCVInstrInfo::genAlternativeCodeSequence(
MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
Expand All @@ -1887,6 +2032,12 @@ void RISCVInstrInfo::genAlternativeCodeSequence(
combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
return;
}
case MachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
return;
case MachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
return;
}
}

Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,7 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
(NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
.addReg(ScratchReg, RegState::Kill)
.addReg(SrcReg, getKillRegState(KillSrcReg))
.addReg(ScratchReg, RegState::Kill).addReg(SrcReg)
.setMIFlag(Flag);
} else {
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
Expand Down
218 changes: 191 additions & 27 deletions llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
#include "SPIRVSubtarget.h"
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
#include "llvm/ADT/APInt.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/TypedPointerType.h"
#include "llvm/Support/Casting.h"
#include <cassert>

using namespace llvm;
SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
Expand All @@ -35,6 +40,15 @@ SPIRVType *SPIRVGlobalRegistry::assignIntTypeToVReg(unsigned BitWidth,
return SpirvType;
}

SPIRVType *
SPIRVGlobalRegistry::assignFloatTypeToVReg(unsigned BitWidth, Register VReg,
MachineInstr &I,
const SPIRVInstrInfo &TII) {
SPIRVType *SpirvType = getOrCreateSPIRVFloatType(BitWidth, I, TII);
assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF);
return SpirvType;
}

SPIRVType *SPIRVGlobalRegistry::assignVectTypeToVReg(
SPIRVType *BaseType, unsigned NumElements, Register VReg, MachineInstr &I,
const SPIRVInstrInfo &TII) {
Expand Down Expand Up @@ -151,6 +165,8 @@ SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType,
Register Res = DT.find(CI, CurMF);
if (!Res.isValid()) {
unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
// TODO: handle cases where the type is not 32bit wide
// TODO: https://github.com/llvm/llvm-project/issues/88129
LLT LLTy = LLT::scalar(32);
Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
Expand All @@ -164,9 +180,83 @@ SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType,
return std::make_tuple(Res, CI, NewInstr);
}

std::tuple<Register, ConstantFP *, bool, unsigned>
SPIRVGlobalRegistry::getOrCreateConstFloatReg(APFloat Val, SPIRVType *SpvType,
MachineIRBuilder *MIRBuilder,
MachineInstr *I,
const SPIRVInstrInfo *TII) {
const Type *LLVMFloatTy;
LLVMContext &Ctx = CurMF->getFunction().getContext();
unsigned BitWidth = 32;
if (SpvType)
LLVMFloatTy = getTypeForSPIRVType(SpvType);
else {
LLVMFloatTy = Type::getFloatTy(Ctx);
if (MIRBuilder)
SpvType = getOrCreateSPIRVType(LLVMFloatTy, *MIRBuilder);
}
bool NewInstr = false;
// Find a constant in DT or build a new one.
auto *const CI = ConstantFP::get(Ctx, Val);
Register Res = DT.find(CI, CurMF);
if (!Res.isValid()) {
if (SpvType)
BitWidth = getScalarOrVectorBitWidth(SpvType);
// TODO: handle cases where the type is not 32bit wide
// TODO: https://github.com/llvm/llvm-project/issues/88129
LLT LLTy = LLT::scalar(32);
Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
if (MIRBuilder)
assignTypeToVReg(LLVMFloatTy, Res, *MIRBuilder);
else
assignFloatTypeToVReg(BitWidth, Res, *I, *TII);
DT.add(CI, CurMF, Res);
NewInstr = true;
}
return std::make_tuple(Res, CI, NewInstr, BitWidth);
}

Register SPIRVGlobalRegistry::getOrCreateConstFP(APFloat Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
bool ZeroAsNull) {
assert(SpvType);
ConstantFP *CI;
Register Res;
bool New;
unsigned BitWidth;
std::tie(Res, CI, New, BitWidth) =
getOrCreateConstFloatReg(Val, SpvType, nullptr, &I, &TII);
// If we have found Res register which is defined by the passed G_CONSTANT
// machine instruction, a new constant instruction should be created.
if (!New && (!I.getOperand(0).isReg() || Res != I.getOperand(0).getReg()))
return Res;
MachineInstrBuilder MIB;
MachineBasicBlock &BB = *I.getParent();
// In OpenCL OpConstantNull - Scalar floating point: +0.0 (all bits 0)
if (Val.isPosZero() && ZeroAsNull) {
MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
.addDef(Res)
.addUse(getSPIRVTypeID(SpvType));
} else {
MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantF))
.addDef(Res)
.addUse(getSPIRVTypeID(SpvType));
addNumImm(
APInt(BitWidth, CI->getValueAPF().bitcastToAPInt().getZExtValue()),
MIB);
}
const auto &ST = CurMF->getSubtarget();
constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(),
*ST.getRegisterInfo(), *ST.getRegBankInfo());
return Res;
}

Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII) {
const SPIRVInstrInfo &TII,
bool ZeroAsNull) {
assert(SpvType);
ConstantInt *CI;
Register Res;
Expand All @@ -179,7 +269,7 @@ Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I,
return Res;
MachineInstrBuilder MIB;
MachineBasicBlock &BB = *I.getParent();
if (Val) {
if (Val || !ZeroAsNull) {
MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
.addDef(Res)
.addUse(getSPIRVTypeID(SpvType));
Expand Down Expand Up @@ -270,21 +360,46 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
return Res;
}

Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
uint64_t Val, MachineInstr &I, SPIRVType *SpvType,
Register SPIRVGlobalRegistry::getOrCreateBaseRegister(Constant *Val,
MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
unsigned BitWidth) {
SPIRVType *Type = SpvType;
if (SpvType->getOpcode() == SPIRV::OpTypeVector ||
SpvType->getOpcode() == SPIRV::OpTypeArray) {
auto EleTypeReg = SpvType->getOperand(1).getReg();
Type = getSPIRVTypeForVReg(EleTypeReg);
}
if (Type->getOpcode() == SPIRV::OpTypeFloat) {
SPIRVType *SpvBaseType = getOrCreateSPIRVFloatType(BitWidth, I, TII);
return getOrCreateConstFP(dyn_cast<ConstantFP>(Val)->getValue(), I,
SpvBaseType, TII);
}
assert(Type->getOpcode() == SPIRV::OpTypeInt);
SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII);
return getOrCreateConstInt(Val->getUniqueInteger().getSExtValue(), I,
SpvBaseType, TII);
}

Register SPIRVGlobalRegistry::getOrCreateCompositeOrNull(
Constant *Val, MachineInstr &I, SPIRVType *SpvType,
const SPIRVInstrInfo &TII, Constant *CA, unsigned BitWidth,
unsigned ElemCnt) {
unsigned ElemCnt, bool ZeroAsNull) {
// Find a constant vector in DT or build a new one.
Register Res = DT.find(CA, CurMF);
// If no values are attached, the composite is null constant.
bool IsNull = Val->isNullValue() && ZeroAsNull;
if (!Res.isValid()) {
SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII);
// SpvScalConst should be created before SpvVecConst to avoid undefined ID
// error on validation.
// TODO: can moved below once sorting of types/consts/defs is implemented.
Register SpvScalConst;
if (Val)
SpvScalConst = getOrCreateConstInt(Val, I, SpvBaseType, TII);
// TODO: maybe use bitwidth of base type.
if (!IsNull)
SpvScalConst = getOrCreateBaseRegister(Val, I, SpvType, TII, BitWidth);

// TODO: handle cases where the type is not 32bit wide
// TODO: https://github.com/llvm/llvm-project/issues/88129
LLT LLTy = LLT::scalar(32);
Register SpvVecConst =
CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
Expand All @@ -293,7 +408,7 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
DT.add(CA, CurMF, SpvVecConst);
MachineInstrBuilder MIB;
MachineBasicBlock &BB = *I.getParent();
if (Val) {
if (!IsNull) {
MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantComposite))
.addDef(SpvVecConst)
.addUse(getSPIRVTypeID(SpvType));
Expand All @@ -313,20 +428,42 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
return Res;
}

Register
SPIRVGlobalRegistry::getOrCreateConsIntVector(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII) {
Register SPIRVGlobalRegistry::getOrCreateConstVector(uint64_t Val,
MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
bool ZeroAsNull) {
const Type *LLVMTy = getTypeForSPIRVType(SpvType);
assert(LLVMTy->isVectorTy());
const FixedVectorType *LLVMVecTy = cast<FixedVectorType>(LLVMTy);
Type *LLVMBaseTy = LLVMVecTy->getElementType();
const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val);
auto ConstVec =
ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstInt);
assert(LLVMBaseTy->isIntegerTy());
auto *ConstVal = ConstantInt::get(LLVMBaseTy, Val);
auto *ConstVec =
ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstVal);
unsigned BW = getScalarOrVectorBitWidth(SpvType);
return getOrCreateIntCompositeOrNull(Val, I, SpvType, TII, ConstVec, BW,
SpvType->getOperand(2).getImm());
return getOrCreateCompositeOrNull(ConstVal, I, SpvType, TII, ConstVec, BW,
SpvType->getOperand(2).getImm(),
ZeroAsNull);
}

Register SPIRVGlobalRegistry::getOrCreateConstVector(APFloat Val,
MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
bool ZeroAsNull) {
const Type *LLVMTy = getTypeForSPIRVType(SpvType);
assert(LLVMTy->isVectorTy());
const FixedVectorType *LLVMVecTy = cast<FixedVectorType>(LLVMTy);
Type *LLVMBaseTy = LLVMVecTy->getElementType();
assert(LLVMBaseTy->isFloatingPointTy());
auto *ConstVal = ConstantFP::get(LLVMBaseTy, Val);
auto *ConstVec =
ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstVal);
unsigned BW = getScalarOrVectorBitWidth(SpvType);
return getOrCreateCompositeOrNull(ConstVal, I, SpvType, TII, ConstVec, BW,
SpvType->getOperand(2).getImm(),
ZeroAsNull);
}

Register
Expand All @@ -337,13 +474,13 @@ SPIRVGlobalRegistry::getOrCreateConsIntArray(uint64_t Val, MachineInstr &I,
assert(LLVMTy->isArrayTy());
const ArrayType *LLVMArrTy = cast<ArrayType>(LLVMTy);
Type *LLVMBaseTy = LLVMArrTy->getElementType();
const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val);
auto ConstArr =
auto *ConstInt = ConstantInt::get(LLVMBaseTy, Val);
auto *ConstArr =
ConstantArray::get(const_cast<ArrayType *>(LLVMArrTy), {ConstInt});
SPIRVType *SpvBaseTy = getSPIRVTypeForVReg(SpvType->getOperand(1).getReg());
unsigned BW = getScalarOrVectorBitWidth(SpvBaseTy);
return getOrCreateIntCompositeOrNull(Val, I, SpvType, TII, ConstArr, BW,
LLVMArrTy->getNumElements());
return getOrCreateCompositeOrNull(ConstInt, I, SpvType, TII, ConstArr, BW,
LLVMArrTy->getNumElements());
}

Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
Expand Down Expand Up @@ -1093,21 +1230,48 @@ SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy,
return SpirvType;
}

SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(
unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) {
Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth);
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth,
MachineInstr &I,
const SPIRVInstrInfo &TII,
unsigned SPIRVOPcode,
Type *LLVMTy) {
Register Reg = DT.find(LLVMTy, CurMF);
if (Reg.isValid())
return getSPIRVTypeForVReg(Reg);
MachineBasicBlock &BB = *I.getParent();
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeInt))
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRVOPcode))
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addImm(BitWidth)
.addImm(0);
DT.add(LLVMTy, CurMF, getSPIRVTypeID(MIB));
return finishCreatingSPIRVType(LLVMTy, MIB);
}

SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(
unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) {
Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth);
return getOrCreateSPIRVType(BitWidth, I, TII, SPIRV::OpTypeInt, LLVMTy);
}
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVFloatType(
unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) {
LLVMContext &Ctx = CurMF->getFunction().getContext();
Type *LLVMTy;
switch (BitWidth) {
case 16:
LLVMTy = Type::getHalfTy(Ctx);
break;
case 32:
LLVMTy = Type::getFloatTy(Ctx);
break;
case 64:
LLVMTy = Type::getDoubleTy(Ctx);
break;
default:
llvm_unreachable("Bit width is of unexpected size.");
}
return getOrCreateSPIRVType(BitWidth, I, TII, SPIRV::OpTypeFloat, LLVMTy);
}

SPIRVType *
SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) {
return getOrCreateSPIRVType(
Expand Down
42 changes: 33 additions & 9 deletions llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "SPIRVDuplicatesTracker.h"
#include "SPIRVInstrInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/Constant.h"

namespace llvm {
using SPIRVType = const MachineInstr;
Expand Down Expand Up @@ -234,6 +235,8 @@ class SPIRVGlobalRegistry {
bool EmitIR = true);
SPIRVType *assignIntTypeToVReg(unsigned BitWidth, Register VReg,
MachineInstr &I, const SPIRVInstrInfo &TII);
SPIRVType *assignFloatTypeToVReg(unsigned BitWidth, Register VReg,
MachineInstr &I, const SPIRVInstrInfo &TII);
SPIRVType *assignVectTypeToVReg(SPIRVType *BaseType, unsigned NumElements,
Register VReg, MachineInstr &I,
const SPIRVInstrInfo &TII);
Expand Down Expand Up @@ -372,12 +375,20 @@ class SPIRVGlobalRegistry {
std::tuple<Register, ConstantInt *, bool> getOrCreateConstIntReg(
uint64_t Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder,
MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr);
std::tuple<Register, ConstantFP *, bool, unsigned> getOrCreateConstFloatReg(
APFloat Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder,
MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr);
SPIRVType *finishCreatingSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType);
Register getOrCreateIntCompositeOrNull(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
Constant *CA, unsigned BitWidth,
unsigned ElemCnt);
Register getOrCreateBaseRegister(Constant *Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
unsigned BitWidth);
Register getOrCreateCompositeOrNull(Constant *Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII, Constant *CA,
unsigned BitWidth, unsigned ElemCnt,
bool ZeroAsNull = true);

Register getOrCreateIntCompositeOrNull(uint64_t Val,
MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType, bool EmitIR,
Expand All @@ -388,12 +399,20 @@ class SPIRVGlobalRegistry {
Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType = nullptr, bool EmitIR = true);
Register getOrCreateConstInt(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType, const SPIRVInstrInfo &TII);
SPIRVType *SpvType, const SPIRVInstrInfo &TII,
bool ZeroAsNull = true);
Register getOrCreateConstFP(APFloat Val, MachineInstr &I, SPIRVType *SpvType,
const SPIRVInstrInfo &TII,
bool ZeroAsNull = true);
Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType = nullptr);
Register getOrCreateConsIntVector(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII);

Register getOrCreateConstVector(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType, const SPIRVInstrInfo &TII,
bool ZeroAsNull = true);
Register getOrCreateConstVector(APFloat Val, MachineInstr &I,
SPIRVType *SpvType, const SPIRVInstrInfo &TII,
bool ZeroAsNull = true);
Register getOrCreateConsIntArray(uint64_t Val, MachineInstr &I,
SPIRVType *SpvType,
const SPIRVInstrInfo &TII);
Expand Down Expand Up @@ -423,6 +442,11 @@ class SPIRVGlobalRegistry {
MachineIRBuilder &MIRBuilder);
SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I,
const SPIRVInstrInfo &TII);
SPIRVType *getOrCreateSPIRVType(unsigned BitWidth, MachineInstr &I,
const SPIRVInstrInfo &TII,
unsigned SPIRVOPcode, Type *LLVMTy);
SPIRVType *getOrCreateSPIRVFloatType(unsigned BitWidth, MachineInstr &I,
const SPIRVInstrInfo &TII);
SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder);
SPIRVType *getOrCreateSPIRVBoolType(MachineInstr &I,
const SPIRVInstrInfo &TII);
Expand Down
98 changes: 95 additions & 3 deletions llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/Support/Debug.h"

Expand Down Expand Up @@ -144,6 +145,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;

bool selectAll(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;

bool selectBitreverse(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;

Expand Down Expand Up @@ -229,6 +233,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
const SPIRVType *ResType = nullptr) const;

Register buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const;
Register buildZerosValF(const SPIRVType *ResType, MachineInstr &I) const;
Register buildOnesVal(bool AllOnes, const SPIRVType *ResType,
MachineInstr &I) const;

Expand Down Expand Up @@ -1155,6 +1160,65 @@ static unsigned getBoolCmpOpcode(unsigned PredNum) {
}
}

bool SPIRVInstructionSelector::selectAll(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
assert(I.getNumOperands() == 3);
assert(I.getOperand(2).isReg());
MachineBasicBlock &BB = *I.getParent();
Register InputRegister = I.getOperand(2).getReg();
SPIRVType *InputType = GR.getSPIRVTypeForVReg(InputRegister);

if (!InputType)
report_fatal_error("Input Type could not be determined.");

bool IsBoolTy = GR.isScalarOrVectorOfType(InputRegister, SPIRV::OpTypeBool);
bool IsVectorTy = InputType->getOpcode() == SPIRV::OpTypeVector;
if (IsBoolTy && !IsVectorTy) {
assert(ResVReg == I.getOperand(0).getReg());
return BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::COPY))
.addDef(ResVReg)
.addUse(InputRegister)
.constrainAllUses(TII, TRI, RBI);
}

bool IsFloatTy = GR.isScalarOrVectorOfType(InputRegister, SPIRV::OpTypeFloat);
unsigned SpirvNotEqualId =
IsFloatTy ? SPIRV::OpFOrdNotEqual : SPIRV::OpINotEqual;
SPIRVType *SpvBoolScalarTy = GR.getOrCreateSPIRVBoolType(I, TII);
SPIRVType *SpvBoolTy = SpvBoolScalarTy;
Register NotEqualReg = ResVReg;

if (IsVectorTy) {
NotEqualReg = IsBoolTy ? InputRegister
: MRI->createVirtualRegister(&SPIRV::IDRegClass);
const unsigned NumElts = InputType->getOperand(2).getImm();
SpvBoolTy = GR.getOrCreateSPIRVVectorType(SpvBoolTy, NumElts, I, TII);
}

if (!IsBoolTy) {
Register ConstZeroReg =
IsFloatTy ? buildZerosValF(InputType, I) : buildZerosVal(InputType, I);

BuildMI(BB, I, I.getDebugLoc(), TII.get(SpirvNotEqualId))
.addDef(NotEqualReg)
.addUse(GR.getSPIRVTypeID(SpvBoolTy))
.addUse(InputRegister)
.addUse(ConstZeroReg)
.constrainAllUses(TII, TRI, RBI);
}

if (!IsVectorTy)
return true;

return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpAll))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(SpvBoolScalarTy))
.addUse(NotEqualReg)
.constrainAllUses(TII, TRI, RBI);
}

bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
Expand Down Expand Up @@ -1391,9 +1455,35 @@ bool SPIRVInstructionSelector::selectFCmp(Register ResVReg,

Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType,
MachineInstr &I) const {
// OpenCL uses nulls for Zero. In HLSL we don't use null constants.
bool ZeroAsNull = STI.isOpenCLEnv();
if (ResType->getOpcode() == SPIRV::OpTypeVector)
return GR.getOrCreateConstVector(0UL, I, ResType, TII, ZeroAsNull);
return GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull);
}

static APFloat getZeroFP(const Type *LLVMFloatTy) {
if (!LLVMFloatTy)
return APFloat::getZero(APFloat::IEEEsingle());
switch (LLVMFloatTy->getScalarType()->getTypeID()) {
case Type::HalfTyID:
return APFloat::getZero(APFloat::IEEEhalf());
default:
case Type::FloatTyID:
return APFloat::getZero(APFloat::IEEEsingle());
case Type::DoubleTyID:
return APFloat::getZero(APFloat::IEEEdouble());
}
}

Register SPIRVInstructionSelector::buildZerosValF(const SPIRVType *ResType,
MachineInstr &I) const {
// OpenCL uses nulls for Zero. In HLSL we don't use null constants.
bool ZeroAsNull = STI.isOpenCLEnv();
APFloat VZero = getZeroFP(GR.getTypeForSPIRVType(ResType));
if (ResType->getOpcode() == SPIRV::OpTypeVector)
return GR.getOrCreateConsIntVector(0, I, ResType, TII);
return GR.getOrCreateConstInt(0, I, ResType, TII);
return GR.getOrCreateConstVector(VZero, I, ResType, TII, ZeroAsNull);
return GR.getOrCreateConstFP(VZero, I, ResType, TII, ZeroAsNull);
}

Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
Expand All @@ -1403,7 +1493,7 @@ Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
APInt One =
AllOnes ? APInt::getAllOnes(BitWidth) : APInt::getOneBitSet(BitWidth, 0);
if (ResType->getOpcode() == SPIRV::OpTypeVector)
return GR.getOrCreateConsIntVector(One.getZExtValue(), I, ResType, TII);
return GR.getOrCreateConstVector(One.getZExtValue(), I, ResType, TII);
return GR.getOrCreateConstInt(One.getZExtValue(), I, ResType, TII);
}

Expand Down Expand Up @@ -1785,6 +1875,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
break;
case Intrinsic::spv_thread_id:
return selectSpvThreadId(ResVReg, ResType, I);
case Intrinsic::spv_all:
return selectAll(ResVReg, ResType, I);
case Intrinsic::spv_lifetime_start:
case Intrinsic::spv_lifetime_end: {
unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart
Expand Down
16 changes: 12 additions & 4 deletions llvm/lib/Target/SPIRV/SPIRVUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,13 +374,21 @@ Type *parseBasicTypeName(StringRef TypeName, LLVMContext &Ctx) {
return Type::getVoidTy(Ctx);
else if (TypeName.consume_front("bool"))
return Type::getIntNTy(Ctx, 1);
else if (TypeName.consume_front("char") || TypeName.consume_front("uchar"))
else if (TypeName.consume_front("char") ||
TypeName.consume_front("unsigned char") ||
TypeName.consume_front("uchar"))
return Type::getInt8Ty(Ctx);
else if (TypeName.consume_front("short") || TypeName.consume_front("ushort"))
else if (TypeName.consume_front("short") ||
TypeName.consume_front("unsigned short") ||
TypeName.consume_front("ushort"))
return Type::getInt16Ty(Ctx);
else if (TypeName.consume_front("int") || TypeName.consume_front("uint"))
else if (TypeName.consume_front("int") ||
TypeName.consume_front("unsigned int") ||
TypeName.consume_front("uint"))
return Type::getInt32Ty(Ctx);
else if (TypeName.consume_front("long") || TypeName.consume_front("ulong"))
else if (TypeName.consume_front("long") ||
TypeName.consume_front("unsigned long") ||
TypeName.consume_front("ulong"))
return Type::getInt64Ty(Ctx);
else if (TypeName.consume_front("half"))
return Type::getHalfTy(Ctx);
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);

if (Subtarget.hasGFNI()) {
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
}

for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
Expand Down Expand Up @@ -31332,6 +31337,23 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntUnary(Op, DAG, DL);

// Lower i32/i64 to GFNI as vXi8 BITREVERSE + BSWAP
if (!VT.isVector()) {

assert((VT.getScalarType() == MVT::i32) ||
(VT.getScalarType() == MVT::i64));

MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
DAG.getBitcast(MVT::v16i8, Res));
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::BSWAP, DL, VT, Res);
}

assert(VT.isVector() && VT.getSizeInBits() >= 128);

// Lower vXi16/vXi32/vXi64 as BSWAP + vXi8 BITREVERSE.
if (VT.getScalarType() != MVT::i8) {
MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/Transforms/IPO/MergeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,11 +712,13 @@ static bool canCreateThunkFor(Function *F) {
return true;
}

/// Copy metadata from one function to another.
static void copyMetadataIfPresent(Function *From, Function *To, StringRef Key) {
if (MDNode *MD = From->getMetadata(Key)) {
To->setMetadata(Key, MD);
}
/// Copy all metadata of a specific kind from one function to another.
static void copyMetadataIfPresent(Function *From, Function *To,
StringRef Kind) {
SmallVector<MDNode *, 4> MDs;
From->getMetadata(Kind, MDs);
for (MDNode *MD : MDs)
To->addMetadata(Kind, *MD);
}

// Replace G with a simple tail call to bitcast(F). Also (unless
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3715,8 +3715,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(&I, 0));
}

void handleArithmeticWithOverflow(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Shadow0 = getShadow(&I, 0);
Value *Shadow1 = getShadow(&I, 1);
Value *ShadowElt0 = IRB.CreateOr(Shadow0, Shadow1);
Value *ShadowElt1 =
IRB.CreateICmpNE(ShadowElt0, getCleanShadow(ShadowElt0));

Value *Shadow = PoisonValue::get(getShadowTy(&I));
Shadow = IRB.CreateInsertValue(Shadow, ShadowElt0, 0);
Shadow = IRB.CreateInsertValue(Shadow, ShadowElt1, 1);

setShadow(&I, Shadow);
setOriginForNaryOp(I);
}

void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
handleArithmeticWithOverflow(I);
break;
case Intrinsic::abs:
handleAbsIntrinsic(I);
break;
Expand Down
138 changes: 135 additions & 3 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2995,6 +2995,15 @@ class BoUpSLP {
return ScalarToTreeEntry.lookup(V);
}

/// Check that the operand node of alternate node does not generate
/// buildvector sequence. If it is, then probably not worth it to build
/// alternate shuffle, if number of buildvector operands + alternate
/// instruction > than the number of buildvector instructions.
/// \param S the instructions state of the analyzed values.
/// \param VL list of the instructions with alternate opcodes.
bool areAltOperandsProfitable(const InstructionsState &S,
ArrayRef<Value *> VL) const;

/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
TreeEntry::EntryState getScalarsVectorizationState(
Expand Down Expand Up @@ -5777,6 +5786,117 @@ static bool isAlternateInstruction(const Instruction *I,
const Instruction *AltOp,
const TargetLibraryInfo &TLI);

bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
ArrayRef<Value *> VL) const {
unsigned Opcode0 = S.getOpcode();
unsigned Opcode1 = S.getAltOpcode();
// The opcode mask selects between the two opcodes.
SmallBitVector OpcodeMask(VL.size(), false);
for (unsigned Lane : seq<unsigned>(0, VL.size()))
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
OpcodeMask.set(Lane);
// If this pattern is supported by the target then consider it profitable.
if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()),
Opcode0, Opcode1, OpcodeMask))
return true;
SmallVector<ValueList> Operands;
for (unsigned I : seq<unsigned>(0, S.MainOp->getNumOperands())) {
Operands.emplace_back();
// Prepare the operand vector.
for (Value *V : VL)
Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
}
if (Operands.size() == 2) {
// Try find best operands candidates.
for (unsigned I : seq<unsigned>(0, VL.size() - 1)) {
SmallVector<std::pair<Value *, Value *>> Candidates(3);
Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]);
Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]);
Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]);
std::optional<int> Res = findBestRootPair(Candidates);
switch (Res.value_or(0)) {
case 0:
break;
case 1:
std::swap(Operands[0][I + 1], Operands[1][I + 1]);
break;
case 2:
std::swap(Operands[0][I], Operands[1][I]);
break;
default:
llvm_unreachable("Unexpected index.");
}
}
}
DenseSet<unsigned> UniqueOpcodes;
constexpr unsigned NumAltInsts = 3; // main + alt + shuffle.
unsigned NonInstCnt = 0;
// Estimate number of instructions, required for the vectorized node and for
// the buildvector node.
unsigned UndefCnt = 0;
// Count the number of extra shuffles, required for vector nodes.
unsigned ExtraShuffleInsts = 0;
// Check that operands do not contain same values and create either perfect
// diamond match or shuffled match.
if (Operands.size() == 2) {
// Do not count same operands twice.
if (Operands.front() == Operands.back()) {
Operands.erase(Operands.begin());
} else if (!allConstant(Operands.front()) &&
all_of(Operands.front(), [&](Value *V) {
return is_contained(Operands.back(), V);
})) {
Operands.erase(Operands.begin());
++ExtraShuffleInsts;
}
}
const Loop *L = LI->getLoopFor(S.MainOp->getParent());
// Vectorize node, if:
// 1. at least single operand is constant or splat.
// 2. Operands have many loop invariants (the instructions are not loop
// invariants).
// 3. At least single unique operands is supposed to vectorized.
return none_of(Operands,
[&](ArrayRef<Value *> Op) {
if (allConstant(Op) ||
(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
getSameOpcode(Op, *TLI).MainOp))
return false;
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
if (isa<Constant, ExtractElementInst>(V) ||
getTreeEntry(V) || (L && L->isLoopInvariant(V))) {
if (isa<UndefValue>(V))
++UndefCnt;
continue;
}
auto Res = Uniques.try_emplace(V, 0);
// Found first duplicate - need to add shuffle.
if (!Res.second && Res.first->second == 1)
++ExtraShuffleInsts;
++Res.first->getSecond();
if (auto *I = dyn_cast<Instruction>(V))
UniqueOpcodes.insert(I->getOpcode());
else if (Res.second)
++NonInstCnt;
}
return none_of(Uniques, [&](const auto &P) {
return P.first->hasNUsesOrMore(P.second + 1) &&
none_of(P.first->users(), [&](User *U) {
return getTreeEntry(U) || Uniques.contains(U);
});
});
}) ||
// Do not vectorize node, if estimated number of vector instructions is
// more than estimated number of buildvector instructions. Number of
// vector operands is number of vector instructions + number of vector
// instructions for operands (buildvectors). Number of buildvector
// instructions is just number_of_operands * number_of_scalars.
(UndefCnt < (VL.size() - 1) * S.MainOp->getNumOperands() &&
(UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
NumAltInsts) < S.MainOp->getNumOperands() * VL.size());
}

BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
Expand Down Expand Up @@ -6074,6 +6194,14 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
if (!areAltOperandsProfitable(S, VL)) {
LLVM_DEBUG(
dbgs()
<< "SLP: ShuffleVector not vectorized, operands are buildvector and "
"the whole alt sequence is not profitable.\n");
return TreeEntry::NeedToGather;
}

return TreeEntry::Vectorize;
}
default:
Expand Down Expand Up @@ -10736,9 +10864,13 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
all_of(E->Scalars, [](Value *V) {
return !isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V);
}))
all_of(E->Scalars,
[](Value *V) {
return !isVectorLikeInstWithConstOps(V) &&
isUsedOutsideBlock(V);
}) ||
(E->State == TreeEntry::NeedToGather && E->Idx == 0 &&
all_of(E->Scalars, IsaPred<ExtractElementInst, UndefValue>)))
Res.second = FindLastInst();
else
Res.second = FindFirstInst();
Expand Down
66 changes: 40 additions & 26 deletions llvm/test/CodeGen/AArch64/lrint-conv.ll
Original file line number Diff line number Diff line change
@@ -1,64 +1,78 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=aarch64 | FileCheck %s --check-prefixes=FALLBACK,CHECK
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for testmswl
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll

; CHECK-LABEL: testmsws:
; CHECK: frintx [[REG:s[0-9]]], s0
; CHECK-NEXT: fcvtzs x0, [[REG]]
; CHECK: ret
; FALLBACK-NOT: remark{{.*}}testmsws
define i32 @testmsws(float %x) {
; CHECK-LABEL: testmsws:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: frintx s0, s0
; CHECK-NEXT: fcvtzs x0, s0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}

; CHECK-LABEL: testmsxs:
; CHECK: frintx [[REG:s[0-9]]], s0
; CHECK-NEXT: fcvtzs x0, [[REG]]
; CHECK-NEXT: ret
; FALLBACK-NOT: remark{{.*}}testmsxs
define i64 @testmsxs(float %x) {
; CHECK-LABEL: testmsxs:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: frintx s0, s0
; CHECK-NEXT: fcvtzs x0, s0
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
ret i64 %0
}

; CHECK-LABEL: testmswd:
; CHECK: frintx [[REG:d[0-9]]], d0
; CHECK-NEXT: fcvtzs x0, [[REG]]
; CHECK: ret
; FALLBACK-NOT: remark{{.*}}testmswd
define i32 @testmswd(double %x) {
; CHECK-LABEL: testmswd:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: frintx d0, d0
; CHECK-NEXT: fcvtzs x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}

; CHECK-LABEL: testmsxd:
; CHECK: frintx [[REG:d[0-9]]], d0
; CHECK-NEXT: fcvtzs x0, [[REG]]
; CHECK-NEXT: ret
; FALLBACK-NOT: remark{{.*}}testmsxd
define i64 @testmsxd(double %x) {
; CHECK-LABEL: testmsxd:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: frintx d0, d0
; CHECK-NEXT: fcvtzs x0, d0
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
ret i64 %0
}

; CHECK-LABEL: testmswl:
; CHECK: bl lrintl
define dso_local i32 @testmswl(fp128 %x) {
; CHECK-LABEL: testmswl:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl lrintl
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}

; CHECK-LABEL: testmsll:
; CHECK: b lrintl
define dso_local i64 @testmsll(fp128 %x) {
; CHECK-LABEL: testmsll:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: b lrintl
entry:
%0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
ret i64 %0
Expand Down
39 changes: 30 additions & 9 deletions llvm/test/CodeGen/AArch64/vector-lrint.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon | FileCheck %s
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon | FileCheck %s
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for lrint_v1f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64

define <1 x i64> @lrint_v1f16(<1 x half> %x) {
; CHECK-LABEL: lrint_v1f16:
Expand Down Expand Up @@ -372,13 +386,20 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>)

define <1 x i64> @lrint_v1f32(<1 x float> %x) {
; CHECK-LABEL: lrint_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: frintx s0, s0
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
; CHECK-SD-LABEL: lrint_v1f32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: frintx s0, s0
; CHECK-SD-NEXT: fcvtzs x8, s0
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: lrint_v1f32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: frintx s0, s0
; CHECK-GI-NEXT: fcvtzs x8, s0
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x)
ret <1 x i64> %a
}
Expand Down
1,658 changes: 1,658 additions & 0 deletions llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll

Large diffs are not rendered by default.

318 changes: 135 additions & 183 deletions llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll

Large diffs are not rendered by default.

1,272 changes: 540 additions & 732 deletions llvm/test/CodeGen/RISCV/interrupt-attr.ll

Large diffs are not rendered by default.

40 changes: 16 additions & 24 deletions llvm/test/CodeGen/RISCV/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1404,9 +1404,8 @@ define i64 @sh6_sh3_add2(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
;
; RV64ZBA-LABEL: sh6_sh3_add2:
; RV64ZBA: # %bb.0: # %entry
; RV64ZBA-NEXT: slli a1, a1, 6
; RV64ZBA-NEXT: add a0, a1, a0
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh3add a1, a1, a2
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ret
entry:
%shl = shl i64 %z, 3
Expand Down Expand Up @@ -2111,9 +2110,8 @@ define i64 @array_index_sh1_sh3(ptr %p, i64 %idx1, i64 %idx2) {
;
; RV64ZBA-LABEL: array_index_sh1_sh3:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a1, 4
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh1add a1, a1, a2
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ld a0, 0(a0)
; RV64ZBA-NEXT: ret
%a = getelementptr inbounds [2 x i64], ptr %p, i64 %idx1, i64 %idx2
Expand Down Expand Up @@ -2174,9 +2172,8 @@ define i32 @array_index_sh2_sh2(ptr %p, i64 %idx1, i64 %idx2) {
;
; RV64ZBA-LABEL: array_index_sh2_sh2:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a1, 4
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh2add a0, a2, a0
; RV64ZBA-NEXT: sh2add a1, a1, a2
; RV64ZBA-NEXT: sh2add a0, a1, a0
; RV64ZBA-NEXT: lw a0, 0(a0)
; RV64ZBA-NEXT: ret
%a = getelementptr inbounds [4 x i32], ptr %p, i64 %idx1, i64 %idx2
Expand All @@ -2196,9 +2193,8 @@ define i64 @array_index_sh2_sh3(ptr %p, i64 %idx1, i64 %idx2) {
;
; RV64ZBA-LABEL: array_index_sh2_sh3:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a1, 5
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh2add a1, a1, a2
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ld a0, 0(a0)
; RV64ZBA-NEXT: ret
%a = getelementptr inbounds [4 x i64], ptr %p, i64 %idx1, i64 %idx2
Expand Down Expand Up @@ -2238,9 +2234,8 @@ define i16 @array_index_sh3_sh1(ptr %p, i64 %idx1, i64 %idx2) {
;
; RV64ZBA-LABEL: array_index_sh3_sh1:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a1, 4
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh1add a0, a2, a0
; RV64ZBA-NEXT: sh3add a1, a1, a2
; RV64ZBA-NEXT: sh1add a0, a1, a0
; RV64ZBA-NEXT: lh a0, 0(a0)
; RV64ZBA-NEXT: ret
%a = getelementptr inbounds [8 x i16], ptr %p, i64 %idx1, i64 %idx2
Expand All @@ -2260,9 +2255,8 @@ define i32 @array_index_sh3_sh2(ptr %p, i64 %idx1, i64 %idx2) {
;
; RV64ZBA-LABEL: array_index_sh3_sh2:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a1, 5
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh2add a0, a2, a0
; RV64ZBA-NEXT: sh3add a1, a1, a2
; RV64ZBA-NEXT: sh2add a0, a1, a0
; RV64ZBA-NEXT: lw a0, 0(a0)
; RV64ZBA-NEXT: ret
%a = getelementptr inbounds [8 x i32], ptr %p, i64 %idx1, i64 %idx2
Expand All @@ -2282,9 +2276,8 @@ define i64 @array_index_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) {
;
; RV64ZBA-LABEL: array_index_sh3_sh3:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a1, 6
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh3add a1, a1, a2
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ld a0, 0(a0)
; RV64ZBA-NEXT: ret
%a = getelementptr inbounds [8 x i64], ptr %p, i64 %idx1, i64 %idx2
Expand All @@ -2308,9 +2301,8 @@ define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) {
; RV64ZBA-LABEL: array_index_lshr_sh3_sh3:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srli a1, a1, 58
; RV64ZBA-NEXT: slli a1, a1, 6
; RV64ZBA-NEXT: add a0, a0, a1
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh3add a1, a1, a2
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ld a0, 0(a0)
; RV64ZBA-NEXT: ret
%shr = lshr i64 %idx1, 58
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,26 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4
ret <vscale x 8 x i32> %res
}

define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison2(<vscale x 4 x i32> %a) {
; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsll.vx v8, v12, a0
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
; ZVBB: # %bb.0:
; ZVBB-NEXT: li a0, 32
; ZVBB-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; ZVBB-NEXT: vwsll.vx v12, v8, a0
; ZVBB-NEXT: vmv4r.v v8, v12
; ZVBB-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a)
ret <vscale x 8 x i32> %res
}

declare <vscale x 64 x half> @llvm.experimental.vector.interleave2.nxv64f16(<vscale x 32 x half>, <vscale x 32 x half>)
declare <vscale x 32 x float> @llvm.experimental.vector.interleave2.nxv32f32(<vscale x 16 x float>, <vscale x 16 x float>)
declare <vscale x 16 x double> @llvm.experimental.vector.interleave2.nxv16f64(<vscale x 8 x double>, <vscale x 8 x double>)
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/SPIRV/SampledImageRetType.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ declare dso_local spir_func ptr addrspace(4) @_Z20__spirv_SampledImageI14ocl_ima

declare dso_local spir_func <4 x float> @_Z30__spirv_ImageSampleExplicitLodIPvDv4_fiET0_T_T1_if(ptr addrspace(4) %0, i32 %1, i32 %2, float %3) local_unnamed_addr

declare dso_local spir_func <4 x i32> @_Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image1d_roDv4_jfET0_T_T1_if(target("spirv.SampledImage", void, 0, 0, 0, 0, 0, 0, 0) %0, float %1, i32 %2, float %3) local_unnamed_addr

@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(2) constant <3 x i64>, align 32

define weak_odr dso_local spir_kernel void @_ZTS17image_kernel_readILi1EE(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 0), target("spirv.Sampler")) {
Expand All @@ -25,3 +27,10 @@ define weak_odr dso_local spir_kernel void @_ZTS17image_kernel_readILi1EE(target

ret void
}

define weak_odr dso_local spir_kernel void @foo_lod(target("spirv.SampledImage", void, 0, 0, 0, 0, 0, 0, 0) %_arg) {
%lod = call spir_func <4 x i32> @_Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image1d_roDv4_jfET0_T_T1_if(target("spirv.SampledImage", void, 0, 0, 0, 0, 0, 0, 0) %_arg, float 0x3FE7FFEB00000000, i32 2, float 0.000000e+00)
; CHECK: %[[#sampled_image_lod:]] = OpFunctionParameter %[[#sampled_image_t]]
; CHECK: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#sampled_image_lod]] %[[#]] {{.*}} %[[#]]
ret void
}
187 changes: 187 additions & 0 deletions llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; Make sure spirv operation function calls for all are generated.

; CHECK-HLSL-DAG: OpMemoryModel Logical GLSL450
; CHECK-OCL-DAG: OpMemoryModel Physical32 OpenCL
; CHECK-DAG: OpName %[[#all_bool_arg:]] "a"
; CHECK-DAG: %[[#int_64:]] = OpTypeInt 64 0
; CHECK-DAG: %[[#bool:]] = OpTypeBool
; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#int_16:]] = OpTypeInt 16 0
; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64
; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
; CHECK-DAG: %[[#vec4_bool:]] = OpTypeVector %[[#bool]] 4
; CHECK-DAG: %[[#vec4_16:]] = OpTypeVector %[[#int_16]] 4
; CHECK-DAG: %[[#vec4_32:]] = OpTypeVector %[[#int_32]] 4
; CHECK-DAG: %[[#vec4_64:]] = OpTypeVector %[[#int_64]] 4
; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4

; CHECK-HLSL-DAG: %[[#const_i64_0:]] = OpConstant %[[#int_64]] 0
; CHECK-HLSL-DAG: %[[#const_i32_0:]] = OpConstant %[[#int_32]] 0
; CHECK-HLSL-DAG: %[[#const_i16_0:]] = OpConstant %[[#int_16]] 0
; CHECK-HLSL-DAG: %[[#const_f64_0:]] = OpConstant %[[#float_64]] 0
; CHECK-HLSL-DAG: %[[#const_f32_0:]] = OpConstant %[[#float_32:]] 0
; CHECK-HLSL-DAG: %[[#const_f16_0:]] = OpConstant %[[#float_16:]] 0
; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i16:]] = OpConstantComposite %[[#vec4_16:]] %[[#const_i16_0:]] %[[#const_i16_0:]] %[[#const_i16_0:]] %[[#const_i16_0:]]
; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i32:]] = OpConstantComposite %[[#vec4_32:]] %[[#const_i32_0:]] %[[#const_i32_0:]] %[[#const_i32_0:]] %[[#const_i32_0:]]
; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i64:]] = OpConstantComposite %[[#vec4_64:]] %[[#const_i64_0:]] %[[#const_i64_0:]] %[[#const_i64_0:]] %[[#const_i64_0:]]
; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f16:]] = OpConstantComposite %[[#vec4_float_16:]] %[[#const_f16_0:]] %[[#const_f16_0:]] %[[#const_f16_0:]] %[[#const_f16_0:]]
; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f32:]] = OpConstantComposite %[[#vec4_float_32:]] %[[#const_f32_0:]] %[[#const_f32_0:]] %[[#const_f32_0:]] %[[#const_f32_0:]]
; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f64:]] = OpConstantComposite %[[#vec4_float_64:]] %[[#const_f64_0:]] %[[#const_f64_0:]] %[[#const_f64_0:]] %[[#const_f64_0:]]

; CHECK-OCL-DAG: %[[#const_i64_0:]] = OpConstantNull %[[#int_64]]
; CHECK-OCL-DAG: %[[#const_i32_0:]] = OpConstantNull %[[#int_32]]
; CHECK-OCL-DAG: %[[#const_i16_0:]] = OpConstantNull %[[#int_16]]
; CHECK-OCL-DAG: %[[#const_f64_0:]] = OpConstantNull %[[#float_64]]
; CHECK-OCL-DAG: %[[#const_f32_0:]] = OpConstantNull %[[#float_32:]]
; CHECK-OCL-DAG: %[[#const_f16_0:]] = OpConstantNull %[[#float_16:]]
; CHECK-OCL-DAG: %[[#vec4_const_zeros_i16:]] = OpConstantNull %[[#vec4_16:]]
; CHECK-OCL-DAG: %[[#vec4_const_zeros_i32:]] = OpConstantNull %[[#vec4_32:]]
; CHECK-OCL-DAG: %[[#vec4_const_zeros_i64:]] = OpConstantNull %[[#vec4_64:]]
; CHECK-OCL-DAG: %[[#vec4_const_zeros_f16:]] = OpConstantNull %[[#vec4_float_16:]]
; CHECK-OCL-DAG: %[[#vec4_const_zeros_f32:]] = OpConstantNull %[[#vec4_float_32:]]
; CHECK-OCL-DAG: %[[#vec4_const_zeros_f64:]] = OpConstantNull %[[#vec4_float_64:]]

define noundef i1 @all_int64_t(i64 noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i64_0:]]
%hlsl.all = call i1 @llvm.spv.all.i64(i64 %p0)
ret i1 %hlsl.all
}


define noundef i1 @all_int(i32 noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i32_0:]]
%hlsl.all = call i1 @llvm.spv.all.i32(i32 %p0)
ret i1 %hlsl.all
}


define noundef i1 @all_int16_t(i16 noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i16_0:]]
%hlsl.all = call i1 @llvm.spv.all.i16(i16 %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_double(double noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f64_0:]]
%hlsl.all = call i1 @llvm.spv.all.f64(double %p0)
ret i1 %hlsl.all
}


define noundef i1 @all_float(float noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f32_0:]]
%hlsl.all = call i1 @llvm.spv.all.f32(float %p0)
ret i1 %hlsl.all
}


define noundef i1 @all_half(half noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f16_0:]]
%hlsl.all = call i1 @llvm.spv.all.f16(half %p0)
ret i1 %hlsl.all
}


define noundef i1 @all_bool4(<4 x i1> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#]] = OpAll %[[#vec4_bool:]] %[[#arg0:]]
%hlsl.all = call i1 @llvm.spv.all.v4i1(<4 x i1> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_short4(<4 x i16> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#shortVecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i16:]]
; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#shortVecNotEq:]]
%hlsl.all = call i1 @llvm.spv.all.v4i16(<4 x i16> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_int4(<4 x i32> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#i32VecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i32:]]
; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#i32VecNotEq:]]
%hlsl.all = call i1 @llvm.spv.all.v4i32(<4 x i32> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_int64_t4(<4 x i64> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#i64VecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i64:]]
; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#i64VecNotEq]]
%hlsl.all = call i1 @llvm.spv.all.v4i64(<4 x i64> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_half4(<4 x half> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#f16VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f16:]]
; CHECK: %[[#]] = OpAll %[[#bool]] %[[#f16VecNotEq:]]
%hlsl.all = call i1 @llvm.spv.all.v4f16(<4 x half> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_float4(<4 x float> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#f32VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f32:]]
; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#f32VecNotEq:]]
%hlsl.all = call i1 @llvm.spv.all.v4f32(<4 x float> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_double4(<4 x double> noundef %p0) {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#f64VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f64:]]
; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#f64VecNotEq:]]
%hlsl.all = call i1 @llvm.spv.all.v4f64(<4 x double> %p0)
ret i1 %hlsl.all
}

define noundef i1 @all_bool(i1 noundef %a) {
entry:
; CHECK: %[[#all_bool_arg:]] = OpFunctionParameter %[[#bool:]]
; CHECK: OpReturnValue %[[#all_bool_arg:]]
%hlsl.all = call i1 @llvm.spv.all.i1(i1 %a)
ret i1 %hlsl.all
}

declare i1 @llvm.spv.all.v4f16(<4 x half>)
declare i1 @llvm.spv.all.v4f32(<4 x float>)
declare i1 @llvm.spv.all.v4f64(<4 x double>)
declare i1 @llvm.spv.all.v4i1(<4 x i1>)
declare i1 @llvm.spv.all.v4i16(<4 x i16>)
declare i1 @llvm.spv.all.v4i32(<4 x i32>)
declare i1 @llvm.spv.all.v4i64(<4 x i64>)
declare i1 @llvm.spv.all.i1(i1)
declare i1 @llvm.spv.all.i16(i16)
declare i1 @llvm.spv.all.i32(i32)
declare i1 @llvm.spv.all.i64(i64)
declare i1 @llvm.spv.all.f16(half)
declare i1 @llvm.spv.all.f32(float)
declare i1 @llvm.spv.all.f64(double)
281 changes: 54 additions & 227 deletions llvm/test/CodeGen/X86/bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -172,26 +172,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
;
; GFNI-LABEL: test_bitreverse_i64:
; GFNI: # %bb.0:
; GFNI-NEXT: bswapq %rdi
; GFNI-NEXT: movq %rdi, %rax
; GFNI-NEXT: shrq $4, %rax
; GFNI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; GFNI-NEXT: andq %rcx, %rax
; GFNI-NEXT: andq %rcx, %rdi
; GFNI-NEXT: shlq $4, %rdi
; GFNI-NEXT: orq %rax, %rdi
; GFNI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; GFNI-NEXT: movq %rdi, %rcx
; GFNI-NEXT: andq %rax, %rcx
; GFNI-NEXT: shrq $2, %rdi
; GFNI-NEXT: andq %rax, %rdi
; GFNI-NEXT: leaq (%rdi,%rcx,4), %rax
; GFNI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; GFNI-NEXT: movq %rax, %rdx
; GFNI-NEXT: andq %rcx, %rdx
; GFNI-NEXT: shrq %rax
; GFNI-NEXT: andq %rcx, %rax
; GFNI-NEXT: leaq (%rax,%rdx,2), %rax
; GFNI-NEXT: vmovq %rdi, %xmm0
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; GFNI-NEXT: vmovq %xmm0, %rax
; GFNI-NEXT: bswapq %rax
; GFNI-NEXT: retq
%b = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %b
Expand Down Expand Up @@ -253,24 +237,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
;
; GFNI-LABEL: test_bitreverse_i32:
; GFNI: # %bb.0:
; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
; GFNI-NEXT: bswapl %edi
; GFNI-NEXT: movl %edi, %eax
; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; GFNI-NEXT: shll $4, %eax
; GFNI-NEXT: shrl $4, %edi
; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; GFNI-NEXT: orl %eax, %edi
; GFNI-NEXT: movl %edi, %eax
; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333
; GFNI-NEXT: shrl $2, %edi
; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333
; GFNI-NEXT: leal (%rdi,%rax,4), %eax
; GFNI-NEXT: movl %eax, %ecx
; GFNI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; GFNI-NEXT: shrl %eax
; GFNI-NEXT: andl $1431655765, %eax # imm = 0x55555555
; GFNI-NEXT: leal (%rax,%rcx,2), %eax
; GFNI-NEXT: vmovd %edi, %xmm0
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: bswapl %eax
; GFNI-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
Expand Down Expand Up @@ -335,24 +305,10 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
;
; GFNI-LABEL: test_bitreverse_i24:
; GFNI: # %bb.0:
; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
; GFNI-NEXT: bswapl %edi
; GFNI-NEXT: movl %edi, %eax
; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; GFNI-NEXT: shll $4, %eax
; GFNI-NEXT: shrl $4, %edi
; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; GFNI-NEXT: orl %eax, %edi
; GFNI-NEXT: movl %edi, %eax
; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333
; GFNI-NEXT: shrl $2, %edi
; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333
; GFNI-NEXT: leal (%rdi,%rax,4), %eax
; GFNI-NEXT: movl %eax, %ecx
; GFNI-NEXT: andl $1431655680, %ecx # imm = 0x55555500
; GFNI-NEXT: shrl %eax
; GFNI-NEXT: andl $1431655680, %eax # imm = 0x55555500
; GFNI-NEXT: leal (%rax,%rcx,2), %eax
; GFNI-NEXT: vmovd %edi, %xmm0
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: bswapl %eax
; GFNI-NEXT: shrl $8, %eax
; GFNI-NEXT: retq
%b = call i24 @llvm.bitreverse.i24(i24 %a)
Expand Down Expand Up @@ -1412,196 +1368,67 @@ define i528 @large_promotion(i528 %A) nounwind {
;
; GFNI-LABEL: large_promotion:
; GFNI: # %bb.0:
; GFNI-NEXT: pushq %r15
; GFNI-NEXT: pushq %r14
; GFNI-NEXT: pushq %r13
; GFNI-NEXT: pushq %r12
; GFNI-NEXT: pushq %rbx
; GFNI-NEXT: movq %rdi, %rax
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r12
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r15
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9241421688590303745,9241421688590303745]
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %r10
; GFNI-NEXT: bswapq %r10
; GFNI-NEXT: vmovq %r9, %xmm1
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %rdi
; GFNI-NEXT: bswapq %rdi
; GFNI-NEXT: movq %rdi, %r10
; GFNI-NEXT: shrq $4, %r10
; GFNI-NEXT: movabsq $1085102592571150095, %r11 # imm = 0xF0F0F0F0F0F0F0F
; GFNI-NEXT: andq %r11, %r10
; GFNI-NEXT: andq %r11, %rdi
; GFNI-NEXT: shlq $4, %rdi
; GFNI-NEXT: orq %r10, %rdi
; GFNI-NEXT: movabsq $3689348814741910323, %r10 # imm = 0x3333333333333333
; GFNI-NEXT: movq %rdi, %r14
; GFNI-NEXT: andq %r10, %r14
; GFNI-NEXT: shrq $2, %rdi
; GFNI-NEXT: andq %r10, %rdi
; GFNI-NEXT: leaq (%rdi,%r14,4), %rdi
; GFNI-NEXT: movabsq $6148820866244280320, %r14 # imm = 0x5555000000000000
; GFNI-NEXT: movq %rdi, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %rdi
; GFNI-NEXT: andq %r14, %rdi
; GFNI-NEXT: leaq (%rdi,%r13,2), %rdi
; GFNI-NEXT: bswapq %rbx
; GFNI-NEXT: movq %rbx, %r14
; GFNI-NEXT: shrq $4, %r14
; GFNI-NEXT: andq %r11, %r14
; GFNI-NEXT: andq %r11, %rbx
; GFNI-NEXT: shlq $4, %rbx
; GFNI-NEXT: orq %r14, %rbx
; GFNI-NEXT: movq %rbx, %r14
; GFNI-NEXT: andq %r10, %r14
; GFNI-NEXT: shrq $2, %rbx
; GFNI-NEXT: andq %r10, %rbx
; GFNI-NEXT: leaq (%rbx,%r14,4), %rbx
; GFNI-NEXT: movabsq $6148914691236517205, %r14 # imm = 0x5555555555555555
; GFNI-NEXT: movq %rbx, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %rbx
; GFNI-NEXT: andq %r14, %rbx
; GFNI-NEXT: leaq (%rbx,%r13,2), %rbx
; GFNI-NEXT: shrdq $48, %rbx, %rdi
; GFNI-NEXT: bswapq %r15
; GFNI-NEXT: movq %r15, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %r15
; GFNI-NEXT: shlq $4, %r15
; GFNI-NEXT: orq %r13, %r15
; GFNI-NEXT: movq %r15, %r13
; GFNI-NEXT: andq %r10, %r13
; GFNI-NEXT: shrq $2, %r15
; GFNI-NEXT: andq %r10, %r15
; GFNI-NEXT: leaq (%r15,%r13,4), %r15
; GFNI-NEXT: movq %r15, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %r15
; GFNI-NEXT: andq %r14, %r15
; GFNI-NEXT: leaq (%r15,%r13,2), %r15
; GFNI-NEXT: shrdq $48, %r15, %rbx
; GFNI-NEXT: bswapq %r12
; GFNI-NEXT: movq %r12, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %r12
; GFNI-NEXT: shlq $4, %r12
; GFNI-NEXT: orq %r13, %r12
; GFNI-NEXT: movq %r12, %r13
; GFNI-NEXT: andq %r10, %r13
; GFNI-NEXT: shrq $2, %r12
; GFNI-NEXT: andq %r10, %r12
; GFNI-NEXT: leaq (%r12,%r13,4), %r12
; GFNI-NEXT: movq %r12, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %r12
; GFNI-NEXT: andq %r14, %r12
; GFNI-NEXT: leaq (%r12,%r13,2), %r12
; GFNI-NEXT: shrdq $48, %r12, %r15
; GFNI-NEXT: bswapq %r9
; GFNI-NEXT: movq %r9, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %r9
; GFNI-NEXT: shlq $4, %r9
; GFNI-NEXT: orq %r13, %r9
; GFNI-NEXT: movq %r9, %r13
; GFNI-NEXT: andq %r10, %r13
; GFNI-NEXT: shrq $2, %r9
; GFNI-NEXT: andq %r10, %r9
; GFNI-NEXT: leaq (%r9,%r13,4), %r9
; GFNI-NEXT: movq %r9, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %r9
; GFNI-NEXT: andq %r14, %r9
; GFNI-NEXT: leaq (%r9,%r13,2), %r9
; GFNI-NEXT: shrdq $48, %r9, %r12
; GFNI-NEXT: vmovq %r8, %xmm1
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %r8
; GFNI-NEXT: bswapq %r8
; GFNI-NEXT: movq %r8, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %r8
; GFNI-NEXT: shlq $4, %r8
; GFNI-NEXT: orq %r13, %r8
; GFNI-NEXT: movq %r8, %r13
; GFNI-NEXT: andq %r10, %r13
; GFNI-NEXT: shrq $2, %r8
; GFNI-NEXT: andq %r10, %r8
; GFNI-NEXT: leaq (%r8,%r13,4), %r8
; GFNI-NEXT: movq %r8, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %r8
; GFNI-NEXT: andq %r14, %r8
; GFNI-NEXT: leaq (%r8,%r13,2), %r8
; GFNI-NEXT: shrdq $48, %r8, %r9
; GFNI-NEXT: movq %r8, %r9
; GFNI-NEXT: shldq $16, %rdi, %r9
; GFNI-NEXT: shldq $16, %r10, %rdi
; GFNI-NEXT: vmovq %rcx, %xmm1
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %rcx
; GFNI-NEXT: bswapq %rcx
; GFNI-NEXT: movq %rcx, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %rcx
; GFNI-NEXT: shlq $4, %rcx
; GFNI-NEXT: orq %r13, %rcx
; GFNI-NEXT: movq %rcx, %r13
; GFNI-NEXT: andq %r10, %r13
; GFNI-NEXT: shrq $2, %rcx
; GFNI-NEXT: andq %r10, %rcx
; GFNI-NEXT: leaq (%rcx,%r13,4), %rcx
; GFNI-NEXT: movq %rcx, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %rcx
; GFNI-NEXT: andq %r14, %rcx
; GFNI-NEXT: leaq (%rcx,%r13,2), %rcx
; GFNI-NEXT: shrdq $48, %rcx, %r8
; GFNI-NEXT: vmovq %rdx, %xmm1
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %rdx
; GFNI-NEXT: bswapq %rdx
; GFNI-NEXT: movq %rdx, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %rdx
; GFNI-NEXT: shlq $4, %rdx
; GFNI-NEXT: orq %r13, %rdx
; GFNI-NEXT: movq %rdx, %r13
; GFNI-NEXT: andq %r10, %r13
; GFNI-NEXT: shrq $2, %rdx
; GFNI-NEXT: andq %r10, %rdx
; GFNI-NEXT: leaq (%rdx,%r13,4), %rdx
; GFNI-NEXT: movq %rdx, %r13
; GFNI-NEXT: andq %r14, %r13
; GFNI-NEXT: shrq %rdx
; GFNI-NEXT: andq %r14, %rdx
; GFNI-NEXT: leaq (%rdx,%r13,2), %rdx
; GFNI-NEXT: shrdq $48, %rdx, %rcx
; GFNI-NEXT: vmovq %rsi, %xmm1
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %rsi
; GFNI-NEXT: bswapq %rsi
; GFNI-NEXT: movq %rsi, %r13
; GFNI-NEXT: shrq $4, %r13
; GFNI-NEXT: andq %r11, %r13
; GFNI-NEXT: andq %r11, %rsi
; GFNI-NEXT: shlq $4, %rsi
; GFNI-NEXT: orq %r13, %rsi
; GFNI-NEXT: movq %rsi, %r11
; GFNI-NEXT: andq %r10, %r11
; GFNI-NEXT: shrq $2, %rsi
; GFNI-NEXT: andq %r10, %rsi
; GFNI-NEXT: leaq (%rsi,%r11,4), %rsi
; GFNI-NEXT: movq %rsi, %r10
; GFNI-NEXT: andq %r14, %r10
; GFNI-NEXT: shrq %rsi
; GFNI-NEXT: andq %r14, %rsi
; GFNI-NEXT: leaq (%rsi,%r10,2), %rsi
; GFNI-NEXT: shrdq $48, %rsi, %rdx
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %r11
; GFNI-NEXT: bswapq %r11
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
; GFNI-NEXT: vmovq %xmm1, %rbx
; GFNI-NEXT: bswapq %rbx
; GFNI-NEXT: shrdq $48, %rbx, %r11
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
; GFNI-NEXT: vmovq %xmm0, %r14
; GFNI-NEXT: bswapq %r14
; GFNI-NEXT: shrdq $48, %r14, %rbx
; GFNI-NEXT: shrdq $48, %r10, %r14
; GFNI-NEXT: shrq $48, %rsi
; GFNI-NEXT: movq %r14, 16(%rax)
; GFNI-NEXT: movq %rbx, 8(%rax)
; GFNI-NEXT: movq %r11, (%rax)
; GFNI-NEXT: movq %rdx, 56(%rax)
; GFNI-NEXT: movq %rcx, 48(%rax)
; GFNI-NEXT: movq %r8, 40(%rax)
; GFNI-NEXT: movq %r9, 32(%rax)
; GFNI-NEXT: movq %r12, 24(%rax)
; GFNI-NEXT: movq %r15, 16(%rax)
; GFNI-NEXT: movq %rbx, 8(%rax)
; GFNI-NEXT: movq %rdi, (%rax)
; GFNI-NEXT: movq %rdi, 24(%rax)
; GFNI-NEXT: movw %si, 64(%rax)
; GFNI-NEXT: popq %rbx
; GFNI-NEXT: popq %r12
; GFNI-NEXT: popq %r13
; GFNI-NEXT: popq %r14
; GFNI-NEXT: popq %r15
; GFNI-NEXT: retq
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
ret i528 %Z
Expand Down
46 changes: 8 additions & 38 deletions llvm/test/CodeGen/X86/vector-bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -276,24 +276,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
;
; GFNIAVX-LABEL: test_bitreverse_i32:
; GFNIAVX: # %bb.0:
; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi
; GFNIAVX-NEXT: bswapl %edi
; GFNIAVX-NEXT: movl %edi, %eax
; GFNIAVX-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; GFNIAVX-NEXT: shll $4, %eax
; GFNIAVX-NEXT: shrl $4, %edi
; GFNIAVX-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; GFNIAVX-NEXT: orl %eax, %edi
; GFNIAVX-NEXT: movl %edi, %eax
; GFNIAVX-NEXT: andl $858993459, %eax # imm = 0x33333333
; GFNIAVX-NEXT: shrl $2, %edi
; GFNIAVX-NEXT: andl $858993459, %edi # imm = 0x33333333
; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax
; GFNIAVX-NEXT: movl %eax, %ecx
; GFNIAVX-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; GFNIAVX-NEXT: shrl %eax
; GFNIAVX-NEXT: andl $1431655765, %eax # imm = 0x55555555
; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax
; GFNIAVX-NEXT: vmovd %edi, %xmm0
; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; GFNIAVX-NEXT: vmovd %xmm0, %eax
; GFNIAVX-NEXT: bswapl %eax
; GFNIAVX-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
Expand Down Expand Up @@ -381,26 +367,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
;
; GFNIAVX-LABEL: test_bitreverse_i64:
; GFNIAVX: # %bb.0:
; GFNIAVX-NEXT: bswapq %rdi
; GFNIAVX-NEXT: movq %rdi, %rax
; GFNIAVX-NEXT: shrq $4, %rax
; GFNIAVX-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; GFNIAVX-NEXT: andq %rcx, %rax
; GFNIAVX-NEXT: andq %rcx, %rdi
; GFNIAVX-NEXT: shlq $4, %rdi
; GFNIAVX-NEXT: orq %rax, %rdi
; GFNIAVX-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; GFNIAVX-NEXT: movq %rdi, %rcx
; GFNIAVX-NEXT: andq %rax, %rcx
; GFNIAVX-NEXT: shrq $2, %rdi
; GFNIAVX-NEXT: andq %rax, %rdi
; GFNIAVX-NEXT: leaq (%rdi,%rcx,4), %rax
; GFNIAVX-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; GFNIAVX-NEXT: movq %rax, %rdx
; GFNIAVX-NEXT: andq %rcx, %rdx
; GFNIAVX-NEXT: shrq %rax
; GFNIAVX-NEXT: andq %rcx, %rax
; GFNIAVX-NEXT: leaq (%rax,%rdx,2), %rax
; GFNIAVX-NEXT: vmovq %rdi, %xmm0
; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; GFNIAVX-NEXT: vmovq %xmm0, %rax
; GFNIAVX-NEXT: bswapq %rax
; GFNIAVX-NEXT: retq
%b = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %b
Expand Down
103 changes: 35 additions & 68 deletions llvm/test/Instrumentation/MemorySanitizer/overflow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,12 @@ define {i64, i1} @test_sadd_with_overflow(i64 %a, i64 %b) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { i64, i1 } [[RES]]
;
%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
Expand All @@ -32,16 +28,12 @@ define {i64, i1} @test_uadd_with_overflow(i64 %a, i64 %b) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { i64, i1 } [[RES]]
;
%res = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
Expand All @@ -54,16 +46,12 @@ define {i64, i1} @test_smul_with_overflow(i64 %a, i64 %b) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { i64, i1 } [[RES]]
;
%res = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
Expand All @@ -75,16 +63,12 @@ define {i64, i1} @test_umul_with_overflow(i64 %a, i64 %b) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { i64, i1 } [[RES]]
;
%res = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
Expand All @@ -96,16 +80,12 @@ define {i64, i1} @test_ssub_with_overflow(i64 %a, i64 %b) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { i64, i1 } [[RES]]
;
%res = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
Expand All @@ -117,16 +97,12 @@ define {i64, i1} @test_usub_with_overflow(i64 %a, i64 %b) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { i64, i1 } [[RES]]
;
%res = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
Expand All @@ -139,25 +115,16 @@ define {<4 x i32>, <4 x i1>} @test_sadd_with_overflow_vec(<4 x i32> %a, <4 x i32
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
; CHECK: 5:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: 6:
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { <4 x i32>, <4 x i1> } poison, <4 x i32> [[TMP3]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <4 x i32>, <4 x i1> } [[TMP5]], <4 x i1> [[TMP4]], 1
; CHECK-NEXT: [[RES:%.*]] = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
; CHECK-NEXT: store { <4 x i32>, <4 x i1> } zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store { <4 x i32>, <4 x i1> } [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <4 x i32>, <4 x i1> } [[RES]]
;
%res = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
ret { <4 x i32>, <4 x i1> } %res
}

attributes #0 = { sanitize_memory }
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000}
;.
Loading