9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,11 @@ static const LLT F64 = LLT::float64();
static const LLT S96 = LLT::scalar(96);
static const LLT S128 = LLT::scalar(128);
static const LLT S160 = LLT::scalar(160);
static const LLT S192 = LLT::scalar(192);
static const LLT S224 = LLT::scalar(224);
static const LLT S256 = LLT::scalar(256);
static const LLT S512 = LLT::scalar(512);
static const LLT S1024 = LLT::scalar(1024);
static const LLT MaxScalar = LLT::scalar(MaxRegisterSize);

static const LLT V2S8 = LLT::fixed_vector(2, 8);
Expand Down Expand Up @@ -332,8 +334,8 @@ static const LLT V16S64 = LLT::fixed_vector(16, 64);
static const LLT V2S128 = LLT::fixed_vector(2, 128);
static const LLT V4S128 = LLT::fixed_vector(4, 128);

static std::initializer_list<LLT> AllScalarTypes = {S32, S64, S96, S128,
S160, S224, S256, S512};
static std::initializer_list<LLT> AllScalarTypes = {
S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024};

static std::initializer_list<LLT> AllS16Vectors{
V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128};
Expand Down Expand Up @@ -889,10 +891,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S16, S64);

getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
.legalIf(isRegisterType(0))
.legalIf(isRegisterClassType(0))
// s1 and s16 are special cases because they have legal operations on
// them, but don't really occupy registers in the normal way.
.legalFor({S1, S16})
.clampNumElements(0, V16S32, V32S32)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampScalarOrElt(0, S32, MaxScalar)
.widenScalarToNextPow2(0, 32)
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6797,8 +6797,7 @@ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});

// Special case: for shifts, the RHS always needs a zext.
if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL ||
Op.getOpcode() == ISD::SRA)
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
else
RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,29 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
LoadOpc = RISCV::LBU;
break;
}
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) {
unsigned Log2SEW =
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
if (STI.getXLen() < (1 << Log2SEW))
return nullptr;
switch (Log2SEW) {
case 3:
LoadOpc = RISCV::LB;
break;
case 4:
LoadOpc = RISCV::LH;
break;
case 5:
LoadOpc = RISCV::LW;
break;
case 6:
LoadOpc = RISCV::LD;
break;
default:
llvm_unreachable("Unexpected SEW");
}
break;
}
return nullptr;
case RISCV::SEXT_H:
LoadOpc = RISCV::LH;
Expand Down
35 changes: 28 additions & 7 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1908,6 +1908,29 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);

// f16 with zvfhmin and bf16 will be promoted to f32.
// FIXME: nxv32[b]f16 will be custom lowered and split.
unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
InstructionCost CastCost = 0;
if ((LT.second.getVectorElementType() == MVT::f16 ||
LT.second.getVectorElementType() == MVT::bf16) &&
TLI->getOperationAction(ISDOpcode, LT.second) ==
TargetLoweringBase::LegalizeAction::Promote) {
MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
Type *PromotedTy = EVT(PromotedVT).getTypeForEVT(Ty->getContext());
Type *LegalTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
// Add cost of extending arguments
CastCost += LT.first * Args.size() *
getCastInstrCost(Instruction::FPExt, PromotedTy, LegalTy,
TTI::CastContextHint::None, CostKind);
// Add cost of truncating result
CastCost +=
LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy,
TTI::CastContextHint::None, CostKind);
// Compute cost of op in promoted type
LT.second = PromotedVT;
}

auto getConstantMatCost =
[&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
Expand All @@ -1929,7 +1952,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
ConstantMatCost += getConstantMatCost(1, Op2Info);

unsigned Op;
switch (TLI->InstructionOpcodeToISD(Opcode)) {
switch (ISDOpcode) {
case ISD::ADD:
case ISD::SUB:
Op = RISCV::VADD_VV;
Expand Down Expand Up @@ -1959,11 +1982,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
break;
case ISD::FADD:
case ISD::FSUB:
// TODO: Address FP16 with VFHMIN
Op = RISCV::VFADD_VV;
break;
case ISD::FMUL:
// TODO: Address FP16 with VFHMIN
Op = RISCV::VFMUL_VV;
break;
case ISD::FDIV:
Expand All @@ -1975,9 +1996,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
default:
// Assuming all other instructions have the same cost until a need arises to
// differentiate them.
return ConstantMatCost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
Op1Info, Op2Info,
Args, CxtI);
return CastCost + ConstantMatCost +
BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);
}

InstructionCost InstrCost = getRISCVInstructionCost(Op, LT.second, CostKind);
Expand All @@ -1986,7 +2007,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
// scalar floating point ops aren't cheaper than their vector equivalents.
if (Ty->isFPOrFPVectorTy())
InstrCost *= 2;
return ConstantMatCost + LT.first * InstrCost;
return CastCost + ConstantMatCost + LT.first * InstrCost;
}

// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "XtensaAsmPrinter.h"
#include "MCTargetDesc/XtensaInstPrinter.h"
#include "MCTargetDesc/XtensaMCExpr.h"
#include "MCTargetDesc/XtensaTargetStreamer.h"
#include "TargetInfo/XtensaTargetInfo.h"
Expand Down Expand Up @@ -157,6 +158,57 @@ void XtensaAsmPrinter::emitConstantPool() {
OutStreamer->popSection();
}

void XtensaAsmPrinter::printOperand(const MachineInstr *MI, int OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);

switch (MO.getType()) {
case MachineOperand::MO_Register:
case MachineOperand::MO_Immediate: {
MCOperand MC = lowerOperand(MI->getOperand(OpNo));
XtensaInstPrinter::printOperand(MC, O);
break;
}
default:
llvm_unreachable("unknown operand type");
}
}

bool XtensaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) {
// Print the operand if there is no operand modifier.
if (!ExtraCode || !ExtraCode[0]) {
printOperand(MI, OpNo, O);
return false;
}

// Fallback to the default implementation.
return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
}

bool XtensaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier.

assert(OpNo + 1 < MI->getNumOperands() && "Insufficient operands");

const MachineOperand &Base = MI->getOperand(OpNo);
const MachineOperand &Offset = MI->getOperand(OpNo + 1);

assert(Base.isReg() &&
"Unexpected base pointer for inline asm memory operand.");
assert(Offset.isImm() && "Unexpected offset for inline asm memory operand.");

OS << XtensaInstPrinter::getRegisterName(Base.getReg());
OS << ", ";
OS << Offset.getImm();

return false;
}

MCSymbol *
XtensaAsmPrinter::GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
// Create a symbol for the name.
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/Xtensa/XtensaAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ class LLVM_LIBRARY_VISIBILITY XtensaAsmPrinter : public AsmPrinter {

void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;

void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);

bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;

bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;

MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;

MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class XtensaDAGToDAGISel : public SelectionDAGISel {

void Select(SDNode *Node) override;

bool SelectInlineAsmMemoryOperand(const SDValue &Op,
InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;

// For load/store instructions generate (base+offset) pair from
// memory address. The offset must be a multiple of scale argument.
bool selectMemRegAddr(SDValue Addr, SDValue &Base, SDValue &Offset,
Expand Down Expand Up @@ -212,3 +216,22 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) {

SelectCode(Node);
}

bool XtensaDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
case InlineAsm::ConstraintCode::m: {
SDValue Base, Offset;

selectMemRegAddr(Op, Base, Offset, 4);
OutOps.push_back(Base);
OutOps.push_back(Offset);

return false;
}
}
return false;
}
68 changes: 68 additions & 0 deletions llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,74 @@ bool XtensaTargetLowering::isOffsetFoldingLegal(
return false;
}

//===----------------------------------------------------------------------===//
// Inline asm support
//===----------------------------------------------------------------------===//
TargetLowering::ConstraintType
XtensaTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
return C_RegisterClass;
default:
break;
}
}
return TargetLowering::getConstraintType(Constraint);
}

TargetLowering::ConstraintWeight
XtensaTargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &Info, const char *Constraint) const {
ConstraintWeight Weight = CW_Invalid;
Value *CallOperandVal = Info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;

Type *Ty = CallOperandVal->getType();

// Look at the constraint type.
switch (*Constraint) {
default:
Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
break;
case 'r':
if (Ty->isIntegerTy())
Weight = CW_Register;
break;
}
return Weight;
}

std::pair<unsigned, const TargetRegisterClass *>
XtensaTargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
default:
break;
case 'r': // General-purpose register
return std::make_pair(0U, &Xtensa::ARRegClass);
}
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}

void XtensaTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDLoc DL(Op);

// Only support length 1 constraints for now.
if (Constraint.size() > 1)
return;

TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}

//===----------------------------------------------------------------------===//
// Calling conventions
//===----------------------------------------------------------------------===//
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/Xtensa/XtensaISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,21 @@ class XtensaTargetLowering : public TargetLowering {

const char *getTargetNodeName(unsigned Opcode) const override;

std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;

TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const override;

TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &Info,
const char *Constraint) const override;

void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
Expand Down
83 changes: 65 additions & 18 deletions llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
Expand Down Expand Up @@ -85,7 +86,43 @@ using PhiMap = MapVector<PHINode *, BBValueVector>;
using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;

using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
using BBPredicates = DenseMap<BasicBlock *, Value *>;

using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;

class CondBranchWeights {
uint32_t TrueWeight;
uint32_t FalseWeight;

CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}

public:
static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
assert(Br.isConditional());

uint64_t T, F;
if (!extractBranchWeights(Br, T, F))
return std::nullopt;

return CondBranchWeights(T, F);
}

static void setMetadata(BranchInst &Br,
const MaybeCondBranchWeights &Weights) {
assert(Br.isConditional());
if (!Weights)
return;
uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
setBranchWeights(Br, Arr, false);
}

CondBranchWeights invert() const {
return CondBranchWeights{FalseWeight, TrueWeight};
}
};

using ValueWeightPair = std::pair<Value *, MaybeCondBranchWeights>;

using BBPredicates = DenseMap<BasicBlock *, ValueWeightPair>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;

Expand Down Expand Up @@ -271,7 +308,7 @@ class StructurizeCFG {

void analyzeLoops(RegionNode *N);

Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert);

void gatherPredicates(RegionNode *N);

Expand Down Expand Up @@ -449,16 +486,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
}

/// Build the condition for one edge
Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
bool Invert) {
ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
bool Invert) {
Value *Cond = Invert ? BoolFalse : BoolTrue;
MaybeCondBranchWeights Weights;

if (Term->isConditional()) {
Cond = Term->getCondition();
Weights = CondBranchWeights::tryParse(*Term);

if (Idx != (unsigned)Invert)
if (Idx != (unsigned)Invert) {
Cond = invertCondition(Cond);
if (Weights)
Weights = Weights->invert();
}
}
return Cond;
return {Cond, Weights};
}

/// Analyze the predecessors of each block and build up predicates
Expand Down Expand Up @@ -490,8 +533,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {

Pred[Other] = BoolFalse;
Pred[P] = BoolTrue;
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
}
}
Expand All @@ -512,9 +555,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {

BasicBlock *Entry = R->getEntry();
if (Visited.count(Entry))
Pred[Entry] = BoolTrue;
Pred[Entry] = {BoolTrue, std::nullopt};
else
LPred[Entry] = BoolFalse;
LPred[Entry] = {BoolFalse, std::nullopt};
}
}
}
Expand Down Expand Up @@ -578,12 +621,14 @@ void StructurizeCFG::insertConditions(bool Loops) {
Dominator.addBlock(Parent);

Value *ParentValue = nullptr;
for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
MaybeCondBranchWeights ParentWeights = std::nullopt;
for (std::pair<BasicBlock *, ValueWeightPair> BBAndPred : Preds) {
BasicBlock *BB = BBAndPred.first;
Value *Pred = BBAndPred.second;
auto [Pred, Weight] = BBAndPred.second;

if (BB == Parent) {
ParentValue = Pred;
ParentWeights = Weight;
break;
}
PhiInserter.AddAvailableValue(BB, Pred);
Expand All @@ -592,6 +637,7 @@ void StructurizeCFG::insertConditions(bool Loops) {

if (ParentValue) {
Term->setCondition(ParentValue);
CondBranchWeights::setMetadata(*Term, ParentWeights);
} else {
if (!Dominator.resultIsRememberedBlock())
PhiInserter.AddAvailableValue(Dominator.result(), Default);
Expand All @@ -607,7 +653,7 @@ void StructurizeCFG::simplifyConditions() {
for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
auto &Preds = I.second;
for (auto &J : Preds) {
auto &Cond = J.second;
Value *Cond = J.second.first;
Instruction *Inverted;
if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
!Cond->use_empty()) {
Expand Down Expand Up @@ -904,9 +950,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) {
/// Does BB dominate all the predicates of Node?
bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
BBPredicates &Preds = Predicates[Node->getEntry()];
return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
return DT->dominates(BB, Pred.first);
});
return llvm::all_of(Preds,
[&](std::pair<BasicBlock *, ValueWeightPair> Pred) {
return DT->dominates(BB, Pred.first);
});
}

/// Can we predict that this node will always be called?
Expand All @@ -918,9 +965,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
if (!PrevNode)
return true;

for (std::pair<BasicBlock*, Value*> Pred : Preds) {
for (std::pair<BasicBlock *, ValueWeightPair> Pred : Preds) {
BasicBlock *BB = Pred.first;
Value *V = Pred.second;
Value *V = Pred.second.first;

if (V != BoolTrue)
return false;
Expand Down
35 changes: 34 additions & 1 deletion llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ static cl::opt<unsigned> PHICSENumPHISmallSize(
"When the basic block contains not more than this number of PHI nodes, "
"perform a (faster!) exhaustive search instead of set-driven one."));

static cl::opt<unsigned> MaxPhiEntriesIncreaseAfterRemovingEmptyBlock(
"max-phi-entries-increase-after-removing-empty-block", cl::init(1000),
cl::Hidden,
cl::desc("Stop removing an empty block if removing it will introduce more "
"than this number of phi entries in its successor"));

// Max recursion depth for collectBitParts used when detecting bswap and
// bitreverse idioms.
static const unsigned BitPartRecursionMaxDepth = 48;
Expand Down Expand Up @@ -1047,6 +1053,33 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
return true;
}

/// Check whether removing \p BB will make the phis in its \p Succ have too
/// many incoming entries. This function does not check whether \p BB is
/// foldable or not.
static bool introduceTooManyPhiEntries(BasicBlock *BB, BasicBlock *Succ) {
// If BB only has one predecessor, then removing it will not introduce more
// incoming edges for phis.
if (BB->hasNPredecessors(1))
return false;
unsigned NumPreds = pred_size(BB);
unsigned NumChangedPhi = 0;
for (auto &Phi : Succ->phis()) {
// If the incoming value is a phi and the phi is defined in BB,
// then removing BB will not increase the total phi entries of the ir.
if (auto *IncomingPhi = dyn_cast<PHINode>(Phi.getIncomingValueForBlock(BB)))
if (IncomingPhi->getParent() == BB)
continue;
// Otherwise, we need to add entries to the phi
NumChangedPhi++;
}
// For every phi that needs to be changed, (NumPreds - 1) new entries will be
// added. If the total increase in phi entries exceeds
// MaxPhiEntriesIncreaseAfterRemovingEmptyBlock, it will be considered as
// introducing too many new phi entries.
return (NumPreds - 1) * NumChangedPhi >
MaxPhiEntriesIncreaseAfterRemovingEmptyBlock;
}

/// Replace a value flowing from a block to a phi with
/// potentially multiple instances of that value flowing from the
/// block's predecessors to the phi.
Expand Down Expand Up @@ -1146,7 +1179,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
BBKillable ||
CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred);

if (!BBKillable && !BBPhisMergeable)
if ((!BBKillable && !BBPhisMergeable) || introduceTooManyPhiEntries(BB, Succ))
return false;

// Check to see if merging these blocks/phis would cause conflicts for any of
Expand Down
224 changes: 144 additions & 80 deletions llvm/test/Analysis/CostModel/RISCV/arith-fp.ll

Large diffs are not rendered by default.

18 changes: 8 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,9 @@ body: |
; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3
; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]]
; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF
; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192)
; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64)
%0:sgpr(s192) = G_IMPLICIT_DEF
%1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0
S_ENDPGM 0, implicit %1, implicit %2, implicit %3
Expand Down Expand Up @@ -294,11 +292,11 @@ body: |
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>)
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>)
; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>)
; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>)
; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>)
; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>)
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>)
; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>)
; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>)
; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>)
%0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
%1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6
%2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10
Expand Down
30 changes: 11 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,8 @@ body: |

; CHECK-LABEL: name: test_freeze_s448
; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]]
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512)
%0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s448) = G_TRUNC %0
%2:_(s448) = G_FREEZE %1
Expand Down Expand Up @@ -399,14 +395,12 @@ body: |
bb.0:

; CHECK-LABEL: name: test_freeze_v33s32
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>)
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE2]](s32)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32)
; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>)
%0:_(<33 x s32>) = G_IMPLICIT_DEF
%1:_(<33 x s32>) = G_FREEZE %0
Expand All @@ -419,12 +413,10 @@ body: |
bb.0:

; CHECK-LABEL: name: test_freeze_v64s32
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>)
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<32 x s32>), [[FREEZE1]](<32 x s32>)
; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>)
%0:_(<64 x s32>) = G_IMPLICIT_DEF
%1:_(<64 x s32>) = G_FREEZE %0
Expand Down
28 changes: 8 additions & 20 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ body: |
bb.0:

; CHECK-LABEL: name: test_implicit_def_s448
; CHECK: [[DEF:%[0-9]+]]:_(s448) = G_IMPLICIT_DEF
; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0
; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[DEF]](s512)
; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[TRUNC]](s448), 0
; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
%0:_(s448) = G_IMPLICIT_DEF
%1:_(s32) = G_EXTRACT %0, 0
Expand Down Expand Up @@ -295,18 +296,6 @@ body: |
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0
...

---
name: test_implicit_def_v17s32
body: |
bb.0:

; CHECK-LABEL: name: test_implicit_def_v17s32
; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>)
%0:_(<17 x s32>) = G_IMPLICIT_DEF
S_NOP 0, implicit %0
...

---
name: test_implicit_def_v32s32
body: |
Expand All @@ -328,9 +317,9 @@ body: |
; CHECK-LABEL: name: test_implicit_def_v33s32
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
Expand All @@ -348,10 +337,9 @@ body: |
bb.0:

; CHECK-LABEL: name: test_implicit_def_v64s32
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>)
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>)
; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>)
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<32 x s32>), [[DEF]](<32 x s32>)
; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[DEF]](<32 x s32>)
%0:_(<64 x s32>) = G_IMPLICIT_DEF
%1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0
S_NOP 0, implicit %0, implicit %1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,11 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32
; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
Expand Down Expand Up @@ -243,10 +241,8 @@ body: |
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64)
; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
Expand Down
152 changes: 75 additions & 77 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir

Large diffs are not rendered by default.

19 changes: 0 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
ret void
}

define void @non_power_of_2() { ret void }

define amdgpu_kernel void @load_constant_v4i16_from_8_align8(ptr addrspace(4) %ptr0) {
ret void
}
Expand Down Expand Up @@ -186,23 +184,6 @@ body: |
%1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1)
...

---
name: non_power_of_2
legalized: true

body: |
bb.0:
; CHECK-LABEL: name: non_power_of_2
; CHECK: [[DEF:%[0-9]+]]:sgpr(s448) = G_IMPLICIT_DEF
; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[DEF]](s448), 0
; CHECK-NEXT: $sgpr0 = COPY [[EXTRACT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
%0:_(s448) = G_IMPLICIT_DEF
%1:_(s32) = G_EXTRACT %0:_(s448), 0
$sgpr0 = COPY %1:_(s32)
SI_RETURN_TO_EPILOG $sgpr0
...

---
name: load_constant_v4i16_from_8_align8
legalized: true
Expand Down
1,856 changes: 1,856 additions & 0 deletions llvm/test/CodeGen/AMDGPU/freeze.ll

Large diffs are not rendered by default.

162 changes: 162 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s

define i64 @i64(<vscale x 1 x i64> %v, i1 %c) {
; RV32-LABEL: i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: #APP
; RV32-NEXT: #NO_APP
; RV32-NEXT: beqz a0, .LBB0_2
; RV32-NEXT: # %bb.1: # %truebb
; RV32-NEXT: li a0, 32
; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v9, a0
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: vmv.x.s a0, v9
; RV32-NEXT: j .LBB0_3
; RV32-NEXT: .LBB0_2: # %falsebb
; RV32-NEXT: li a1, 0
; RV32-NEXT: .LBB0_3: # %falsebb
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 1
; RV32-NEXT: add sp, sp, a2
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: i64:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: andi a0, a0, 1
; RV64-NEXT: #APP
; RV64-NEXT: #NO_APP
; RV64-NEXT: beqz a0, .LBB0_2
; RV64-NEXT: # %bb.1: # %truebb
; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: .LBB0_2: # %falsebb
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: add sp, sp, a1
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
br i1 %c, label %truebb, label %falsebb
truebb:
%x = extractelement <vscale x 1 x i64> %v, i32 0
ret i64 %x
falsebb:
ret i64 0
}

define i32 @i32(<vscale x 2 x i32> %v, i1 %c) {
; CHECK-LABEL: i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: beqz a0, .LBB1_2
; CHECK-NEXT: # %bb.1: # %truebb
; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: .LBB1_2: # %falsebb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
br i1 %c, label %truebb, label %falsebb
truebb:
%x = extractelement <vscale x 2 x i32> %v, i32 0
ret i32 %x
falsebb:
ret i32 0
}

define i16 @i16(<vscale x 4 x i16> %v, i1 %c) {
; CHECK-LABEL: i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: beqz a0, .LBB2_2
; CHECK-NEXT: # %bb.1: # %truebb
; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: .LBB2_2: # %falsebb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
br i1 %c, label %truebb, label %falsebb
truebb:
%x = extractelement <vscale x 4 x i16> %v, i32 0
ret i16 %x
falsebb:
ret i16 0
}

define i8 @i8(<vscale x 8 x i8> %v, i1 %c) {
; CHECK-LABEL: i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: beqz a0, .LBB3_2
; CHECK-NEXT: # %bb.1: # %truebb
; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: .LBB3_2: # %falsebb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
br i1 %c, label %truebb, label %falsebb
truebb:
%x = extractelement <vscale x 8 x i8> %v, i32 0
ret i8 %x
falsebb:
ret i8 0
}
91 changes: 61 additions & 30 deletions llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v,+optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED
; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v,+optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED
; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED
; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED
; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v,+optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED
; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v,+optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED
; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi ilp32d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED
; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi lp64d -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFH
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFHMIN
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFHMIN
; RUN: llc -mtriple=riscv32 -mattr=+v,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZFMIN
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZFMIN

define <vscale x 8 x bfloat> @vsplat_nxv8bf16(bfloat %f) {
; NOZFMIN-LABEL: vsplat_nxv8bf16:
; NOZFMIN: # %bb.0:
; NOZFMIN-NEXT: fmv.x.w a0, fa0
; NOZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; NOZFMIN-NEXT: vmv.v.x v8, a0
; NOZFMIN-NEXT: ret
;
; ZFMIN-LABEL: vsplat_nxv8bf16:
; ZFMIN: # %bb.0:
; ZFMIN-NEXT: fmv.x.h a0, fa0
; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZFMIN-NEXT: vmv.v.x v8, a0
; ZFMIN-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %f, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x bfloat> %splat
}

define <vscale x 8 x bfloat> @vsplat_zero_nxv8bf16() {
; CHECK-LABEL: vsplat_zero_nxv8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: ret
ret <vscale x 8 x bfloat> splat (bfloat zeroinitializer)
}

define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
; ZVFH-LABEL: vsplat_nxv8f16:
Expand All @@ -25,10 +43,17 @@ define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
;
; ZVFHMIN-LABEL: vsplat_nxv8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: fmv.x.w a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v8, a0
; ZVFHMIN-NEXT: ret
;
; ZFMIN-LABEL: vsplat_nxv8f16:
; ZFMIN: # %bb.0:
; ZFMIN-NEXT: fmv.x.h a0, fa0
; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZFMIN-NEXT: vmv.v.x v8, a0
; ZFMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %f, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x half> %splat
Expand Down Expand Up @@ -83,20 +108,26 @@ define <vscale x 8 x double> @vsplat_zero_nxv8f64() {
ret <vscale x 8 x double> splat (double zeroinitializer)
}

; Test that we fold this to a vlse with 0 stride.
define <vscale x 8 x float> @vsplat_load_nxv8f32(ptr %ptr) {
; OPTIMIZED-LABEL: vsplat_load_nxv8f32:
; OPTIMIZED: # %bb.0:
; OPTIMIZED-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; OPTIMIZED-NEXT: vlse32.v v8, (a0), zero
; OPTIMIZED-NEXT: ret
;
; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32:
; NOT-OPTIMIZED: # %bb.0:
; NOT-OPTIMIZED-NEXT: flw fa5, 0(a0)
; NOT-OPTIMIZED-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; NOT-OPTIMIZED-NEXT: vfmv.v.f v8, fa5
; NOT-OPTIMIZED-NEXT: ret
; CHECK-LABEL: vsplat_load_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: flw fa5, 0(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa5
; CHECK-NEXT: ret
%f = load float, ptr %ptr
%head = insertelement <vscale x 8 x float> poison, float %f, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x float> %splat
}

; Test that we fold this to a vlse with 0 stride.
define <vscale x 8 x float> @vsplat_load_nxv8f32_optimized(ptr %ptr) "target-features"="+optimized-zero-stride-load" {
; CHECK-LABEL: vsplat_load_nxv8f32_optimized:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), zero
; CHECK-NEXT: ret
%f = load float, ptr %ptr
%head = insertelement <vscale x 8 x float> poison, float %f, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
Expand Down
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: not llc --mtriple=xtensa < %s 2>&1 | FileCheck %s

define void @constraint_f() nounwind {
; CHECK: error: unknown asm constraint 'f'
tail call void asm "addi a1, a1, $0", "f"(i32 1)
ret void
}

define i32 @register_a100(i32 %a) nounwind {
; CHECK: error: couldn't allocate input reg for constraint '{$a100}'
%1 = tail call i32 asm "addi $0, $1, 1", "=r,{$a100}"(i32 %a)
ret i32 %1
}
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=xtensa < %s | FileCheck %s --check-prefix=XTENSA

define i32 @m_offset_0(ptr %p) nounwind {
; XTENSA-LABEL: m_offset_0:
; XTENSA: #APP
; XTENSA-NEXT: l32i a2, a2, 0
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %p)
ret i32 %1
}

define i32 @m_offset_1020(ptr %p) nounwind {
; XTENSA-LABEL: m_offset_1020:
; XTENSA: #APP
; XTENSA-NEXT: l32i a2, a2, 1020
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = getelementptr inbounds i8, ptr %p, i32 1020
%2 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %1)
ret i32 %2
}

define i8 @m_i8_offset_7(ptr %p) nounwind {
; XTENSA-LABEL: m_i8_offset_7:
; XTENSA: addi a8, a2, 7
; XTENSA-NEXT: #APP
; XTENSA-NEXT: l8ui a2, a8, 0
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = getelementptr inbounds i8, ptr %p, i32 7
%2 = call i8 asm "l8ui $0, $1", "=r,*m"(ptr elementtype(i8) %1)
ret i8 %2
}

define i16 @m_i16_offset_10(ptr %p) nounwind {
; XTENSA-LABEL: m_i16_offset_10:
; XTENSA: #APP
; XTENSA-NEXT: l16si a2, a2, 20
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = getelementptr inbounds i16, ptr %p, i32 10
%2 = call i16 asm "l16si $0, $1", "=r,*m"(ptr elementtype(i16) %1)
ret i16 %2
}
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/Xtensa/inline-asm.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=xtensa < %s \
; RUN: | FileCheck -check-prefix=XTENSA %s

@gi = external global i32

define i32 @constraint_r(i32 %a) {
; XTENSA-LABEL: constraint_r:
; XTENSA: l32r a8, .LCPI0_0
; XTENSA-NEXT: l32i a8, a8, 0
; XTENSA-NEXT: #APP
; XTENSA-NEXT: add a2, a2, a8
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = load i32, ptr @gi
%2 = tail call i32 asm "add $0, $1, $2", "=r,r,r"(i32 %a, i32 %1)
ret i32 %2
}

define i32 @constraint_i(i32 %a) {
; XTENSA-LABEL: constraint_i:
; XTENSA: #APP
; XTENSA-NEXT: addi a2, a2, 113
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = load i32, ptr @gi
%2 = tail call i32 asm "addi $0, $1, $2", "=r,r,i"(i32 %a, i32 113)
ret i32 %2
}

define i32 @explicit_register_a3(i32 %a) nounwind {
; XTENSA-LABEL: explicit_register_a3:
; XTENSA: or a3, a2, a2
; XTENSA-NEXT: #APP
; XTENSA-NEXT: addi a2, a3, 1
; XTENSA-NEXT: #NO_APP
; XTENSA-NEXT: ret
%1 = tail call i32 asm "addi $0, $1, 1", "=r,{a3}"(i32 %a)
ret i32 %1
}
164 changes: 164 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=12 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-12 %s
; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=11 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-11 %s
; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=4 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-4 %s
;
; This test has the following CFG:
; 1. entry has a switch to 4 blocks: B1 - B4
; 2. For B1 and B2, it branches to B5 and B6
; 3. For B3 and B4, it branches to B5 and B7
; 4. In B5, %val is defined as phi taking values from B1 to B4
; 5. B5, B6, B7 branch to block Merge unconditionally
; 6. Block Merge has 5 phis(%x1 - %x4 and %val_merge).
;
; If we remove B5, %x1 - %x4 will increase the number of phi entries by (4 - 1) * 4 = 12. For %val_merge, since the value taking from B5
; is defined in B5, it will not increase the number of phi entries (it can be considered as move the entries from %val to
; %val_merge). Therefore, removing B5 will increase the number of phi entries by 12 (not (4 - 1) * 5 = 15).
;
; If we remove B6 / B7, it will increase the number of phi entries by (2 - 1) * 5 = 5.
;
; In the first test, max-phi-entries-increase-after-removing-empty-block is set to be 12, then B5 will be removed.
; In the second test, max-phi-entries-increase-after-removing-empty-block is set to be 11, then B5 should not be removed,
; but B6 and B7 can be removed.
; In the third test, max-phi-entries-increase-after-removing-empty-block is set to be 4, then no BB can be removed.
;
define void @foo(i32 %a, i32 %val1, i32 %val2, i32 %val3, i32 %val4) {
; CHECK-12-LABEL: define void @foo(
; CHECK-12-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) {
; CHECK-12-NEXT: [[ENTRY:.*:]]
; CHECK-12-NEXT: switch i32 [[A]], label %[[B1:.*]] [
; CHECK-12-NEXT: i32 4, label %[[B4:.*]]
; CHECK-12-NEXT: i32 2, label %[[B2:.*]]
; CHECK-12-NEXT: i32 3, label %[[B3:.*]]
; CHECK-12-NEXT: ]
; CHECK-12: [[B1]]:
; CHECK-12-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1
; CHECK-12-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[MERGE:.*]]
; CHECK-12: [[B2]]:
; CHECK-12-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2
; CHECK-12-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[MERGE]]
; CHECK-12: [[B3]]:
; CHECK-12-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3
; CHECK-12-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[MERGE]]
; CHECK-12: [[B4]]:
; CHECK-12-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4
; CHECK-12-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[MERGE]]
; CHECK-12: [[B6]]:
; CHECK-12-NEXT: br label %[[MERGE]]
; CHECK-12: [[B7]]:
; CHECK-12-NEXT: br label %[[MERGE]]
; CHECK-12: [[MERGE]]:
; CHECK-12-NEXT: [[X1:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 1, %[[B4]] ], [ 1, %[[B3]] ], [ 1, %[[B2]] ], [ 1, %[[B1]] ]
; CHECK-12-NEXT: [[X2:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ], [ 2, %[[B2]] ], [ 2, %[[B1]] ]
; CHECK-12-NEXT: [[X3:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 3, %[[B4]] ], [ 3, %[[B3]] ], [ 3, %[[B2]] ], [ 3, %[[B1]] ]
; CHECK-12-NEXT: [[X4:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 4, %[[B4]] ], [ 4, %[[B3]] ], [ 4, %[[B2]] ], [ 4, %[[B1]] ]
; CHECK-12-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ]
; CHECK-12-NEXT: ret void
;
; CHECK-11-LABEL: define void @foo(
; CHECK-11-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) {
; CHECK-11-NEXT: [[ENTRY:.*:]]
; CHECK-11-NEXT: switch i32 [[A]], label %[[B1:.*]] [
; CHECK-11-NEXT: i32 4, label %[[B4:.*]]
; CHECK-11-NEXT: i32 2, label %[[B2:.*]]
; CHECK-11-NEXT: i32 3, label %[[B3:.*]]
; CHECK-11-NEXT: ]
; CHECK-11: [[B1]]:
; CHECK-11-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1
; CHECK-11-NEXT: br i1 [[CMP1]], label %[[MERGE:.*]], label %[[B5:.*]]
; CHECK-11: [[B2]]:
; CHECK-11-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2
; CHECK-11-NEXT: br i1 [[CMP2]], label %[[MERGE]], label %[[B5]]
; CHECK-11: [[B3]]:
; CHECK-11-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3
; CHECK-11-NEXT: br i1 [[CMP3]], label %[[MERGE]], label %[[B5]]
; CHECK-11: [[B4]]:
; CHECK-11-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4
; CHECK-11-NEXT: br i1 [[CMP4]], label %[[MERGE]], label %[[B5]]
; CHECK-11: [[B5]]:
; CHECK-11-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ]
; CHECK-11-NEXT: br label %[[MERGE]]
; CHECK-11: [[MERGE]]:
; CHECK-11-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ]
; CHECK-11-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ]
; CHECK-11-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ]
; CHECK-11-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ]
; CHECK-11-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ]
; CHECK-11-NEXT: ret void
;
; CHECK-4-LABEL: define void @foo(
; CHECK-4-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) {
; CHECK-4-NEXT: [[ENTRY:.*:]]
; CHECK-4-NEXT: switch i32 [[A]], label %[[B1:.*]] [
; CHECK-4-NEXT: i32 4, label %[[B4:.*]]
; CHECK-4-NEXT: i32 2, label %[[B2:.*]]
; CHECK-4-NEXT: i32 3, label %[[B3:.*]]
; CHECK-4-NEXT: ]
; CHECK-4: [[B1]]:
; CHECK-4-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1
; CHECK-4-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[B5:.*]]
; CHECK-4: [[B2]]:
; CHECK-4-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2
; CHECK-4-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[B5]]
; CHECK-4: [[B3]]:
; CHECK-4-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3
; CHECK-4-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[B5]]
; CHECK-4: [[B4]]:
; CHECK-4-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4
; CHECK-4-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[B5]]
; CHECK-4: [[B5]]:
; CHECK-4-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ]
; CHECK-4-NEXT: br label %[[MERGE:.*]]
; CHECK-4: [[B6]]:
; CHECK-4-NEXT: br label %[[MERGE]]
; CHECK-4: [[B7]]:
; CHECK-4-NEXT: br label %[[MERGE]]
; CHECK-4: [[MERGE]]:
; CHECK-4-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ]
; CHECK-4-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ]
; CHECK-4-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ]
; CHECK-4-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ]
; CHECK-4-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ]
; CHECK-4-NEXT: ret void
;
entry:
switch i32 %a, label %B1 [
i32 4, label %B4
i32 2, label %B2
i32 3, label %B3
]

B1: ; preds = %entry
%cmp1 = icmp eq i32 %val1, 1
br i1 %cmp1, label %B6, label %B5

B2: ; preds = %entry
%cmp2 = icmp eq i32 %val2, 2
br i1 %cmp2, label %B6, label %B5

B3: ; preds = %entry
%cmp3 = icmp eq i32 %val3, 3
br i1 %cmp3, label %B7, label %B5

B4: ; preds = %entry
%cmp4 = icmp eq i32 %val4, 4
br i1 %cmp4, label %B7, label %B5

B5: ; preds = %B4, %B3, %B2, %B1
%val = phi i32 [ %val1, %B1 ], [ %val2, %B2 ], [ %val3, %B3 ], [ %val4, %B4 ]
br label %Merge

B6: ; preds = %B2, %B1
br label %Merge

B7: ; preds = %B4, %B3
br label %Merge

Merge: ; preds = %B7, %B6, %B5
%x1 = phi i16 [ 1, %B5 ], [ 0, %B6 ], [ 2, %B7 ]
%x2 = phi i16 [ 2, %B5 ], [ 0, %B6 ], [ 2, %B7 ]
%x3 = phi i16 [ 3, %B5 ], [ 0, %B6 ], [ 2, %B7 ]
%x4 = phi i16 [ 4, %B5 ], [ 0, %B6 ], [ 2, %B7 ]
%val_merge = phi i32 [ %val, %B5 ], [ 0, %B6 ], [ 2, %B7 ]
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ define amdgpu_ps i32 @if_else(i32 %0) {
; OPT-LABEL: define amdgpu_ps i32 @if_else(
; OPT-SAME: i32 [[TMP0:%.*]]) {
; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0
; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]]
; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]]
; OPT: [[FLOW]]:
; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ]
; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ]
Expand Down Expand Up @@ -40,7 +40,7 @@ define amdgpu_ps void @loop_if_break(i32 %n) {
; OPT: [[LOOP]]:
; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ]
; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0
; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]]
; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]]
; OPT: [[LOOP_BODY]]:
; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1
; OPT-NEXT: br label %[[FLOW]]
Expand Down Expand Up @@ -70,3 +70,7 @@ exit: ; preds = %loop
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!0 = !{!"branch_weights", i32 1000, i32 1}
;.
; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000}
; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1}
;.
86 changes: 86 additions & 0 deletions llvm/unittests/ADT/SmallSetTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,63 @@

#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <string>

using namespace llvm;

TEST(SmallSetTest, ConstructorIteratorPair) {
std::initializer_list<int> L = {1, 2, 3, 4, 5};
SmallSet<int, 4> S(std::begin(L), std::end(L));
EXPECT_THAT(S, testing::UnorderedElementsAreArray(L));
}

TEST(SmallSet, ConstructorRange) {
std::initializer_list<int> L = {1, 2, 3, 4, 5};

SmallSet<int, 4> S(llvm::make_range(std::begin(L), std::end(L)));
EXPECT_THAT(S, testing::UnorderedElementsAreArray(L));
}

TEST(SmallSet, ConstructorInitializerList) {
std::initializer_list<int> L = {1, 2, 3, 4, 5};
SmallSet<int, 4> S = {1, 2, 3, 4, 5};
EXPECT_THAT(S, testing::UnorderedElementsAreArray(L));
}

TEST(SmallSet, CopyConstructor) {
SmallSet<int, 4> S = {1, 2, 3};
SmallSet<int, 4> T = S;

EXPECT_THAT(S, testing::ContainerEq(T));
}

TEST(SmallSet, MoveConstructor) {
std::initializer_list<int> L = {1, 2, 3};
SmallSet<int, 4> S = L;
SmallSet<int, 4> T = std::move(S);

EXPECT_THAT(T, testing::UnorderedElementsAreArray(L));
}

TEST(SmallSet, CopyAssignment) {
SmallSet<int, 4> S = {1, 2, 3};
SmallSet<int, 4> T;
T = S;

EXPECT_THAT(S, testing::ContainerEq(T));
}

TEST(SmallSet, MoveAssignment) {
std::initializer_list<int> L = {1, 2, 3};
SmallSet<int, 4> S = L;
SmallSet<int, 4> T;
T = std::move(S);

EXPECT_THAT(T, testing::UnorderedElementsAreArray(L));
}

TEST(SmallSetTest, Insert) {

SmallSet<int, 4> s1;
Expand All @@ -41,6 +93,40 @@ TEST(SmallSetTest, Insert) {
EXPECT_EQ(0u, s1.count(4));
}

TEST(SmallSetTest, InsertPerfectFwd) {
struct Value {
int Key;
bool Moved;

Value(int Key) : Key(Key), Moved(false) {}
Value(const Value &) = default;
Value(Value &&Other) : Key(Other.Key), Moved(false) { Other.Moved = true; }
bool operator==(const Value &Other) const { return Key == Other.Key; }
bool operator<(const Value &Other) const { return Key < Other.Key; }
};

{
SmallSet<Value, 4> S;
Value V1(1), V2(2);

S.insert(V1);
EXPECT_EQ(V1.Moved, false);

S.insert(std::move(V2));
EXPECT_EQ(V2.Moved, true);
}
{
SmallSet<Value, 1> S;
Value V1(1), V2(2);

S.insert(V1);
EXPECT_EQ(V1.Moved, false);

S.insert(std::move(V2));
EXPECT_EQ(V2.Moved, true);
}
}

TEST(SmallSetTest, Grow) {
SmallSet<int, 4> s1;

Expand Down
8 changes: 3 additions & 5 deletions llvm/utils/TableGen/IntrinsicEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,10 @@ using TypeSigTy = SmallVector<unsigned char>;
static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) {
TypeSigTy TypeSig;
const Record *TypeInfo = Int.TheDef->getValueAsDef("TypeInfo");
const ListInit *OuterList = TypeInfo->getValueAsListInit("TypeSig");
const ListInit *TypeList = TypeInfo->getValueAsListInit("TypeSig");

for (const auto *Outer : OuterList->getValues()) {
for (const auto *Inner : cast<ListInit>(Outer)->getValues())
TypeSig.emplace_back(cast<IntInit>(Inner)->getValue());
}
for (const auto *TypeListEntry : TypeList->getValues())
TypeSig.emplace_back(cast<IntInit>(TypeListEntry)->getValue());
return TypeSig;
}

Expand Down
3 changes: 0 additions & 3 deletions mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,9 @@ add_mlir_conversion_library(MLIRVectorToLLVMPass
MLIRVectorToLLVM

MLIRArmNeonDialect
MLIRArmNeonTransforms
MLIRArmSMEDialect
MLIRArmSMETransforms
MLIRArmSVEDialect
MLIRArmSVETransforms
MLIRVectorToArmSME
MLIRAMXDialect
MLIRAMXTransforms
MLIRX86VectorDialect
Expand Down