112 changes: 47 additions & 65 deletions llvm/lib/IR/IntrinsicInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,13 +479,16 @@ std::optional<unsigned>
VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
switch (VPID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_MEMOP(POINTERPOS, ...) return POINTERPOS;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
return std::nullopt;
case Intrinsic::vp_store:
case Intrinsic::vp_scatter:
case Intrinsic::experimental_vp_strided_store:
return 1;
case Intrinsic::vp_load:
case Intrinsic::vp_gather:
case Intrinsic::experimental_vp_strided_load:
return 0;
}
return std::nullopt;
}

/// \return The data (payload) operand of this store or scatter.
Expand All @@ -499,13 +502,12 @@ Value *VPIntrinsic::getMemoryDataParam() const {
std::optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) {
switch (VPID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS) return DATAPOS;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
return std::nullopt;
case Intrinsic::vp_store:
case Intrinsic::vp_scatter:
case Intrinsic::experimental_vp_strided_store:
return 0;
}
return std::nullopt;
}

constexpr bool isVPIntrinsic(Intrinsic::ID ID) {
Expand Down Expand Up @@ -589,7 +591,7 @@ VPIntrinsic::getConstrainedIntrinsicIDForVP(Intrinsic::ID ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_CONSTRAINEDFP(HASRND, HASEXCEPT, CID) return Intrinsic::CID;
#define VP_PROPERTY_CONSTRAINEDFP(CID) return Intrinsic::CID;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
Expand Down Expand Up @@ -738,38 +740,42 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,

bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::vp_reduce_add:
case Intrinsic::vp_reduce_mul:
case Intrinsic::vp_reduce_and:
case Intrinsic::vp_reduce_or:
case Intrinsic::vp_reduce_xor:
case Intrinsic::vp_reduce_smax:
case Intrinsic::vp_reduce_smin:
case Intrinsic::vp_reduce_umax:
case Intrinsic::vp_reduce_umin:
case Intrinsic::vp_reduce_fmax:
case Intrinsic::vp_reduce_fmin:
case Intrinsic::vp_reduce_fmaximum:
case Intrinsic::vp_reduce_fminimum:
case Intrinsic::vp_reduce_fadd:
case Intrinsic::vp_reduce_fmul:
return true;
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
return false;
}
return false;
}

bool VPCastIntrinsic::isVPCast(Intrinsic::ID ID) {
switch (ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_CASTOP return true;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
// All of the vp.casts correspond to instructions
if (std::optional<unsigned> Opc = getFunctionalOpcodeForVP(ID))
return Instruction::isCast(*Opc);
return false;
}

bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) {
switch (ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_CMP(CCPOS, ...) return true;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
return false;
case Intrinsic::vp_fcmp:
case Intrinsic::vp_icmp:
return true;
}
return false;
}

bool VPBinOpIntrinsic::isVPBinOp(Intrinsic::ID ID) {
Expand Down Expand Up @@ -803,22 +809,10 @@ static ICmpInst::Predicate getIntPredicateFromMD(const Value *Op) {
}

CmpInst::Predicate VPCmpIntrinsic::getPredicate() const {
bool IsFP = true;
std::optional<unsigned> CCArgIdx;
switch (getIntrinsicID()) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_CMP(CCPOS, ISFP) \
CCArgIdx = CCPOS; \
IsFP = ISFP; \
break;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
assert(CCArgIdx && "Unexpected vector-predicated comparison");
return IsFP ? getFPPredicateFromMD(getArgOperand(*CCArgIdx))
: getIntPredicateFromMD(getArgOperand(*CCArgIdx));
assert(isVPCmp(getIntrinsicID()));
return getIntrinsicID() == Intrinsic::vp_fcmp
? getFPPredicateFromMD(getArgOperand(2))
: getIntPredicateFromMD(getArgOperand(2));
}

unsigned VPReductionIntrinsic::getVectorParamPos() const {
Expand All @@ -831,27 +825,15 @@ unsigned VPReductionIntrinsic::getStartParamPos() const {

std::optional<unsigned>
VPReductionIntrinsic::getVectorParamPos(Intrinsic::ID ID) {
switch (ID) {
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return VECTORPOS;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
default:
break;
}
if (isVPReduction(ID))
return 1;
return std::nullopt;
}

std::optional<unsigned>
VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) {
switch (ID) {
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return STARTPOS;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
default:
break;
}
if (isVPReduction(ID))
return 0;
return std::nullopt;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5128,6 +5128,7 @@ void Verifier::visitInstruction(Instruction &I) {
F->getIntrinsicID() ==
Intrinsic::experimental_patchpoint_void ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint ||
F->getIntrinsicID() == Intrinsic::fake_use ||
F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
IsAttachedCallOperand(F, CBI, i),
Expand Down
85 changes: 72 additions & 13 deletions llvm/lib/Support/FormatVariadic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ static std::optional<AlignStyle> translateLocChar(char C) {
LLVM_BUILTIN_UNREACHABLE;
}

bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad) {
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad) {
Where = AlignStyle::Right;
Align = 0;
Pad = ' ';
Expand All @@ -35,8 +35,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,

if (Spec.size() > 1) {
// A maximum of 2 characters at the beginning can be used for something
// other
// than the width.
// other than the width.
// If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
// contains the width.
// Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
Expand All @@ -55,8 +54,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
return !Failed;
}

std::optional<ReplacementItem>
formatv_object_base::parseReplacementItem(StringRef Spec) {
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
StringRef RepString = Spec.trim("{}");

// If the replacement sequence does not start with a non-negative integer,
Expand All @@ -82,15 +80,14 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
RepString = StringRef();
}
RepString = RepString.trim();
if (!RepString.empty()) {
assert(false && "Unexpected characters found in replacement string!");
}
assert(RepString.empty() &&
"Unexpected characters found in replacement string!");

return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
}

std::pair<ReplacementItem, StringRef>
formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
static std::pair<ReplacementItem, StringRef>
splitLiteralAndReplacement(StringRef Fmt) {
while (!Fmt.empty()) {
// Everything up until the first brace is a literal.
if (Fmt.front() != '{') {
Expand Down Expand Up @@ -143,15 +140,77 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
return std::make_pair(ReplacementItem{Fmt}, StringRef());
}

#ifndef NDEBUG
#define ENABLE_VALIDATION 1
#else
#define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
#endif

SmallVector<ReplacementItem, 2>
formatv_object_base::parseFormatString(StringRef Fmt) {
formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
bool Validate) {
SmallVector<ReplacementItem, 2> Replacements;
ReplacementItem I;

#if ENABLE_VALIDATION
const StringRef SavedFmtStr = Fmt;
size_t NumExpectedArgs = 0;
#endif

while (!Fmt.empty()) {
ReplacementItem I;
std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
if (I.Type != ReplacementType::Empty)
Replacements.push_back(I);
#if ENABLE_VALIDATION
if (I.Type == ReplacementType::Format)
NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
#endif
}

#if ENABLE_VALIDATION
if (!Validate)
return Replacements;

// Perform additional validation. Verify that the number of arguments matches
// the number of replacement indices and that there are no holes in the
// replacement indices.

// When validation fails, return an array of replacement items that
// will print an error message as the outout of this formatv() (used when
// validation is enabled in release mode).
auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
return SmallVector<ReplacementItem, 2>{
ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
};

if (NumExpectedArgs != NumArgs) {
errs() << formatv(
"Expected {0} Args, but got {1} for format string '{2}'\n",
NumExpectedArgs, NumArgs, SavedFmtStr);
assert(0 && "Invalid formatv() call");
return getErrorReplacements("Unexpected number of arguments");
}

// Find the number of unique indices seen. All replacement indices
// are < NumExpectedArgs.
SmallVector<bool> Indices(NumExpectedArgs);
size_t Count = 0;
for (const ReplacementItem &I : Replacements) {
if (I.Type != ReplacementType::Format || Indices[I.Index])
continue;
Indices[I.Index] = true;
++Count;
}

if (Count != NumExpectedArgs) {
errs() << formatv(
"Replacement field indices cannot have holes for format string '{0}'\n",
SavedFmtStr);
assert(0 && "Invalid format string");
return getErrorReplacements("Replacement indices have holes");
}
#endif // ENABLE_VALIDATION
return Replacements;
}

Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Support/ModRef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/Support/ModRef.h"
#include "llvm/ADT/STLExtras.h"

using namespace llvm;

Expand All @@ -33,7 +34,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ModRefInfo MR) {
}

raw_ostream &llvm::operator<<(raw_ostream &OS, MemoryEffects ME) {
for (IRMemLocation Loc : MemoryEffects::locations()) {
interleaveComma(MemoryEffects::locations(), OS, [&](IRMemLocation Loc) {
switch (Loc) {
case IRMemLocation::ArgMem:
OS << "ArgMem: ";
Expand All @@ -45,7 +46,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, MemoryEffects ME) {
OS << "Other: ";
break;
}
OS << ME.getModRef(Loc) << ", ";
}
OS << ME.getModRef(Loc);
});
return OS;
}
59 changes: 42 additions & 17 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27096,21 +27096,37 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::LLSC;
}

// Return true if the atomic operation expansion will lower to use a library
// call, and is thus ineligible to use an LLSC expansion.
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget,
const AtomicRMWInst *RMW) {
if (!RMW->isFloatingPointOperation())
return false;
switch (RMW->getType()->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
case Type::HalfTyID:
case Type::BFloatTyID:
// Will use soft float
return !Subtarget.hasFPARMv8();
default:
// fp128 will emit library calls.
return true;
}

llvm_unreachable("covered type switch");
}

// The "default" for integer RMW operations is to expand to an LL/SC loop.
// However, with the LSE instructions (or outline-atomics mode, which provides
// library routines in place of the LSE-instructions), we can directly emit many
// operations instead.
//
// Floating-point operations are always emitted to a cmpxchg loop, because they
// may trigger a trap which aborts an LLSC sequence.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
Type *Ty = AI->getType();
unsigned Size = Ty->getPrimitiveSizeInBits();
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");

if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;

bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
(AI->getOperation() == AtomicRMWInst::Xchg ||
AI->getOperation() == AtomicRMWInst::Or ||
Expand All @@ -27120,7 +27136,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
!AI->isFloatingPointOperation()) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
Expand All @@ -27146,7 +27163,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
// we have a single CAS instruction that can replace the loop.
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
Subtarget->hasLSE())
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
return AtomicExpansionKind::CmpXChg;

return AtomicExpansionKind::LLSC;
Expand Down Expand Up @@ -27193,10 +27210,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,

Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");

auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");

Value *Or = Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
return Builder.CreateBitCast(Or, ValueTy);
}

Type *Tys[] = { Addr->getType() };
Expand All @@ -27207,8 +27228,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
CI->addParamAttr(
0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
CI->addParamAttr(0, Attribute::get(Builder.getContext(),
Attribute::ElementType, IntEltTy));
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);

return Builder.CreateBitCast(Trunc, ValueTy);
Expand All @@ -27234,9 +27255,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Type *Int128Ty = Type::getInt128Ty(M->getContext());

Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);

Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
Value *Hi =
Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}

Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/AArch64/AArch64Processors.td
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,12 @@ def ProcessorFeatures {
FeatureLSE, FeaturePAuth, FeatureFPAC,
FeatureRAS, FeatureRCPC, FeatureRDM,
FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8,
// Technically apple-m4 is v9.2a, but we can't use that here.
// Historically, llvm defined v9.0a as requiring SVE, but it's optional
// according to the Arm ARM, and not supported by the core. We decoupled the
// two in the clang driver and in the backend subtarget features, but it's
// still an issue in the clang frontend. v8.7a is the next closest choice.
list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureSHA2, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureAES, FeatureBF16,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
disablePass(&ShrinkWrapID);
disablePass(&RemoveLoadsIntoFakeUsesID);

addPass(createNVPTXAAWrapperPass());
addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FMA,
ISD::FDIV, ISD::FSQRT,
ISD::FABS, ISD::FNEG,
ISD::STRICT_FMA, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
Expand All @@ -416,8 +415,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
setOperationAction(ISD::FREM, MVT::bf16, Promote);
// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FABS, MVT::bf16, Expand);
setOperationAction(ISD::FNEG, MVT::bf16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
}

Expand All @@ -433,8 +432,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
// FIXME: Need to promote f16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FABS, MVT::f16, Expand);
setOperationAction(ISD::FNEG, MVT::f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
}

Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1626,9 +1626,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (LT.second.isScalableVector() && !LT.first.isValid())
return LT.first;

if (!isTypeLegal(Val))
return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);

// Mask vector extract/insert is expanded via e8.
if (Val->getScalarSizeInBits() == 1) {
VectorType *WideTy =
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ void SPIRVPassConfig::addPostRegAlloc() {
disablePass(&ShrinkWrapID);
disablePass(&LiveDebugValuesID);
disablePass(&MachineLateInstrsCleanupID);
disablePass(&RemoveLoadsIntoFakeUsesID);

// Do not work with OpPhi.
disablePass(&BranchFolderPassID);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrAliases.td
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,10 @@ def : InstAlias<"flush", (FLUSH), 0>;
// unimp -> unimp 0
def : InstAlias<"unimp", (UNIMP 0), 0>;

// Not in spec, but we follow Solaris behavior of having `illtrap`
// interchangeable with `unimp` all the time.
def : MnemonicAlias<"illtrap", "unimp">;

def : MnemonicAlias<"iflush", "flush">;

def : MnemonicAlias<"stub", "stb">;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
disablePass(&StackMapLivenessID);
disablePass(&PatchableFunctionID);
disablePass(&ShrinkWrapID);
disablePass(&RemoveLoadsIntoFakeUsesID);

// This pass hurts code size for wasm because it can generate irreducible
// control flow.
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/Target/X86/X86FloatingPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,24 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
if (MI.isCall())
FPInstClass = X86II::SpecialFP;

// A fake_use with a floating point pseudo register argument that is
// killed must behave like any other floating point operation and pop
// the floating point stack (this is done in handleSpecialFP()).
// Fake_use is, however, unusual, in that sometimes its operand is not
// killed because a later instruction (probably a return) will use it.
// It is this instruction that will pop the stack.
// In this scenario we can safely remove the fake_use's operand
// (it is live anyway).
if (MI.isFakeUse()) {
const MachineOperand &MO = MI.getOperand(0);
if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
if (MO.isKill())
FPInstClass = X86II::SpecialFP;
else
MI.removeOperand(0);
}
}

if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!

Expand Down Expand Up @@ -1737,6 +1755,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
// Don't delete the inline asm!
return;
}

// FAKE_USE must pop its register operand off the stack if it is killed,
// because this constitutes the register's last use. If the operand
// is not killed, it will have its last use later, so we leave it alone.
// In either case we remove the operand so later passes don't see it.
case TargetOpcode::FAKE_USE: {
assert(MI.getNumExplicitOperands() == 1 &&
"FAKE_USE must have exactly one operand");
if (MI.getOperand(0).isKill()) {
freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
}
MI.removeOperand(0);
return;
}
}

Inst = MBB->erase(Inst); // Remove the pseudo instruction
Expand Down
19 changes: 17 additions & 2 deletions llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
case Intrinsic::x86_avx512_vpermilvar_ps_512:
case Intrinsic::x86_avx512_vpermilvar_ps_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(32);
if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
return &II;
break;
}

case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
case Intrinsic::x86_avx512_vpermilvar_pd_512:
case Intrinsic::x86_avx512_vpermilvar_pd_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(64);
if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
return &II;
break;
}

case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
Expand Down
30 changes: 30 additions & 0 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3802,6 +3802,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {

struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
AAMDNodes AATags;
// A vector to hold the split components that we want to emit
// separate fake uses for.
SmallVector<Value *, 4> Components;
// A vector to hold all the fake uses of the struct that we are splitting.
// Usually there should only be one, but we are handling the general case.
SmallVector<Instruction *, 1> FakeUses;

LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
Expand All @@ -3826,10 +3832,32 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
Load->setAAMetadata(
AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
// Record the load so we can generate a fake use for this aggregate
// component.
Components.push_back(Load);

Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}

// Stash the fake uses that use the value generated by this instruction.
void recordFakeUses(LoadInst &LI) {
for (Use &U : LI.uses())
if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
if (II->getIntrinsicID() == Intrinsic::fake_use)
FakeUses.push_back(II);
}

// Replace all fake uses of the aggregate with a series of fake uses, one
// for each split component.
void emitFakeUses() {
for (Instruction *I : FakeUses) {
IRB.SetInsertPoint(I);
for (auto *V : Components)
IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V});
I->eraseFromParent();
}
}
};

bool visitLoadInst(LoadInst &LI) {
Expand All @@ -3841,8 +3869,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
getAdjustedAlignment(&LI, 0), DL, IRB);
Splitter.recordFakeUses(LI);
Value *V = PoisonValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
Splitter.emitFakeUses();
Visited.erase(&LI);
LI.replaceAllUsesWith(V);
LI.eraseFromParent();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Utils/CloneFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,12 @@ void PruningFunctionCloner::CloneBlock(
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
++II) {

// Don't clone fake_use as it may suppress many optimizations
// due to inlining, especially SROA.
if (auto *IntrInst = dyn_cast<IntrinsicInst>(II))
if (IntrInst->getIntrinsicID() == Intrinsic::fake_use)
continue;

Instruction *NewInst = cloneInstruction(II);
NewInst->insertInto(NewBB, NewBB->end());

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3491,6 +3491,9 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,

unsigned Count = 0;
for (Use &U : llvm::make_early_inc_range(From->uses())) {
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
if (II && II->getIntrinsicID() == Intrinsic::fake_use)
continue;
if (!ShouldReplace(Root, U))
continue;
LLVM_DEBUG(dbgs() << "Replace dominated use of '";
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
II->getIntrinsicID() != Intrinsic::fake_use)
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5296,6 +5296,9 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
!TE.isAltShuffle())
return TE.ReorderIndices;
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;

auto PHICompare = [&](unsigned I1, unsigned I2) {
Value *V1 = TE.Scalars[I1];
Value *V2 = TE.Scalars[I2];
Expand Down Expand Up @@ -5329,8 +5332,6 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return false;
return true;
};
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
DenseMap<unsigned, unsigned> PhiToId;
SmallVector<unsigned> Phis(TE.Scalars.size());
std::iota(Phis.begin(), Phis.end(), 0);
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,34 +42,34 @@ define i32 @masked_gather() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I8 = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64.u = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16.u = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64.u = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32.u = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I16.u = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,34 +42,34 @@ define i32 @masked_scatter() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
Expand Down
266 changes: 133 additions & 133 deletions llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll

Large diffs are not rendered by default.

250 changes: 125 additions & 125 deletions llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.inc, %for.body.lr.ph
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
%add = add nsw i32 %i.02, 50, !dbg !16
call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !18, metadata !19), !dbg !20
tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !18, metadata !19), !dbg !20
%idxprom = sext i32 %add to i64, !dbg !21

; CHECK: %idxprom = sext i32 %add to i64
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Workaround A53 erratum 835769 pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Workaround A53 erratum 835769 pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
115 changes: 0 additions & 115 deletions llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll

This file was deleted.

1,033 changes: 1,033 additions & 0 deletions llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll

Large diffs are not rendered by default.

957 changes: 957 additions & 0 deletions llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll

Large diffs are not rendered by default.

957 changes: 957 additions & 0 deletions llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll

Large diffs are not rendered by default.

1,033 changes: 1,033 additions & 0 deletions llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll

Large diffs are not rendered by default.

1,465 changes: 1,155 additions & 310 deletions llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll

Large diffs are not rendered by default.

668 changes: 549 additions & 119 deletions llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
; GCN-O0-NEXT: Post RA hazard recognizer
; GCN-O0-NEXT: Branch relaxation pass
; GCN-O0-NEXT: Register Usage Information Collector Pass
; GCN-O0-NEXT: Remove Loads Into Fake Uses
; GCN-O0-NEXT: Live DEBUG_VALUE analysis
; GCN-O0-NEXT: Machine Sanitizer Binary Metadata
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
Expand Down Expand Up @@ -420,6 +421,7 @@
; GCN-O1-NEXT: AMDGPU Insert Delay ALU
; GCN-O1-NEXT: Branch relaxation pass
; GCN-O1-NEXT: Register Usage Information Collector Pass
; GCN-O1-NEXT: Remove Loads Into Fake Uses
; GCN-O1-NEXT: Live DEBUG_VALUE analysis
; GCN-O1-NEXT: Machine Sanitizer Binary Metadata
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
Expand Down Expand Up @@ -725,6 +727,7 @@
; GCN-O1-OPTS-NEXT: AMDGPU Insert Delay ALU
; GCN-O1-OPTS-NEXT: Branch relaxation pass
; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass
; GCN-O1-OPTS-NEXT: Remove Loads Into Fake Uses
; GCN-O1-OPTS-NEXT: Live DEBUG_VALUE analysis
; GCN-O1-OPTS-NEXT: Machine Sanitizer Binary Metadata
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
Expand Down Expand Up @@ -1036,6 +1039,7 @@
; GCN-O2-NEXT: AMDGPU Insert Delay ALU
; GCN-O2-NEXT: Branch relaxation pass
; GCN-O2-NEXT: Register Usage Information Collector Pass
; GCN-O2-NEXT: Remove Loads Into Fake Uses
; GCN-O2-NEXT: Live DEBUG_VALUE analysis
; GCN-O2-NEXT: Machine Sanitizer Binary Metadata
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
Expand Down Expand Up @@ -1359,6 +1363,7 @@
; GCN-O3-NEXT: AMDGPU Insert Delay ALU
; GCN-O3-NEXT: Branch relaxation pass
; GCN-O3-NEXT: Register Usage Information Collector Pass
; GCN-O3-NEXT: Remove Loads Into Fake Uses
; GCN-O3-NEXT: Live DEBUG_VALUE analysis
; GCN-O3-NEXT: Machine Sanitizer Binary Metadata
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
Expand Down
1,029 changes: 942 additions & 87 deletions llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions llvm/test/CodeGen/ARM/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@
; CHECK-NEXT: ARM block placement
; CHECK-NEXT: optimise barriers pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/LoongArch/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/LoongArch/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@
; LAXX-NEXT: Implement the 'patchable-function' attribute
; LAXX-NEXT: Branch relaxation pass
; LAXX-NEXT: Contiguously Lay Out Funclets
; LAXX-NEXT: Remove Loads Into Fake Uses
; LAXX-NEXT: StackMap Liveness Analysis
; LAXX-NEXT: Live DEBUG_VALUE analysis
; LAXX-NEXT: Machine Sanitizer Binary Metadata
Expand Down
99 changes: 99 additions & 0 deletions llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# In certain cases CodeGenPrepare folds a return instruction into
# the return block's predecessor blocks and subsequently deletes the return block.
# The purpose of this is to enable tail call optimization in the predecessor blocks.
# Removal of the return block also removes fake use instructions that were present
# in the return block, potentially causing debug information to be lost.
#
# The fix is to clone any fake use instructions that are not dominated by definitions
# in the return block itself into the predecessor blocks. This test enures that we do so.
#
# Generated from the following source with
# clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-before=codegenprepare -o test.mir test.c
#
# extern int f0();
# extern int f1();
#
# int foo(int i) {
# int temp = i;
# if (temp == 0)
# temp = f0();
# else
# temp = f1();
# return temp;
# }
#
# RUN: llc -run-pass=codegenprepare -mtriple=x86_64-unknown-linux -o - %s | FileCheck %s
#
# CHECK: define{{.*}}foo
# CHECK: if.then:
# CHECK-NEXT: call{{.*}}fake.use(i32 %i)
# CHECK-NEXT: tail call i32{{.*}}@f0
# CHECK-NEXT: ret
# CHECK: if.else:
# CHECK-NEXT: call{{.*}}fake.use(i32 %i)
# CHECK-NEXT: tail call i32{{.*}}@f1
# CHECK-NEXT: ret

--- |
define hidden i32 @foo(i32 %i) local_unnamed_addr optdebug {
entry:
%cmp = icmp eq i32 %i, 0
br i1 %cmp, label %if.then, label %if.else

if.then:
%call = tail call i32 (...) @f0()
br label %if.end

if.else:
%call1 = tail call i32 (...) @f1()
br label %if.end

if.end:
%temp.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
notail call void (...) @llvm.fake.use(i32 %temp.0)
notail call void (...) @llvm.fake.use(i32 %i)
ret i32 %temp.0
}
declare i32 @f0(...) local_unnamed_addr
declare i32 @f1(...) local_unnamed_addr

...
---
name: foo
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |

...
1 change: 1 addition & 0 deletions llvm/test/CodeGen/PowerPC/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: PowerPC Pre-Emit Peephole
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/PowerPC/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@
; CHECK-NEXT: PowerPC Pre-Emit Peephole
; CHECK-NEXT: PowerPC Early-Return Creation
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
603 changes: 433 additions & 170 deletions llvm/test/CodeGen/RISCV/bfloat-arith.ll

Large diffs are not rendered by default.

631 changes: 453 additions & 178 deletions llvm/test/CodeGen/RISCV/half-arith-strict.ll

Large diffs are not rendered by default.

724 changes: 425 additions & 299 deletions llvm/test/CodeGen/RISCV/half-arith.ll

Large diffs are not rendered by default.

70 changes: 39 additions & 31 deletions llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,15 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
; RV32IZFHMIN-LABEL: fcopysign_fneg:
; RV32IZFHMIN: # %bb.0:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0
; RV32IZFHMIN-NEXT: fmv.h.x fa4, a1
; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa4
; RV32IZFHMIN-NEXT: fneg.s fa4, fa4
; RV32IZFHMIN-NEXT: fcvt.h.s fa4, fa4
; RV32IZFHMIN-NEXT: fsh fa5, 8(sp)
; RV32IZFHMIN-NEXT: fsh fa4, 12(sp)
; RV32IZFHMIN-NEXT: fmv.h.x fa5, a1
; RV32IZFHMIN-NEXT: fsh fa5, 4(sp)
; RV32IZFHMIN-NEXT: lbu a1, 5(sp)
; RV32IZFHMIN-NEXT: xori a1, a1, 128
; RV32IZFHMIN-NEXT: sb a1, 5(sp)
; RV32IZFHMIN-NEXT: flh fa5, 4(sp)
; RV32IZFHMIN-NEXT: fmv.h.x fa4, a0
; RV32IZFHMIN-NEXT: fsh fa4, 8(sp)
; RV32IZFHMIN-NEXT: fsh fa5, 12(sp)
; RV32IZFHMIN-NEXT: lbu a0, 9(sp)
; RV32IZFHMIN-NEXT: lbu a1, 13(sp)
; RV32IZFHMIN-NEXT: andi a0, a0, 127
Expand All @@ -228,31 +230,35 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
;
; RV64IZFHMIN-LABEL: fcopysign_fneg:
; RV64IZFHMIN: # %bb.0:
; RV64IZFHMIN-NEXT: addi sp, sp, -16
; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0
; RV64IZFHMIN-NEXT: fmv.h.x fa4, a1
; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa4
; RV64IZFHMIN-NEXT: fneg.s fa4, fa4
; RV64IZFHMIN-NEXT: fcvt.h.s fa4, fa4
; RV64IZFHMIN-NEXT: fsh fa5, 0(sp)
; RV64IZFHMIN-NEXT: fsh fa4, 8(sp)
; RV64IZFHMIN-NEXT: lbu a0, 1(sp)
; RV64IZFHMIN-NEXT: addi sp, sp, -32
; RV64IZFHMIN-NEXT: fmv.h.x fa5, a1
; RV64IZFHMIN-NEXT: fsh fa5, 8(sp)
; RV64IZFHMIN-NEXT: lbu a1, 9(sp)
; RV64IZFHMIN-NEXT: xori a1, a1, 128
; RV64IZFHMIN-NEXT: sb a1, 9(sp)
; RV64IZFHMIN-NEXT: flh fa5, 8(sp)
; RV64IZFHMIN-NEXT: fmv.h.x fa4, a0
; RV64IZFHMIN-NEXT: fsh fa4, 16(sp)
; RV64IZFHMIN-NEXT: fsh fa5, 24(sp)
; RV64IZFHMIN-NEXT: lbu a0, 17(sp)
; RV64IZFHMIN-NEXT: lbu a1, 25(sp)
; RV64IZFHMIN-NEXT: andi a0, a0, 127
; RV64IZFHMIN-NEXT: andi a1, a1, 128
; RV64IZFHMIN-NEXT: or a0, a0, a1
; RV64IZFHMIN-NEXT: sb a0, 1(sp)
; RV64IZFHMIN-NEXT: flh fa5, 0(sp)
; RV64IZFHMIN-NEXT: sb a0, 17(sp)
; RV64IZFHMIN-NEXT: flh fa5, 16(sp)
; RV64IZFHMIN-NEXT: fmv.x.h a0, fa5
; RV64IZFHMIN-NEXT: addi sp, sp, 16
; RV64IZFHMIN-NEXT: addi sp, sp, 32
; RV64IZFHMIN-NEXT: ret
;
; RV32IZHINXMIN-LABEL: fcopysign_fneg:
; RV32IZHINXMIN: # %bb.0:
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: fcvt.s.h a1, a1
; RV32IZHINXMIN-NEXT: fneg.s a1, a1
; RV32IZHINXMIN-NEXT: fcvt.h.s a1, a1
; RV32IZHINXMIN-NEXT: sh a1, 4(sp)
; RV32IZHINXMIN-NEXT: lbu a1, 5(sp)
; RV32IZHINXMIN-NEXT: xori a1, a1, 128
; RV32IZHINXMIN-NEXT: sb a1, 5(sp)
; RV32IZHINXMIN-NEXT: lh a1, 4(sp)
; RV32IZHINXMIN-NEXT: sh a0, 8(sp)
; RV32IZHINXMIN-NEXT: sh a1, 12(sp)
; RV32IZHINXMIN-NEXT: lbu a0, 9(sp)
Expand All @@ -267,20 +273,22 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
;
; RV64IZHINXMIN-LABEL: fcopysign_fneg:
; RV64IZHINXMIN: # %bb.0:
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: fcvt.s.h a1, a1
; RV64IZHINXMIN-NEXT: fneg.s a1, a1
; RV64IZHINXMIN-NEXT: fcvt.h.s a1, a1
; RV64IZHINXMIN-NEXT: sh a0, 0(sp)
; RV64IZHINXMIN-NEXT: addi sp, sp, -32
; RV64IZHINXMIN-NEXT: sh a1, 8(sp)
; RV64IZHINXMIN-NEXT: lbu a0, 1(sp)
; RV64IZHINXMIN-NEXT: lbu a1, 9(sp)
; RV64IZHINXMIN-NEXT: xori a1, a1, 128
; RV64IZHINXMIN-NEXT: sb a1, 9(sp)
; RV64IZHINXMIN-NEXT: lh a1, 8(sp)
; RV64IZHINXMIN-NEXT: sh a0, 16(sp)
; RV64IZHINXMIN-NEXT: sh a1, 24(sp)
; RV64IZHINXMIN-NEXT: lbu a0, 17(sp)
; RV64IZHINXMIN-NEXT: lbu a1, 25(sp)
; RV64IZHINXMIN-NEXT: andi a0, a0, 127
; RV64IZHINXMIN-NEXT: andi a1, a1, 128
; RV64IZHINXMIN-NEXT: or a0, a0, a1
; RV64IZHINXMIN-NEXT: sb a0, 1(sp)
; RV64IZHINXMIN-NEXT: lh a0, 0(sp)
; RV64IZHINXMIN-NEXT: addi sp, sp, 16
; RV64IZHINXMIN-NEXT: sb a0, 17(sp)
; RV64IZHINXMIN-NEXT: lh a0, 16(sp)
; RV64IZHINXMIN-NEXT: addi sp, sp, 32
; RV64IZHINXMIN-NEXT: ret
%1 = fneg half %b
%2 = call half @llvm.copysign.f16(half %a, half %1)
Expand Down
27 changes: 21 additions & 6 deletions llvm/test/CodeGen/RISCV/half-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1821,12 +1821,27 @@ define half @fabs_f16(half %a) nounwind {
; RV64I-NEXT: srli a0, a0, 49
; RV64I-NEXT: ret
;
; CHECKIZFHMIN-LABEL: fabs_f16:
; CHECKIZFHMIN: # %bb.0:
; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
; CHECKIZFHMIN-NEXT: fabs.s fa5, fa5
; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
; CHECKIZFHMIN-NEXT: ret
; RV32IZFHMIN-LABEL: fabs_f16:
; RV32IZFHMIN: # %bb.0:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: fsh fa0, 12(sp)
; RV32IZFHMIN-NEXT: lbu a0, 13(sp)
; RV32IZFHMIN-NEXT: andi a0, a0, 127
; RV32IZFHMIN-NEXT: sb a0, 13(sp)
; RV32IZFHMIN-NEXT: flh fa0, 12(sp)
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: fabs_f16:
; RV64IZFHMIN: # %bb.0:
; RV64IZFHMIN-NEXT: addi sp, sp, -16
; RV64IZFHMIN-NEXT: fsh fa0, 8(sp)
; RV64IZFHMIN-NEXT: lbu a0, 9(sp)
; RV64IZFHMIN-NEXT: andi a0, a0, 127
; RV64IZFHMIN-NEXT: sb a0, 9(sp)
; RV64IZFHMIN-NEXT: flh fa0, 8(sp)
; RV64IZFHMIN-NEXT: addi sp, sp, 16
; RV64IZFHMIN-NEXT: ret
;
; RV32IZHINXMIN-LABEL: fabs_f16:
; RV32IZHINXMIN: # %bb.0:
Expand Down
189 changes: 158 additions & 31 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -650,38 +650,165 @@ define void @fabs_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
; ZVFHMIN-RV32-LABEL: fabs_v6f16:
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-RV32-NEXT: ret
; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
; ZVFHMIN-ZFH-RV32: # %bb.0:
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vfabs.v v8, v9
; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-ZFH-RV32-NEXT: addi a1, a0, 8
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fabs_v6f16:
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
; ZVFHMIN-RV64-NEXT: ret
; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
; ZVFHMIN-ZFH-RV64: # %bb.0:
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vfabs.v v8, v9
; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v9, v8
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 52(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 20(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 50(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 16(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 48(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 46(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 44(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 42(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 40(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 40
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV64-LABEL: fabs_v6f16:
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -80
; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 80
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 56(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 48(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 40(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 16(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 57(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 57(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 49(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 49(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 41(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 41(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 33(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 33(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 25(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 25(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: lbu a1, 17(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: andi a1, a1, 127
; ZVFHMIN-ZFHIN-RV64-NEXT: sb a1, 17(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 56(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 74(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 48(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 72(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 40(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 70(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 68(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 66(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 64(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 64
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 80
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/Thumb2/pr52817.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ define i32 @test(ptr %arg, ptr %arg1, ptr %arg2) #0 !dbg !6 {
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: LBB0_1: @ %bb3
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: .loc 1 0 0 is_stmt 0 @ :0:0
; CHECK-NEXT: adds r5, r3, #1
; CHECK-NEXT: str.w lr, [r2]
; CHECK-NEXT: cmp.w lr, #0
Expand All @@ -36,7 +37,7 @@ define i32 @test(ptr %arg, ptr %arg1, ptr %arg2) #0 !dbg !6 {
; CHECK-NEXT: movne r6, #0
; CHECK-NEXT: Ltmp0:
; CHECK-NEXT: @DEBUG_VALUE: test:this <- [DW_OP_LLVM_arg 0, DW_OP_plus_uconst 135168, DW_OP_LLVM_arg 1, DW_OP_constu 4, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst 4, DW_OP_stack_value] $r0, $r5
; CHECK-NEXT: .loc 1 28 24 prologue_end @ test.cpp:28:24
; CHECK-NEXT: .loc 1 28 24 prologue_end is_stmt 1 @ test.cpp:28:24
; CHECK-NEXT: strne.w r6, [r8]
; CHECK-NEXT: moveq r6, #1
; CHECK-NEXT: ldr r4, [r4, #4]
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
; CHECK-NEXT: X86 Insert Cache Prefetches
; CHECK-NEXT: X86 insert wait instruction
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-hpfloat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; assert in DAGlegalizer with fake use of half precision float.
; Changes to half float promotion.
; RUN: llc -stop-after=finalize-isel -o - %s | FileCheck %s
;
; CHECK: bb.0.entry:
; CHECK-NEXT: %0:fr16 = FsFLD0SH
; CHECK-NEXT: FAKE_USE killed %0
;
target triple = "x86_64-unknown-unknown"

define void @_Z6doTestv() local_unnamed_addr optdebug {
entry:
tail call void (...) @llvm.fake.use(half 0xH0000)
ret void
}
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-ld.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s

; Checks that fake uses of the FP stack do not cause a crash.
;
; /*******************************************************************/
; extern long double foo(long double, long double, long double);
;
; long double actual(long double p1, long double p2, long double p3) {
; return fmal(p1, p2, p3);
; }
; /*******************************************************************/

define x86_fp80 @actual(x86_fp80 %p1, x86_fp80 %p2, x86_fp80 %p3) optdebug {
;
; CHECK: actual
;
entry:
%p1.addr = alloca x86_fp80, align 16
%p2.addr = alloca x86_fp80, align 16
%p3.addr = alloca x86_fp80, align 16
store x86_fp80 %p1, ptr %p1.addr, align 16
store x86_fp80 %p2, ptr %p2.addr, align 16
store x86_fp80 %p3, ptr %p3.addr, align 16
%0 = load x86_fp80, ptr %p1.addr, align 16
%1 = load x86_fp80, ptr %p2.addr, align 16
%2 = load x86_fp80, ptr %p3.addr, align 16
;
; CHECK: callq{{.*}}foo
;
%3 = call x86_fp80 @foo(x86_fp80 %0, x86_fp80 %1, x86_fp80 %2)
%4 = load x86_fp80, ptr %p1.addr, align 16
call void (...) @llvm.fake.use(x86_fp80 %4)
%5 = load x86_fp80, ptr %p2.addr, align 16
call void (...) @llvm.fake.use(x86_fp80 %5)
%6 = load x86_fp80, ptr %p3.addr, align 16
call void (...) @llvm.fake.use(x86_fp80 %6)
;
; CHECK: ret
;
ret x86_fp80 %3
}

declare x86_fp80 @foo(x86_fp80, x86_fp80, x86_fp80)
123 changes: 123 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-scheduler.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Prevent the machine scheduler from moving instructions past FAKE_USE.
# RUN: llc -run-pass machine-scheduler -mtriple=x86_64-unknown-linux -debug-only=machine-scheduler 2>&1 -o - %s | FileCheck %s
# REQUIRES: asserts
#
# We make sure that, beginning with the first FAKE_USE instruction,
# no changes to the sequence of instructions are undertaken by the
# scheduler. We don't bother to check that the order of the FAKE_USEs
# remains the same. They should, but it is irrelevant.
#
# CHECK: ********** MI Scheduling **********
# CHECK-NEXT: foo:%bb.0 entry
# CHECK-NEXT: From: %0:gr64 = COPY $rdi
# CHECK-NEXT: To: FAKE_USE %5:gr64
# CHECK-NEXT: RegionInstrs: 7
#
# CHECK: ********** MI Scheduling **********
# CHECK-NEXT: bar:%bb.0 entry
# CHECK-NEXT: From: %0:gr64 = COPY $rdi
# CHECK-NEXT: To: RET 0, killed $rax
# CHECK-NEXT: RegionInstrs: 7
#
--- |
; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

@glb = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 16

define dso_local i64 @foo(ptr %p) local_unnamed_addr optdebug {
entry:
%0 = load i32, ptr @glb, align 16
store i32 %0, ptr %p, align 4
%conv = sext i32 %0 to i64
%1 = load i32, ptr getelementptr inbounds ([100 x i32], ptr @glb, i64 0, i64 1), align 4
%arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
store i32 %1, ptr %arrayidx1, align 4
%conv2 = sext i32 %1 to i64
%add3 = add nsw i64 %conv2, %conv
notail call void (...) @llvm.fake.use(i64 %add3)
notail call void (...) @llvm.fake.use(i32 %1)
notail call void (...) @llvm.fake.use(i32 %0)
notail call void (...) @llvm.fake.use(ptr %p)
ret i64 %add3
}

define dso_local i64 @bar(ptr %p) local_unnamed_addr optdebug {
entry:
%0 = load i32, ptr @glb, align 16
store i32 %0, ptr %p, align 4
%conv = sext i32 %0 to i64
%1 = load i32, ptr getelementptr inbounds ([100 x i32], ptr @glb, i64 0, i64 1), align 4
%arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
store i32 %1, ptr %arrayidx1, align 4
%conv2 = sext i32 %1 to i64
%add3 = add nsw i64 %conv2, %conv
ret i64 %add3
}

; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.stackprotector(ptr, ptr)

...
---
name: foo
alignment: 16
tracksRegLiveness: true
debugInstrRef: true
registers:
- { id: 0, class: gr64, preferred-register: '' }
- { id: 1, class: gr64_with_sub_8bit, preferred-register: '' }
- { id: 2, class: gr32, preferred-register: '' }
- { id: 3, class: gr64_with_sub_8bit, preferred-register: '' }
- { id: 4, class: gr32, preferred-register: '' }
- { id: 5, class: gr64, preferred-register: '' }
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $rdi

%0:gr64 = COPY $rdi
%1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg
MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit
%3:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg
MOV32mr %0, 1, $noreg, 4, $noreg, %3.sub_32bit
%5:gr64 = COPY %3
%5:gr64 = nsw ADD64rr %5, %1, implicit-def dead $eflags
FAKE_USE %5
FAKE_USE %3.sub_32bit
FAKE_USE %1.sub_32bit
FAKE_USE %0
$rax = COPY %5
RET 0, killed $rax

...
---
name: bar
alignment: 16
tracksRegLiveness: true
debugInstrRef: true
registers:
- { id: 0, class: gr64, preferred-register: '' }
- { id: 1, class: gr64_with_sub_8bit, preferred-register: '' }
- { id: 2, class: gr32, preferred-register: '' }
- { id: 3, class: gr64_with_sub_8bit, preferred-register: '' }
- { id: 4, class: gr32, preferred-register: '' }
- { id: 5, class: gr64_with_sub_8bit, preferred-register: '' }
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $rdi

%0:gr64 = COPY $rdi
%1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg
MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit
%5:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg
MOV32mr %0, 1, $noreg, 4, $noreg, %5.sub_32bit
%5:gr64_with_sub_8bit = nsw ADD64rr %5, %1, implicit-def dead $eflags
$rax = COPY %5
RET 0, killed $rax

...
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O2 -o - \
; RUN: | FileCheck %s --implicit-check-not=TAILCALL
; Generated with: clang -emit-llvm -O2 -S -fextend-lifetimes test.cpp -o -
; =========== test.cpp ===============
; extern int bar(int);
; int foo1(int i)
; {
; return bar(i);
; }
; =========== test.cpp ===============

; CHECK: TAILCALL

; ModuleID = 'test.cpp'
source_filename = "test.cpp"

define i32 @_Z4foo1i(i32 %i) local_unnamed_addr optdebug {
entry:
%call = tail call i32 @_Z3bari(i32 %i)
tail call void (...) @llvm.fake.use(i32 %i)
ret i32 %call
}

declare i32 @_Z3bari(i32) local_unnamed_addr
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-suppress-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
; Suppress redundant loads feeding into fake uses.
; RUN: llc -filetype=asm -o - %s --mtriple=x86_64-unknown-unknown | FileCheck %s
; Windows ABI works differently, there's no offset.
;
; Look for the spill
; CHECK: movq %r{{[a-z]+,}} -{{[0-9]+\(%rsp\)}}
; CHECK-NOT: movq -{{[0-9]+\(%rsp\)}}, %r{{[a-z]+}}

define dso_local i32 @f(ptr %p) local_unnamed_addr optdebug {
entry:
call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"() #1
notail call void (...) @llvm.fake.use(ptr %p)
ret i32 4
}
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-tailcall.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
; RUN: llc < %s -stop-after=finalize-isel -mtriple=x86_64-unknown-linux - | FileCheck %s --implicit-check-not FAKE_USE
; Fake uses following tail calls should be pulled in front
; of the TCRETURN instruction. Fake uses using something defined by
; the tail call or after it should be suppressed.

; CHECK: name:{{ +}}bar
; CHECK: body:
; CHECK: bb.0.{{.*}}:
; CHECK: %0:{{.*}}= COPY
; CHECK: FAKE_USE %0
; CHECK: TCRETURN

; CHECK: name:{{ +}}baz
; CHECK: body:
; CHECK: bb.0.{{.*}}:
; CHECK: %0:{{.*}}= COPY
; CHECK: FAKE_USE %0
; CHECK: TCRETURN

define void @bar(i32 %v) optdebug {
entry:
%call = tail call i32 @_Z3fooi(i32 %v)
%mul = mul nsw i32 %call, 3
notail call void (...) @llvm.fake.use(i32 %mul)
notail call void (...) @llvm.fake.use(i32 %call)
notail call void (...) @llvm.fake.use(i32 %v)
ret void
}

define i32 @baz(i32 %v) optdebug {
entry:
%call = tail call i32 @_Z3fooi(i32 %v)
notail call void (...) @llvm.fake.use(i32 %v)
ret i32 %call
}

declare i32 @_Z3fooi(i32) local_unnamed_addr
39 changes: 39 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; assert in DAGlegalizer with fake use of 1-element vectors.
; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s
;
; ModuleID = 't2.cpp'
; source_filename = "t2.cpp"
; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;
; Check that we get past ISel and generate FAKE_USE machine instructions for
; one-element vectors.
;
; CHECK: bb.0.entry:
; CHECK-DAG: %1:gr64 = COPY $rdi
; CHECK-DAG: %0:vr128 = COPY $xmm0
; CHECK: %2:vr64 =
; CHECK-DAG: FAKE_USE %1
; CHECK-DAG: FAKE_USE %0
; CHECK: RET


target triple = "x86_64-unknown-unknown"

; Function Attrs: nounwind sspstrong uwtable
define <4 x float> @_Z3runDv4_fDv1_x(<4 x float> %r, i64 %b.coerce) local_unnamed_addr #0 {
entry:
%0 = insertelement <1 x i64> undef, i64 %b.coerce, i32 0
%1 = bitcast i64 %b.coerce to <1 x i64>
%2 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %r, <1 x i64> %1)
tail call void (...) @llvm.fake.use(<1 x i64> %0)
tail call void (...) @llvm.fake.use(<4 x float> %r)
ret <4 x float> %2
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)

; Function Attrs: nounwind
declare void @llvm.fake.use(...)

attributes #0 = { "target-cpu"="btver2" optdebug }
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-vector2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; RUN: llc -stop-after=finalize-isel -mtriple=x86_64-unknown-linux -filetype=asm -o - %s | FileCheck %s
;
; Make sure we can split vectors that are used as operands of FAKE_USE.

; Generated from:
;
; typedef long __attribute__((ext_vector_type(8))) long8;
; void test0() { long8 id208 {0, 1, 2, 3, 4, 5, 6, 7}; }

; ModuleID = 't5.cpp'
source_filename = "t5.cpp"


; CHECK: %0:vr256 = VMOV
; CHECK: %1:vr256 = VMOV
; CHECK-DAG: FAKE_USE killed %1
; CHECK-DAG: FAKE_USE killed %0
; CHECK: RET
define void @_Z5test0v() local_unnamed_addr #0 {
entry:
tail call void (...) @llvm.fake.use(<8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>) #1
ret void
}

declare void @llvm.fake.use(...)

attributes #0 = { "target-cpu"="btver2" optdebug }
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/X86/fake-use-zero-length.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: llc < %s -stop-after=finalize-isel -mtriple=x86_64-unknown-linux | FileCheck %s --implicit-check-not=FAKE_USE
;
; Make sure SelectionDAG does not crash handling fake uses of zero-length arrays
; and structs. Check also that they are not propagated.
;
; Generated from the following source with
; clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-after=safe-stack -o test.mir test.cpp
;
; int main ()
; { int array[0]; }
;
;
; CHECK: liveins: $[[IN_REG:[a-zA-Z0-9]+]]
; CHECK: %[[IN_VREG:[a-zA-Z0-9]+]]:gr32 = COPY $[[IN_REG]]
; CHECK: FAKE_USE %[[IN_VREG]]

source_filename = "test.ll"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define hidden i32 @main([0 x i32] %zero, [1 x i32] %one) local_unnamed_addr optdebug {
entry:
notail call void (...) @bar([0 x i32] %zero)
notail call void (...) @baz([1 x i32] %one)
notail call void (...) @llvm.fake.use([0 x i32] %zero)
notail call void (...) @llvm.fake.use([1 x i32] %one)
ret i32 0
}

declare void @bar([0 x i32] %a)
declare void @baz([1 x i32] %a)
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/fsafdo_test1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
; Check that fs-afdo discriminators are generated.
; V01: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3
; V01: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
; V0: .loc 1 9 5 discriminator 11266 # foo.c:9:5
; V0: .loc 1 9 5 is_stmt 1 discriminator 11266 # foo.c:9:5
; V0: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3
; V1: .loc 1 9 5 discriminator 514 # foo.c:9:5
; V1: .loc 1 9 5 is_stmt 1 discriminator 514 # foo.c:9:5
; V1: .loc 1 7 3 is_stmt 1 discriminator 258 # foo.c:7:3
; Check that variable __llvm_fs_discriminator__ is generated.
; V01: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/fsafdo_test4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
; CHECK: .loc 1 0 3 # foo.c:0:3
; CHECK: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
; CHECK: .loc 1 0 5 is_stmt 0 # :0:5
; CHECK: .loc 1 9 5 discriminator 2 # foo.c:9:5
; CHECK: .loc 1 0 5 # :0:5
; CHECK: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
; CHECK: .loc 1 0 5 is_stmt 0 # :0:5
; CHECK: .loc 1 7 3 is_stmt 1 discriminator 2 # foo.c:7:3
; CHECK: .loc 1 14 3 # foo.c:14:3
; Check that variable __llvm_fs_discriminator__ is NOT generated.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@
; CHECK-NEXT: X86 Insert Cache Prefetches
; CHECK-NEXT: X86 insert wait instruction
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Sanitizer Binary Metadata
Expand Down
98 changes: 98 additions & 0 deletions llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
; REQUIRES: object-emission

; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b'
; to extend all the way to the end of the function.
; Duplicates `DebugInfo/X86/fake-use.ll` for global-isel.

; RUN: %llc_dwarf -O2 --global-isel=1 -mtriple=aarch64--linux-gnu -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump --debug-info --debug-line -v - -o %t
; RUN: %python %p/../Inputs/check-fake-use.py %t
; RUN: sed -e 's,call void (...) @llvm.fake.use,;,' %s \
; RUN: | %llc_dwarf - -O2 --global-isel=1 -mtriple=aarch64--linux-gnu -filetype=obj -dwarf-linkage-names=Abstract \
; RUN: | llvm-dwarfdump --debug-info --debug-line -v - -o %t
; RUN: not %python %p/../Inputs/check-fake-use.py %t

; Generated with:
; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c
;
; int glob[10];
; extern void bar();
;
; int foo(int b, int i)
; {
; int loc = glob[i] * 2;
; if (b) {
; glob[2] = loc;
; bar();
; }
; return loc;
; }
;
; ModuleID = 't2.c'
source_filename = "t2.c"

@glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0

; Function Attrs: nounwind sspstrong uwtable
define i32 @foo(i32 %b, i32 %i) local_unnamed_addr optdebug !dbg !13 {
entry:
#dbg_value(i32 %b, !17, !20, !21)
%c = add i32 %b, 42
%tobool = icmp sgt i32 %c, 2, !dbg !27
tail call void (...) @bar() #2, !dbg !32
%idxprom = sext i32 %i to i64, !dbg !22
%arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22
%0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23
%mul = shl nsw i32 %0, 1, !dbg !22
br i1 %tobool, label %if.end, label %if.then, !dbg !29

if.then: ; preds = %entry
store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23
tail call void (...) @bar() #2, !dbg !32
br label %if.end, !dbg !33

if.end: ; preds = %entry, %if.then
call void (...) @llvm.fake.use(i32 %b), !dbg !34
ret i32 %mul, !dbg !35
}

declare void @bar(...) local_unnamed_addr

!llvm.dbg.cu = !{!1}
!llvm.module.flags = !{!9, !10, !11}
!llvm.ident = !{!12}

!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression())
!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
!2 = !DIFile(filename: "t2.c", directory: "/")
!3 = !{}
!4 = !{!0}
!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7)
!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !{!8}
!8 = !DISubrange(count: 10)
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{i32 1, !"PIC Level", i32 2}
!12 = !{!"clang version 4.0.0"}
!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16)
!14 = !DISubroutineType(types: !15)
!15 = !{!6, !6, !6}
!16 = !{!17, !19}
!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6)
!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6)
!20 = !DIExpression()
!21 = !DILocation(line: 4, scope: !13)
!22 = !DILocation(line: 6, scope: !13)
!23 = !{!24, !24, i64 0}
!24 = !{!"int", !25, i64 0}
!25 = !{!"omnipotent char", !26, i64 0}
!26 = !{!"Simple C/C++ TBAA"}
!27 = !DILocation(line: 7, scope: !28)
!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7)
!29 = !DILocation(line: 7, scope: !13)
!30 = !DILocation(line: 8, scope: !31)
!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7)
!32 = !DILocation(line: 9, scope: !31)
!33 = !DILocation(line: 10, scope: !31)
!34 = !DILocation(line: 12, scope: !13)
!35 = !DILocation(line: 11, scope: !13)
107 changes: 107 additions & 0 deletions llvm/test/DebugInfo/Inputs/check-fake-use.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/python3

# Parsing dwarfdump's output to determine whether the location list for the
# parameter "b" covers all of the function. The script searches for information
# in the input file to determine the [prologue, epilogue) range for the
# function, the location list range for "b", and checks that the latter covers
# the entirety of the former.
import re
import sys

DebugInfoPattern = r"\.debug_info contents:"
DebugLinePattern = r"\.debug_line contents:"
ProloguePattern = r"^\s*0x([0-9a-f]+)\s.+prologue_end"
EpiloguePattern = r"^\s*0x([0-9a-f]+)\s.+epilogue_begin"
FormalPattern = r"^0x[0-9a-f]+:\s+DW_TAG_formal_parameter"
LocationPattern = r"DW_AT_location\s+\[DW_FORM_([a-z_]+)\](?:.*0x([a-f0-9]+))"
DebugLocPattern = r'\[0x([a-f0-9]+),\s+0x([a-f0-9]+)\) ".text": (.+)$'

SeenDebugInfo = False
SeenDebugLine = False
LocationRanges = None
PrologueEnd = None
EpilogueBegin = None

# The dwarfdump output should contain the DW_AT_location for "b" first, then the
# line table which should contain prologue_end and epilogue_begin entries.
with open(sys.argv[1], "r") as dwarf_dump_file:
dwarf_iter = iter(dwarf_dump_file)
for line in dwarf_iter:
if not SeenDebugInfo and re.match(DebugInfoPattern, line):
SeenDebugInfo = True
if not SeenDebugLine and re.match(DebugLinePattern, line):
SeenDebugLine = True
# Get the range of DW_AT_location for "b".
if LocationRanges is None:
if match := re.match(FormalPattern, line):
# Go until we either find DW_AT_location or reach the end of this entry.
location_match = None
while location_match is None:
if (line := next(dwarf_iter, "")) == "\n":
raise RuntimeError(
".debug_info output is missing DW_AT_location for 'b'"
)
location_match = re.search(LocationPattern, line)
# Variable has whole-scope location, represented by an empty tuple.
if location_match.group(1) == "exprloc":
LocationRanges = ()
continue
if location_match.group(1) != "sec_offset":
raise RuntimeError(
f"Unhandled form for DW_AT_location: DW_FORM_{location_match.group(1)}"
)
# Variable has location range list.
if (
debug_loc_match := re.search(DebugLocPattern, next(dwarf_iter, ""))
) is None:
raise RuntimeError(f"Invalid location range list for 'b'")
LocationRanges = (
int(debug_loc_match.group(1), 16),
int(debug_loc_match.group(2), 16),
)
while (
debug_loc_match := re.search(DebugLocPattern, next(dwarf_iter, ""))
) is not None:
match_loc_start = int(debug_loc_match.group(1), 16)
match_loc_end = int(debug_loc_match.group(2), 16)
match_expr = debug_loc_match.group(3)
if match_loc_start != LocationRanges[1]:
raise RuntimeError(
f"Location list for 'b' is discontinuous from [0x{LocationRanges[1]:x}, 0x{match_loc_start:x})"
)
if "stack_value" in match_expr:
raise RuntimeError(
f"Location list for 'b' contains a stack_value expression: {match_expr}"
)
LocationRanges = (LocationRanges[0], match_loc_end)
# Get the prologue_end address.
elif PrologueEnd is None:
if match := re.match(ProloguePattern, line):
PrologueEnd = int(match.group(1), 16)
# Get the epilogue_begin address.
elif EpilogueBegin is None:
if match := re.match(EpiloguePattern, line):
EpilogueBegin = int(match.group(1), 16)
break

if not SeenDebugInfo:
raise RuntimeError(".debug_info section not found.")
if not SeenDebugLine:
raise RuntimeError(".debug_line section not found.")

if LocationRanges is None:
raise RuntimeError(".debug_info output is missing parameter 'b'")
if PrologueEnd is None:
raise RuntimeError(".debug_line output is missing prologue_end")
if EpilogueBegin is None:
raise RuntimeError(".debug_line output is missing epilogue_begin")

if len(LocationRanges) == 2 and (
LocationRanges[0] > PrologueEnd or LocationRanges[1] < EpilogueBegin
):
raise RuntimeError(
f"""Location list for 'b' does not cover the whole function:")
Prologue to Epilogue = [0x{PrologueEnd:x}, 0x{EpilogueBegin:x})
Location range = [0x{LocationRanges[0]:x}, 0x{LocationRanges[1]:x})
"""
)
2 changes: 2 additions & 0 deletions llvm/test/DebugInfo/MIR/X86/empty-inline.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
# CHECK: Address Line Column File ISA Discriminator OpIndex Flags
# CHECK-NEXT: ---
# CHECK-NEXT: 25 0 1 0 0 0 is_stmt
# CHECK-NEXT: 0 0 1 0 0 0
# CHECK-NEXT: 29 28 1 0 0 0 is_stmt prologue_end
# CHECK-NEXT: 29 28 1 0 0 0 is_stmt
# CHECK-NEXT: 29 28 1 0 0 0 is_stmt end_sequence
--- |
source_filename = "t.ll"
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/DebugInfo/X86/discriminator.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"=

; CHECK: Address Line Column File ISA Discriminator OpIndex Flags
; CHECK: ------------------ ------ ------ ------ --- ------------- ------- -------------
; CHECK: 0x000000000000000a 2 0 1 0 42 0 {{$}}
; CHECK: 0x000000000000000a 2 0 1 0 42 0 is_stmt{{$}}
Loading