3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@ LegalityPredicate isPointer(unsigned TypeIdx);
/// True iff the specified type index is a pointer with the specified address
/// space.
LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
/// True iff the specified type index is a vector of pointers (with any address
/// space).
LegalityPredicate isPointerVector(unsigned TypeIdx);

/// True if the type index is a vector with element type \p EltTy
LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy);
Expand Down
12 changes: 0 additions & 12 deletions llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1147,18 +1147,6 @@ template <typename FrameIdTy> class CallStackRadixTreeBuilder {
return std::move(CallStackPos);
}
};

// Verify that each CallStackId is computed with hashCallStack. This function
// is intended to help transition from CallStack to CSId in
// IndexedAllocationInfo.
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);

// Verify that each CallStackId is computed with hashCallStack. This function
// is intended to help transition from CallStack to CSId in
// IndexedAllocationInfo.
void verifyFunctionProfileData(
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
&FunctionProfileData);
} // namespace memprof
} // namespace llvm

Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Analysis/MemoryBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,10 +565,7 @@ static APInt getSizeWithOverflow(const SizeOffsetAPInt &Data) {
APInt Size = Data.Size;
APInt Offset = Data.Offset;

assert(!Offset.isNegative() &&
"size for a pointer before the allocated object is ambiguous");

if (Size.ult(Offset))
if (Offset.isNegative() || Size.ult(Offset))
return APInt::getZero(Size.getBitWidth());

return Size - Offset;
Expand Down Expand Up @@ -756,10 +753,14 @@ OffsetSpan ObjectSizeOffsetVisitor::computeImpl(Value *V) {
}

// We end up pointing on a location that's outside of the original object.
// This is UB, and we'd rather return an empty location then.
if (ORT.knownBefore() && ORT.Before.isNegative()) {
ORT.Before = APInt::getZero(ORT.Before.getBitWidth());
ORT.After = APInt::getZero(ORT.Before.getBitWidth());
// This is UB, and we'd rather return an empty location then.
if (Options.EvalMode == ObjectSizeOpts::Mode::Min ||
Options.EvalMode == ObjectSizeOpts::Mode::Max) {
ORT.Before = APInt::getZero(ORT.Before.getBitWidth());
ORT.After = APInt::getZero(ORT.Before.getBitWidth());
}
// Otherwise it's fine, caller can handle negative offset.
}
return ORT;
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
};
}

LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
return Query.Types[TypeIdx].isPointerVector();
};
}

LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx,
LLT EltTy) {
return [=](const LegalityQuery &Query) {
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}

// This bitcasts a shuffle vector to a different type currently of the same
// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
// will be used instead.
//
// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
// ===>
// <4 x s64> = G_PTRTOINT <4 x p0>
// <4 x s64> = G_PTRTOINT <4 x p0>
// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
// <16 x p0> = G_INTTOPTR <16 x s64>
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy) {
auto ShuffleMI = cast<GShuffleVector>(&MI);
LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));

// We currently only handle vectors of the same size.
if (TypeIdx != 0 ||
CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
CastTy.getElementCount() != DstTy.getElementCount())
return UnableToLegalize;

LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());

auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
auto Shuf =
MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);

MI.eraseFromParent();
return Legalized;
}

/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
///
/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
Expand Down Expand Up @@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
case TargetOpcode::G_CONCAT_VECTORS:
return bitcastConcatVector(MI, TypeIdx, CastTy);
case TargetOpcode::G_SHUFFLE_VECTOR:
return bitcastShuffleVector(MI, TypeIdx, CastTy);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return bitcastExtractSubvector(MI, TypeIdx, CastTy);
case TargetOpcode::G_INSERT_SUBVECTOR:
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
return buildCopy(Dst, Src);

unsigned Opcode;
if (SrcTy.isPointer() && DstTy.isScalar())
if (SrcTy.isPointerOrPointerVector())
Opcode = TargetOpcode::G_PTRTOINT;
else if (DstTy.isPointer() && SrcTy.isScalar())
else if (DstTy.isPointerOrPointerVector())
Opcode = TargetOpcode::G_INTTOPTR;
else {
assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
assert(!SrcTy.isPointerOrPointerVector() &&
!DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet");
Opcode = TargetOpcode::G_BITCAST;
}

Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/CodeGen/MachineFunctionSplitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "llvm/Analysis/EHUtils.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
Expand Down Expand Up @@ -128,6 +129,9 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
}

bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// Do not split functions when -basic-block-sections=all is specified.
if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All)
return false;
// We target functions with profile data. Static information in the form
// of exception handling code may be split to cold if user passes the
// mfs-split-ehcode flag.
Expand All @@ -139,6 +143,14 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
if (!TII.isFunctionSafeToSplit(MF))
return false;

// Do not split functions with BasicBlockSections profiles as they will
// be split by the BasicBlockSections pass.
auto BBSectionsProfile =
getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
if (BBSectionsProfile != nullptr &&
BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName()))
return false;

// Renumbering blocks here preserves the order of the blocks as
// sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
// blocks. Preserving the order of blocks is essential to retaining decisions
Expand Down Expand Up @@ -201,6 +213,7 @@ void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
}

char MachineFunctionSplitter::ID = 0;
Expand Down
17 changes: 9 additions & 8 deletions llvm/lib/CodeGen/TargetPassConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1235,13 +1235,13 @@ void TargetPassConfig::addMachinePasses() {
addPass(createMIRAddFSDiscriminatorsPass(
sampleprof::FSDiscriminatorPass::PassLast));

bool NeedsBBSections =
TM->getBBSectionsType() != llvm::BasicBlockSection::None;
// Machine function splitter uses the basic block sections feature. Both
// cannot be enabled at the same time. We do not apply machine function
// splitter if -basic-block-sections is requested.
if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter ||
EnableMachineFunctionSplitter)) {
// Machine function splitter uses the basic block sections feature.
// When used along with `-basic-block-sections=`, the basic-block-sections
// feature takes precedence. This means functions eligible for
// basic-block-sections optimizations (`=all`, or `=list=` with function
// included in the list profile) will get that optimization instead.
if (TM->Options.EnableMachineFunctionSplitter ||
EnableMachineFunctionSplitter) {
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty()) {
if (EnableFSDiscriminator) {
Expand All @@ -1260,7 +1260,8 @@ void TargetPassConfig::addMachinePasses() {
}
// We run the BasicBlockSections pass if either we need BB sections or BB
// address map (or both).
if (NeedsBBSections || TM->Options.BBAddrMap) {
if (TM->getBBSectionsType() != llvm::BasicBlockSection::None ||
TM->Options.BBAddrMap) {
if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
TM->getBBSectionsFuncListBuf()));
Expand Down
17 changes: 0 additions & 17 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,22 +537,5 @@ template llvm::DenseMap<LinearFrameId, FrameStat>
computeFrameHistogram<LinearFrameId>(
llvm::MapVector<CallStackId, llvm::SmallVector<LinearFrameId>>
&MemProfCallStackData);

void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
for (const auto &AS : Record.AllocSites) {
assert(AS.CSId == hashCallStack(AS.CallStack));
(void)AS;
}
}

void verifyFunctionProfileData(
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
&FunctionProfileData) {
for (const auto &[GUID, Record] : FunctionProfileData) {
(void)GUID;
verifyIndexedMemProfRecord(Record);
}
}

} // namespace memprof
} // namespace llvm
2 changes: 0 additions & 2 deletions llvm/lib/ProfileData/MemProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,8 +549,6 @@ Error RawMemProfReader::mapRawProfileToRecords() {
}
}

verifyFunctionProfileData(FunctionProfileData);

return Error::success();
}

Expand Down
15 changes: 11 additions & 4 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,13 +840,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_PTRTOINT)
.legalFor({{s64, p0}, {v2s64, v2p0}})
.widenScalarToNextPow2(0, 64)
.clampScalar(0, s64, s64);
.clampScalar(0, s64, s64)
.clampMaxNumElements(0, s64, 2);

getActionDefinitionsBuilder(G_INTTOPTR)
.unsupportedIf([&](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
})
.legalFor({{p0, s64}, {v2p0, v2s64}});
.legalFor({{p0, s64}, {v2p0, v2s64}})
.clampMaxNumElements(1, s64, 2);

// Casts for 32 and 64-bit width type are just copies.
// Same for 128-bit width type, except they are on the FPR bank.
Expand Down Expand Up @@ -1053,7 +1055,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
if (DstTy != SrcTy)
return false;
return llvm::is_contained(
{v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
// just want those lowered into G_BUILD_VECTOR
Expand All @@ -1079,7 +1081,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v8s8, v16s8)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64);
.clampNumElements(0, v2s64, v2s64)
.bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
// Bitcast pointers vector to i64.
const LLT DstTy = Query.Types[0];
return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
});

getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
Expand Down
36 changes: 10 additions & 26 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,10 @@ using namespace llvm;

using DecodeStatus = llvm::MCDisassembler::DecodeStatus;

static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
MCContext &Ctx) {
if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
!STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
// If there is no default wave size it must be a generation before gfx10,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
return STICopy;
}

return STI;
}

AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
MCContext &Ctx, MCInstrInfo const *MCII)
: MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
: MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
Expand Down Expand Up @@ -1842,28 +1826,28 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
STI.hasFeature(AMDGPU::FeatureGFX10)) &&
"SDWAVopcDst should be present only on GFX9+");

bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);

if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;

int TTmpIdx = getTTmpIdx(Val);
if (TTmpIdx >= 0) {
auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
auto TTmpClsId = getTtmpClassId(IsWave32 ? OPW32 : OPW64);
return createSRegOperand(TTmpClsId, TTmpIdx);
}
if (Val > SGPR_MAX) {
return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
}
return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
return createSRegOperand(getSgprClassId(IsWave32 ? OPW32 : OPW64), Val);
}
return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
}

MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
? decodeSrcOp(OPW64, Val)
: decodeSrcOp(OPW32, Val);
return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
? decodeSrcOp(OPW32, Val)
: decodeSrcOp(OPW64, Val);
}

MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
// The code produced for "generic" is only useful for tests and cannot
// reasonably be expected to execute on any particular target.
def : ProcessorModel<"generic", NoSchedModel,
[FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
[FeatureGDS, FeatureGWS]
>;

def : ProcessorModel<"generic-hsa", NoSchedModel,
[FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
[FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
>;

//===------------------------------------------------------------===//
Expand Down
16 changes: 7 additions & 9 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,16 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
if (Gen == AMDGPUSubtarget::INVALID) {
Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS
: AMDGPUSubtarget::SOUTHERN_ISLANDS;
}

if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
!hasFeature(AMDGPU::FeatureWavefrontSize64)) {
// Assume wave64 for the unknown target, if not explicitly set.
if (getWavefrontSizeLog2() == 0)
WavefrontSizeLog2 = 6;
} else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
!hasFeature(AMDGPU::FeatureWavefrontSize64)) {
// If there is no default wave size it must be a generation before gfx10,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
ToggleFeature(AMDGPU::FeatureWavefrontSize32);
WavefrontSizeLog2 = getGeneration() >= AMDGPUSubtarget::GFX10 ? 5 : 6;
}

// We don't support FP64 for EG/NI atm.
Expand Down Expand Up @@ -147,10 +149,6 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
!getFeatureBits().test(AMDGPU::FeatureCuMode))
LocalMemorySize *= 2;

// Don't crash on invalid devices.
if (WavefrontSizeLog2 == 0)
WavefrontSizeLog2 = 5;

HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
HasSMulHi = getGeneration() >= AMDGPUSubtarget::GFX9;

Expand All @@ -166,7 +164,7 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,

void GCNSubtarget::checkSubtargetFeatures(const Function &F) const {
LLVMContext &Ctx = F.getContext();
if (hasFeature(AMDGPU::FeatureWavefrontSize32) ==
if (hasFeature(AMDGPU::FeatureWavefrontSize32) &&
hasFeature(AMDGPU::FeatureWavefrontSize64)) {
Ctx.diagnose(DiagnosticInfoUnsupported(
F, "must specify exactly one of wavefrontsize32 and wavefrontsize64"));
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1569,6 +1569,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getWavefrontSize() == 64;
}

/// Returns if the wavesize of this subtarget is known reliable. This is false
/// only for the a default target-cpu that does not have an explicit
/// +wavefrontsize target feature.
bool isWaveSizeKnown() const {
return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
hasFeature(AMDGPU::FeatureWavefrontSize64);
}

const TargetRegisterClass *getBoolRC() const {
return getRegisterInfo()->getBoolRC();
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,9 +649,9 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
raw_ostream &O) {
if (!FirstOperand)
O << ", ";
printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
? AMDGPU::VCC
: AMDGPU::VCC_LO,
printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
? AMDGPU::VCC_LO
: AMDGPU::VCC,
O, MRI);
if (FirstOperand)
O << ", ";
Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,22 @@ static MCSubtargetInfo *
createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
if (TT.getArch() == Triple::r600)
return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);

MCSubtargetInfo *STI =
createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);

// FIXME: We should error for the default target.
if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
!STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
// If there is no default wave size it must be a generation before gfx10,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
? AMDGPU::FeatureWavefrontSize32
: AMDGPU::FeatureWavefrontSize64);
}

return STI;
}

static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
Expand Down
23 changes: 21 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14300,9 +14300,17 @@ static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// sub-512-bit shuffles are padded to 512-bits for the shuffle and then
// the active subvector is extracted.
static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
ArrayRef<int> OriginalMask, SDValue V1,
SDValue V2, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// Commute binary inputs so V2 is a load to simplify VPERMI2/T2 folds.
SmallVector<int, 32> Mask(OriginalMask);
if (!V2.isUndef() && isShuffleFoldableLoad(V1) &&
!isShuffleFoldableLoad(V2)) {
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(V1, V2);
}

MVT MaskVT = VT.changeTypeToInteger();
SDValue MaskNode;
MVT ShuffleVT = VT;
Expand Down Expand Up @@ -42244,6 +42252,17 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
DAG.getIntPtrConstant(0, DL));
}
}
SmallVector<SDValue, 2> Ops;
SmallVector<int, 32> Mask;
if (isShuffleFoldableLoad(N.getOperand(0)) &&
!isShuffleFoldableLoad(N.getOperand(2)) &&
getTargetShuffleMask(N, /*AllowSentinelZero=*/false, Ops, Mask)) {
ShuffleVectorSDNode::commuteMask(Mask);
SDValue NewMask = getConstVector(
Mask, N.getOperand(1).getSimpleValueType(), DAG, DL, /*IsMask=*/true);
return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask,
N.getOperand(0));
}
return SDValue();
}
default:
Expand Down
83 changes: 37 additions & 46 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,52 +134,43 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
}
return true;
};
for (const VPUser *U : EVL.users()) {
if (!TypeSwitch<const VPUser *, bool>(U)
.Case<VPWidenIntrinsicRecipe>(
[&](const VPWidenIntrinsicRecipe *S) {
return VerifyEVLUse(*S, S->getNumOperands() - 1);
})
.Case<VPWidenStoreEVLRecipe>([&](const VPWidenStoreEVLRecipe *S) {
return VerifyEVLUse(*S, 2);
})
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
return VerifyEVLUse(
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
return VerifyEVLUse(*R, 2);
})
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return true; })
.Case<VPInstruction>([&](const VPInstruction *I) {
if (I->getOpcode() != Instruction::Add) {
errs()
<< "EVL is used as an operand in non-VPInstruction::Add\n";
return false;
}
if (I->getNumUsers() != 1) {
errs() << "EVL is used in VPInstruction:Add with multiple "
"users\n";
return false;
}
if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
errs() << "Result of VPInstruction::Add with EVL operand is "
"not used by VPEVLBasedIVPHIRecipe\n";
return false;
}
return true;
})
.Default([&](const VPUser *U) {
errs() << "EVL has unexpected user\n";
return false;
})) {
return false;
}
}
return true;
return all_of(EVL.users(), [&VerifyEVLUse](VPUser *U) {
return TypeSwitch<const VPUser *, bool>(U)
.Case<VPWidenIntrinsicRecipe>([&](const VPWidenIntrinsicRecipe *S) {
return VerifyEVLUse(*S, S->getNumOperands() - 1);
})
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe>(
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
return VerifyEVLUse(*W,
Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
.Case<VPInstruction>([&](const VPInstruction *I) {
if (I->getOpcode() != Instruction::Add) {
errs() << "EVL is used as an operand in non-VPInstruction::Add\n";
return false;
}
if (I->getNumUsers() != 1) {
errs() << "EVL is used in VPInstruction:Add with multiple "
"users\n";
return false;
}
if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
errs() << "Result of VPInstruction::Add with EVL operand is "
"not used by VPEVLBasedIVPHIRecipe\n";
return false;
}
return true;
})
.Default([&](const VPUser *U) {
errs() << "EVL has unexpected user\n";
return false;
});
});
}

bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0)
; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>)
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>)
; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0)
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>)
; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x p0>) = COPY $q0
%1:_(<2 x p0>) = COPY $q1
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/arm64-ext.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) {
; CHECK-LABEL: test_vextd:
Expand Down
9 changes: 1 addition & 8 deletions llvm/test/CodeGen/AArch64/neon-perm.ll
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

%struct.int8x8x2_t = type { [2 x <8 x i8>] }
%struct.int16x4x2_t = type { [2 x <4 x i16>] }
Expand Down
18 changes: 11 additions & 7 deletions llvm/test/CodeGen/AArch64/neon-vector-splat.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for shuffle8
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define <2 x i32> @shuffle(ptr %P) {
; CHECK-SD-LABEL: shuffle:
Expand Down Expand Up @@ -116,10 +114,16 @@ define <2 x i64> @shuffle7(ptr %P) {
}

define <2 x ptr> @shuffle8(ptr %P) {
; CHECK-LABEL: shuffle8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1r { v0.2d }, [x0]
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shuffle8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffle8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: ret
%lv2ptr = load <2 x ptr>, ptr %P
%sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer
ret <2 x ptr> %sv2ptr
Expand Down
83 changes: 57 additions & 26 deletions llvm/test/CodeGen/AArch64/shufflevector.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2p0_zeroes
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0_zeroes
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; ===== Legal Vector Types =====

Expand Down Expand Up @@ -392,13 +387,49 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
ret <4 x i64> %c
}

define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
; CHECK-SD-LABEL: shufflevector_v3p0:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov d2, d5
; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d1, d3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v3p0:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d3
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v2.d[0], x9
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: fmov x9, d4
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: mov v2.d[1], x9
; CHECK-GI-NEXT: fmov x8, d5
; CHECK-GI-NEXT: mov v1.d[0], x8
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8
; CHECK-GI-NEXT: fmov x10, d1
; CHECK-GI-NEXT: mov d2, v0.d[1]
; CHECK-GI-NEXT: fmov d1, d2
; CHECK-GI-NEXT: fmov d2, x10
; CHECK-GI-NEXT: ret
%c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> <i32 1, i32 3, i32 5>
ret <3 x ptr> %c
}

define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
; CHECK-LABEL: shufflevector_v4p0:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v4p0:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d
; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4p0:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x ptr> %c
}
Expand Down Expand Up @@ -549,13 +580,13 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: fmov s1, w3
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: mov v0.b[1], w1
; CHECK-GI-NEXT: mov v1.b[1], w4
; CHECK-GI-NEXT: mov v0.b[2], w2
; CHECK-GI-NEXT: mov v1.b[2], w5
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: umov w0, v0.b[0]
; CHECK-GI-NEXT: umov w1, v0.b[1]
Expand All @@ -570,19 +601,19 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: adrp x8, .LCPI35_0
; CHECK-SD-NEXT: adrp x8, .LCPI36_0
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI36_0]
; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v7i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -601,9 +632,9 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand All @@ -614,18 +645,18 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
; CHECK-SD-LABEL: shufflevector_v7i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI37_0
; CHECK-SD-NEXT: adrp x8, .LCPI38_0
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v7i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
Expand All @@ -642,9 +673,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
;
; CHECK-GI-LABEL: shufflevector_v3i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
Expand Down
66 changes: 65 additions & 1 deletion llvm/test/CodeGen/Generic/machine-function-splitter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,21 @@
; REQUIRES: x86-registered-target

; COM: Machine function splitting with FDO profiles
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86,MFS-NOBBSECTIONS
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86
; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86

; COM: Machine function splitting along with -basic-block-sections profile
; RUN: echo 'v1' > %t
; RUN: echo 'ffoo21' >> %t
; RUN: echo 'c0' >> %t
; RUN: echo 'ffoo22' >> %t
; RUN: echo 'c0 1' >> %t
; RUN: echo 'c2' >> %t
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -basic-block-sections=%t -split-machine-functions | FileCheck %s --check-prefixes=MFS-BBSECTIONS

; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64
Expand Down Expand Up @@ -610,6 +619,61 @@ cold_asm_target:
ret void
}

define void @foo21(i1 zeroext %0) {
;; Check that a function with basic-block-sections profile (but no pgo profile)
;; is properly split when the profile is used along with mfs.
; MFS-BBSECTIONS: .section .text.hot.foo21
; MFS-NOBBSECTIONS-NOT: .section .text.hot.foo21
; MFS-BBSECTIONS-LABEL: foo21:
; MFS-NOBBSECTIONS-NOT: foo21.cold:
; MFS-BBSECTIONS: .section .text.split.foo21
; MFS-BBSECTIONS: foo21.cold
%2 = alloca i8, align 1
%3 = zext i1 %0 to i8
store i8 %3, ptr %2, align 1
%4 = load i8, ptr %2, align 1
%5 = trunc i8 %4 to i1
br i1 %5, label %6, label %8

6: ; preds = %1
%7 = call i32 @bar()
br label %10

8: ; preds = %1
%9 = call i32 @baz()
br label %10

10: ; preds = %8, %6
ret void
}

define void @foo22(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
;; Check that when a function has both basic-block-section and pgo profiles
;; only the basic-block-section profile is used for splitting.

;; Check that we create two hot sections with -basic-block-sections.
; MFS-BBSECTIONS: .section .text.hot.foo22
; MFS-BBSECTIONS-LABEL: foo22:
; MFS-BBSECTIONS: callq bar
; MFS-BBSECTIONS: .section .text.hot.foo22
; MFS-BBSECTIONS-NEXT: foo22.__part.1:
; MFS-BBSECTIONS: callq baz
; MFS-BBSECTIONS-NOT: .section .text.split.foo22
br i1 %0, label %2, label %4, !prof !17

2: ; preds = %1
%3 = call i32 @bar()
br label %6

4: ; preds = %1
%5 = call i32 @baz()
br label %6

6: ; preds = %4, %2
%7 = tail call i32 @qux()
ret void
}

declare i32 @bar()
declare i32 @baz()
declare i32 @bam()
Expand Down
40 changes: 19 additions & 21 deletions llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2540,9 +2540,9 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask3(ptr %vp, <4 x i64>
define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %vec2, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [0,4,6,1]
; CHECK-NEXT: vpermi2q 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2
; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [4,0,2,5]
; CHECK-NEXT: vpermi2q (%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
; CHECK-NEXT: vmovdqa64 %ymm3, %ymm0 {%k1}
; CHECK-NEXT: retq
Expand All @@ -2556,10 +2556,10 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,4,6,1]
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2
; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,0,2,5]
; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
; CHECK-NEXT: vpermi2q 32(%rdi), %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vpermi2q (%rdi), %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%vec = load <8 x i64>, ptr %vp
Expand Down Expand Up @@ -3514,12 +3514,11 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask2(ptr %vp, <8 x
; CHECK-FAST-PERLANE-LABEL: test_masked_16xfloat_to_8xfloat_perm_mem_mask2:
; CHECK-FAST-PERLANE: # %bb.0:
; CHECK-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm2
; CHECK-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3
; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm4 = [9,5,2,3,2,8,8,1]
; CHECK-FAST-PERLANE-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm3 = [1,13,10,11,10,0,0,9]
; CHECK-FAST-PERLANE-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm3
; CHECK-FAST-PERLANE-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm4, %ymm0 {%k1}
; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm3, %ymm0 {%k1}
; CHECK-FAST-PERLANE-NEXT: retq
%vec = load <16 x float>, ptr %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <8 x i32> <i32 1, i32 13, i32 10, i32 11, i32 10, i32 0, i32 0, i32 9>
Expand All @@ -3542,11 +3541,10 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2(ptr %vp, <8
; CHECK-FAST-PERLANE-LABEL: test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2:
; CHECK-FAST-PERLANE: # %bb.0:
; CHECK-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm2
; CHECK-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3
; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = [9,5,2,3,2,8,8,1]
; CHECK-FAST-PERLANE-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm4, %ymm0, %k1
; CHECK-FAST-PERLANE-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z}
; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = [1,13,10,11,10,0,0,9]
; CHECK-FAST-PERLANE-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm3, %ymm0, %k1
; CHECK-FAST-PERLANE-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm1 {%k1} {z}
; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm1, %ymm0
; CHECK-FAST-PERLANE-NEXT: retq
%vec = load <16 x float>, ptr %vp
Expand Down Expand Up @@ -4398,9 +4396,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask0(ptr %vp,
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-FAST-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask1:
; CHECK-FAST: # %bb.0:
; CHECK-FAST-NEXT: vmovapd (%rdi), %ymm2
; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,6]
; CHECK-FAST-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm3
; CHECK-FAST-NEXT: vbroadcastsd 32(%rdi), %ymm2
; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm3 = [7,0,6,2]
; CHECK-FAST-NEXT: vpermi2pd (%rdi), %ymm2, %ymm3
; CHECK-FAST-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-FAST-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-FAST-NEXT: vmovapd %ymm3, %ymm0 {%k1}
Expand All @@ -4423,11 +4421,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 x double> %mask) {
; CHECK-FAST-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1:
; CHECK-FAST: # %bb.0:
; CHECK-FAST-NEXT: vmovapd (%rdi), %ymm2
; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,6]
; CHECK-FAST-NEXT: vbroadcastsd 32(%rdi), %ymm2
; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [7,0,6,2]
; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
; CHECK-FAST-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm1 {%k1} {z}
; CHECK-FAST-NEXT: vpermi2pd (%rdi), %ymm2, %ymm1 {%k1} {z}
; CHECK-FAST-NEXT: vmovapd %ymm1, %ymm0
; CHECK-FAST-NEXT: retq
;
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/X86/insert-into-constant-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,8 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
; X64-AVX512F-LABEL: elt5_v8i64:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: vmovq %rdi, %xmm1
; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,2,3,4,8,6,7]
; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [42,1,2,3,4,0,6,7]
; X64-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,10,11,12,0,14,15]
; X64-AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
; X64-AVX512F-NEXT: retq
%ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5
ret <8 x i64> %ins
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2516,10 +2516,8 @@ define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, ptr %a1) {
;
; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovaps (%rdi), %xmm2
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4]
; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1
; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [4,2,6,0]
; AVX512VL-NEXT: vpermt2ps (%rdi), %xmm1, %xmm0
; AVX512VL-NEXT: retq
%1 = load <4 x float>, ptr %a1
%2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
Expand Down
63 changes: 23 additions & 40 deletions llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 ^ (zmm2 & (zmm0 ^ zmm1))
; AVX512F-NEXT: ret{{[l|q]}}
entry:
%0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63>
Expand All @@ -354,15 +354,10 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
; AVX512-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: ret{{[l|q]}}
;
; X86-AVX512F-LABEL: test_mm512_mask_blend_epi16:
; X86-AVX512F: # %bb.0: # %entry
; X86-AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm1, %zmm0
; X86-AVX512F-NEXT: retl
;
; X64-AVX512F-LABEL: test_mm512_mask_blend_epi16:
; X64-AVX512F: # %bb.0: # %entry
; X64-AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; X64-AVX512F-NEXT: retq
; AVX512F-LABEL: test_mm512_mask_blend_epi16:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
; AVX512F-NEXT: ret{{[l|q]}}
entry:
%0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
ret <32 x i16> %0
Expand Down Expand Up @@ -486,37 +481,33 @@ define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) {
; X86-AVX512-LABEL: test_masked_permps_v8f32:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512-NEXT: vmovaps (%eax), %ymm2
; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15]
; X86-AVX512-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1
; X86-AVX512-NEXT: vmovaps %ymm1, %ymm0
; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [15,14,11,3,15,14,6,7]
; X86-AVX512-NEXT: vpermt2ps (%eax), %ymm1, %ymm0
; X86-AVX512-NEXT: retl
;
; X64-AVX512-LABEL: test_masked_permps_v8f32:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps (%rdi), %ymm2
; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15]
; X64-AVX512-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1
; X64-AVX512-NEXT: vmovaps %ymm1, %ymm0
; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [15,14,11,3,15,14,6,7]
; X64-AVX512-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0
; X64-AVX512-NEXT: retq
;
; X86-AVX512F-LABEL: test_masked_permps_v8f32:
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: vmovaps (%eax), %ymm1
; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23]
; X86-AVX512F-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
; X86-AVX512F-NEXT: vmovaps %ymm1, %ymm0
; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [23,22,19,3,23,22,6,7]
; X86-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
; X86-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; X86-AVX512F-NEXT: retl
;
; X64-AVX512F-LABEL: test_masked_permps_v8f32:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; X64-AVX512F-NEXT: vmovaps (%rdi), %ymm1
; X64-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23]
; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
; X64-AVX512F-NEXT: vmovaps %ymm1, %ymm0
; X64-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [23,22,19,3,23,22,6,7]
; X64-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
; X64-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; X64-AVX512F-NEXT: retq
%vec = load <8 x float>, ptr %vp
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0>
Expand All @@ -528,35 +519,27 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) {
; X86-AVX512-LABEL: test_masked_permps_v16f32:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512-NEXT: vmovaps (%eax), %zmm2
; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
; X86-AVX512-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; X86-AVX512-NEXT: vmovaps %zmm1, %zmm0
; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15]
; X86-AVX512-NEXT: vpermt2ps (%eax), %zmm1, %zmm0
; X86-AVX512-NEXT: retl
;
; X64-AVX512-LABEL: test_masked_permps_v16f32:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps (%rdi), %zmm2
; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
; X64-AVX512-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; X64-AVX512-NEXT: vmovaps %zmm1, %zmm0
; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15]
; X64-AVX512-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0
; X64-AVX512-NEXT: retq
;
; X86-AVX512F-LABEL: test_masked_permps_v16f32:
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: vmovaps (%eax), %zmm2
; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
; X86-AVX512F-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; X86-AVX512F-NEXT: vmovaps %zmm1, %zmm0
; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15]
; X86-AVX512F-NEXT: vpermt2ps (%eax), %zmm1, %zmm0
; X86-AVX512F-NEXT: retl
;
; X64-AVX512F-LABEL: test_masked_permps_v16f32:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: vmovaps (%rdi), %zmm2
; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
; X64-AVX512F-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; X64-AVX512F-NEXT: vmovaps %zmm1, %zmm0
; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15]
; X64-AVX512F-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0
; X64-AVX512F-NEXT: retq
%vec = load <16 x float>, ptr %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 13, i32 11, i32 9, i32 14, i32 12, i32 10, i32 8, i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0>
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -719,10 +719,9 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1]
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,0,0,0,0]
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [1,14,9,8,11,15,15,9]
; AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: def $al killed $al killed $eax
; AVX512F-NEXT: vzeroupper
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/vector-shuffle-v48.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,9 @@ define <32 x i8> @foo(ptr %x0) {
;
; AVX512VBMI-LABEL: foo:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm2
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,33,34,36,37,39,40,42,43,45,46]
; AVX512VBMI-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm1
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [32,33,35,36,38,39,41,42,44,45,47,48,50,51,53,54,56,57,59,60,62,63,1,2,4,5,7,8,10,11,13,14]
; AVX512VBMI-NEXT: vpermi2b (%rdi), %ymm1, %ymm0
; AVX512VBMI-NEXT: retq
%1 = load <48 x i8>, ptr %x0, align 1
%2 = shufflevector <48 x i8> %1, <48 x i8> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 6, i32 7, i32 9, i32 10, i32 12, i32 13, i32 15, i32 16, i32 18, i32 19, i32 21, i32 22, i32 24, i32 25, i32 27, i32 28, i32 30, i32 31, i32 33, i32 34, i32 36, i32 37, i32 39, i32 40, i32 42, i32 43, i32 45, i32 46>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4895,11 +4895,10 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
;
; AVX512BW-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,0,2,0,8,0,6,0]
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,0,10,0,0,0,14,0]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -4997,11 +4996,10 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
;
; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,32,3,32,5,32,7,32,9,32,11,32,13,32,15,32,17,32,19,32,21,32,23,32,25,32,27,32,29,32,31]
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63]
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1
; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Expand Down Expand Up @@ -5411,39 +5409,36 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
;
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
; AVX512BW-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
; AVX512BW-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Expand Down Expand Up @@ -5679,39 +5674,36 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Expand Down Expand Up @@ -5938,39 +5930,36 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
;
; AVX512F-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/Instrumentation/BoundsChecking/negative.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; Check that negative oob gep do not generate invalid check.
; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"


@str = global [100 x i8] zeroinitializer, align 1

define i16 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 65, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TMP4:%.*]] ]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i8 [[I_0]], 76
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[TMP4]]
; CHECK: for.inc:
; CHECK-NEXT: [[I_0_C:%.*]] = sext i8 [[I_0]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 -65, [[I_0_C]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @str, i8 -65), i8 [[I_0]]
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 100, [[TMP0]]
; CHECK-NEXT: store i8 [[I_0]], ptr [[GEP]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i8 [[I_0]], 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
; CHECK-NEXT: ret i16 0
;
entry:
br label %for.cond

for.cond:
%i.0 = phi i8 [ 65, %entry ], [ %inc, %for.inc ]
%exitcond.not = icmp eq i8 %i.0, 76
br i1 %exitcond.not, label %for.end, label %for.inc

for.inc: ; preds = %for.cond
%gep = getelementptr i8, ptr getelementptr (i8, ptr @str, i8 -65), i8 %i.0
store i8 %i.0, ptr %gep, align 1
%inc = add nuw nsw i8 %i.0, 1
br label %for.cond

for.end:
ret i16 0
}

2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/ds.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=CI,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# RUN: not llvm-mc -triple amdgcn < %s 2>&1 | FileCheck --strict-whitespace %s
# RUN: not llvm-mc -triple amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck --strict-whitespace %s

# This tests the mnemonic spell checker.

Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/literals.s
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error:
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/mimg-err.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error:
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/mimg.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_0
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 --implicit-check-not=error:
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/regression/bug28165.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/regression/bug28413.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/smrd.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN %s
// RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefixes=GCN,CI %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=VI %s

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error:

Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/sopk.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,VI %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,GFX9 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GCN,GFX10 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX11 %s

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/MC/AMDGPU/unknown-target-cpu.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding < %s | FileCheck %s
// RUN: not llvm-mc -triple=amdgcn -show-encoding -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s
// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding < %s | FileCheck %s

// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
v_cmp_lt_f32 vcc, s2, v4

// CHECK: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
v_cndmask_b32 v1, v2, v3, vcc

// ERR: [[@LINE+1]]:1: error: instruction not supported on this GPU
v_mac_legacy_f32 v1, v3, s5

// CHECK: v_lshr_b32_e32 v0, v1, v2 ; encoding: [0x01,0x05,0x00,0x2a]
v_lshr_b32 v0, v1, v2
2 changes: 1 addition & 1 deletion llvm/test/MC/AMDGPU/vintrp.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck -check-prefix=SI %s
// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SI %s
// RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s

v_interp_p1_f32 v1, v0, attr0.x
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/vop1.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,CI,SICI,CIVI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,CIVI,VI

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error:
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/vop2.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error:
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/MC/AMDGPU/vop3-convert.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error:
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/vop3-errs.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/vop3.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI

// Make sure interp instructions disassemble regardless of lds bank count
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck %s -check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/vop_dpp.s
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,VI9
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,VI9

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOSICI,NOCI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error:
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/vop_sdwa.s
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,GFX89
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,GFX89

// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOCI,NOSICI --implicit-check-not=error:
// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOVI,NOGFX89 --implicit-check-not=error:
Expand Down
1 change: 0 additions & 1 deletion llvm/test/MC/AMDGPU/vopc.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI

Expand Down
48 changes: 24 additions & 24 deletions llvm/test/MC/AMDGPU/wave_any.s
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s

v_cmp_ge_i32_e32 s0, v0
// GFX10: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]

v_cmp_ge_i32_e32 vcc_lo, s0, v1
// GFX10: v_cmp_ge_i32_e32 vcc, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]

v_cmp_ge_i32_e32 vcc, s0, v2
// GFX10: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]

v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
// GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
Expand All @@ -16,10 +16,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
// GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]

v_cmp_class_f32_e32 vcc_lo, s0, v0
// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]

v_cmp_class_f32_e32 vcc, s0, v0
// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]

v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
// GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
Expand All @@ -34,40 +34,40 @@ v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
// GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]

v_cndmask_b32_e32 v1, v2, v3,
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]

v_cndmask_b32_e32 v1, v2, v3, vcc_lo
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]

v_cndmask_b32_e32 v1, v2, v3, vcc
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]

v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]

v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]

v_add_co_ci_u32_e32 v3, v3, v4
// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]

v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]

v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]

v_sub_co_ci_u32_e32 v3, v3, v4
// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]

v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]

v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]

v_subrev_co_ci_u32_e32 v1, 0, v1
// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]

v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
Expand All @@ -76,7 +76,7 @@ v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr
// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]

v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]

v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
Expand All @@ -85,7 +85,7 @@ v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr
// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]

v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]

v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
Expand All @@ -94,10 +94,10 @@ v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD
// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]

v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]

v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]

v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
Expand All @@ -106,7 +106,7 @@ v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:U
// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]

v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]

v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
Expand Down Expand Up @@ -189,8 +189,8 @@ v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]

v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
// GFX10: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3]
// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]

v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
Expand Down
2 changes: 1 addition & 1 deletion mlir/cmake/modules/MLIRDetectPythonEnv.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ macro(mlir_configure_python_dev_packages)
message(STATUS "Found python libraries: ${Python3_LIBRARIES}")
message(STATUS "Found numpy v${Python3_NumPy_VERSION}: ${Python3_NumPy_INCLUDE_DIRS}")
mlir_detect_pybind11_install()
find_package(pybind11 2.9 CONFIG REQUIRED)
find_package(pybind11 2.10 CONFIG REQUIRED)
message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIR}")
message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', "
"suffix = '${PYTHON_MODULE_SUFFIX}', "
Expand Down
1 change: 1 addition & 0 deletions mlir/include/mlir/IR/Builders.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class AffineExpr;
class IRMapping;
class UnknownLoc;
class FileLineColLoc;
class FileLineColRange;
class Type;
class PrimitiveType;
class IntegerType;
Expand Down
24 changes: 20 additions & 4 deletions mlir/include/mlir/IR/BuiltinDialectBytecode.td
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,26 @@ def CallSiteLoc : DialectAttribute<(attr
LocationAttr:$caller
)>;

let cType = "FileLineColRange" in {
def FileLineColRange : DialectAttribute<(attr
StringAttr:$filename,
WithBuilder<"$_args",
WithType<"SmallVector<uint64_t>",
WithParser<"succeeded(readFileLineColRangeLocs($_reader, $_var))",
WithPrinter<"writeFileLineColRangeLocs($_writer, $_name)">>>>:$rawLocData
)> {
let cBuilder = "getFileLineColRange(context, filename, rawLocData)";
let printerPredicate = "!::llvm::isa<FileLineColLoc>($_val)";
}

def FileLineColLoc : DialectAttribute<(attr
StringAttr:$filename,
VarInt:$line,
VarInt:$column
)>;
VarInt:$start_line,
VarInt:$start_column
)> {
let printerPredicate = "::llvm::isa<FileLineColLoc>($_val)";
}
}

let cType = "FusedLoc",
cBuilder = "cast<FusedLoc>(get<FusedLoc>(context, $_args))" in {
Expand Down Expand Up @@ -321,7 +336,8 @@ def BuiltinDialectAttributes : DialectAttributes<"Builtin"> {
DenseIntOrFPElementsAttr,
DenseStringElementsAttr,
SparseElementsAttr,
DistinctAttr
DistinctAttr,
FileLineColRange,
];
}

Expand Down
84 changes: 68 additions & 16 deletions mlir/include/mlir/IR/BuiltinLocationAttributes.td
Original file line number Diff line number Diff line change
Expand Up @@ -60,46 +60,98 @@ def CallSiteLoc : Builtin_LocationAttr<"CallSiteLoc"> {
}

//===----------------------------------------------------------------------===//
// FileLineColLoc
// FileLineColRange
//===----------------------------------------------------------------------===//

def FileLineColLoc : Builtin_LocationAttr<"FileLineColLoc"> {
let summary = "A file:line:column source location";
def FileLineColRange : Builtin_LocationAttr<"FileLineColRange"> {
let summary = "A file:line:column source location range";
let description = [{
Syntax:

```
filelinecol-location ::= string-literal `:` integer-literal `:`
integer-literal
(`to` (integer-literal ?) `:` integer-literal ?)
```

An instance of this location represents a tuple of file, line number, and
column number. This is similar to the type of location that you get from
most source languages.
An instance of this location represents a tuple of file, start and end line
number, and start and end column number. It allows for the following
configurations:

* A single file line location: `file:line`;
* A single file line col location: `file:line:column`;
* A single line range: `file:line:column to :column`;
* A single file range: `file:line:column to line:column`;

Example:

```mlir
loc("mysource.cc":10:8)
loc("mysource.cc":10:8 to 12:18)
```
}];
let parameters = (ins "StringAttr":$filename, "unsigned":$line,
"unsigned":$column);

let parameters = (ins "StringAttr":$filename,
"unsigned":$start_line, "unsigned":$start_column,
"unsigned":$end_line, "unsigned":$end_column);
let builders = [
AttrBuilderWithInferredContext<(ins "StringAttr":$filename), [{
return $_get(filename.getContext(), filename, ArrayRef<unsigned>{});
}]>,
AttrBuilderWithInferredContext<(ins "StringAttr":$filename,
"unsigned":$line), [{
return $_get(filename.getContext(), filename,
ArrayRef<unsigned>{line});
}]>,
AttrBuilderWithInferredContext<(ins "StringAttr":$filename,
"unsigned":$line,
"unsigned":$column), [{
return $_get(filename.getContext(), filename, line, column);
return $_get(filename.getContext(), filename,
ArrayRef<unsigned>{line, column});
}]>,
AttrBuilder<(ins "StringRef":$filename, "unsigned":$line,
"unsigned":$column), [{
AttrBuilder<(ins "::llvm::StringRef":$filename,
"unsigned":$start_line,
"unsigned":$start_column), [{
return $_get($_ctxt,
StringAttr::get($_ctxt, filename.empty() ? "-" : filename),
line, column);
}]>
StringAttr::get($_ctxt, filename.empty() ? "-" : filename),
ArrayRef<unsigned>{start_line, start_column});
}]>,
AttrBuilderWithInferredContext<(ins "::mlir::StringAttr":$filename,
"unsigned":$line,
"unsigned":$start_column,
"unsigned":$end_column), [{
return $_get(filename.getContext(), filename,
ArrayRef<unsigned>{line, start_column, end_column});
}]>,
AttrBuilderWithInferredContext<(ins "::mlir::StringAttr":$filename,
"unsigned":$start_line,
"unsigned":$start_column,
"unsigned":$end_line,
"unsigned":$end_column), [{
return $_get(filename.getContext(), filename,
ArrayRef<unsigned>{start_line, start_column, end_column, end_line});
}]>,
AttrBuilder<(ins "::llvm::StringRef":$filename,
"unsigned":$start_line,
"unsigned":$start_column,
"unsigned":$end_line,
"unsigned":$end_column), [{
return $_get($_ctxt,
StringAttr::get($_ctxt, filename.empty() ? "-" : filename),
ArrayRef<unsigned>{start_line, start_column, end_column, end_line});
}]>,
];

let extraClassDeclaration = [{
::mlir::StringAttr getFilename() const;
unsigned getStartLine() const;
unsigned getStartColumn() const;
unsigned getEndColumn() const;
unsigned getEndLine() const;
}];
let skipDefaultBuilders = 1;
let attrName = "builtin.file_line_loc";
let genAccessors = 0;
let genStorageClass = 0;
let attrName = "builtin.file_line_range";
}

//===----------------------------------------------------------------------===//
Expand Down
31 changes: 31 additions & 0 deletions mlir/include/mlir/IR/Location.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ inline ::llvm::hash_code hash_value(Location arg) {
// Tablegen Attribute Declarations
//===----------------------------------------------------------------------===//

// Forward declaration for class created later.
namespace mlir::detail {
struct FileLineColRangeAttrStorage;
} // namespace mlir::detail

#define GET_ATTRDEF_CLASSES
#include "mlir/IR/BuiltinLocationAttributes.h.inc"

Expand Down Expand Up @@ -164,6 +169,32 @@ class FusedLocWith : public FusedLoc {
}
};

//===----------------------------------------------------------------------===//
// FileLineColLoc
//===----------------------------------------------------------------------===//

/// An instance of this location represents a tuple of file, line number, and
/// column number. This is similar to the type of location that you get from
/// most source languages.
///
/// FileLineColLoc is a FileLineColRange with exactly one line and column.
class FileLineColLoc : public FileLineColRange {
public:
using FileLineColRange::FileLineColRange;

static FileLineColLoc get(StringAttr filename, unsigned line,
unsigned column);
static FileLineColLoc get(MLIRContext *context, StringRef fileName,
unsigned line, unsigned column);

StringAttr getFilename() const;
unsigned getLine() const;
unsigned getColumn() const;

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(Attribute attr);
};

//===----------------------------------------------------------------------===//
// OpaqueLoc
//===----------------------------------------------------------------------===//
Expand Down
Loading