182 changes: 161 additions & 21 deletions lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MipsABIFlags.h"
#include "lldb/Target/Process.h"

#define CASE_AND_STREAM(s, def, width) \
case def: \
Expand Down Expand Up @@ -3007,9 +3008,10 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
// section, nomatter if .symtab was already parsed or not. This is because
// minidebuginfo normally removes the .symtab symbols which have their
// matching .dynsym counterparts.
Section *dynsym = nullptr;
if (!symtab ||
GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
Section *dynsym =
dynsym =
section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
.get();
if (dynsym) {
Expand All @@ -3019,6 +3021,20 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
m_address_class_map.merge(address_class_map);
}
}
if (!dynsym) {
// Try and read the dynamic symbol table from the .dynamic section.
uint32_t num_symbols = 0;
std::optional<DataExtractor> symtab_data =
GetDynsymDataFromDynamic(num_symbols);
std::optional<DataExtractor> strtab_data = GetDynstrData();
if (symtab_data && strtab_data) {
auto [num_symbols_parsed, address_class_map] =
ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
symtab_data.value(), strtab_data.value());
symbol_id += num_symbols_parsed;
m_address_class_map.merge(address_class_map);
}
}

// DT_JMPREL
// If present, this entry's d_ptr member holds the address of
Expand Down Expand Up @@ -3828,6 +3844,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
Offset);
}

std::optional<DataExtractor>
ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
uint64_t offset) {
// ELFDynamic values contain a "d_ptr" member that will be a load address if
// we have an ELF file read from memory, or it will be a file address if it
// was read from a ELF file. This function will correctly fetch data pointed
// to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
// available.
const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
if (ProcessSP process_sp = m_process_wp.lock()) {
if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
} else {
// We have an ELF file with no section headers or we didn't find the
// .dynamic section. Try and find the .dynstr section.
Address addr;
if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
return std::nullopt;
DataExtractor data;
addr.GetSection()->GetSectionData(data);
return DataExtractor(data,
d_ptr_addr - addr.GetSection()->GetFileAddress(),
length);
}
return std::nullopt;
}

std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
if (SectionList *section_list = GetSectionList()) {
// Find the SHT_DYNAMIC section.
Expand Down Expand Up @@ -3855,31 +3898,15 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
// and represent the dynamic symbol tables's string table. These are needed
// by the dynamic loader and we can read them from a process' address space.
//
// When loading and ELF file from memory, only the program headers end up
// being mapped into memory, and we can find these values in the PT_DYNAMIC
// segment.
// When loading and ELF file from memory, only the program headers are
// guaranteed end up being mapped into memory, and we can find these values in
// the PT_DYNAMIC segment.
const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB);
const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ);
if (strtab == nullptr || strsz == nullptr)
return std::nullopt;

if (ProcessSP process_sp = m_process_wp.lock()) {
if (DataBufferSP data_sp =
ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
} else {
// We have an ELF file with no section headers or we didn't find the
// .dynamic section. Try and find the .dynstr section.
Address addr;
if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
DataExtractor data;
addr.GetSection()->GetSectionData(data);
return DataExtractor(data,
strtab->d_ptr - addr.GetSection()->GetFileAddress(),
strsz->d_val);
}
}
return std::nullopt;
return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
}

std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
Expand Down Expand Up @@ -3912,3 +3939,116 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
}
return std::nullopt;
}

std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() {
const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
if (hash == nullptr)
return std::nullopt;

// The DT_HASH header looks like this:
struct DtHashHeader {
uint32_t nbucket;
uint32_t nchain;
};
if (auto data = ReadDataFromDynamic(hash, 8)) {
// We don't need the number of buckets value "nbucket", we just need the
// "nchain" value which contains the number of symbols.
offset_t offset = offsetof(DtHashHeader, nchain);
return data->GetU32(&offset);
}

return std::nullopt;
}

std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
if (gnu_hash == nullptr)
return std::nullopt;

// Create a DT_GNU_HASH header
// https://flapenguin.me/elf-dt-gnu-hash
struct DtGnuHashHeader {
uint32_t nbuckets = 0;
uint32_t symoffset = 0;
uint32_t bloom_size = 0;
uint32_t bloom_shift = 0;
};
uint32_t num_symbols = 0;
// Read enogh data for the DT_GNU_HASH header so we can extract the values.
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) {
offset_t offset = 0;
DtGnuHashHeader header;
header.nbuckets = data->GetU32(&offset);
header.symoffset = data->GetU32(&offset);
header.bloom_size = data->GetU32(&offset);
header.bloom_shift = data->GetU32(&offset);
const size_t addr_size = GetAddressByteSize();
const addr_t buckets_offset =
sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
std::vector<uint32_t> buckets;
if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
offset = 0;
for (uint32_t i = 0; i < header.nbuckets; ++i)
buckets.push_back(bucket_data->GetU32(&offset));
// Locate the chain that handles the largest index bucket.
uint32_t last_symbol = 0;
for (uint32_t bucket_value : buckets)
last_symbol = std::max(bucket_value, last_symbol);
if (last_symbol < header.symoffset) {
num_symbols = header.symoffset;
} else {
// Walk the bucket's chain to add the chain length to the total.
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
for (;;) {
if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
offset = 0;
uint32_t chain_entry = chain_entry_data->GetU32(&offset);
++last_symbol;
// If the low bit is set, this entry is the end of the chain.
if (chain_entry & 1)
break;
} else {
break;
}
}
num_symbols = last_symbol;
}
}
}
if (num_symbols > 0)
return num_symbols;

return std::nullopt;
}

std::optional<DataExtractor>
ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
// Every ELF file which represents an executable or shared library has
// mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
// symbol table, and DT_SYMENT contains the size of a symbol table entry.
// We then can use either the DT_HASH or DT_GNU_HASH to find the number of
// symbols in the symbol table as the symbol count is not stored in the
// .dynamic section as a key/value pair.
//
// When loading and ELF file from memory, only the program headers end up
// being mapped into memory, and we can find these values in the PT_DYNAMIC
// segment.
num_symbols = 0;
// Get the process in case this is an in memory ELF file.
ProcessSP process_sp(m_process_wp.lock());
const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
// DT_SYMTAB and DT_SYMENT are mandatory.
if (symtab == nullptr || syment == nullptr)
return std::nullopt;

if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash())
num_symbols = *syms;
else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash())
num_symbols = *syms;
else
return std::nullopt;
if (num_symbols == 0)
return std::nullopt;
return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
}
41 changes: 41 additions & 0 deletions lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,47 @@ class ObjectFileELF : public lldb_private::ObjectFile {
/// \return The bytes that represent the string table data or \c std::nullopt
/// if an error occured.
std::optional<lldb_private::DataExtractor> GetDynstrData();

/// Read the bytes pointed to by the \a dyn dynamic entry.
///
/// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
/// form a file on disk, or they contain load addresses if they were read
/// from memory. This function will correctly extract the data in both cases
/// if it is available.
///
/// \param[in] dyn The dynamic entry to use to fetch the data from.
///
/// \param[in] length The number of bytes to read.
///
/// \param[in] offset The number of bytes to skip after the d_ptr value
/// before reading data.
///
/// \return The bytes that represent the dynanic entries data or
/// \c std::nullopt if an error occured or the data is not available.
std::optional<lldb_private::DataExtractor>
ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
uint64_t offset = 0);

/// Get the bytes that represent the dynamic symbol table from the .dynamic
/// section from process memory.
///
/// This functon uses the DT_SYMTAB value from the .dynamic section to read
/// the symbols table data from process memory. The number of symbols in the
/// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
/// values as the symbol count isn't stored in the .dynamic section.
///
/// \return The bytes that represent the symbol table data from the .dynamic
/// section or section headers or \c std::nullopt if an error
/// occured or if there is no dynamic symbol data available.
std::optional<lldb_private::DataExtractor>
GetDynsymDataFromDynamic(uint32_t &num_symbols);

/// Get the number of symbols from the DT_HASH dynamic entry.
std::optional<uint32_t> GetNumSymbolsFromDynamicHash();

/// Get the number of symbols from the DT_GNU_HASH dynamic entry.
std::optional<uint32_t> GetNumSymbolsFromDynamicGnuHash();

};

#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
42 changes: 42 additions & 0 deletions lldb/test/Shell/ObjectFile/ELF/elf-dynsym.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// This test verifies that loading an ELF file that has no section headers can
// load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or
// the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC
// segment.

// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \
// RUN: -z nosectionheader -o %t.gnu
// RUN: %lldb %t.gnu -b \
// RUN: -o "image dump objfile" \
// RUN: | FileCheck %s --dump-input=always --check-prefix=GNU
// GNU: (lldb) image dump objfile
// GNU: Dumping headers for 1 module(s).
// GNU: ObjectFileELF, file =
// GNU: ELF Header
// GNU: e_type = 0x0003 ET_DYN
// Make sure there are no section headers
// GNU: e_shnum = 0x00000000
// Make sure we were able to load the symbols
// GNU: Symtab, file = {{.*}}elf-dynsym.test.tmp.gnu, num_symbols = 2:
// GNU-DAG: undefined
// GNU-DAG: defined

// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \
// RUN: -z nosectionheader -o %t.sysv
// RUN: %lldb %t.sysv -b \
// RUN: -o "image dump objfile" \
// RUN: | FileCheck %s --dump-input=always --check-prefix=HASH
// HASH: (lldb) image dump objfile
// HASH: Dumping headers for 1 module(s).
// HASH: ObjectFileELF, file =
// HASH: ELF Header
// HASH: e_type = 0x0003 ET_DYN
// Make sure there are no section headers
// HASH: e_shnum = 0x00000000
// Make sure we were able to load the symbols
// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2:
// HASH-DAG: undefined
// HASH-DAG: defined
9 changes: 8 additions & 1 deletion llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item
IDs

``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
- tgsplit flat
- xnack scratch .. TODO::
- kernarg preload - Packed
work-item Add product
IDs names.

**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
Expand Down Expand Up @@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
*reserved* 0x04f Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
*reserved* 0x050 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``
Expand Down
2 changes: 0 additions & 2 deletions llvm/include/llvm/Analysis/InlineAdvisor.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,15 +287,13 @@ class PluginInlineAdvisorAnalysis
: public AnalysisInfoMixin<PluginInlineAdvisorAnalysis> {
public:
static AnalysisKey Key;
static bool HasBeenRegistered;

typedef InlineAdvisor *(*AdvisorFactory)(Module &M,
FunctionAnalysisManager &FAM,
InlineParams Params,
InlineContext IC);

PluginInlineAdvisorAnalysis(AdvisorFactory Factory) : Factory(Factory) {
HasBeenRegistered = true;
assert(Factory != nullptr &&
"The plugin advisor factory should not be a null pointer.");
}
Expand Down
5 changes: 0 additions & 5 deletions llvm/include/llvm/Analysis/InlineOrder.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ class PluginInlineOrderAnalysis
ModuleAnalysisManager &MAM, Module &M);

PluginInlineOrderAnalysis(InlineOrderFactory Factory) : Factory(Factory) {
HasBeenRegistered = true;
assert(Factory != nullptr &&
"The plugin inline order factory should not be a null pointer.");
}
Expand All @@ -71,11 +70,7 @@ class PluginInlineOrderAnalysis
Result run(Module &, ModuleAnalysisManager &) { return {Factory}; }
Result getResult() { return {Factory}; }

static bool isRegistered() { return HasBeenRegistered; }
static void unregister() { HasBeenRegistered = false; }

private:
static bool HasBeenRegistered;
InlineOrderFactory Factory;
};

Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
Expand Down
12 changes: 8 additions & 4 deletions llvm/include/llvm/IR/PassManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,11 @@ template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager {
AnalysisResultLists.clear();
}

/// Returns true if the specified analysis pass is registered.
template <typename PassT> bool isPassRegistered() const {
return AnalysisPasses.count(PassT::ID());
}

/// Get the result of an analysis pass for a given IR unit.
///
/// Runs the analysis if a cached result is not available.
Expand Down Expand Up @@ -458,10 +463,9 @@ template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager {
/// and this function returns true.
///
/// (Note: Although the return value of this function indicates whether or not
/// an analysis was previously registered, there intentionally isn't a way to
/// query this directly. Instead, you should just register all the analyses
/// you might want and let this class run them lazily. This idiom lets us
/// minimize the number of times we have to look up analyses in our
/// an analysis was previously registered, you should just register all the
/// analyses you might want and let this class run them lazily. This idiom
/// lets us minimize the number of times we have to look up analyses in our
/// hashtable.)
template <typename PassBuilderT>
bool registerPass(PassBuilderT &&PassBuilder) {
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ class InstrProfWriter {
const llvm::SmallVector<memprof::FrameId> &CallStack,
function_ref<void(Error)> Warn);

/// Add the entire MemProfData \p Incoming to the writer context.
bool addMemProfData(memprof::IndexedMemProfData Incoming,
function_ref<void(Error)> Warn);

// Add a binary id to the binary ids list.
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);

Expand Down
25 changes: 13 additions & 12 deletions llvm/include/llvm/TargetParser/TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
GK_GFX940 = 68,
GK_GFX941 = 69,
GK_GFX942 = 70,

GK_GFX1010 = 71,
GK_GFX1011 = 72,
GK_GFX1012 = 73,
GK_GFX1013 = 74,
GK_GFX1030 = 75,
GK_GFX1031 = 76,
GK_GFX1032 = 77,
GK_GFX1033 = 78,
GK_GFX1034 = 79,
GK_GFX1035 = 80,
GK_GFX1036 = 81,
GK_GFX950 = 71,

GK_GFX1010 = 72,
GK_GFX1011 = 73,
GK_GFX1012 = 74,
GK_GFX1013 = 75,
GK_GFX1030 = 76,
GK_GFX1031 = 77,
GK_GFX1032 = 78,
GK_GFX1033 = 79,
GK_GFX1034 = 80,
GK_GFX1035 = 81,
GK_GFX1036 = 82,

GK_GFX1100 = 90,
GK_GFX1101 = 91,
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Analysis/InlineAdvisor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,12 @@ void InlineAdvice::recordInliningWithCalleeDeleted() {

AnalysisKey InlineAdvisorAnalysis::Key;
AnalysisKey PluginInlineAdvisorAnalysis::Key;
bool PluginInlineAdvisorAnalysis::HasBeenRegistered = false;

bool InlineAdvisorAnalysis::Result::tryCreate(
InlineParams Params, InliningAdvisorMode Mode,
const ReplayInlinerSettings &ReplaySettings, InlineContext IC) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
if (PluginInlineAdvisorAnalysis::HasBeenRegistered) {
if (MAM.isPassRegistered<PluginInlineAdvisorAnalysis>()) {
auto &DA = MAM.getResult<PluginInlineAdvisorAnalysis>(M);
Advisor.reset(DA.Factory(M, FAM, Params, IC));
return !!Advisor;
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Analysis/InlineOrder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,6 @@ class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
} // namespace

AnalysisKey llvm::PluginInlineOrderAnalysis::Key;
bool llvm::PluginInlineOrderAnalysis::HasBeenRegistered;

std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM,
Expand Down Expand Up @@ -313,7 +312,7 @@ llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM,
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M) {
if (llvm::PluginInlineOrderAnalysis::isRegistered()) {
if (MAM.isPassRegistered<PluginInlineOrderAnalysis>()) {
LLVM_DEBUG(dbgs() << " Current used priority: plugin ---- \n");
return MAM.getResult<PluginInlineOrderAnalysis>(M).Factory(FAM, Params, MAM,
M);
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/CodeGen/MachineBlockPlacement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3558,14 +3558,16 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {

if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
/*AfterPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
ComputedEdges.clear();
// Must redo the post-dominator tree if blocks were changed.
if (MPDT)
MPDT->recalculate(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
if (!UseExtTspForSize) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
ComputedEdges.clear();
ChainAllocator.DestroyAll();
buildCFGChains();
}
}
}

Expand Down
36 changes: 36 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
case ISD::VP_LOAD:
Res = PromoteIntRes_VP_LOAD(cast<VPLoadSDNode>(N));
break;
case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
Expand Down Expand Up @@ -957,6 +960,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
return Res;
}

SDValue DAGTypeLegalizer::PromoteIntRes_VP_LOAD(VPLoadSDNode *N) {
assert(!N->isIndexed() && "Indexed vp_load during type legalization!");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
ISD::LoadExtType ExtType = (N->getExtensionType() == ISD::NON_EXTLOAD)
? ISD::EXTLOAD
: N->getExtensionType();
SDLoc dl(N);
SDValue Res =
DAG.getLoadVP(N->getAddressingMode(), ExtType, NVT, dl, N->getChain(),
N->getBasePtr(), N->getOffset(), N->getMask(),
N->getVectorLength(), N->getMemoryVT(), N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}

SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
Expand Down Expand Up @@ -1957,6 +1977,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::VP_STORE:
Res = PromoteIntOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
break;
case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
OpNo); break;
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
Expand Down Expand Up @@ -2378,6 +2401,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}

SDValue DAGTypeLegalizer::PromoteIntOp_VP_STORE(VPStoreSDNode *N,
unsigned OpNo) {

assert(OpNo == 1 && "Unexpected operand for promotion");
assert(!N->isIndexed() && "expecting unindexed vp_store!");

SDValue DataOp = GetPromotedInteger(N->getValue());
return DAG.getTruncStoreVP(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
N->getMask(), N->getVectorLength(),
N->getMemoryVT(), N->getMemOperand(),
N->isCompressingStore());
}

SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
SDValue DataOp = N->getValue();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_VP_LOAD(VPLoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_VECTOR_COMPRESS(SDNode *N);
Expand Down Expand Up @@ -420,6 +421,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntOp_ExpOp(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Object/ELFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
return "gfx950";

// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,35 @@ bool InstrProfWriter::addMemProfCallStack(
return true;
}

bool InstrProfWriter::addMemProfData(memprof::IndexedMemProfData Incoming,
function_ref<void(Error)> Warn) {
// TODO: Once we remove support for MemProf format Version V1, assert that
// the three components (frames, call stacks, and records) are either all
// empty or populated.

if (MemProfData.Frames.empty())
MemProfData.Frames = std::move(Incoming.Frames);
else
for (const auto &[Id, F] : Incoming.Frames)
if (addMemProfFrame(Id, F, Warn))
return false;

if (MemProfData.CallStacks.empty())
MemProfData.CallStacks = std::move(Incoming.CallStacks);
else
for (const auto &[CSId, CS] : Incoming.CallStacks)
if (addMemProfCallStack(CSId, CS, Warn))
return false;

if (MemProfData.Records.empty())
MemProfData.Records = std::move(Incoming.Records);
else
for (const auto &[GUID, Record] : Incoming.Records)
addMemProfRecord(GUID, Record);

return true;
}

void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
llvm::append_range(BinaryIds, BIs);
}
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
"Additional instructions for GFX940+"
>;

def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
"GFX950Insts",
"true",
"Additional instructions for GFX950+"
>;

def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
Expand Down Expand Up @@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;

def FeatureISAVersion9_5_Common : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
Expand Down Expand Up @@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureRequiresCOV6])>;

def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;

def FeatureISAVersion10_Common : FeatureSet<
[FeatureGFX10,
FeatureLDSBankCount32,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;

def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_5_0.Features
>;

// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool GFX9Insts = false;
bool GFX90AInsts = false;
bool GFX940Insts = false;
bool GFX950Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
bool GFX12Insts = false;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
Expand Down Expand Up @@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
Expand Down
12 changes: 4 additions & 8 deletions llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,11 +614,9 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {

if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
// Extract the run of set bits starting with bit zero from the bitwise
// inverse of ImmValue, and test that the inverse of this is the same
// as the original value.
if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {

// Check if we have a leading one, then check if the whole value is a
// shifted mask.
if (ImmValue.isNegative() && ImmValue.isShiftedMask()) {
Imm = CurDAG->getTargetConstant(ImmValue.popcount() - 1, SDLoc(N), EltTy);
return true;
}
Expand Down Expand Up @@ -647,9 +645,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {

if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
// Extract the run of set bits starting with bit zero, and test that the
// result is the same as the original value
if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
if (ImmValue.isMask()) {
Imm = CurDAG->getTargetConstant(ImmValue.popcount() - 1, SDLoc(N), EltTy);
return true;
}
Expand Down
22 changes: 14 additions & 8 deletions llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,12 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
}

bool isGPRPair() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(
Reg.RegNum);
}

bool isGPRF16() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
Expand All @@ -491,17 +497,17 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum);
}

bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; }
bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; }

bool isGPRPair() const {
bool isGPRF64Pair() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(
RISCVMCRegisterClasses[RISCV::GPRF64PairRegClassID].contains(
Reg.RegNum);
}

bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; }
bool isGPRPairAsFPR64() const { return isGPRF64Pair() && Reg.IsGPRAsFPR; }

static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
Expand Down Expand Up @@ -2399,7 +2405,7 @@ ParseStatus RISCVAsmParser::parseGPRPairAsFPR64(OperandVector &Operands) {
const MCRegisterInfo *RI = getContext().getRegisterInfo();
MCRegister Pair = RI->getMatchingSuperReg(
Reg, RISCV::sub_gpr_even,
&RISCVMCRegisterClasses[RISCV::GPRPairRegClassID]);
&RISCVMCRegisterClasses[RISCV::GPRF64PairRegClassID]);
Operands.push_back(RISCVOperand::createReg(Pair, S, E, /*isGPRAsFPR=*/true));
return ParseStatus::Success;
}
Expand Down
24 changes: 18 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -952,27 +952,36 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Res);
return;
}
case RISCVISD::BuildGPRPair:
case RISCVISD::BuildPairF64: {
if (!Subtarget->hasStdExtZdinx())
if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
break;

assert(!Subtarget->is64Bit() && "Unexpected subtarget");
assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
"BuildPairF64 only handled here on rv32i_zdinx");

int RegClassID = (Opcode == RISCVISD::BuildGPRPair)
? RISCV::GPRPairRegClassID
: RISCV::GPRF64PairRegClassID;
MVT OutType = (Opcode == RISCVISD::BuildGPRPair) ? MVT::Untyped : MVT::f64;

SDValue Ops[] = {
CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
CurDAG->getTargetConstant(RegClassID, DL, MVT::i32),
Node->getOperand(0),
CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
Node->getOperand(1),
CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};

SDNode *N =
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, OutType, Ops);
ReplaceNode(Node, N);
return;
}
case RISCVISD::SplitGPRPair:
case RISCVISD::SplitF64: {
if (Subtarget->hasStdExtZdinx()) {
assert(!Subtarget->is64Bit() && "Unexpected subtarget");
if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
"SplitF64 only handled here on rv32i_zdinx");

if (!SDValue(Node, 0).use_empty()) {
SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
Expand All @@ -990,6 +999,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}

assert(Opcode != RISCVISD::SplitGPRPair &&
"SplitGPRPair should already be handled");

if (!Subtarget->hasStdExtZfa())
break;
assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
Expand Down
81 changes: 75 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
else
addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
addRegisterClass(MVT::f64, &RISCV::GPRF64PairRegClass);
}

static const MVT::SimpleValueType BoolVecVTs[] = {
Expand Down Expand Up @@ -2233,6 +2233,17 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
return PartVT;
}

unsigned
RISCVTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT,
std::optional<MVT> RegisterVT) const {
// Pair inline assembly operand
if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
*RegisterVT == MVT::Untyped)
return 1;

return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
}

unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
Expand Down Expand Up @@ -20196,6 +20207,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TAIL)
NODE_NAME_CASE(SELECT_CC)
NODE_NAME_CASE(BR_CC)
NODE_NAME_CASE(BuildGPRPair)
NODE_NAME_CASE(SplitGPRPair)
NODE_NAME_CASE(BuildPairF64)
NODE_NAME_CASE(SplitF64)
NODE_NAME_CASE(ADD_LO)
Expand Down Expand Up @@ -20456,6 +20469,7 @@ RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
case 'f':
case 'R':
return C_RegisterClass;
case 'I':
case 'J':
Expand Down Expand Up @@ -20493,7 +20507,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
return std::make_pair(0U, &RISCV::GPRF64PairNoX0RegClass);
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
if (VT == MVT::f16) {
Expand All @@ -20510,11 +20524,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasStdExtD())
return std::make_pair(0U, &RISCV::FPR64RegClass);
if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
return std::make_pair(0U, &RISCV::GPRF64PairNoX0RegClass);
if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
}
break;
case 'R':
if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
return std::make_pair(0U, &RISCV::GPRF64PairCRegClass);
return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
default:
break;
}
Expand Down Expand Up @@ -20552,7 +20570,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
return std::make_pair(0U, &RISCV::GPRF32CRegClass);
if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
return std::make_pair(0U, &RISCV::GPRPairCRegClass);
return std::make_pair(0U, &RISCV::GPRF64PairCRegClass);
if (!VT.isVector())
return std::make_pair(0U, &RISCV::GPRCRegClass);
} else if (Constraint == "cf") {
Expand All @@ -20570,7 +20588,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasStdExtD())
return std::make_pair(0U, &RISCV::FPR64CRegClass);
if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
return std::make_pair(0U, &RISCV::GPRPairCRegClass);
return std::make_pair(0U, &RISCV::GPRF64PairCRegClass);
if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
return std::make_pair(0U, &RISCV::GPRCRegClass);
}
Expand Down Expand Up @@ -20734,7 +20752,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Subtarget into account.
if (Res.second == &RISCV::GPRF16RegClass ||
Res.second == &RISCV::GPRF32RegClass ||
Res.second == &RISCV::GPRPairRegClass)
Res.second == &RISCV::GPRF64PairRegClass)
return std::make_pair(Res.first, &RISCV::GPRRegClass);

return Res;
Expand Down Expand Up @@ -21360,6 +21378,16 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();

if (ValueVT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) &&
NumParts == 1 && PartVT == MVT::Untyped) {
// Pairs in Inline Assembly
MVT XLenVT = Subtarget.getXLenVT();
auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, MVT::Untyped, Lo, Hi);
return true;
}

if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
Expand Down Expand Up @@ -21436,6 +21464,17 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();

if (ValueVT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) &&
NumParts == 1 && PartVT == MVT::Untyped) {
// Pairs in Inline Assembly
MVT XLenVT = Subtarget.getXLenVT();
SDValue Res = DAG.getNode(RISCVISD::SplitGPRPair, DL,
DAG.getVTList(XLenVT, XLenVT), Parts[0]);
return DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Res.getValue(0),
Res.getValue(1));
}

if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
SDValue Val = Parts[0];
Expand Down Expand Up @@ -22012,6 +22051,36 @@ SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
}

// If an output pattern produces multiple instructions tablegen may pick an
// arbitrary type from an instructions destination register class to use for the
// VT of that MachineSDNode. This VT may be used to look up the representative
// register class. If the type isn't legal, the default implementation will
// not find a register class.
//
// Some integer types smaller than XLen are listed in the GPR register class to
// support isel patterns for GISel, but are not legal in SelectionDAG. The
// arbitrary type tablegen picks may be one of these smaller types.
//
// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
std::pair<const TargetRegisterClass *, uint8_t>
RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
switch (VT.SimpleTy) {
default:
break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
return TargetLowering::findRepresentativeClass(TRI, Subtarget.getXLenVT());
case MVT::bf16:
case MVT::f16:
return TargetLowering::findRepresentativeClass(TRI, MVT::f32);
}

return TargetLowering::findRepresentativeClass(TRI, VT);
}

namespace llvm::RISCVVIntrinsicsTable {

#define GET_RISCVVIntrinsicsTable_IMPL
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ enum NodeType : unsigned {
SELECT_CC,
BR_CC,

/// Turn a pair of `i<xlen>`s into an even-odd register pair (`untyped`).
/// - Output: `untyped` even-odd register pair
/// - Input 0: `i<xlen>` low-order bits, for even register.
/// - Input 1: `i<xlen>` high-order bits, for odd register.
BuildGPRPair,

/// Turn an even-odd register pair (`untyped`) into a pair of `i<xlen>`s.
/// - Output 0: `i<xlen>` low-order bits, from even register.
/// - Output 1: `i<xlen>` high-order bits, from odd register.
/// - Input: `untyped` even-odd register pair
SplitGPRPair,

/// Turns a pair of `i32`s into an `f64`. Needed for rv32d/ilp32.
/// - Output: `f64`.
/// - Input 0: low-order bits (31-0) (as `i32`), for even register.
Expand Down Expand Up @@ -547,6 +559,11 @@ class RISCVTargetLowering : public TargetLowering {
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;

/// Return the number of registers for a given MVT, for inline assembly
unsigned
getNumRegisters(LLVMContext &Context, EVT VT,
std::optional<MVT> RegisterVT = std::nullopt) const override;

/// Return the number of registers for a given MVT, ensuring vectors are
/// treated as a series of gpr sized integers.
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
Expand Down Expand Up @@ -1051,6 +1068,9 @@ class RISCVTargetLowering : public TargetLowering {

SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start,
SDValue End, SDValue Flags, SDLoc DL) const;

std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override;
};

namespace RISCVVIntrinsicsTable {
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/RISCV/RISCVInstrInfoD.td
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmRV32Zdinx">;
def GPRPairAsFPR : AsmOperandClass {
let Name = "GPRPairAsFPR";
let ParserMethod = "parseGPRPairAsFPR64";
let PredicateMethod = "isGPRPairAsFPR";
let PredicateMethod = "isGPRPairAsFPR64";
let RenderMethod = "addRegOperands";
}

Expand All @@ -52,7 +52,7 @@ def FPR64INX : RegisterOperand<GPR> {
let DecoderMethod = "DecodeGPRRegisterClass";
}

def FPR64IN32X : RegisterOperand<GPRPair> {
def FPR64IN32X : RegisterOperand<GPRF64Pair> {
let ParserMatchClass = GPRPairAsFPR;
}

Expand Down Expand Up @@ -523,15 +523,15 @@ def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>;

/// Loads
let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
def PseudoRV32ZdinxLD : Pseudo<(outs GPRF64Pair:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))),
(PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>;

/// Stores
let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in
def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
(PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>;
def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRF64Pair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
def : Pat<(store (f64 GPRF64Pair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
(PseudoRV32ZdinxSD GPRF64Pair:$rs2, GPR:$rs1, simm12:$imm12)>;
} // Predicates = [HasStdExtZdinx, IsRV32]

let Predicates = [HasStdExtD, IsRV32] in {
Expand Down
23 changes: 20 additions & 3 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ let RegAltNameIndices = [ABIRegAltName] in {

def XLenVT : ValueTypeByHwMode<[RV32, RV64],
[i32, i64]>;
defvar XLenPairVT = untyped;

// Allow f64 in GPR for ZDINX on RV64.
def XLenFVT : ValueTypeByHwMode<[RV64],
[f64]>;
Expand Down Expand Up @@ -323,7 +325,7 @@ let RegAltNameIndices = [ABIRegAltName] in {

let RegInfos = XLenPairRI,
DecoderMethod = "DecodeGPRPairRegisterClass" in {
def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add
def GPRPair : RISCVRegisterClass<[XLenPairVT], 64, (add
X10_X11, X12_X13, X14_X15, X16_X17,
X6_X7,
X28_X29, X30_X31,
Expand All @@ -332,11 +334,11 @@ def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add
X0_Pair, X2_X3, X4_X5
)>;

def GPRPairNoX0 : RISCVRegisterClass<[XLenPairFVT], 64, (sub GPRPair, X0_Pair)>;
def GPRPairNoX0 : RISCVRegisterClass<[XLenPairVT], 64, (sub GPRPair, X0_Pair)>;
} // let RegInfos = XLenPairRI, DecoderMethod = "DecodeGPRPairRegisterClass"

let RegInfos = XLenPairRI in
def GPRPairC : RISCVRegisterClass<[XLenPairFVT], 64, (add
def GPRPairC : RISCVRegisterClass<[XLenPairVT], 64, (add
X10_X11, X12_X13, X14_X15, X8_X9
)>;

Expand Down Expand Up @@ -462,6 +464,21 @@ def GPRF32C : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 15),
(sequence "X%u_W", 8, 9))>;
def GPRF32NoX0 : RISCVRegisterClass<[f32], 32, (sub GPRF32, X0_W)>;

let DecoderMethod = "DecodeGPRPairRegisterClass" in
def GPRF64Pair : RISCVRegisterClass<[XLenPairFVT], 64, (add
X10_X11, X12_X13, X14_X15, X16_X17,
X6_X7,
X28_X29, X30_X31,
X8_X9,
X18_X19, X20_X21, X22_X23, X24_X25, X26_X27,
X0_Pair, X2_X3, X4_X5
)>;

def GPRF64PairC : RISCVRegisterClass<[XLenPairFVT], 64, (add
X10_X11, X12_X13, X14_X15, X8_X9
)>;

def GPRF64PairNoX0 : RISCVRegisterClass<[XLenPairFVT], 64, (sub GPRF64Pair, X0_Pair)>;

//===----------------------------------------------------------------------===//
// Vector type mapping to LLVM types.
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/TargetParser/TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
Expand Down Expand Up @@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
Expand Down Expand Up @@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
} else if (T.isAMDGCN()) {
switch (parseArchAMDGCN(GPU)) {
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
switch (Kind) {
case GK_GFX1201:
case GK_GFX1200:
case GK_GFX12_GENERIC:
Expand Down Expand Up @@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memtime-inst"] = true;
Features["gws"] = true;
break;
case GK_GFX950:
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
case GK_GFX941:
case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
Features["xf32-insts"] = true;
if (Kind != GK_GFX950)
Features["xf32-insts"] = true;
[[fallthrough]];
case GK_GFX9_4_GENERIC:
Features["gfx940-insts"] = true;
Expand Down
345 changes: 232 additions & 113 deletions llvm/test/CodeGen/AMDGPU/bf16-conversions.ll

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX950-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX950-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
Expand Down Expand Up @@ -180,6 +183,9 @@
; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
; GFX950: .amdgcn_target "amdgcn-amd-amdhsa--gfx950"
; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack-"
; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
Expand Down Expand Up @@ -139,6 +140,7 @@
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
; GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s

; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s

; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
Expand Down Expand Up @@ -44,6 +47,11 @@
; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; SRAM-ECC-GFX940: ]

; SRAM-ECC-GFX950: Flags [
; SRAM-ECC-GFX950: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; SRAM-ECC-GFX950: ]

define amdgpu_kernel void @elf_header() {
ret void
}
594 changes: 437 additions & 157 deletions llvm/test/CodeGen/AMDGPU/fmaximum3.ll

Large diffs are not rendered by default.

594 changes: 437 additions & 157 deletions llvm/test/CodeGen/AMDGPU/fminimum3.ll

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s

Expand Down
1,224 changes: 586 additions & 638 deletions llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll

Large diffs are not rendered by default.

1,113 changes: 527 additions & 586 deletions llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll

Large diffs are not rendered by default.

1,569 changes: 755 additions & 814 deletions llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll

Large diffs are not rendered by default.

1,223 changes: 586 additions & 637 deletions llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll

Large diffs are not rendered by default.

1,113 changes: 527 additions & 586 deletions llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll

Large diffs are not rendered by default.

1,569 changes: 755 additions & 814 deletions llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll

Large diffs are not rendered by default.

73 changes: 73 additions & 0 deletions llvm/test/CodeGen/RISCV/rv32-inline-asm-pairs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s

define i64 @test_Pr_wide_scalar_simple(i64 noundef %0) nounwind {
; CHECK-LABEL: test_Pr_wide_scalar_simple:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: # a2 <- a0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: ret
entry:
%1 = call i64 asm sideeffect "/* $0 <- $1 */", "=&R,R"(i64 %0)
ret i64 %1
}

define i32 @test_Pr_wide_scalar_with_ops(i32 noundef %0) nounwind {
; CHECK-LABEL: test_Pr_wide_scalar_with_ops:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: #APP
; CHECK-NEXT: # a2 <- a0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: or a0, a2, a3
; CHECK-NEXT: ret
entry:
%1 = zext i32 %0 to i64
%2 = shl i64 %1, 32
%3 = or i64 %1, %2
%4 = call i64 asm sideeffect "/* $0 <- $1 */", "=&R,R"(i64 %3)
%5 = trunc i64 %4 to i32
%6 = lshr i64 %4, 32
%7 = trunc i64 %6 to i32
%8 = or i32 %5, %7
ret i32 %8
}

define i64 @test_Pr_wide_scalar_inout(ptr %0, i64 noundef %1) nounwind {
; CHECK-LABEL: test_Pr_wide_scalar_inout:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: sw a1, 0(sp)
; CHECK-NEXT: sw a3, 4(sp)
; CHECK-NEXT: #APP
; CHECK-NEXT: # a0; a2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: sw a2, 0(sp)
; CHECK-NEXT: sw a3, 4(sp)
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
entry:
%2 = alloca ptr, align 4
%3 = alloca i64, align 8
store ptr %0, ptr %2, align 4
store i64 %1, ptr %3, align 8
%4 = load ptr, ptr %2, align 4
%5 = load i64, ptr %3, align 8
%6 = call { ptr, i64 } asm sideeffect "/* $0; $1 */", "=r,=R,0,1"(ptr %4, i64 %5)
%7 = extractvalue { ptr, i64} %6, 0
%8 = extractvalue { ptr, i64 } %6, 1
store ptr %7, ptr %2, align 4
store i64 %8, ptr %3, align 8
%9 = load i64, ptr %3, align 8
ret i64 %9
}
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/RISCV/rv64-inline-asm-pairs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s

define i128 @test_R_wide_scalar_simple(i128 noundef %0) nounwind {
; CHECK-LABEL: test_R_wide_scalar_simple:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: # a2 <- a0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: ret
entry:
%1 = call i128 asm sideeffect "/* $0 <- $1 */", "=&R,R"(i128 %0)
ret i128 %1
}

define i64 @test_R_wide_scalar_with_ops(i64 noundef %0) nounwind {
; CHECK-LABEL: test_R_wide_scalar_with_ops:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: #APP
; CHECK-NEXT: # a2 <- a0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: or a0, a2, a3
; CHECK-NEXT: ret
entry:
%1 = zext i64 %0 to i128
%2 = shl i128 %1, 64
%3 = or i128 %1, %2
%4 = call i128 asm sideeffect "/* $0 <- $1 */", "=&R,R"(i128 %3)
%5 = trunc i128 %4 to i64
%6 = lshr i128 %4, 64
%7 = trunc i128 %6 to i64
%8 = or i64 %5, %7
ret i64 %8
}

define i128 @test_R_wide_scalar_inout(ptr %0, i128 noundef %1) nounwind {
; CHECK-LABEL: test_R_wide_scalar_inout:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: sd a0, 24(sp)
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: sd a1, 0(sp)
; CHECK-NEXT: sd a3, 8(sp)
; CHECK-NEXT: #APP
; CHECK-NEXT: # a0; a2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: sd a0, 24(sp)
; CHECK-NEXT: sd a2, 0(sp)
; CHECK-NEXT: sd a3, 8(sp)
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
entry:
%2 = alloca ptr, align 8
%3 = alloca i128, align 16
store ptr %0, ptr %2, align 8
store i128 %1, ptr %3, align 16
%4 = load ptr, ptr %2, align 8
%5 = load i128, ptr %3, align 16
%6 = call { ptr, i128 } asm sideeffect "/* $0; $1 */", "=r,=R,0,1"(ptr %4, i128 %5)
%7 = extractvalue { ptr, i128} %6, 0
%8 = extractvalue { ptr, i128 } %6, 1
store ptr %7, ptr %2, align 8
store i128 %8, ptr %3, align 16
%9 = load i128, ptr %3, align 16
ret i128 %9
}
16 changes: 14 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ define void @vpstore_v4i8(<4 x i8> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl
ret void
}

declare void @llvm.vp.store.v8i7.v8i7.p0(<8 x i7>, ptr, <8 x i1>, i32)

define void @vpstore_v8i7(<8 x i7> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpstore_v8i7:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vse8.v v8, (a0), v0.t
; CHECK-NEXT: ret
call void @llvm.vp.store.v8i7.v8i7.p0(<8 x i7> %val, ptr %ptr, <8 x i1> %m, i32 %evl)
ret void
}

declare void @llvm.vp.store.v8i8.p0(<8 x i8>, ptr, <8 x i1>, i32)

define void @vpstore_v8i8(<8 x i8> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
Expand Down Expand Up @@ -285,10 +297,10 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero
; CHECK: # %bb.0:
; CHECK-NEXT: li a3, 16
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: bltu a1, a3, .LBB23_2
; CHECK-NEXT: bltu a1, a3, .LBB24_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB23_2:
; CHECK-NEXT: .LBB24_2:
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v8, (a0), v0.t
; CHECK-NEXT: addi a2, a1, -16
Expand Down
28 changes: 20 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ define <vscale x 3 x i8> @vpload_nxv3i8(ptr %ptr, <vscale x 3 x i1> %m, i32 zero
ret <vscale x 3 x i8> %load
}

declare <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>*, <vscale x 4 x i1>, i32)

define <vscale x 4 x i6> @vpload_nxv4i6(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv4i6:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i6> %load
}

declare <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr, <vscale x 4 x i1>, i32)

define <vscale x 4 x i8> @vpload_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
Expand Down Expand Up @@ -523,10 +535,10 @@ define <vscale x 16 x double> @vpload_nxv16f64(ptr %ptr, <vscale x 16 x i1> %m,
; CHECK-NEXT: add a4, a0, a4
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v16, (a4), v0.t
; CHECK-NEXT: bltu a1, a2, .LBB43_2
; CHECK-NEXT: bltu a1, a2, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB43_2:
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a0), v0.t
Expand All @@ -553,10 +565,10 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a5, a3, 1
; CHECK-NEXT: mv a4, a2
; CHECK-NEXT: bltu a2, a5, .LBB44_2
; CHECK-NEXT: bltu a2, a5, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a5
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: sub a6, a4, a3
; CHECK-NEXT: slli a7, a3, 3
; CHECK-NEXT: srli t0, a3, 3
Expand All @@ -572,21 +584,21 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
; CHECK-NEXT: sltu a2, a2, a5
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a5
; CHECK-NEXT: bltu a2, a3, .LBB44_4
; CHECK-NEXT: bltu a2, a3, .LBB45_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB44_4:
; CHECK-NEXT: .LBB45_4:
; CHECK-NEXT: slli a5, a3, 4
; CHECK-NEXT: srli a6, a3, 2
; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a6
; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a5), v0.t
; CHECK-NEXT: bltu a4, a3, .LBB44_6
; CHECK-NEXT: bltu a4, a3, .LBB45_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a4, a3
; CHECK-NEXT: .LBB44_6:
; CHECK-NEXT: .LBB45_6:
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a0), v0.t
Expand Down
28 changes: 20 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/vpstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ define void @vpstore_nxv4i16(<vscale x 4 x i16> %val, ptr %ptr, <vscale x 4 x i1
ret void
}

declare void @llvm.vp.store.nxv8i12.nxv8i12.p0(<vscale x 8 x i12>, <vscale x 8 x i12>*, <vscale x 8 x i1>, i32)

define void @vpstore_nxv8i12(<vscale x 8 x i12> %val, <vscale x 8 x i12>* %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpstore_nxv8i12:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; CHECK-NEXT: vse16.v v8, (a0), v0.t
; CHECK-NEXT: ret
call void @llvm.vp.store.nxv8i12.nxv8i12.p0(<vscale x 8 x i12> %val, <vscale x 8 x i12>* %ptr, <vscale x 8 x i1> %m, i32 %evl)
ret void
}

declare void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, <vscale x 8 x i1>, i32)

define void @vpstore_nxv8i16(<vscale x 8 x i16> %val, ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
Expand Down Expand Up @@ -421,10 +433,10 @@ define void @vpstore_nxv16f64(<vscale x 16 x double> %val, ptr %ptr, <vscale x 1
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: bltu a1, a2, .LBB34_2
; CHECK-NEXT: bltu a1, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v8, (a0), v0.t
; CHECK-NEXT: srli a3, a2, 3
Expand Down Expand Up @@ -462,15 +474,15 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a4, a3, 1
; CHECK-NEXT: mv a5, a2
; CHECK-NEXT: bltu a2, a4, .LBB35_2
; CHECK-NEXT: bltu a2, a4, .LBB36_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a5, a4
; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: .LBB36_2:
; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: bltu a5, a3, .LBB35_4
; CHECK-NEXT: bltu a5, a3, .LBB36_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB35_4:
; CHECK-NEXT: .LBB36_4:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
Expand All @@ -492,10 +504,10 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v8, (a6), v0.t
; CHECK-NEXT: bltu a0, a3, .LBB35_6
; CHECK-NEXT: bltu a0, a3, .LBB36_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a0, a3
; CHECK-NEXT: .LBB35_6:
; CHECK-NEXT: .LBB36_6:
; CHECK-NEXT: slli a2, a3, 4
; CHECK-NEXT: srli a3, a3, 2
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
Expand Down
28 changes: 27 additions & 1 deletion llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s \
; RUN: -target-abi=ilp32 -mattr=+zhinx | FileCheck %s

;; These tests cover the use of `r` and `cr` constraints for floating point values on rv32.
;; These tests cover the use of `r`, `R`, and `cr` constraints for floating point values on rv32.
;;
;; In particular, there is significant complexity around using paired GPRs for double values on rv32.

Expand All @@ -26,6 +26,32 @@ entry:
ret void
}

define dso_local void @zdinx_asm_R(ptr nocapture noundef writeonly %a, double noundef %b, double noundef %c) nounwind {
; CHECK-LABEL: zdinx_asm_R:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv a5, a4
; CHECK-NEXT: mv s1, a2
; CHECK-NEXT: mv a4, a3
; CHECK-NEXT: mv s0, a1
; CHECK-NEXT: #APP
; CHECK-NEXT: fsgnjx.d a2, s0, a4
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: sw a2, 8(a0)
; CHECK-NEXT: sw a3, 12(a0)
; CHECK-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
entry:
%arrayidx = getelementptr inbounds double, ptr %a, i32 1
%0 = tail call double asm "fsgnjx.d $0, $1, $2", "=R,R,R"(double %b, double %c)
store double %0, ptr %arrayidx, align 8
ret void
}

define dso_local void @zfinx_asm(ptr nocapture noundef writeonly %a, float noundef %b, float noundef %c) nounwind {
; CHECK-LABEL: zfinx_asm:
; CHECK: # %bb.0: # %entry
Expand Down
1 change: 1 addition & 0 deletions llvm/test/MC/AMDGPU/flat-scratch-gfx940.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX940 %s

scratch_load_dword a2, v4, s6
// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
Expand Down
1 change: 1 addition & 0 deletions llvm/test/MC/AMDGPU/gfx940_asm_features.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX10 --implicit-check-not=error: %s

Expand Down
179 changes: 179 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950-unsupported.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck -check-prefix=ERR %s

//===----------------------------------------------------------------------===//
// v_mfma_f32_32x32x4_xf32
//===----------------------------------------------------------------------===//

v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU


//===----------------------------------------------------------------------===//
// v_mfma_f32_16x16x8_xf32
//===----------------------------------------------------------------------===//

v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU


v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
13 changes: 13 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# RUN: llvm-mc -disassemble -arch=amdgcn -mcpu=gfx950 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX950 %s

# GFX950: warning: invalid instruction encoding
0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04

# GFX950: warning: invalid instruction encoding
0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04

# GFX950: warning: invalid instruction encoding
0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04

# GFX950: warning: invalid instruction encoding
0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04
1 change: 1 addition & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s

# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02]
0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s

# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX950/' %s | yaml2obj -o %t.o.AMDGCN_GFX950
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX950 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX950 %s

# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
Expand Down Expand Up @@ -411,6 +415,9 @@
# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ]

# ELF-AMDGCN-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
# YAML-AMDGCN-GFX950: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX950 ]

# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]

Expand Down
6 changes: 5 additions & 1 deletion llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ define amdgpu_kernel void @test_kernel() {

; ----------------------------------GFX9---------------------------------------
;

; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-4-generic -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-4-generic %t.o > %t-specify.txt
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
Expand All @@ -148,6 +147,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx950 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C

# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F

# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F

# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F

# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33

Expand Down
12 changes: 3 additions & 9 deletions llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class ObjcopyOptTable : public opt::GenericOptTable {
public:
ObjcopyOptTable() : opt::GenericOptTable(objcopy_opt::ObjcopyInfoTable) {
setGroupedShortOptions(true);
setDashDashParsing(true);
}
};

Expand Down Expand Up @@ -650,17 +651,11 @@ parseChangeSectionAddr(StringRef ArgValue, StringRef OptionName,
// help flag is set then parseObjcopyOptions will print the help messege and
// exit.
Expected<DriverConfig>
objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
objcopy::parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
function_ref<Error(Error)> ErrorCallback) {
DriverConfig DC;
ObjcopyOptTable T;

const char *const *DashDash =
llvm::find_if(RawArgsArr, [](StringRef Str) { return Str == "--"; });
ArrayRef<const char *> ArgsArr = ArrayRef(RawArgsArr.begin(), DashDash);
if (DashDash != RawArgsArr.end())
DashDash = std::next(DashDash);

unsigned MissingArgumentIndex, MissingArgumentCount;
llvm::opt::InputArgList InputArgs =
T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
Expand All @@ -671,7 +666,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
"argument to '%s' is missing (expected %d value(s))",
InputArgs.getArgString(MissingArgumentIndex), MissingArgumentCount);

if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) {
if (InputArgs.size() == 0) {
printHelp(T, errs(), ToolType::Objcopy);
exit(1);
}
Expand All @@ -695,7 +690,6 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,

for (auto *Arg : InputArgs.filtered(OBJCOPY_INPUT))
Positional.push_back(Arg->getValue());
std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional));

if (Positional.empty())
return createStringError(errc::invalid_argument, "no input file specified");
Expand Down
1 change: 1 addition & 0 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1619,6 +1619,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
Expand Down
41 changes: 10 additions & 31 deletions llvm/unittests/Analysis/PluginInlineAdvisorAnalysisTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,33 +87,10 @@ struct CompilerInstance {
ThinOrFullLTOPhase::None));
}

~CompilerInstance() {
// Reset the static variable that tracks if the plugin has been registered.
// This is needed to allow the test to run multiple times.
PluginInlineAdvisorAnalysis::HasBeenRegistered = false;
}

std::string output;
std::unique_ptr<Module> outputM;

// run with the default inliner
auto run_default(StringRef IR) {
PluginInlineAdvisorAnalysis::HasBeenRegistered = false;
outputM = parseAssemblyString(IR, Error, Ctx);
MPM.run(*outputM, MAM);
ASSERT_TRUE(outputM);
output.clear();
raw_string_ostream o_stream{output};
outputM->print(o_stream, nullptr);
ASSERT_TRUE(true);
}

// run with the dnamic inliner
auto run_dynamic(StringRef IR) {
// note typically the constructor for the DynamicInlineAdvisorAnalysis
// will automatically set this to true, we controll it here only to
// altenate between the default and dynamic inliner in our test
PluginInlineAdvisorAnalysis::HasBeenRegistered = true;
auto run(StringRef IR) {
outputM = parseAssemblyString(IR, Error, Ctx);
MPM.run(*outputM, MAM);
ASSERT_TRUE(outputM);
Expand Down Expand Up @@ -274,14 +251,16 @@ TEST(PluginInlineAdvisorTest, PluginLoad) {
// Skip the test if plugins are disabled.
GTEST_SKIP();
#endif
CompilerInstance CI{};
CI.setupPlugin();
CompilerInstance DefaultCI{};

CompilerInstance PluginCI{};
PluginCI.setupPlugin();

for (StringRef IR : TestIRS) {
CI.run_default(IR);
std::string default_output = CI.output;
CI.run_dynamic(IR);
std::string dynamic_output = CI.output;
DefaultCI.run(IR);
std::string default_output = DefaultCI.output;
PluginCI.run(IR);
std::string dynamic_output = PluginCI.output;
ASSERT_EQ(default_output, dynamic_output);
}
}
Expand All @@ -294,7 +273,7 @@ TEST(PluginInlineAdvisorTest, CustomAdvisor) {
CI.setupFooOnly();

for (StringRef IR : TestIRS) {
CI.run_dynamic(IR);
CI.run(IR);
CallGraph CGraph = CallGraph(*CI.outputM);
for (auto &node : CGraph) {
for (auto &edge : *node.second) {
Expand Down
6 changes: 0 additions & 6 deletions llvm/unittests/Analysis/PluginInlineOrderAnalysisTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ struct CompilerInstance {
ThinOrFullLTOPhase::None));
}

~CompilerInstance() {
// Reset the static variable that tracks if the plugin has been registered.
// This is needed to allow the test to run multiple times.
PluginInlineOrderAnalysis::unregister();
}

std::string Output;
std::unique_ptr<Module> OutputM;

Expand Down
112 changes: 51 additions & 61 deletions llvm/unittests/ProfileData/InstrProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
#include <cstdarg>
#include <initializer_list>
#include <optional>

using namespace llvm;
Expand Down Expand Up @@ -348,10 +349,10 @@ TEST_F(InstrProfTest, test_merge_traces_sampled) {
using ::llvm::memprof::IndexedMemProfRecord;
using ::llvm::memprof::MemInfoBlock;
using FrameIdMapTy =
llvm::DenseMap<::llvm::memprof::FrameId, ::llvm::memprof::Frame>;
llvm::MapVector<::llvm::memprof::FrameId, ::llvm::memprof::Frame>;
using CallStackIdMapTy =
llvm::DenseMap<::llvm::memprof::CallStackId,
::llvm::SmallVector<::llvm::memprof::FrameId>>;
llvm::MapVector<::llvm::memprof::CallStackId,
::llvm::SmallVector<::llvm::memprof::FrameId>>;

static FrameIdMapTy getFrameMapping() {
FrameIdMapTy Mapping;
Expand Down Expand Up @@ -467,11 +468,11 @@ TEST_F(InstrProfTest, test_memprof_v0) {
/*CallSiteFrames=*/{
{4, 5},
});
const FrameIdMapTy IdToFrameMap = getFrameMapping();
for (const auto &I : IdToFrameMap) {
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
}
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);

memprof::IndexedMemProfData MemProfData;
MemProfData.Frames = getFrameMapping();
MemProfData.Records.try_emplace(0x9999, IndexedMR);
Writer.addMemProfData(MemProfData, Err);

auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
Expand All @@ -482,8 +483,8 @@ TEST_F(InstrProfTest, test_memprof_v0) {

std::optional<memprof::FrameId> LastUnmappedFrameId;
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
auto Iter = IdToFrameMap.find(Id);
if (Iter == IdToFrameMap.end()) {
auto Iter = MemProfData.Frames.find(Id);
if (Iter == MemProfData.Frames.end()) {
LastUnmappedFrameId = Id;
return memprof::Frame(0, 0, 0, false);
}
Expand All @@ -508,15 +509,11 @@ TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{0x333}, MIB, memprof::getFullSchema());
const FrameIdMapTy IdToFrameMap = getFrameMapping();
const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &I : IdToFrameMap) {
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
}
for (const auto &I : CSIdToCallStackMap) {
Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
}
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
memprof::IndexedMemProfData MemProfData;
MemProfData.Frames = getFrameMapping();
MemProfData.CallStacks = getCallStackMapping();
MemProfData.Records.try_emplace(0x9999, IndexedMR);
Writer.addMemProfData(MemProfData, Err);

auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
Expand All @@ -525,9 +522,10 @@ TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
const memprof::MemProfRecord &Record = RecordOr.get();

memprof::FrameIdConverter<decltype(IdToFrameMap)> FrameIdConv(IdToFrameMap);
memprof::CallStackIdConverter<decltype(CSIdToCallStackMap)> CSIdConv(
CSIdToCallStackMap, FrameIdConv);
memprof::FrameIdConverter<decltype(MemProfData.Frames)> FrameIdConv(
MemProfData.Frames);
memprof::CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv(
MemProfData.CallStacks, FrameIdConv);

const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdConv);
Expand All @@ -550,15 +548,11 @@ TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{0x333}, MIB, memprof::getHotColdSchema());
const FrameIdMapTy IdToFrameMap = getFrameMapping();
const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &I : IdToFrameMap) {
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
}
for (const auto &I : CSIdToCallStackMap) {
Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
}
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
memprof::IndexedMemProfData MemProfData;
MemProfData.Frames = getFrameMapping();
MemProfData.CallStacks = getCallStackMapping();
MemProfData.Records.try_emplace(0x9999, IndexedMR);
Writer.addMemProfData(MemProfData, Err);

auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
Expand All @@ -567,9 +561,10 @@ TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
const memprof::MemProfRecord &Record = RecordOr.get();

memprof::FrameIdConverter<decltype(IdToFrameMap)> FrameIdConv(IdToFrameMap);
memprof::CallStackIdConverter<decltype(CSIdToCallStackMap)> CSIdConv(
CSIdToCallStackMap, FrameIdConv);
memprof::FrameIdConverter<decltype(MemProfData.Frames)> FrameIdConv(
MemProfData.Frames);
memprof::CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv(
MemProfData.CallStacks, FrameIdConv);

const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdConv);
Expand Down Expand Up @@ -601,23 +596,21 @@ TEST_F(InstrProfTest, test_caller_callee_pairs) {
// Line: 7, Column: 8
// new(...)

const std::pair<memprof::FrameId, memprof::Frame> Frames[] = {
{0, {0x123, 1, 2, false}},
{1, {0x234, 3, 4, true}},
{2, {0x123, 5, 6, false}},
{3, {0x345, 7, 8, true}}};
for (const auto &[FrameId, Frame] : Frames)
Writer.addMemProfFrame(FrameId, Frame, Err);

const std::pair<memprof::CallStackId, SmallVector<memprof::FrameId>>
CallStacks[] = {{0x111, {1, 0}}, {0x222, {3, 2}}};
for (const auto &[CSId, CallStack] : CallStacks)
Writer.addMemProfCallStack(CSId, CallStack, Err);

const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{}, MIB, memprof::getHotColdSchema());
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);

memprof::IndexedMemProfData MemProfData;
MemProfData.Frames.try_emplace(0, 0x123, 1, 2, false);
MemProfData.Frames.try_emplace(1, 0x234, 3, 4, true);
MemProfData.Frames.try_emplace(2, 0x123, 5, 6, false);
MemProfData.Frames.try_emplace(3, 0x345, 7, 8, true);
MemProfData.CallStacks.try_emplace(
0x111, std::initializer_list<memprof::FrameId>{1, 0});
MemProfData.CallStacks.try_emplace(
0x222, std::initializer_list<memprof::FrameId>{3, 2});
MemProfData.Records.try_emplace(0x9999, IndexedMR);
Writer.addMemProfData(MemProfData, Err);

auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
Expand Down Expand Up @@ -681,19 +674,15 @@ TEST_F(InstrProfTest, test_memprof_merge) {
ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());

const FrameIdMapTy IdToFrameMap = getFrameMapping();
for (const auto &I : IdToFrameMap) {
Writer2.addMemProfFrame(I.first, I.getSecond(), Err);
}

const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &[CSId, CallStack] : CSIdToCallStackMap)
Writer2.addMemProfCallStack(CSId, CallStack, Err);

const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{}, makePartialMIB(), memprof::getHotColdSchema());
Writer2.addMemProfRecord(/*Id=*/0x9999, IndexedMR);

memprof::IndexedMemProfData MemProfData;
MemProfData.Frames = getFrameMapping();
MemProfData.CallStacks = getCallStackMapping();
MemProfData.Records.try_emplace(0x9999, IndexedMR);
Writer2.addMemProfData(MemProfData, Err);

ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()),
Succeeded());
Expand All @@ -714,9 +703,10 @@ TEST_F(InstrProfTest, test_memprof_merge) {

std::optional<memprof::FrameId> LastUnmappedFrameId;

memprof::FrameIdConverter<decltype(IdToFrameMap)> FrameIdConv(IdToFrameMap);
memprof::CallStackIdConverter<decltype(CSIdToCallStackMap)> CSIdConv(
CSIdToCallStackMap, FrameIdConv);
memprof::FrameIdConverter<decltype(MemProfData.Frames)> FrameIdConv(
MemProfData.Frames);
memprof::CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv(
MemProfData.CallStacks, FrameIdConv);

const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdConv);
Expand Down
2 changes: 1 addition & 1 deletion offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)

set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
"gfx1151;gfx1152;gfx1153")
Expand Down