429 changes: 0 additions & 429 deletions llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Large diffs are not rendered by default.

76 changes: 0 additions & 76 deletions llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,82 +159,6 @@ class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
~MetadataStreamerMsgPackV5() = default;
};

// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
class MetadataStreamerYamlV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;

void dump(StringRef HSAMetadataString) const;

void verify(StringRef HSAMetadataString) const;

AccessQualifier getAccessQualifier(StringRef AccQual) const;

AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;

ValueKind getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;

std::string getTypeName(Type *Ty, bool Signed) const;

std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;

Kernel::CodeProps::Metadata getHSACodeProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
Kernel::DebugProps::Metadata getHSADebugProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;

void emitPrintf(const Module &Mod);

void emitKernelLanguage(const Function &Func);

void emitKernelAttrs(const Function &Func);

void emitKernelArgs(const Function &Func, const GCNSubtarget &ST);

void emitKernelArg(const Argument &Arg);

void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment,
ValueKind ValueKind,
MaybeAlign PointeeAlign = std::nullopt,
StringRef Name = "", StringRef TypeName = "",
StringRef BaseTypeName = "", StringRef AccQual = "",
StringRef TypeQual = "");

void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST);

const Metadata &getHSAMetadata() const {
return HSAMetadata;
}

protected:
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) override {
llvm_unreachable("Dummy override should not be invoked!");
}
void emitKernelAttrs(const Function &Func,
msgpack::MapDocNode Kern) override {
llvm_unreachable("Dummy override should not be invoked!");
}

public:
MetadataStreamerYamlV2() = default;
~MetadataStreamerYamlV2() = default;

bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;

void begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) override;

void end() override;

void emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) override;
};

} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
Expand Down
48 changes: 17 additions & 31 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Expand All @@ -1361,7 +1361,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
Expand Down Expand Up @@ -2861,7 +2861,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (isHsaAbi(getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
Expand Down Expand Up @@ -4920,7 +4920,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");

if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
if (!isHsaAbi(getSTI()))
return TokError("directive only supported for amdhsa OS");

StringRef KernelName;
Expand Down Expand Up @@ -5480,33 +5480,15 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
}

bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
isHsaAbiVersion3AndAbove(&getSTI())
? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::pair(HSAMD::AssemblerDirectiveBegin,
HSAMD::AssemblerDirectiveEnd);

if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getLoc(),
(Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
assert(isHsaAbi(getSTI()));

std::string HSAMetadataString;
if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
HSAMetadataString))
if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
return true;

if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
} else {
if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
}
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");

return false;
}
Expand Down Expand Up @@ -5649,7 +5631,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();

if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (isHsaAbi(getSTI())) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();

Expand All @@ -5672,8 +5654,12 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();

if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
Twine(" directive is "
"not available on non-amdhsa OSes"))
.str());
}
}

if (IDVal == ".amdgcn_target")
Expand Down Expand Up @@ -7765,7 +7751,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
// TODO: Should try to check code object version from directive???
AMDGPU::getAmdhsaCodeObjectVersion());

if (isHsaAbiVersion3AndAbove(&getSTI()))
if (isHsaAbi(getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}

Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
switch (CodeObjectVersion) {
default:
break;
case AMDGPU::AMDHSA_COV2:
break;
case AMDGPU::AMDHSA_COV3:
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
Expand Down Expand Up @@ -545,7 +543,7 @@ void AMDGPUTargetELFStreamer::EmitNote(
unsigned NoteFlags = 0;
// TODO Apparently, this is currently needed for OpenCL as mentioned in
// https://reviews.llvm.org/D74995
if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
if (isHsaAbi(STI))
NoteFlags = ELF::SHF_ALLOC;

S.pushSection();
Expand Down Expand Up @@ -604,11 +602,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
}

unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
assert(isHsaAbi(STI));

if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVer) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return getEFlagsV3();
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
Expand Down
70 changes: 5 additions & 65 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,16 @@ namespace llvm {

namespace AMDGPU {

/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI) {
return STI.getTargetTriple().getOS() == Triple::AMDHSA;
}

std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
return std::nullopt;

switch (AmdhsaCodeObjectVersion) {
case 2:
return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
case 3:
return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
case 4:
Expand All @@ -137,12 +140,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
}
}

bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
return false;
}

bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
Expand All @@ -161,11 +158,6 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
return false;
}

bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
isHsaAbiVersion5(STI);
}

unsigned getAmdhsaCodeObjectVersion() {
return AmdhsaCodeObjectVersion;
}
Expand All @@ -182,7 +174,6 @@ unsigned getCodeObjectVersion(const Module &M) {

unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 48;
Expand All @@ -197,7 +188,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
// central TD file.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 24;
Expand All @@ -209,7 +199,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {

unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 32;
Expand All @@ -221,7 +210,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {

unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 40;
Expand Down Expand Up @@ -786,54 +774,6 @@ std::string AMDGPUTargetID::toString() const {
std::string Features;
if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
case AMDGPU::AMDHSA_COV2:
// Code object V2 only supported specific processors and had fixed
// settings for the XNACK.
if (Processor == "gfx600") {
} else if (Processor == "gfx601") {
} else if (Processor == "gfx602") {
} else if (Processor == "gfx700") {
} else if (Processor == "gfx701") {
} else if (Processor == "gfx702") {
} else if (Processor == "gfx703") {
} else if (Processor == "gfx704") {
} else if (Processor == "gfx705") {
} else if (Processor == "gfx801") {
if (!isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor) + " without XNACK");
} else if (Processor == "gfx802") {
} else if (Processor == "gfx803") {
} else if (Processor == "gfx805") {
} else if (Processor == "gfx810") {
if (!isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor) + " without XNACK");
} else if (Processor == "gfx900") {
if (isXnackOnOrAny())
Processor = "gfx901";
} else if (Processor == "gfx902") {
if (isXnackOnOrAny())
Processor = "gfx903";
} else if (Processor == "gfx904") {
if (isXnackOnOrAny())
Processor = "gfx905";
} else if (Processor == "gfx906") {
if (isXnackOnOrAny())
Processor = "gfx907";
} else if (Processor == "gfx90c") {
if (isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor) + " with XNACK being ON or ANY");
} else {
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor));
}
break;
case AMDGPU::AMDHSA_COV3:
// xnack.
if (isXnackOnOrAny())
Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,15 @@ namespace AMDGPU {
struct IsaVersion;

enum {
AMDHSA_COV2 = 2,
AMDHSA_COV3 = 3,
AMDHSA_COV4 = 4,
AMDHSA_COV5 = 5
};

/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI);
/// \returns HSA OS ABI Version identification.
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 2,
/// false otherwise.
bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3,
/// false otherwise.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
Expand All @@ -63,9 +61,6 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 5,
/// false otherwise.
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3 and above,
/// false otherwise.
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);

/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
declare i64 @llvm.amdgcn.dispatch.id() #1

; GCN-LABEL: {{^}}dispatch_id:
; GCN: .amd_kernel_code_t
; GCN: enable_sgpr_dispatch_id = 1

; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; GCN: .amdhsa_user_sgpr_dispatch_id 1
define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 {
%tmp0 = call i64 @llvm.amdgcn.dispatch.id()
store i64 %tmp0, ptr addrspace(1) %out
Expand All @@ -19,4 +17,4 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
; FIXME: Error on non-HSA target

; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
%dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
%value = load i32, ptr addrspace(4) %dispatch_ptr
Expand All @@ -17,4 +17,4 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { readnone }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,HSA,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,OS-MESA3D,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s

; ALL-LABEL: {{^}}test:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 8
; HSA: kernarg_segment_alignment = 4

; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1
; CO-V4: s_load_dword s{{[0-9]+}}, s[4:5], 0xa

; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa

; HSA: .amdhsa_kernarg_size 8
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
%kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
%gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
Expand All @@ -19,13 +19,14 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
}

; ALL-LABEL: {{^}}test_implicit:
; HSA: kernarg_segment_byte_size = 8
; OS-MESA3D: kernarg_segment_byte_size = 24
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4

; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15

; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15

; HSA: .amdhsa_kernarg_size 8
define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10
Expand All @@ -35,16 +36,16 @@ define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
}

; ALL-LABEL: {{^}}test_implicit_alignment:
; HSA: kernarg_segment_byte_size = 12
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4

; OS-MESA3D: kernarg_segment_alignment = 4

; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]

; HSA: .amdhsa_kernarg_size 12
define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load i32, ptr addrspace(4) %implicitarg.ptr
Expand All @@ -53,16 +54,16 @@ define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i
}

; ALL-LABEL: {{^}}opencl_test_implicit_alignment
; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4

; OS-MESA3D: kernarg_segment_alignment = 4

; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]

; HSA: .amdhsa_kernarg_size 64
define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load i32, ptr addrspace(4) %implicitarg.ptr
Expand All @@ -71,12 +72,15 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out,
}

; ALL-LABEL: {{^}}test_no_kernargs:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 0
; CO-V2: kernarg_segment_byte_size = 0
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 0
; OS-MESA3D: kernarg_segment_byte_size = 0
; OS-MESA3D: kernarg_segment_alignment = 4

; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}}
; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]]

; HSA: .amdhsa_kernarg_size 0
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0
define amdgpu_kernel void @test_no_kernargs() #1 {
%kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
%gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
Expand All @@ -86,9 +90,9 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
}

; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
; HSA: kernarg_segment_byte_size = 48
; OS-MESA3D: kernarg_segment_byte_size = 16
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
Expand All @@ -97,9 +101,9 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs()
}

; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
; HSA: kernarg_segment_byte_size = 40
; OS-MESA3D: kernarg_segment_byte_size = 16
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: .amdhsa_kernarg_size 40
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
Expand All @@ -124,4 +128,4 @@ attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
; FIXME: Error on non-hsa target

; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_queue_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_queue_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
%queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
%value = load i32, ptr addrspace(4) %queue_ptr
Expand All @@ -17,4 +17,4 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
attributes #0 = { nounwind readnone }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
96 changes: 47 additions & 49 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
Original file line number Diff line number Diff line change
@@ -1,35 +1,33 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s

declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
declare i32 @llvm.amdgcn.workgroup.id.z() #0

; ALL-LABEL: {{^}}test_workgroup_id_x:

; CO-V2: .amd_kernel_code_t
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 0
; CO-V2: enable_sgpr_workgroup_id_z = 0
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_vgpr_workitem_id = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; CO-V2: .end_amd_kernel_code_t
; MESA3D: .amd_kernel_code_t
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 0
; MESA3D: enable_sgpr_workgroup_id_z = 0
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_vgpr_workitem_id = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: .end_amd_kernel_code_t

; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}}
; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}
; MESA3D: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}

; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]

; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
Expand All @@ -41,22 +39,22 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
}

; ALL-LABEL: {{^}}test_workgroup_id_y:
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 1
; CO-V2: enable_sgpr_workgroup_id_z = 0
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 1
; MESA3D: enable_sgpr_workgroup_id_z = 0
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0

; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}}
; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}

; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]

; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
Expand All @@ -68,30 +66,30 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
}

; ALL-LABEL: {{^}}test_workgroup_id_z:
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 0
; CO-V2: enable_sgpr_workgroup_id_z = 1
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_vgpr_workitem_id = 0
; CO-V2: enable_sgpr_private_segment_buffer = 1
; CO-V2: enable_sgpr_dispatch_ptr = 0
; CO-V2: enable_sgpr_queue_ptr = 0
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
; CO-V2: enable_sgpr_dispatch_id = 0
; CO-V2: enable_sgpr_flat_scratch_init = 0
; CO-V2: enable_sgpr_private_segment_size = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 0
; MESA3D: enable_sgpr_workgroup_id_z = 1
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_vgpr_workitem_id = 0
; MESA3D: enable_sgpr_private_segment_buffer = 1
; MESA3D: enable_sgpr_dispatch_ptr = 0
; MESA3D: enable_sgpr_queue_ptr = 0
; MESA3D: enable_sgpr_kernarg_segment_ptr = 1
; MESA3D: enable_sgpr_dispatch_id = 0
; MESA3D: enable_sgpr_flat_scratch_init = 0
; MESA3D: enable_sgpr_private_segment_size = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0

; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}}
; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}

; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]

; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
Expand All @@ -106,4 +104,4 @@ attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s

declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
Expand All @@ -16,7 +16,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
; MESA-NEXT: .long 132{{$}}

; ALL-LABEL: {{^}}test_workitem_id_x:
; CO-V2: enable_vgpr_workitem_id = 0
; MESA3D: enable_vgpr_workitem_id = 0

; ALL-NOT: v0
; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
Expand All @@ -33,9 +33,9 @@ define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
; MESA-NEXT: .long 2180{{$}}

; ALL-LABEL: {{^}}test_workitem_id_y:
; CO-V2: enable_vgpr_workitem_id = 1
; CO-V2-NOT: v1
; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1
; MESA3D: enable_vgpr_workitem_id = 1
; MESA3D-NOT: v1
; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1

; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
Expand All @@ -51,9 +51,9 @@ define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
; MESA-NEXT: .long 4228{{$}}

; ALL-LABEL: {{^}}test_workitem_id_z:
; CO-V2: enable_vgpr_workitem_id = 2
; CO-V2-NOT: v2
; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2
; MESA3D: enable_vgpr_workitem_id = 2
; MESA3D-NOT: v2
; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2

; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
Expand Down Expand Up @@ -129,7 +129,7 @@ define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 {
; FIXME: Should be able to avoid enabling in kernel inputs
; FIXME: Packed tid should avoid the and
; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only:
; CO-V2: enable_vgpr_workitem_id = 0
; MESA3D: enable_vgpr_workitem_id = 0

; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
Expand All @@ -150,7 +150,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_
}

; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only:
; CO-V2: enable_vgpr_workitem_id = 1
; MESA3D: enable_vgpr_workitem_id = 1

; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
Expand All @@ -172,7 +172,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_
}

; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only:
; CO-V2: enable_vgpr_workitem_id = 2
; MESA3D: enable_vgpr_workitem_id = 2

; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
Expand Down Expand Up @@ -200,4 +200,4 @@ attributes #1 = { nounwind }
!2 = !{i32 1, i32 1, i32 64}

!llvm.module.flags = !{!99}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}
48 changes: 28 additions & 20 deletions llvm/test/CodeGen/AMDGPU/addrspacecast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s

; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0

; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
Expand All @@ -24,6 +20,11 @@

; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]

; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0

; At most 2 digits. Make sure src_shared_base is not counted as a high
; number SGPR.

Expand Down Expand Up @@ -59,10 +60,6 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
}

; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0

; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
Expand All @@ -82,6 +79,11 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {

; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]

; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0

; HSA: NumSgprs: {{[0-9]+}}
define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
Expand All @@ -91,13 +93,14 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p

; no-op
; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]

; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
store volatile i32 7, ptr %stof
Expand Down Expand Up @@ -131,9 +134,6 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4)
}

; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
Expand All @@ -146,16 +146,17 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4)
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]]
; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]]

; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(3)
store volatile i32 0, ptr addrspace(3) %ftos
ret void
}

; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
Expand All @@ -171,14 +172,17 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}

; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(5)
store volatile i32 0, ptr addrspace(5) %ftos
ret void
}

; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
Expand All @@ -188,17 +192,20 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {

; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}

; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(1)
store volatile i32 0, ptr addrspace(1) %ftos
ret void
}

; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0

; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(4)
load volatile i32, ptr addrspace(4) %ftos
Expand Down Expand Up @@ -279,13 +286,14 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {


; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0

; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]

; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
store volatile i32 7, ptr %cast
Expand Down Expand Up @@ -416,4 +424,4 @@ attributes #2 = { nounwind readnone }
attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
34 changes: 15 additions & 19 deletions llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
; RUN: opt < %s -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
; RUN: opt < %s -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC

; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4
; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] poison, align 16
Expand All @@ -24,9 +24,7 @@
; R600: LDS_READ
; R600: LDS_READ

; HSA-PROMOTE: .amd_kernel_code_t
; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120
; HSA-PROMOTE: .end_amd_kernel_code_t
; HSA-PROMOTE: .amdhsa_group_segment_fixed_size 5120

; HSA-PROMOTE: s_load_dwordx2 s[{{[0-9:]+}}], s[4:5], 0x1

Expand All @@ -35,14 +33,12 @@
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32

; HSA-ALLOCA: .amd_kernel_code_t
; FIXME: Creating the emergency stack slots causes us to over-estimate scratch
; by 4 bytes.
; HSA-ALLOCA: workitem_private_segment_byte_size = 24
; HSA-ALLOCA: .end_amd_kernel_code_t
; HSA-ALLOCA: .amdhsa_private_segment_fixed_size 24

; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
; HSA-ALLOCA: s_add_i32 s6, s6, s9
; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8

; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0
Expand Down Expand Up @@ -534,7 +530,7 @@ attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-s
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" }

!llvm.module.flags = !{!99}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}

; HSAOPT: !1 = !{}
; HSAOPT: !2 = !{i32 0, i32 257}
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,14 @@ define amdgpu_kernel void @min_1024_max_1024() #3 {
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; HSAMD: Version: [ 1, 0 ]
; HSAMD: Kernels:
; HSAMD: - Name: min_64_max_64
; HSAMD: MaxFlatWorkGroupSize: 64
; HSAMD: - Name: min_64_max_128
; HSAMD: MaxFlatWorkGroupSize: 128
; HSAMD: - Name: min_128_max_128
; HSAMD: MaxFlatWorkGroupSize: 128
; HSAMD: - Name: min_1024_max_1024
; HSAMD: MaxFlatWorkGroupSize: 1024
; HSAMD: amdhsa.kernels
; HSAMD: .max_flat_workgroup_size: 64
; HSAMD: .name: min_64_max_64
; HSAMD: .max_flat_workgroup_size: 128
; HSAMD: .name: min_64_max_128
; HSAMD: .max_flat_workgroup_size: 128
; HSAMD: .name: min_128_max_128
; HSAMD: .max_flat_workgroup_size: 1024
; HSAMD: .name: min_1024_max_1024
18 changes: 4 additions & 14 deletions llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s

; Make sure to run a GPU with the SGPR allocation bug.

Expand Down Expand Up @@ -32,7 +32,6 @@ define void @indirect_use_vcc() #1 {
}

; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 38
; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
Expand Down Expand Up @@ -61,7 +60,6 @@ define void @indirect_use_flat_scratch() #1 {
}

; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 38
; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
Expand All @@ -87,7 +85,6 @@ define void @indirect_use_10_vgpr() #0 {
}

; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
; GCN: is_dynamic_callstack = 0
; GCN: ; NumVgprs: 41
define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
call void @indirect_use_10_vgpr()
Expand Down Expand Up @@ -123,7 +120,6 @@ define void @indirect_use_80_sgpr() #1 {
}

; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 84
; VI-NOBUG: ; NumSgprs: 86
; VI-BUG: ; NumSgprs: 96
Expand Down Expand Up @@ -159,7 +155,6 @@ define void @indirect_use_stack() #1 {
}

; GCN-LABEL: {{^}}indirect_2_level_use_stack:
; GCN: is_dynamic_callstack = 0
; GCN: ScratchSize: 2132
define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
call void @indirect_use_stack()
Expand All @@ -169,7 +164,6 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 {

; Should be maximum of callee usage
; GCN-LABEL: {{^}}multi_call_use_use_stack:
; GCN: is_dynamic_callstack = 0
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_use_use_stack() #0 {
call void @use_stack0()
Expand All @@ -181,7 +175,6 @@ define amdgpu_kernel void @multi_call_use_use_stack() #0 {
declare void @external() #0

; GCN-LABEL: {{^}}usage_external:
; GCN: is_dynamic_callstack = 1
; NumSgprs: 48
; NumVgprs: 24
; GCN: ScratchSize: 16384
Expand All @@ -196,7 +189,6 @@ define amdgpu_kernel void @usage_external() #0 {
declare void @external_recurse() #2

; GCN-LABEL: {{^}}usage_external_recurse:
; GCN: is_dynamic_callstack = 1
; NumSgprs: 48
; NumVgprs: 24
; GCN: ScratchSize: 16384
Expand Down Expand Up @@ -229,9 +221,7 @@ ret:
}

; GCN-LABEL: {{^}}usage_direct_recursion:
; GCN: is_ptr64 = 1
; GCN: is_dynamic_callstack = 1
; GCN: workitem_private_segment_byte_size = 18448{{$}}
; GCN: .amdhsa_private_segment_fixed_size 18448
;
; GCN-V5-LABEL: {{^}}usage_direct_recursion:
; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}}
Expand Down
29 changes: 19 additions & 10 deletions llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ define void @use_workitem_id_yz() #1 {
}

; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0

; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v31
Expand All @@ -120,13 +119,14 @@ define void @use_workitem_id_yz() #1 {
; FIXEDABI-NOT: v31

; GCN: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
call void @use_workitem_id_x()
ret void
}

; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
; GCN: enable_vgpr_workitem_id = 1

; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
Expand All @@ -137,13 +137,14 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
; FIXEDABI-NOT: v2

; GCN: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
call void @use_workitem_id_y()
ret void
}

; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
; GCN: enable_vgpr_workitem_id = 2

; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
Expand All @@ -152,6 +153,8 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
; FIXEDABI-NOT: v1

; GCN: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
call void @use_workitem_id_z()
ret void
Expand Down Expand Up @@ -284,40 +287,43 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {


; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0

; FIXEDABI-NOT: v0
; FIXEDABI: v_mov_b32_e32 v31, v0
; FIXEDABI: v_mov_b32_e32 v0, 0x22b

; GCN: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
call void @other_arg_use_workitem_id_x(i32 555)
ret void
}


; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
; GCN: enable_vgpr_workitem_id = 1

; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
; FIXEDABI: v_mov_b32_e32 v0, 0x22b

; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
call void @other_arg_use_workitem_id_y(i32 555)
ret void
}

; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
; GCN: enable_vgpr_workitem_id = 2

; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
; FIXEDABI: v_mov_b32_e32 v0, 0x22b

; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
call void @other_arg_use_workitem_id_z(i32 555)
ret void
Expand Down Expand Up @@ -374,7 +380,6 @@ define void @too_many_args_use_workitem_id_x(
}

; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0

; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
Expand All @@ -385,6 +390,8 @@ define void @too_many_args_use_workitem_id_x(
; FIXEDABI-DAG: v_mov_b32_e32 v31, v0

; FIXEDABI: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
call void @too_many_args_use_workitem_id_x(
i32 10, i32 20, i32 30, i32 40,
Expand Down Expand Up @@ -639,7 +646,6 @@ define void @too_many_args_use_workitem_id_xyz(
}

; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
; GCN: enable_vgpr_workitem_id = 2

; GCN-DAG: s_mov_b32 s32, 0

Expand All @@ -652,6 +658,8 @@ define void @too_many_args_use_workitem_id_xyz(
; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]

; GCN: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
call void @too_many_args_use_workitem_id_xyz(
i32 10, i32 20, i32 30, i32 40,
Expand Down Expand Up @@ -729,7 +737,6 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
}

; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
; GCN: enable_vgpr_workitem_id = 2

; GCN-NOT: v0
; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
Expand All @@ -739,6 +746,8 @@ define void @too_many_args_use_workitem_id_x_stack_yz(

; GCN: s_mov_b32 s32, 0
; GCN: s_swappc_b64

; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
call void @too_many_args_use_workitem_id_x_stack_yz(
i32 10, i32 20, i32 30, i32 40,
Expand Down Expand Up @@ -804,4 +813,4 @@ attributes #1 = { nounwind noinline }
attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@


; GCN-LABEL: {{^}}divergent_if_endif:
; VGPR: workitem_private_segment_byte_size = 16{{$}}


; GCN: {{^}}; %bb.0:
; GCN: s_mov_b32 m0, -1
Expand Down Expand Up @@ -63,6 +61,8 @@
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]

; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]]

; VGPR: .amdhsa_private_segment_fixed_size 16
define amdgpu_kernel void @divergent_if_endif(ptr addrspace(1) %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -82,7 +82,6 @@ endif:
}

; GCN-LABEL: {{^}}divergent_loop:
; VGPR: workitem_private_segment_byte_size = 20{{$}}

; GCN: {{^}}; %bb.0:
; GCN-DAG: s_mov_b32 m0, -1
Expand Down Expand Up @@ -133,6 +132,8 @@ endif:
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]

; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]

; VGPR: .amdhsa_private_segment_fixed_size 20
define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -274,4 +275,4 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
19 changes: 9 additions & 10 deletions llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s


; There are no stack objects even though flat is used by default, so
; flat_scratch_init should be disabled.

; ALL-LABEL: {{^}}test:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_flat_scratch_init = 0
; HSA: .end_amd_kernel_code_t

; ALL-NOT: flat_scr

; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
; HSA-NOADDR64: flat_store_dword

; HSA: .amdhsa_user_sgpr_flat_scratch_init 0

; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
; NOHSA-NOADDR64: flat_store_dword
Expand Down Expand Up @@ -53,4 +52,4 @@ entry:
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
56 changes: 16 additions & 40 deletions llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll
Original file line number Diff line number Diff line change
@@ -1,32 +1,26 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s

; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s

; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s

; GCN-LABEL: {{^}}no_vcc_no_flat:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -44,9 +38,6 @@ entry:

; GCN-LABEL: {{^}}vcc_no_flat:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -64,9 +55,6 @@ entry:

; GCN-LABEL: {{^}}no_vcc_flat:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -84,9 +72,6 @@ entry:

; GCN-LABEL: {{^}}vcc_flat:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -107,9 +92,6 @@ entry:

; GCN-LABEL: {{^}}use_flat_scr:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -127,9 +109,6 @@ entry:

; GCN-LABEL: {{^}}use_flat_scr_lo:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -147,9 +126,6 @@ entry:

; GCN-LABEL: {{^}}use_flat_scr_hi:

; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1

; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
Expand All @@ -168,4 +144,4 @@ entry:
attributes #0 = { nounwind }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s

; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
; CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-"
define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind {
store float 0.0, ptr addrspace(1) %out0
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}

!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/hsa-default-device.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
; Make sure that with an HSA triple, we don't default to an
; unsupported device.

; CHECK: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; CHECK: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind {
store float 0.0, ptr addrspace(1) %out0
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
Original file line number Diff line number Diff line change
@@ -1,89 +1,89 @@
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL: {{^}}test_default_ci:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_default_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #1 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_f64_denormals:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 192
define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #2 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_f32_denormals:
; GCN: float_mode = 48
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 48
define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #3 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_f32_f64_denormals:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #4 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_no_denormals:
; GCN: float_mode = 0
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 0
define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #5 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_no_dx10_clamp_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 0
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 0
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #6 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_no_ieee_mode_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 0
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 0
; GCN: FloatMode: 240
define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #7 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 0
; GCN: enable_ieee_mode = 0
; GCN: .amdhsa_dx10_clamp 0
; GCN: .amdhsa_ieee_mode 0
; GCN: FloatMode: 240
define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #8 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
Expand All @@ -101,4 +101,4 @@ attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" }
attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/hsa-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=4 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF

; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
Expand All @@ -18,12 +18,13 @@
; ELF: }

; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
; ELF: 0010: 02000000 01000000 04000000 1B000000

; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
; ELF: 0040: 50550000
; ELF: 0000: 07000000 4F000000 20000000 414D4447
; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572
; ELF: 0020: 6E656C73 90AD616D 64687361 2E746172
; ELF: 0030: 676574BD 616D6467 636E2D75 6E6B6E6F
; ELF: 0040: 776E2D61 6D646873 612D2D67 66783730
; ELF: 0050: 30AE616D 64687361 2E766572 73696F6E
; ELF: 0060: 92010100

; ELF: Symbol {
; ELF: Name: simple
Expand All @@ -32,9 +33,8 @@
; ELF: }

; HSA: .text
; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801"

; HSA-NOT: .amdgpu_hsa_kernel simple
; HSA: .globl simple
Expand Down Expand Up @@ -69,4 +69,4 @@ entry:
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
35 changes: 0 additions & 35 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v2.ll

This file was deleted.

31 changes: 0 additions & 31 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll

This file was deleted.

92 changes: 0 additions & 92 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll

This file was deleted.

1,931 changes: 0 additions & 1,931 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll

This file was deleted.

313 changes: 0 additions & 313 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll

This file was deleted.

95 changes: 0 additions & 95 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll

This file was deleted.

11 changes: 0 additions & 11 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll

This file was deleted.

14 changes: 0 additions & 14 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll

This file was deleted.

12 changes: 0 additions & 12 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll

This file was deleted.

12 changes: 0 additions & 12 deletions llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll

This file was deleted.

Loading