diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6c36f8ad9b6a9..78a3ec7f0c266 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -3080,9 +3080,38 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); const unsigned Opc = gwsIntrinToOpcode(IntrID); + + const MCInstrDesc &InstrDesc = TII->get(Opc); + int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); + + const TargetRegisterClass *DataRC = TII->getRegClass(InstrDesc, Data0Idx); + SmallVector Ops; - if (HasVSrc) - Ops.push_back(N->getOperand(2)); + if (HasVSrc) { + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + + SDValue Data = N->getOperand(2); + MVT DataVT = Data.getValueType().getSimpleVT(); + if (TRI->isTypeLegalForClass(*DataRC, DataVT)) { + // Normal 32-bit case. + Ops.push_back(N->getOperand(2)); + } else { + // Operand is really 32-bits, but requires 64-bit alignment, so use the + // even aligned 64-bit register class. + const SDValue RegSeqOps[] = { + CurDAG->getTargetConstant(DataRC->getID(), SL, MVT::i32), Data, + CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), + SDValue( + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32), + 0), + CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)}; + + Ops.push_back(SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + SL, MVT::v2i32, RegSeqOps), + 0)); + } + } + Ops.push_back(OffsetField); Ops.push_back(Chain); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 650df2a87506a..c575714cf61cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1946,20 +1946,52 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, // The resource id offset is computed as ( + M0[21:16] + // offset field) % 64. Some versions of the programming guide omit the m0 // part, or claim it's from offset 0. - auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID))); + + unsigned Opc = gwsIntrinToOpcode(IID); + const MCInstrDesc &InstrDesc = TII.get(Opc); if (HasVSrc) { Register VSrc = MI.getOperand(1).getReg(); - MIB.addReg(VSrc); - if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI)) - return false; - } + int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); + const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx); + const TargetRegisterClass *SubRC = + TRI.getSubRegisterClass(DataRC, AMDGPU::sub0); + + if (!SubRC) { + // 32-bit normal case. + if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI)) + return false; - MIB.addImm(ImmOffset) - .cloneMemRefs(MI); + BuildMI(*MBB, &MI, DL, InstrDesc) + .addReg(VSrc) + .addImm(ImmOffset) + .cloneMemRefs(MI); + } else { + // Requires even register alignment, so create 64-bit value and pad the + // top half with undef. + Register DataReg = MRI->createVirtualRegister(DataRC); + if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI)) + return false; - TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0); + Register UndefReg = MRI->createVirtualRegister(SubRC); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), DataReg) + .addReg(VSrc) + .addImm(AMDGPU::sub0) + .addReg(UndefReg) + .addImm(AMDGPU::sub1); + + BuildMI(*MBB, &MI, DL, InstrDesc) + .addReg(DataReg) + .addImm(ImmOffset) + .cloneMemRefs(MI); + } + } else { + BuildMI(*MBB, &MI, DL, InstrDesc) + .addImm(ImmOffset) + .cloneMemRefs(MI); + } MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 68060553e558c..7a91a40e18cde 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -347,6 +347,11 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isRegKind() && getReg() == AMDGPU::SGPR_NULL; } + bool isAV_LdSt_32_Align2_RegOp() const { + return isRegClass(AMDGPU::VGPR_32RegClassID) || + isRegClass(AMDGPU::AGPR_32RegClassID); + } + bool isVRegWithInputMods() const; template bool isT16_Lo128VRegWithInputMods() const; template bool isT16VRegWithInputMods() const; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index b841171c285d8..040a7112d29c3 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -463,7 +463,7 @@ class DS_GWS_0D class DS_GWS_1D : DS_GWS { let has_gws_data0 = 1; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index da287e0243d71..b63d71dc2fde9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -491,6 +491,18 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo, printRegularOperand(MI, OpNo, STI, O); } +void AMDGPUInstPrinter::printAVLdSt32Align2RegOp(const MCInst *MI, + unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + MCRegister Reg = MI->getOperand(OpNo).getReg(); + + // On targets with an even alignment requirement + if (MCRegister SubReg = MRI.getSubReg(Reg, AMDGPU::sub0)) + Reg = SubReg; + printRegOperand(Reg, O, MRI); +} + void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index b27295e73ec99..564d6eea52328 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -77,6 +77,9 @@ class AMDGPUInstPrinter : public MCInstPrinter { raw_ostream &O); void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printAVLdSt32Align2RegOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d3023ecf84c53..3aef0bd31debe 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6429,8 +6429,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case AMDGPU::DS_GWS_INIT: case AMDGPU::DS_GWS_SEMA_BR: case AMDGPU::DS_GWS_BARRIER: - TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0); - [[fallthrough]]; case AMDGPU::DS_GWS_SEMA_V: case AMDGPU::DS_GWS_SEMA_P: case AMDGPU::DS_GWS_SEMA_RELEASE_ALL: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index b4b4d30c9de45..3174bfafb4154 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1657,6 +1657,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { const TargetSchedModel &getSchedModel() const { return SchedModel; } + // FIXME: This should be removed // Enforce operand's \p OpName even alignment if required by target. // This is used if an operand is a 32 bit register but needs to be aligned // regardless. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5cff5f2248b02..272d4b5609dfb 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1328,6 +1328,17 @@ def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>, let DecoderMethod = "decodeSrcRegOrImm9"; } + +// Special case for DS_GWS instructions. The register input is really +// 32-bit, but it needs to be even aligned on targets with a VGPR +// alignment requirement. +def AV_LdSt_32_Align2 : SIRegisterClassLike, + RegClassByHwMode< + [DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32], + [VGPR_32, VGPR_32, AV_64_Align2, VReg_64_Align2, VReg_64_Align2]> { + let DecoderMethod = "decodeAVLdSt<32>"; +} + class RegImmMatcher : AsmOperandClass { let Name = name; let RenderMethod = "addRegOrImmOperands"; @@ -1580,6 +1591,17 @@ foreach size = ["64", "96", "128", "160", "256", "1024" ] in { def AVLdSt_#size#_Align2 : AVLdStOperand("AV_LdSt_"#size#_Align2)>; } +def AV_LdSt_32_Align2_RegMatcher : AsmOperandClass { + let Name = "AV_LdSt_32_Align2_RegOp"; + let RenderMethod = "addRegOperands"; +} + +def AV_LdSt_32_Align2_RegOp : RegisterOperand { + let ParserMatchClass = AV_LdSt_32_Align2_RegMatcher; + let PrintMethod = "printAVLdSt32Align2RegOp"; + let EncoderMethod = "getAVOperandEncoding"; +} + //===----------------------------------------------------------------------===// // ACSrc_* Operands with an AGPR or an inline constant //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/gws_agpr.ll b/llvm/test/CodeGen/AMDGPU/gws_agpr.ll index 2082a519d4f83..d87dac1d69047 100644 --- a/llvm/test/CodeGen/AMDGPU/gws_agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/gws_agpr.ll @@ -3,128 +3,72 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=CHECK,GISEL %s define void @gws_init_offset0() #0 { -; SDAG-LABEL: gws_init_offset0: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_mov_b32 m0, 0 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_init a0 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_init_offset0: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_mov_b32 m0, 0 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_init v0 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_init_offset0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_mov_b32 m0, 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_init a0 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0) ret void } define void @gws_init_offset63() #0 { -; SDAG-LABEL: gws_init_offset63: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_mov_b32 m0, 0 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_init a0 offset:63 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_init_offset63: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_mov_b32 m0, 0 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_init v0 offset:63 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_init_offset63: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_mov_b32 m0, 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_init a0 offset:63 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 63) ret void } define void @gws_init_sgpr_offset(i32 inreg %offset) #0 { -; SDAG-LABEL: gws_init_sgpr_offset: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_lshl_b32 m0, s16, 16 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_init a0 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_init_sgpr_offset: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_lshl_b32 m0, s16, 16 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_init v0 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_init_sgpr_offset: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_lshl_b32 m0, s16, 16 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_init a0 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) ret void } define amdgpu_kernel void @gws_init_agpr_offset() #0 { -; SDAG-LABEL: gws_init_agpr_offset: -; SDAG: ; %bb.0: -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a1 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: v_accvgpr_read_b32 v0, a1 -; SDAG-NEXT: v_readfirstlane_b32 s0, v0 -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_lshl_b32 m0, s0, 16 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_init a0 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_endpgm -; -; GISEL-LABEL: gws_init_agpr_offset: -; GISEL: ; %bb.0: -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a1 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a1 -; GISEL-NEXT: v_readfirstlane_b32 s0, v0 -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v2, a0 -; GISEL-NEXT: s_lshl_b32 m0, s0, 16 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_init v2 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_endpgm +; CHECK-LABEL: gws_init_agpr_offset: +; CHECK: ; %bb.0: +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a1 +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_lshl_b32 m0, s0, 16 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_init a0 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_endpgm %val = call i32 asm "; def $0", "=a"() %offset = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) @@ -132,40 +76,22 @@ define amdgpu_kernel void @gws_init_agpr_offset() #0 { } define void @gws_init_agpr_offset_add1() #0 { -; SDAG-LABEL: gws_init_agpr_offset_add1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a1 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: v_accvgpr_read_b32 v0, a1 -; SDAG-NEXT: v_readfirstlane_b32 s4, v0 -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_lshl_b32 m0, s4, 16 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_init a0 offset:1 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_init_agpr_offset_add1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a1 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a1 -; GISEL-NEXT: v_readfirstlane_b32 s4, v0 -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v2, a0 -; GISEL-NEXT: s_lshl_b32 m0, s4, 16 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_init v2 offset:1 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_init_agpr_offset_add1: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a1 +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_lshl_b32 m0, s4, 16 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_init a0 offset:1 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() %offset.base = call i32 asm "; def $0", "=a"() %offset = add i32 %offset.base, 1 @@ -195,90 +121,51 @@ define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 { } define void @gws_barrier_offset0() #0 { -; SDAG-LABEL: gws_barrier_offset0: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_mov_b32 m0, 0 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_barrier a0 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_barrier_offset0: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_mov_b32 m0, 0 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_barrier v0 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_barrier_offset0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_mov_b32 m0, 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_barrier a0 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0) ret void } define void @gws_barrier_offset63() #0 { -; SDAG-LABEL: gws_barrier_offset63: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_mov_b32 m0, 0 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_barrier a0 offset:63 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_barrier_offset63: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_mov_b32 m0, 0 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_barrier v0 offset:63 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_barrier_offset63: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_mov_b32 m0, 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_barrier a0 offset:63 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 63) ret void } define void @gws_barrier_sgpr_offset(i32 inreg %offset) #0 { -; SDAG-LABEL: gws_barrier_sgpr_offset: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_lshl_b32 m0, s16, 16 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_barrier a0 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_barrier_sgpr_offset: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_lshl_b32 m0, s16, 16 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_barrier v0 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_barrier_sgpr_offset: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_lshl_b32 m0, s16, 16 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_barrier a0 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset) ret void @@ -311,30 +198,17 @@ define void @gws_sema_v_offset0() #0 { } define void @gws_sema_br_offset0() #0 { -; SDAG-LABEL: gws_sema_br_offset0: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: ;;#ASMSTART -; SDAG-NEXT: ; def a0 -; SDAG-NEXT: ;;#ASMEND -; SDAG-NEXT: s_mov_b32 m0, 0 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: ds_gws_sema_br a0 gds -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: gws_sema_br_offset0: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: ; def a0 -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 -; GISEL-NEXT: s_mov_b32 m0, 0 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: ds_gws_sema_br v0 gds -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: gws_sema_br_offset0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_mov_b32 m0, 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ds_gws_sema_br a0 gds +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %val = call i32 asm "; def $0", "=a"() call void @llvm.amdgcn.ds.gws.sema.br(i32 %val, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir index a9170f00cf023..262f642f0b71f 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir @@ -11,10 +11,23 @@ # GFX90A-ERR: DS_GWS_BARRIER killed %2.sub0:vreg_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** # GFX90A-ERR: DS_GWS_INIT killed %3:vgpr_32, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") -# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** +# GFX90A-ERR: *** Bad machine code: Illegal physical register for instruction *** # GFX90A-ERR: DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") -# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** +# GFX90A-ERR: *** Bad machine code: Illegal physical register for instruction *** # GFX90A-ERR: DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers *** +# GFX90A-ERR: DS_GWS_INIT %4:vreg_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") +# GFX90A-ERR: *** Bad machine code: Illegal virtual register for instruction *** +# GFX90A-ERR: DS_GWS_INIT %4:vreg_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers *** +# GFX90A-ERR: DS_GWS_INIT %5:areg_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") +# GFX90A-ERR: *** Bad machine code: Illegal virtual register for instruction *** +# GFX90A-ERR: DS_GWS_INIT %5:areg_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers *** +# GFX90A-ERR: DS_GWS_INIT %6:av_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") +# GFX90A-ERR: *** Bad machine code: Illegal virtual register for instruction *** +# GFX90A-ERR: DS_GWS_INIT %6:av_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + --- name: gws_odd_vgpr body: | @@ -33,6 +46,22 @@ body: | DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") $agpr1 = IMPLICIT_DEF DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + + $vgpr3_vgpr4 = IMPLICIT_DEF + DS_GWS_INIT $vgpr1_vgpr2, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + + $agpr3_agpr4 = IMPLICIT_DEF + DS_GWS_INIT $agpr3_agpr4, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + + %4:vreg_64 = IMPLICIT_DEF + DS_GWS_INIT %4, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + + %5:areg_64 = IMPLICIT_DEF + DS_GWS_INIT %5, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + + %6:av_64 = IMPLICIT_DEF + DS_GWS_INIT %6, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") + S_ENDPGM 0 ... diff --git a/llvm/test/MC/AMDGPU/ds_gws_sgpr_err.s b/llvm/test/MC/AMDGPU/ds_gws_sgpr_err.s new file mode 100644 index 0000000000000..4afced60a0bf2 --- /dev/null +++ b/llvm/test/MC/AMDGPU/ds_gws_sgpr_err.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -filetype=null %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -filetype=null %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx908 -filetype=null %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck %s + +// CHECK: :[[@LINE+1]]:13: error: invalid operand for instruction +ds_gws_init s0 offset:65535 gds + +// CHECK: :[[@LINE+1]]:13: error: invalid operand for instruction +ds_gws_init s[0:1] offset:65535 gds + +// CHECK: :[[@LINE+1]]:13: error: invalid operand for instruction +ds_gws_init s1 offset:65535 gds + +// CHECK: :[[@LINE+1]]:16: error: invalid operand for instruction +ds_gws_barrier s1 gds + +// CHECK: :[[@LINE+1]]:16: error: invalid operand for instruction +ds_gws_barrier s2 gds + +// CHECK: :[[@LINE+1]]:15: error: invalid operand for instruction +ds_gws_sema_v s1 gds + +// CHECK: :[[@LINE+1]]:15: error: invalid operand for instruction +ds_gws_sema_v s2 gds + +// CHECK: :[[@LINE+1]]:16: error: invalid operand for instruction +ds_gws_sema_br s1 gds + +// CHECK: :[[@LINE+1]]:16: error: invalid operand for instruction +ds_gws_sema_br s2 gds + diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index c96a72ddc2573..912fb0b616690 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -9782,63 +9782,63 @@ ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:4 // GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_init a0 offset:65535 gds // GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_init a254 offset:65535 gds // GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_init a2 gds // GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_init a0 gds // GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_init a0 offset:4 gds // GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_sema_br a2 offset:65535 gds // GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_sema_br a254 offset:65535 gds // GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_sema_br a0 gds // GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_sema_br a2 gds // GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_sema_br a0 offset:4 gds // GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_barrier a2 offset:65535 gds // GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_barrier a254 offset:65535 gds // GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_barrier a0 gds // GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_barrier a2 gds // GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU ds_gws_barrier a0 offset:4 gds // GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]