diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 5bb0e3648d2ec..421fc429048ff 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5324,6 +5324,12 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, unsigned CPol = Inst.getOperand(CPolPos).getImm(); if (!isGFX1250()) { + if (CPol & CPol::SCAL) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + StringRef CStr(S.getPointer()); + S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]); + Error(S, "scale_offset is not supported on this GPU"); + } if (CPol & CPol::NV) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); @@ -5332,6 +5338,13 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, } } + if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + StringRef CStr(S.getPointer()); + S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]); + Error(S, "scale_offset is not supported for this instruction"); + } + if (isGFX12Plus()) return validateTHAndScopeBits(Inst, Operands, CPol); @@ -7003,6 +7016,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { ParseStatus ResTH = ParseStatus::NoMatch; ParseStatus ResScope = ParseStatus::NoMatch; ParseStatus ResNV = ParseStatus::NoMatch; + ParseStatus ResScal = ParseStatus::NoMatch; for (;;) { if (ResTH.isNoMatch()) { @@ -7041,10 +7055,22 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { } } + if (ResScal.isNoMatch()) { + if (trySkipId("scale_offset")) { + ResScal = ParseStatus::Success; + CPolVal |= CPol::SCAL; + continue; + } else if (trySkipId("no", "scale_offset")) { + ResScal = ParseStatus::Success; + continue; + } + } + break; } - if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch()) + if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() && + ResScal.isNoMatch()) return ParseStatus::NoMatch; Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index f7f29f17f9d0e..1cc717bb437de 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -2941,6 +2941,7 @@ multiclass VFLAT_Real_gfx12 op, string name = get_FLAT_ps.Mnemoni let DecoderNamespace = "GFX12"; let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch}; + let Inst{48} = cpol{CPolBit.SCAL}; // scale offset } } @@ -3170,6 +3171,7 @@ multiclass VFLAT_Real_gfx1250 op, let DecoderNamespace = "GFX1250"; let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch}; + let Inst{48} = cpol{CPolBit.SCAL}; // scale offset } } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 268162bcada47..cbc517d3da680 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1162,6 +1162,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; } + // Scalar and global loads support scale_offset bit. + bool hasScaleOffset() const { return GFX1250Insts; } + bool hasFlatGVSMode() const { return FlatGVSMode; } bool enableSIScheduler() const { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 197bb3f0b569b..11b072e05a6e1 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -157,6 +157,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, const int64_t TH = Imm & CPol::TH; const int64_t Scope = Imm & CPol::SCOPE; + if (Imm & CPol::SCAL) + O << " scale_offset"; + printTH(MI, TH, Scope, O); printScope(Scope, O); diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index d3790880532a8..3902d4c3b1027 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -402,6 +402,8 @@ enum CPol { SWZ = 1 << 6, // Swizzle bit + SCAL = 1 << 11, // Scale offset bit + ALL = TH | SCOPE, // Helper bits diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index e172c0b63189b..e5d1eaad2b8f4 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1209,18 +1209,24 @@ void SIFoldOperandsImpl::foldOperand( return; } - // A frame index will resolve to a positive constant, so it should always be - // safe to fold the addressing mode, even pre-GFX9. - UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI()); - const unsigned Opc = UseMI->getOpcode(); if (TII->isFLATScratch(*UseMI) && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) && !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) { unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc); + unsigned CPol = + TII->getNamedOperand(*UseMI, AMDGPU::OpName::cpol)->getImm(); + if ((CPol & AMDGPU::CPol::SCAL) && + !AMDGPU::supportsScaleOffset(*TII, NewOpc)) + return; + UseMI->setDesc(TII->get(NewOpc)); } + // A frame index will resolve to a positive constant, so it should always be + // safe to fold the addressing mode, even pre-GFX9. + UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI()); + return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 6b419347c01d9..89d9b0d32b25b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -318,6 +318,7 @@ def CPolBit { int DLC = 2; int SCC = 4; int NV = 5; + int SCAL = 11; } class VOPDstOperand : RegisterOperand ; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e2a2525d909bd..571f3efd68260 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5482,6 +5482,19 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (const MachineOperand *CPol = getNamedOperand(MI, AMDGPU::OpName::cpol)) { + if (CPol->getImm() & AMDGPU::CPol::SCAL) { + if (!ST.hasScaleOffset()) { + ErrInfo = "Subtarget does not support offset scaling"; + return false; + } + if (!AMDGPU::supportsScaleOffset(*this, MI.getOpcode())) { + ErrInfo = "Instruction does not support offset scaling"; + return false; + } + } + } + return true; } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 5097ac03954d5..b49c5a997af78 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -61,6 +61,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" @@ -1078,7 +1079,9 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI, if (EltOffset0 + CI.Width != EltOffset1 && EltOffset1 + Paired.Width != EltOffset0) return false; - if (CI.CPol != Paired.CPol) + // Instructions with scale_offset modifier cannot be combined unless we + // also generate a code to scale the offset and reset that bit. + if (CI.CPol != Paired.CPol || (CI.CPol & AMDGPU::CPol::SCAL)) return false; if (CI.InstClass == S_LOAD_IMM || CI.InstClass == S_BUFFER_LOAD_IMM || CI.InstClass == S_BUFFER_LOAD_SGPR_IMM) { diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index d8b52d271a964..0850c41c933de 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -1488,6 +1488,7 @@ class SMEM_Real_Load_gfx12 op, string ps, string opName, OffsetMode offs let Inst{20} = cpol{CPolBit.NV}; // non-volatile let Inst{22-21} = cpol{4-3}; // scope let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported + let Inst{56} = cpol{CPolBit.SCAL}; // scale offset } multiclass SM_Real_Loads_gfx12 op, string ps = NAME> { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 9c6c3746f9007..b5b3cc97569ed 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3228,6 +3228,25 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, : getGfx9BufferFormatInfo(Format); } +bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) { + uint64_t TSFlags = MII.get(Opcode).TSFlags; + + if (TSFlags & SIInstrFlags::SMRD) + return !getSMEMIsBuffer(Opcode); + if (!(TSFlags & SIInstrFlags::FLAT)) + return false; + + // Only SV and SVS modes are supported. + if (TSFlags & SIInstrFlags::FlatScratch) + return hasNamedOperand(Opcode, OpName::vaddr); + + // Only GVS mode is supported. + return hasNamedOperand(Opcode, OpName::vaddr) && + hasNamedOperand(Opcode, OpName::saddr); + + return false; +} + bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) { int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index bde951b43d105..c09a9d694f3d8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1757,6 +1757,9 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID); /// \returns true if the intrinsic is uniform bool isIntrinsicAlwaysUniform(unsigned IntrID); +/// \returns true if a memory instruction supports scale_offset modifier. +bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode); + /// \returns lds block size in terms of dwords. \p /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which /// must be defined in terms of bytes. diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi-gfx1250.mir new file mode 100644 index 0000000000000..e5955ad3d1d49 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi-gfx1250.mir @@ -0,0 +1,43 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=si-fold-operands -stop-after=prologepilog -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: test_fold_fi_scratch_load_vgpr +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr + ; GCN: renamable $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + S_ENDPGM 0, implicit %1 + +... + +# SS form of the SCRATCH_LOAD_DWORD does not support offset scaling + +--- +name: test_no_fold_fi_scratch_load_vgpr_scale_offset +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +body: | + bb.0.entry: + ; GCN-LABEL: name: test_no_fold_fi_scratch_load_vgpr_scale_offset + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 4, 2048, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 2048, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + S_ENDPGM 0, implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-scale-offset.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-scale-offset.mir new file mode 100644 index 0000000000000..76e2092c8b57a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-scale-offset.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-load-store-opt -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: merge_global_load_dword_2_no_scale_offset +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_load_dword_2_no_scale_offset + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[COPY]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1) + %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: no_merge_global_load_dword_2_same_scale_offset +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_load_dword_2_same_scale_offset + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 2049, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1) + ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 4, 2049, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 8, addrspace 1) + ; GCN-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[GLOBAL_LOAD_DWORD_SADDR]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2049, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1) + %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2049, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: no_merge_global_load_dword_2_different_scale_offset +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_load_dword_2_different_scale_offset + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1) + ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 4, 2048, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 8, addrspace 1) + ; GCN-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[GLOBAL_LOAD_DWORD_SADDR]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1) + %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2048, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) + S_NOP 0, implicit %1, implicit %2 +... + +# NB: We do not currently support merging SGPR offset and SGPR+Imm offset forms +# of S_LOAD, but the check stays the same: these cannot be merged with different +# scale offsets. +# +# We also do not currently merge flat scratch instructions, although a common +# check in the merge logic that CPol shall not be set for merge to happen. + +--- +name: merge_s_load_x1_x1_imm_no_scale_offset +body: | + bb.0: + ; GCN-LABEL: name: merge_s_load_x1_x1_imm_no_scale_offset + ; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[DEF]], 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 + %0:sgpr_64 = IMPLICIT_DEF + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 0 :: (dereferenceable invariant load (s32)) + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 0 :: (dereferenceable invariant load (s32)) +... + +--- +name: no_merge_s_load_x1_x1_imm_same_scale_offset +body: | + bb.0: + ; GCN-LABEL: name: no_merge_s_load_x1_x1_imm_same_scale_offset + ; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 0, 2048 :: (dereferenceable invariant load (s32)) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 4, 2048 :: (dereferenceable invariant load (s32)) + %0:sgpr_64 = IMPLICIT_DEF + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 2048 :: (dereferenceable invariant load (s32)) + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 2048 :: (dereferenceable invariant load (s32)) +... + +--- +name: no_merge_s_load_x1_x1_imm_different_scale_offset +body: | + bb.0: + ; GCN-LABEL: name: no_merge_s_load_x1_x1_imm_different_scale_offset + ; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 4, 2048 :: (dereferenceable invariant load (s32)) + %0:sgpr_64 = IMPLICIT_DEF + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 0 :: (dereferenceable invariant load (s32)) + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 2048 :: (dereferenceable invariant load (s32)) +... diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s index 899c4c7aca0ba..800f66221114e 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s @@ -12,3 +12,30 @@ s_buffer_load_i8 s5, s[4:7], s0 nv // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU // GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv // GFX12-ERR-NEXT:{{^}} ^ + +s_load_b32 s4, s[2:3], 0xa scale_offset +// GFX1250: s_load_b32 s4, s[2:3], 0xa scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x0a,0x00,0x00,0xf9] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 0xa scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +s_load_b32 s4, s[2:3], 0xa scale_offset nv +// GFX1250: s_load_b32 s4, s[2:3], 0xa scale_offset nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf9] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 0xa scale_offset nv +// GFX12-ERR-NEXT:{{^}} ^ +// GFX12-ERR-NEXT: error: nv is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 0xa scale_offset nv +// GFX12-ERR-NEXT:{{^}} ^ + +s_load_b32 s4, s[2:3], s5 offset:32 scale_offset +// GFX1250: s_load_b32 s4, s[2:3], s5 offset:0x20 scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0x0b] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], s5 offset:32 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +s_load_b32 s4, s[2:3], m0 offset:32 scale_offset +// GFX1250: s_load_b32 s4, s[2:3], m0 offset:0x20 scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0xfb] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], m0 offset:32 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s new file mode 100644 index 0000000000000..e57d4fc760309 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s @@ -0,0 +1,16 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +s_buffer_load_i8 s5, s[4:7], s0 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +s_prefetch_data s[18:19], 100, s10, 7 nv +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR-NEXT:{{^}}s_prefetch_data s[18:19], 100, s10, 7 nv +// GFX1250-ERR-NEXT:{{^}} ^ + +s_prefetch_data s[18:19], 100, s10, 7 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR-NEXT:{{^}}s_prefetch_data s[18:19], 100, s10, 7 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s new file mode 100644 index 0000000000000..731eb6770ceed --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s index 488040e1b5390..d3a49f2eb25fa 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s @@ -61,6 +61,54 @@ scratch_load_b32 v5, v2, off nv // GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv // GFX12-ERR-NEXT:{{^}} ^ +global_load_b32 v5, v1, s[2:3] offset:32 scale_offset +// GFX1250: global_load_b32 v5, v1, s[2:3] offset:32 scale_offset ; encoding: [0x02,0x00,0x05,0xee,0x05,0x00,0x01,0x00,0x01,0x20,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}global_load_b32 v5, v1, s[2:3] offset:32 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +global_store_b32 v5, v1, s[2:3] offset:32 scale_offset +// GFX1250: global_store_b32 v5, v1, s[2:3] offset:32 scale_offset ; encoding: [0x02,0x80,0x06,0xee,0x00,0x00,0x81,0x00,0x05,0x20,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}global_store_b32 v5, v1, s[2:3] offset:32 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +global_atomic_add_u32 v2, v5, s[2:3] scale_offset +// GFX1250: global_atomic_add_u32 v2, v5, s[2:3] scale_offset ; encoding: [0x02,0x40,0x0d,0xee,0x00,0x00,0x81,0x02,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}global_atomic_add_u32 v2, v5, s[2:3] scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +scratch_load_b32 v5, v2, off scale_offset +// GFX1250: scratch_load_b32 v5, v2, off scale_offset ; encoding: [0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +scratch_load_b32 v5, v2, off offset:32 scale_offset +// GFX1250: scratch_load_b32 v5, v2, off offset:32 scale_offset ; encoding: [0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off offset:32 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +scratch_load_b32 v5, v2, s1 offset:32 scale_offset +// GFX1250: scratch_load_b32 v5, v2, s1 offset:32 scale_offset ; encoding: [0x01,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, s1 offset:32 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +scratch_store_b32 v2, v5, off scale_offset +// GFX1250: scratch_store_b32 v2, v5, off scale_offset ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v5, off scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + +scratch_store_b32 v2, v5, s1 scale_offset +// GFX1250: scratch_store_b32 v2, v5, s1 scale_offset ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU +// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v5, s1 scale_offset +// GFX12-ERR-NEXT:{{^}} ^ + tensor_save s[0:1] // GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU @@ -81,10 +129,18 @@ tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS // GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +flat_atomic_add_f32 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_add_f32 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_add_f32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_add_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_add_u32 v1, v2, s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_add_u32 v1, v2, s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 // GFX1250: flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -93,6 +149,14 @@ flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_add_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_add_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_and_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_and_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_and_b32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_and_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -101,18 +165,38 @@ flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_and_b64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_and_b64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_cmpswap_b32 v0, v2, v[2:3], s[2:3] scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_cmpswap_b32 v0, v2, v[2:3], s[2:3] scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x11,0x01,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_cmpswap_b64 v[0:1], v2, v[2:5], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_cmpswap_b64 v[0:1], v2, v[2:5], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + flat_atomic_cmpswap_b64 v2, v[2:5], s[2:3] // GFX1250: flat_atomic_cmpswap_b64 v2, v[2:5], s[2:3] ; encoding: [0x02,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_cond_sub_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_cond_sub_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_cond_sub_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_cond_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_dec_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_dec_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x10,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_dec_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_dec_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x10,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -121,6 +205,14 @@ flat_atomic_dec_u64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_dec_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_dec_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_dec_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x13,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_inc_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_inc_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_inc_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_inc_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -129,10 +221,22 @@ flat_atomic_inc_u64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_inc_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_inc_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_inc_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x13,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_max_num_f32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_max_num_f32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_max_num_f32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_max_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_max_i32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_max_i32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_max_i32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_max_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -141,6 +245,14 @@ flat_atomic_max_i64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_max_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_max_i64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_max_i64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_max_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_max_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_max_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_max_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -149,10 +261,22 @@ flat_atomic_max_u64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_max_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_max_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_max_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_min_num_f32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_min_num_f32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_min_num_f32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_min_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_min_i32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_min_i32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_min_i32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_min_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -161,6 +285,14 @@ flat_atomic_min_i64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_min_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_min_i64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_min_i64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_min_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_min_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_min_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_min_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -169,6 +301,14 @@ flat_atomic_min_u64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_min_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_min_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_min_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_or_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_or_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_or_b32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_or_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -177,10 +317,22 @@ flat_atomic_or_b64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_or_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_or_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_or_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_sub_clamp_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_sub_clamp_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_sub_clamp_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_sub_clamp_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_sub_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_sub_u32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_sub_u32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -189,6 +341,14 @@ flat_atomic_sub_u64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_sub_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_sub_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_sub_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_swap_b32 v0, v2, s[2:3] scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_swap_b32 v0, v2, s[2:3] scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0c,0xec,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_swap_b32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_swap_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0c,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -197,6 +357,14 @@ flat_atomic_swap_b64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_swap_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_swap_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_swap_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_xor_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_xor_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_xor_b32 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_xor_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -205,10 +373,114 @@ flat_atomic_xor_b64 v2, v[2:3], s[2:3] offset:64 // GFX1250: flat_atomic_xor_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_xor_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_xor_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_atomic_pk_add_f16 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_pk_add_f16 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x16,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_pk_add_f16 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_pk_add_f16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +flat_atomic_pk_add_bf16 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN +// GFX1250: flat_atomic_pk_add_bf16 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x16,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + flat_atomic_pk_add_bf16 v2, v3, s[2:3] offset:64 // GFX1250: flat_atomic_pk_add_bf16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_load_b128 v[2:5], v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_b128 v[2:5], v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_b32 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_b32 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x05,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_b64 v[2:3], v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_b64 v[2:3], v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_b96 v[2:4], v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_b96 v[2:4], v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_d16_b16 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_d16_b16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_d16_hi_b16 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_d16_hi_b16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_d16_hi_i8 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_d16_hi_i8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_d16_hi_u8 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_d16_hi_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_d16_i8 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_d16_i8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x07,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_d16_u8 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_d16_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x07,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_i16 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_i16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_i8 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_i8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_u16 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_u16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_u8 v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_load_dword v1, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_load_b32 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x05,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_b128 v2, v[2:5], s[2:3] offset:64 scale_offset +// GFX1250: flat_store_b128 v2, v[2:5], s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_b16 v2, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_store_b16 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_b32 v2, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_store_b32 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_b64 v2, v[2:3], s[2:3] offset:64 scale_offset +// GFX1250: flat_store_b64 v2, v[2:3], s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_b8 v2, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_store_b8 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_b96 v2, v[2:4], s[2:3] offset:64 scale_offset +// GFX1250: flat_store_b96 v2, v[2:4], s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_d16_hi_b16 v2, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_store_d16_hi_b16 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +flat_store_d16_hi_b8 v2, v2, s[2:3] offset:64 scale_offset +// GFX1250: flat_store_d16_hi_b8 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s index 86a63bcb4be89..26d7ed3cfae4c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s @@ -2,3 +2,58 @@ global_load_b96 v[1:3], v[0:1], off // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned + +flat_load_b32 v5, v[2:3] scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}flat_load_b32 v5, v[2:3] scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +flat_load_b32 v5, v[2:3] offset:32 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}flat_load_b32 v5, v[2:3] offset:32 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +flat_store_b32 v[2:3], v5 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}flat_store_b32 v[2:3], v5 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +flat_atomic_add v[2:3], v2 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}flat_atomic_add v[2:3], v2 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +global_load_b32 v5, v[2:3], off offset:32 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}global_load_b32 v5, v[2:3], off offset:32 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +global_store_b32 v[2:3], v5, off offset:32 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}global_store_b32 v[2:3], v5, off offset:32 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +global_atomic_add v[2:3], v2, off scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}global_atomic_add v[2:3], v2, off scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +global_load_addtid_b32 v5, s[2:3] scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}global_load_addtid_b32 v5, s[2:3] scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +global_store_addtid_b32 v5, s[2:3] scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}global_store_addtid_b32 v5, s[2:3] scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +scratch_load_b32 v5, off, s1 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}scratch_load_b32 v5, off, s1 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ + +scratch_load_b32 v5, off, off offset:32 scale_offset +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction +// GFX1250-ERR-NEXT:{{^}}scratch_load_b32 v5, off, off offset:32 scale_offset +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt index 4bd9ab45af6bd..92fa802802f29 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt @@ -5,3 +5,15 @@ # GFX1250: s_load_b32 s4, s[2:3], 0xa nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8] 0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8 + +# GFX1250: s_load_b32 s4, s[2:3], 0xa scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x0a,0x00,0x00,0xf9] +0x01,0x01,0x00,0xf4,0x0a,0x00,0x00,0xf9 + +# GFX1250: s_load_b32 s4, s[2:3], 0xa scale_offset nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf9] +0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf9 + +# GFX1250: s_load_b32 s4, s[2:3], m0 offset:0x20 scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0xfb] +0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0xfb + +# GFX1250: s_load_b32 s4, s[2:3], s5 offset:0x20 scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0x0b] +0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0x0b diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt index fcbb58bf7865e..3455f4c3b46e9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt @@ -1,104 +1,269 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# GFX1250: flat_atomic_add_f32 v0, v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a] +0x02,0x80,0x15,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a + # GFX1250: flat_atomic_add_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_add_u32 v0, v1, v2, s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0xc0,0xff,0xff] +0x02,0x40,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0xc0,0xff,0xff + # GFX1250: flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff] 0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff # GFX1250: flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_add_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff] +0x02,0xc0,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff + +# GFX1250: flat_atomic_and_b32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x00,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_and_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_and_b64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff] +0x02,0x40,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cmpswap_b32 v0, v2, v[2:3], s[2:3] scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x11,0x01,0x02,0x00,0x00,0x00] +0x02,0x00,0x0d,0xec,0x00,0x00,0x11,0x01,0x02,0x00,0x00,0x00 + # GFX1250: flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_cmpswap_b64 v2, v[2:5], s[2:3] ; encoding: [0x02,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00] 0x02,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00 +# GFX1250: flat_atomic_cmpswap_b64 v[0:1], v2, v[2:5], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cond_sub_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x00,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_cond_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_dec_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x10,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x00,0x10,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_dec_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x10,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x10,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_dec_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_dec_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x13,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff] +0x02,0x40,0x13,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff + +# GFX1250: flat_atomic_inc_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0xc0,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_inc_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_inc_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_inc_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x13,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x13,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x80,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_max_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_max_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_max_i64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x80,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_max_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_max_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0xc0,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_max_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_max_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_max_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x00,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_min_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_min_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_min_i64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x40,0x14,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_min_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_min_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x40,0x0e,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_min_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_min_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_min_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x40,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_or_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_or_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_or_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_bf16 v0, v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x16,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a] +0x02,0x80,0x16,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a + # GFX1250: flat_atomic_pk_add_bf16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_pk_add_f16 v0, v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x16,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a] +0x02,0x40,0x16,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a + # GFX1250: flat_atomic_pk_add_f16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_sub_clamp_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0xc0,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_sub_clamp_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_sub_u32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x80,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_sub_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_sub_u64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x11,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b32 v0, v0, v2, s[2:3] scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0c,0xec,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x00] +0x02,0xc0,0x0c,0xec,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x00 + # GFX1250: flat_atomic_swap_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0c,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x0c,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_swap_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_swap_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_xor_b32 v0, v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00] +0x02,0x80,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00 + # GFX1250: flat_atomic_xor_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] 0x02,0x80,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 # GFX1250: flat_atomic_xor_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] 0x02,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_xor_b64 v[0:1], v2, v[2:3], s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_b128 v[2:5], v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0xc0,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_b32 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x05,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x00,0x05,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_b64 v[2:3], v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x40,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_b96 v[2:4], v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x80,0x05,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_b16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x00,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_hi_b16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0xc0,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_hi_i8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x80,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_hi_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x40,0x08,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_i8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x07,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0xc0,0x07,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x07,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x80,0x07,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_i16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0xc0,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_i8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x40,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_u16 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x80,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_load_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +0x02,0x00,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_b128 v2, v[2:5], s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_b16 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_b32 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x80,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_b64 v2, v[2:3], s[2:3] offset:64 scale_offset ; encoding: [0x02,0xc0,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_b8 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x06,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_b96 v2, v[2:4], s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_d16_hi_b16 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_store_d16_hi_b8 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00 + # GFX1250: flat_atomic_add_f32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] 0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff @@ -2856,6 +3021,30 @@ # GFX1250: scratch_load_b32 v5, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00] 0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00 +# GFX1250: global_load_b32 v5, v1, s[2:3] offset:32 scale_offset ; encoding: [0x02,0x00,0x05,0xee,0x05,0x00,0x01,0x00,0x01,0x20,0x00,0x00] +0x02,0x00,0x05,0xee,0x05,0x00,0x01,0x00,0x01,0x20,0x00,0x00 + +# GFX1250: global_store_b32 v5, v1, s[2:3] offset:32 scale_offset ; encoding: [0x02,0x80,0x06,0xee,0x00,0x00,0x81,0x00,0x05,0x20,0x00,0x00] +0x02,0x80,0x06,0xee,0x00,0x00,0x81,0x00,0x05,0x20,0x00,0x00 + +# GFX1250: global_atomic_add_u32 v2, v5, s[2:3] scale_offset ; encoding: [0x02,0x40,0x0d,0xee,0x00,0x00,0x81,0x02,0x02,0x00,0x00,0x00] +0x02,0x40,0x0d,0xee,0x00,0x00,0x81,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_b32 v5, v2, off scale_offset ; encoding: [0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x00,0x00,0x00] +0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_b32 v5, v2, off offset:32 scale_offset ; encoding: [0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00] +0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00 + +# GFX1250: scratch_load_b32 v5, v2, s1 offset:32 scale_offset ; encoding: [0x01,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00] +0x01,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00 + +# GFX1250: scratch_store_b32 v2, v5, off scale_offset ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00] +0x7c,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_store_b32 v2, v5, s1 scale_offset ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00] +0x01,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00 + # GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00