diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 99ba04378ba2e..97eeaaae6ce44 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -9048,6 +9048,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); + // Parse a dummy operand as a placeholder for the SWZ operand. This enforces + // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands. + Inst.addOperand(MCOperand::createImm(0)); } //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s new file mode 100644 index 0000000000000..8bd91484d149c --- /dev/null +++ b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s @@ -0,0 +1,43 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1100 --show-inst < %s | FileCheck %s + +// CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" +buffer_load_dwordx4 v[0:3], v0, s[0:3], 0, offen offset:4092 slc +// CHECK: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 slc ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; > +buffer_store_dword v0, v1, s[0:3], 0 offen slc +// CHECK: buffer_store_b32 v0, v1, s[0:3], 0 offen slc ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; > + +; tbuffer ops use autogenerate asm parsers +tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc +// CHECK: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; > +tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc +// CHECK: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; +// CHECK-NEXT: ; > diff --git a/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s b/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s new file mode 100644 index 0000000000000..932d9d1cc48b5 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s + +buffer_load_dwordx4 v[30:33], v4, s[0:3], 0, offen offset:4092 +buffer_store_dword v0, v1, s[0:3], 0 offen + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 200 +# CHECK-NEXT: Total Cycles: 280 +# CHECK-NEXT: Total uOps: 200 + +# CHECK: Dispatch Width: 1 +# CHECK-NEXT: uOps Per Cycle: 0.71 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 80 1.00 * U buffer_load_dwordx4 v[30:33], v4, s[0:3], 0 offen offset:4092 +# CHECK-NEXT: 1 80 1.00 * U buffer_store_dword v0, v1, s[0:3], 0 offen + +# CHECK: Resources: +# CHECK-NEXT: [0] - HWBranch +# CHECK-NEXT: [1] - HWExport +# CHECK-NEXT: [2] - HWLGKM +# CHECK-NEXT: [3] - HWSALU +# CHECK-NEXT: [4] - HWVALU +# CHECK-NEXT: [5] - HWVMEM +# CHECK-NEXT: [6] - HWXDL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] +# CHECK-NEXT: - - - - - 2.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: - - - - - 1.00 - buffer_load_dwordx4 v[30:33], v4, s[0:3], 0 offen offset:4092 +# CHECK-NEXT: - - - - - 1.00 - buffer_store_dword v0, v1, s[0:3], 0 offen