| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
| ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck %s | ||
|
|
||
| ; This showcases a miscompile that was fixed in #83107: | ||
| ; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores. | ||
| ; - the load and store of q aliases the upper 128 bits store of p. | ||
| ; - The aliasing 128 bit store will be between the chain of the scalar | ||
| ; load/store: | ||
| ; | ||
| ; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... | ||
| ; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... | ||
| ; | ||
| ; t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ... | ||
| ; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... | ||
| ; t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ... | ||
| ; | ||
| ; Previously, the scalar load/store was incorrectly combined away: | ||
| ; | ||
| ; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... | ||
| ; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... | ||
| ; | ||
| ; // MISSING | ||
| ; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... | ||
| ; // MISSING | ||
| ; | ||
| ; - We need to compile with an exact VLEN so that we select an ISD::STORE node | ||
| ; which triggers the combine | ||
| ; - The miscompile doesn't happen if we use separate GEPs as we need the stores | ||
| ; to share the same MachinePointerInfo | ||
| define void @aliasing(ptr %p) { | ||
| ; CHECK-LABEL: aliasing: | ||
| ; CHECK: # %bb.0: | ||
| ; CHECK-NEXT: lw a1, 84(a0) | ||
| ; CHECK-NEXT: addi a2, a0, 80 | ||
| ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma | ||
| ; CHECK-NEXT: vmv.v.i v8, 0 | ||
| ; CHECK-NEXT: vs1r.v v8, (a2) | ||
| ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma | ||
| ; CHECK-NEXT: vmv.v.i v12, 0 | ||
| ; CHECK-NEXT: vs4r.v v12, (a0) | ||
| ; CHECK-NEXT: addi a2, a0, 64 | ||
| ; CHECK-NEXT: vs1r.v v8, (a2) | ||
| ; CHECK-NEXT: sw a1, 84(a0) | ||
| ; CHECK-NEXT: ret | ||
| %q = getelementptr inbounds i8, ptr %p, i64 84 | ||
| %tmp = load i32, ptr %q | ||
| tail call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 96, i1 false) | ||
| store i32 %tmp, ptr %q | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s | ||
| ; This test only intends to check the vstoren builtin name resolution. | ||
| ; The calls to the OpenCL builtins are not valid and will not pass SPIR-V validation. | ||
|
|
||
| ; CHECK-DAG: %[[#IMPORT:]] = OpExtInstImport "OpenCL.std" | ||
|
|
||
| ; CHECK-DAG: %[[#VOID:]] = OpTypeVoid | ||
| ; CHECK-DAG: %[[#INT8:]] = OpTypeInt 8 0 | ||
| ; CHECK-DAG: %[[#INT64:]] = OpTypeInt 64 0 | ||
| ; CHECK-DAG: %[[#VINT8:]] = OpTypeVector %[[#INT8]] 2 | ||
| ; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer CrossWorkgroup %[[#INT8]] | ||
|
|
||
| ; CHECK: %[[#DATA:]] = OpFunctionParameter %[[#VINT8]] | ||
| ; CHECK: %[[#OFFSET:]] = OpFunctionParameter %[[#INT64]] | ||
| ; CHECK: %[[#ADDRESS:]] = OpFunctionParameter %[[#PTRINT8]] | ||
|
|
||
| define spir_kernel void @test_fn(<2 x i8> %data, i64 %offset, ptr addrspace(1) %address) { | ||
| ; CHECK: %[[#]] = OpExtInst %[[#VOID]] %[[#IMPORT]] vstoren %[[#DATA]] %[[#OFFSET]] %[[#ADDRESS]] | ||
| call spir_func void @_Z7vstore2Dv2_cmPU3AS1c(<2 x i8> %data, i64 %offset, ptr addrspace(1) %address) | ||
| ret void | ||
| } | ||
|
|
||
| declare spir_func void @_Z7vstore2Dv2_cmPU3AS1c(<2 x i8>, i64, ptr addrspace(1)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -print-after-all -o - 2>&1 | FileCheck %s | ||
|
|
||
| ; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) *** | ||
|
|
||
| define spir_kernel void @test(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %srcimg) { | ||
| ; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef) | ||
| %call = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %srcimg) | ||
| ret void | ||
| ; CHECK: } | ||
| } | ||
|
|
||
| declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -print-after-all -o - 2>&1 | FileCheck %s | ||
|
|
||
| ; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) *** | ||
|
|
||
| define spir_kernel void @test_pointer_cast(ptr addrspace(1) %src) { | ||
| ; CHECK-NOT: call void @llvm.spv.assign.ptr.type.p1(ptr addrspace(1) %src, metadata i8 undef, i32 1) | ||
| ; CHECK: call void @llvm.spv.assign.ptr.type.p1(ptr addrspace(1) %src, metadata i32 0, i32 1) | ||
| %b = bitcast ptr addrspace(1) %src to ptr addrspace(1) | ||
| %g = getelementptr inbounds i32, ptr addrspace(1) %b, i64 52 | ||
| ret void | ||
| ; CHECK: } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -print-after-all -o - 2>&1 | FileCheck %s | ||
|
|
||
| ; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) *** | ||
|
|
||
| define spir_kernel void @test(ptr addrspace(1) %srcimg) { | ||
| ; CHECK: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef) | ||
| %call1 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg) | ||
| ; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef) | ||
| %call2 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg) | ||
| ret void | ||
| ; CHECK: } | ||
| } | ||
|
|
||
| declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s | ||
| ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} | ||
|
|
||
| ; CHECK-DAG: %[[#INT8:]] = OpTypeInt 8 0 | ||
| ; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer CrossWorkgroup %[[#INT8]] | ||
|
|
||
| define spir_kernel void @test_fn(ptr addrspace(1) %src) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_type_qual !4 !kernel_arg_base_type !3 { | ||
| entry: | ||
| %g1 = call spir_func i64 @_Z13get_global_idj(i32 0) | ||
| %i1 = insertelement <3 x i64> undef, i64 %g1, i32 0 | ||
| %g2 = call spir_func i64 @_Z13get_global_idj(i32 1) | ||
| %i2 = insertelement <3 x i64> %i1, i64 %g2, i32 1 | ||
| %g3 = call spir_func i64 @_Z13get_global_idj(i32 2) | ||
| %i3 = insertelement <3 x i64> %i2, i64 %g3, i32 2 | ||
| %e = extractelement <3 x i64> %i3, i32 0 | ||
| %c1 = trunc i64 %e to i32 | ||
| %c2 = sext i32 %c1 to i64 | ||
| %b = bitcast ptr addrspace(1) %src to ptr addrspace(1) | ||
|
|
||
| ; Make sure that builtin call directly uses either a OpBitcast or OpFunctionParameter of i8* type | ||
| ; CHECK: %[[#BITCASTorPARAMETER:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRINT8]]{{.*}} | ||
| ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] vloadn %[[#]] %[[#BITCASTorPARAMETER]] 3 | ||
| %call = call spir_func <3 x i8> @_Z6vload3mPU3AS1Kc(i64 %c2, ptr addrspace(1) %b) | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| declare spir_func i64 @_Z13get_global_idj(i32) | ||
|
|
||
| declare spir_func <3 x i8> @_Z6vload3mPU3AS1Kc(i64, ptr addrspace(1)) | ||
|
|
||
| !1 = !{i32 1} | ||
| !2 = !{!"none"} | ||
| !3 = !{!"char3*"} | ||
| !4 = !{!""} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| ; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o - 2>&1 | FileCheck %s | ||
|
|
||
| ; CHECK: LLVM ERROR: Type mismatch {{.*}} | ||
|
|
||
| define spir_kernel void @test(ptr addrspace(1) %srcimg) { | ||
| %call1 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg) | ||
| %call2 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_rw(ptr addrspace(1) %srcimg) | ||
| ret void | ||
| } | ||
|
|
||
| declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1)) | ||
| declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_rw(ptr addrspace(1)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s | ||
| ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} | ||
|
|
||
| define spir_kernel void @test(ptr addrspace(1) %srcimg) { | ||
| ; CHECK: %[[#VOID:]] = OpTypeVoid | ||
| ; CHECK: %[[#IMAGE:]] = OpTypeImage %[[#VOID]] 2D 0 0 0 0 Unknown ReadOnly | ||
| ; CHECK: %[[#PARAM:]] = OpFunctionParameter %[[#IMAGE]] | ||
| ; CHECK: %[[#]] = OpImageQuerySizeLod %[[#]] %[[#PARAM]] %[[#]] | ||
| %call = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg) | ||
| ret void | ||
| } | ||
|
|
||
| declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s | ||
| ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} | ||
|
|
||
| target triple = "spirv64-unknown-unknown" | ||
|
|
||
| ; CHECK-DAG: %[[#VOID:]] = OpTypeVoid | ||
| ; CHECK-DAG: %[[#INT32:]] = OpTypeInt 32 0 | ||
| ; CHECK-DAG: %[[#STRUCT1:]] = OpTypeStruct %[[#INT32]] | ||
| ; CHECK-DAG: %[[#CONST:]] = OpConstant %[[#INT32]] 7 | ||
| ; CHECK-DAG: %[[#ARRAY:]] = OpTypeArray %[[#STRUCT1]] %[[#CONST]] | ||
| ; CHECK-DAG: %[[#STRUCT2:]] = OpTypeStruct %[[#ARRAY]] | ||
| ; CHECK-DAG: %[[#PTR:]] = OpTypePointer Function %[[#STRUCT2]] | ||
|
|
||
| ; CHECK: %[[#FUNC:]] = OpTypeFunction %[[#VOID]] %[[#PTR]] | ||
| ; CHECK: %[[#]] = OpFunction %[[#VOID]] None %[[#FUNC]] | ||
| ; CHECK: %[[#]] = OpFunctionParameter %[[#PTR]] | ||
|
|
||
| %struct.S = type { i32 } | ||
| %struct.__wrapper_class = type { [7 x %struct.S] } | ||
|
|
||
| define spir_kernel void @foo(ptr noundef byref(%struct.__wrapper_class) align 4 %_arg_Arr) { | ||
| entry: | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s | ||
| ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} | ||
|
|
||
| ; CHECK-DAG: %[[#VOID:]] = OpTypeVoid | ||
| ; CHECK-DAG: %[[#INT32:]] = OpTypeInt 32 0 | ||
| ; CHECK-DAG: %[[#STRUCT1:]] = OpTypeStruct %[[#INT32]] | ||
| ; CHECK-DAG: %[[#CONST:]] = OpConstant %[[#INT32]] 7 | ||
| ; CHECK-DAG: %[[#ARRAY:]] = OpTypeArray %[[#STRUCT1]] %[[#CONST]] | ||
| ; CHECK-DAG: %[[#STRUCT2:]] = OpTypeStruct %[[#ARRAY]] | ||
| ; CHECK-DAG: %[[#PTR:]] = OpTypePointer Function %[[#STRUCT2]] | ||
|
|
||
| ; CHECK: %[[#FUNC:]] = OpTypeFunction %[[#VOID]] %[[#PTR]] | ||
| ; CHECK: %[[#]] = OpFunction %[[#VOID]] None %[[#FUNC]] | ||
| ; CHECK: %[[#]] = OpFunctionParameter %[[#PTR]] | ||
|
|
||
| %struct.S = type { i32 } | ||
| %struct.__wrapper_class = type { [7 x %struct.S] } | ||
|
|
||
| define spir_kernel void @foo(ptr noundef byval(%struct.__wrapper_class) align 4 %_arg_Arr) { | ||
| entry: | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s | ||
|
|
||
| ; TODO: OpFunctionParameter should be a pointer of struct base type. | ||
| ; XFAIL: * | ||
|
|
||
| %struct = type { | ||
| i32, | ||
| i16 | ||
| } | ||
|
|
||
| %nested_struct = type { | ||
| %struct, | ||
| i16 | ||
| } | ||
|
|
||
| define void @foo(ptr %ptr) { | ||
| store %nested_struct undef, ptr %ptr | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # RUN: not llvm-mc -triple x86_64 -show-encoding %s 2>&1 | FileCheck %s | ||
|
|
||
| # CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15 | ||
| # CHECK: addq $1234, %cs:-96, %rax | ||
| addq $1234, %cs:-96, %rax | ||
|
|
||
| # CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15 | ||
| # CHECK: subq $1234, %fs:257(%rbx, %rcx), %rax | ||
| subq $1234, %fs:257(%rbx, %rcx), %rax | ||
|
|
||
| # CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15 | ||
| # CHECK: orq $1234, 257(%ebx, %ecx), %rax | ||
| orq $1234, 257(%ebx, %ecx), %rax | ||
|
|
||
| # CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15 | ||
| # CHECK: xorq $1234, %gs:257(%ebx), %rax | ||
| xorq $1234, %gs:257(%ebx), %rax | ||
|
|
||
| # CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15 | ||
| # CHECK: {nf} andq $1234, %cs:-96 | ||
| {nf} andq $1234, %cs:-96 | ||
|
|
||
| # CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15 | ||
| # CHECK: {evex} adcq $1234, %cs:-96 | ||
| {evex} adcq $1234, %cs:-96 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| # RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s | ||
| --- | ||
| name: basic | ||
| tracksRegLiveness: true | ||
| body: | | ||
| bb.0: | ||
| successors: %bb.1, %bb.2; | ||
| %0:sgpr_64 = CONVERGENCECTRL_ANCHOR | ||
| ; CHECK: Entry intrinsic cannot be preceded by a convergent operation in the same basic block. | ||
| ; CHECK: CONVERGENCECTRL_ENTRY | ||
| %1:sgpr_64 = CONVERGENCECTRL_ENTRY | ||
| ; CHECK: Loop intrinsic cannot be preceded by a convergent operation in the same basic block. | ||
| ; CHECK: CONVERGENCECTRL_LOOP | ||
| %2:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 | ||
| S_CBRANCH_EXECZ %bb.1, implicit $exec | ||
| S_BRANCH %bb.2 | ||
| bb.1: | ||
| successors: %bb.2; | ||
| ; CHECK: Entry intrinsic can occur only in the entry block. | ||
| ; CHECK: CONVERGENCECTRL_ENTRY | ||
| %5:sgpr_64 = CONVERGENCECTRL_ENTRY | ||
| bb.2: | ||
| ; CHECK: Convergence control tokens can only be used by convergent operations. | ||
| ; CHECK: G_PHI | ||
| %6:sgpr_64 = G_PHI %0:sgpr_64, %bb.0, %0:sgpr_64, %bb.1 | ||
| %7:sgpr_64 = CONVERGENCECTRL_ANCHOR | ||
| %8:sgpr_64 = IMPLICIT_DEF | ||
| %4:sgpr_64 = SI_CALL %8:sgpr_64, 1, implicit %7:sgpr_64 | ||
| ; CHECK: An operation can use at most one convergence control token. | ||
| ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 | ||
| %9:sgpr_64 = SI_CALL %8:sgpr_64, 2, implicit %7:sgpr_64, implicit %7:sgpr_64 | ||
| ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. | ||
| ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 3 | ||
| %10:sgpr_64 = SI_CALL %8:sgpr_64, 3 | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| # RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s | ||
| --- | ||
| name: cycles | ||
| body: | | ||
| bb.0: | ||
| %0:sgpr_64 = CONVERGENCECTRL_ANCHOR | ||
| %1:sgpr_64 = IMPLICIT_DEF | ||
| S_CBRANCH_EXECZ %bb.9, implicit $exec | ||
| S_BRANCH %bb.1 | ||
| bb.1: | ||
| S_CBRANCH_EXECZ %bb.8, implicit $exec | ||
| S_BRANCH %bb.5 | ||
| bb.2: | ||
| S_CBRANCH_EXECZ %bb.3, implicit $exec | ||
| S_BRANCH %bb.4 | ||
| bb.3: | ||
| ; CHECK: Cycle heart must dominate all blocks in the cycle. | ||
| ; Irreducible cycle: entries(bb.4 bb.3) | ||
| %3:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 | ||
| S_BRANCH %bb.4 | ||
| bb.4: | ||
| S_BRANCH %bb.3 | ||
| bb.5: | ||
| S_CBRANCH_EXECZ %bb.6, implicit $exec | ||
| S_BRANCH %bb.2 | ||
| bb.6: | ||
| S_BRANCH %bb.7 | ||
| bb.7: | ||
| ; CHECK: Cycle heart must dominate all blocks in the cycle. | ||
| ; Reducible cycle: entries(bb.6) bb.7 | ||
| %4:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 | ||
| S_BRANCH %bb.6 | ||
| bb.8: | ||
| ; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition. | ||
| %5:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 | ||
| %6:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 | ||
| S_BRANCH %bb.8 | ||
| bb.9: | ||
| ; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition. | ||
| %7:sgpr_64 = G_SI_CALL %1:sgpr_64, 3, implicit %0:sgpr_64 | ||
| S_BRANCH %bb.9 | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| if not "AMDGPU" in config.root.targets: | ||
| config.unsupported = True |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s | ||
| --- | ||
| name: mixed2 | ||
| body: | | ||
| bb.0: | ||
| %0:sgpr_64 = IMPLICIT_DEF | ||
| %1:sgpr_64 = SI_CALL %0, 1 | ||
| ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. | ||
| ; CHECK: CONVERGENCECTRL_ANCHOR | ||
| %2:sgpr_64 = CONVERGENCECTRL_ANCHOR | ||
| ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. | ||
| ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 | ||
| %3:sgpr_64 = SI_CALL %0, 2, implicit %2:sgpr_64 | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| # RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s | ||
| --- | ||
| name: region_nesting | ||
| body: | | ||
| bb.0: | ||
| %0:sgpr_64 = CONVERGENCECTRL_ANCHOR | ||
| %1:sgpr_64 = CONVERGENCECTRL_ANCHOR | ||
| %2:sgpr_64 = IMPLICIT_DEF | ||
| %3:sgpr_64 = SI_CALL %2, 1, implicit %0:sgpr_64 | ||
| ; CHECK: Convergence region is not well-nested. | ||
| ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 | ||
| %4:sgpr_64 = SI_CALL %2, 2, implicit %1:sgpr_64 | ||
| S_CBRANCH_EXECZ %bb.1, implicit $exec | ||
| S_BRANCH %bb.2 | ||
| bb.1: | ||
| %5:sgpr_64 = SI_CALL %2, 3, implicit %0:sgpr_64 | ||
| bb.2: | ||
| ; CHECK: Convergence region is not well-nested. | ||
| ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 4 | ||
| %6:sgpr_64 = SI_CALL %2, 4, implicit %1:sgpr_64 | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| ; A pre-commit test to show that branch weight associated with invoke are not updated. | ||
| ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s | ||
|
|
||
| declare i32 @__gxx_personality_v0(...) | ||
|
|
||
| define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 { | ||
| call void @callee(ptr %func), !prof !16 | ||
| ret void | ||
| } | ||
|
|
||
| declare void @inner_callee(ptr %func) | ||
|
|
||
| define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 { | ||
| invoke void @inner_callee(ptr %func) | ||
| to label %ret unwind label %lpad, !prof !18 | ||
|
|
||
| ret: | ||
| ret void | ||
|
|
||
| lpad: | ||
| %exn = landingpad {ptr, i32} | ||
| cleanup | ||
| unreachable | ||
| } | ||
|
|
||
| !llvm.module.flags = !{!1} | ||
| !1 = !{i32 1, !"ProfileSummary", !2} | ||
| !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} | ||
| !3 = !{!"ProfileFormat", !"SampleProfile"} | ||
| !4 = !{!"TotalCount", i64 10000} | ||
| !5 = !{!"MaxCount", i64 10} | ||
| !6 = !{!"MaxInternalCount", i64 1} | ||
| !7 = !{!"MaxFunctionCount", i64 2000} | ||
| !8 = !{!"NumCounts", i64 2} | ||
| !9 = !{!"NumFunctions", i64 2} | ||
| !10 = !{!"DetailedSummary", !11} | ||
| !11 = !{!12, !13, !14} | ||
| !12 = !{i32 10000, i64 100, i32 1} | ||
| !13 = !{i32 999000, i64 100, i32 1} | ||
| !14 = !{i32 999999, i64 1, i32 2} | ||
| !15 = !{!"function_entry_count", i64 1000} | ||
| !16 = !{!"branch_weights", i32 1000} | ||
| !17 = !{!"function_entry_count", i32 1500} | ||
| !18 = !{!"branch_weights", i32 1500} | ||
|
|
||
| ; CHECK-LABEL: @caller( | ||
| ; CHECK: invoke void @inner_callee( | ||
| ; CHECK-NEXT: {{.*}} !prof ![[PROF0:[0-9]+]] | ||
|
|
||
| ; CHECK-LABL: @callee( | ||
| ; CHECK: invoke void @inner_callee( | ||
| ; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]] | ||
|
|
||
| ; CHECK: ![[PROF0]] = !{!"branch_weights", i32 1000} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,185 +1,55 @@ | ||
| ; A pre-commit test to show that value profiles associated with invoke are not updated. | ||
| ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s | ||
|
|
||
| declare i32 @__gxx_personality_v0(...) | ||
|
|
||
| define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 { | ||
| call void @callee(ptr %func), !prof !16 | ||
| ret void | ||
| } | ||
|
|
||
| declare void @inner_callee(ptr %func) | ||
|
|
||
| define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 { | ||
| invoke void %func() | ||
| to label %ret unwind label %lpad, !prof !18 | ||
|
|
||
| ret: | ||
| ret void | ||
|
|
||
| lpad: | ||
| %exn = landingpad {ptr, i32} | ||
| cleanup | ||
| unreachable | ||
| } | ||
|
|
||
| !llvm.module.flags = !{!1} | ||
| !1 = !{i32 1, !"ProfileSummary", !2} | ||
| !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} | ||
| !3 = !{!"ProfileFormat", !"SampleProfile"} | ||
| !4 = !{!"TotalCount", i64 10000} | ||
| !5 = !{!"MaxCount", i64 10} | ||
| !6 = !{!"MaxInternalCount", i64 1} | ||
| !7 = !{!"MaxFunctionCount", i64 2000} | ||
| !8 = !{!"NumCounts", i64 2} | ||
| !9 = !{!"NumFunctions", i64 2} | ||
| !10 = !{!"DetailedSummary", !11} | ||
| !11 = !{!12, !13, !14} | ||
| !12 = !{i32 10000, i64 100, i32 1} | ||
| !13 = !{i32 999000, i64 100, i32 1} | ||
| !14 = !{i32 999999, i64 1, i32 2} | ||
| !15 = !{!"function_entry_count", i64 1000} | ||
| !16 = !{!"branch_weights", i64 1000} | ||
| !17 = !{!"function_entry_count", i32 1500} | ||
| !18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600} | ||
|
|
||
| ; CHECK-LABEL: @caller( | ||
| ; CHECK: invoke void %func( | ||
| ; CHECK-NEXT: {{.*}} !prof ![[PROF0:[0-9]+]] | ||
|
|
||
| ; CHECK-LABL: @callee( | ||
| ; CHECK: invoke void %func( | ||
| ; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]] | ||
|
|
||
| ; CHECK: ![[PROF0]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400} | ||
| ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s | ||
|
|
||
| # CHECK: Iterations: 1 | ||
| # CHECK: Instructions: 58 | ||
| # CHECK: Total Cycles: 543 | ||
| # CHECK: Total uOps: 58 | ||
|
|
||
| # CHECK: Resources: | ||
| # CHECK: [0] - HWBranch | ||
| # CHECK: [1] - HWExport | ||
| # CHECK: [2] - HWLGKM | ||
| # CHECK: [3] - HWSALU | ||
| # CHECK: [4] - HWVALU | ||
| # CHECK: [5] - HWVMEM | ||
| # CHECK: [6] - HWXDL | ||
|
|
||
| v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] | ||
| v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] | ||
|
|
||
| v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] | ||
| v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] | ||
|
|
||
| v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] | ||
| v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] | ||
|
|
||
| v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] | ||
| v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] | ||
|
|
||
| v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] | ||
| v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] | ||
|
|
||
| v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] | ||
| v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] | ||
|
|
||
| v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] | ||
| v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] | ||
|
|
||
| v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] | ||
| v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] | ||
|
|
||
| v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] | ||
| v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] | ||
|
|
||
| v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] | ||
| v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] | ||
|
|
||
| v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] | ||
| v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] | ||
|
|
||
| v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] | ||
| v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] | ||
|
|
||
| v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] | ||
| v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] | ||
|
|
||
| v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] | ||
| v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] | ||
|
|
||
| v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] | ||
| v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] | ||
|
|
||
| v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] | ||
| v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] | ||
|
|
||
| v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] | ||
| v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] | ||
|
|
||
| v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] | ||
| v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] | ||
|
|
||
| v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] | ||
| v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] | ||
|
|
||
| v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 | ||
| v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 | ||
|
|
||
| v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] | ||
| v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] | ||
|
|
||
| v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] | ||
| v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] | ||
|
|
||
| v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] | ||
| v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] | ||
|
|
||
| v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] | ||
| v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] | ||
|
|
||
| v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 | ||
| v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 | ||
|
|
||
| v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 | ||
| v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 | ||
|
|
||
| v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 | ||
| v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 | ||
|
|
||
| v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 | ||
| v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 | ||
|
|
||
| v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 | ||
| v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 | ||
|
|
||
| # CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions: | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] | ||
| # CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] | ||
| # CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] | ||
| # CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] | ||
| # CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] | ||
| # CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] | ||
| # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] | ||
| # CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] | ||
| # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 | ||
| # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 | ||
| # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 | ||
| # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 | ||
| # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 | ||
| # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 | ||
| # CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 | ||
| # CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 | ||
| # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 | ||
| # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 |