78 changes: 42 additions & 36 deletions llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -916,13 +916,13 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_mov_b32 s32, 0x1200
; WAVE32-O0-NEXT: s_getpc_b64 s[20:21]
; WAVE32-O0-NEXT: s_mov_b32 s20, s0
; WAVE32-O0-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0
; WAVE32-O0-NEXT: s_getpc_b64 s[24:25]
; WAVE32-O0-NEXT: s_mov_b32 s24, s0
; WAVE32-O0-NEXT: s_load_dwordx4 s[24:27], s[24:25], 0x0
; WAVE32-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-O0-NEXT: s_bitset0_b32 s23, 21
; WAVE32-O0-NEXT: s_add_u32 s20, s20, s9
; WAVE32-O0-NEXT: s_addc_u32 s21, s21, 0
; WAVE32-O0-NEXT: s_bitset0_b32 s27, 21
; WAVE32-O0-NEXT: s_add_u32 s24, s24, s9
; WAVE32-O0-NEXT: s_addc_u32 s25, s25, 0
; WAVE32-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; WAVE32-O0-NEXT: s_mov_b32 s14, s8
; WAVE32-O0-NEXT: s_mov_b32 s13, s7
Expand All @@ -934,17 +934,17 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 0
; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 1
; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19
; WAVE32-O0-NEXT: s_or_saveexec_b32 s20, -1
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s20
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 42
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0
; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[24:25]
; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[26:27]
; WAVE32-O0-NEXT: s_mov_b32 s6, s32
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 17
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], s6 offset:4
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[24:27], s6 offset:4
; WAVE32-O0-NEXT: s_mov_b32 s6, stack_passed_argument@abs32@hi
; WAVE32-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
Expand Down Expand Up @@ -1018,10 +1018,11 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1
; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:128 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19
; WAVE32-O0-NEXT: s_or_saveexec_b32 s20, -1
; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s20
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: v_readlane_b32 s1, v0, 1
; WAVE32-O0-NEXT: v_readlane_b32 s0, v0, 0
Expand Down Expand Up @@ -1136,6 +1137,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE64-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE64-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload
Expand All @@ -1153,13 +1155,13 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, 0x1200
; WAVE32-WWM-PREALLOC-NEXT: s_getpc_b64 s[20:21]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s20, s0
; WAVE32-WWM-PREALLOC-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_getpc_b64 s[24:25]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s0
; WAVE32-WWM-PREALLOC-NEXT: s_load_dwordx4 s[24:27], s[24:25], 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s23, 21
; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s20, s20, s9
; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s21, s21, 0
; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s27, 21
; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s24, s24, s9
; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s25, s25, 0
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s14, s8
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s13, s7
Expand All @@ -1172,13 +1174,13 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 1
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, 42
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[24:27], 0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[0:1], s[24:25]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[2:3], s[26:27]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s6, s32
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, 17
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[20:23], s6 offset:4
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[24:27], s6 offset:4
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s6, stack_passed_argument@abs32@hi
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-WWM-PREALLOC-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
Expand Down Expand Up @@ -1252,6 +1254,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18_sgpr19
; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s1, v32, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s0, v32, 0
Expand Down Expand Up @@ -1344,7 +1347,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s25, s33
; WAVE32-O0-NEXT: s_mov_b32 s26, s33
; WAVE32-O0-NEXT: s_mov_b32 s33, s32
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
Expand All @@ -1358,9 +1361,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 0
; WAVE32-O0-NEXT: s_lshr_b32 s16, s16, 5
; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 1
; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1
; WAVE32-O0-NEXT: s_or_saveexec_b32 s25, -1
; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s25
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 42
; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down Expand Up @@ -1437,10 +1440,11 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1
; WAVE32-O0-NEXT: s_or_saveexec_b32 s25, -1
; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s25
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: v_readlane_b32 s5, v0, 1
; WAVE32-O0-NEXT: v_readlane_b32 s4, v0, 0
Expand All @@ -1456,14 +1460,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0xffffee00
; WAVE32-O0-NEXT: s_mov_b32 s33, s25
; WAVE32-O0-NEXT: s_mov_b32 s33, s26
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s19, s33
; WAVE64-O0-NEXT: s_mov_b32 s28, s33
; WAVE64-O0-NEXT: s_mov_b32 s33, s32
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[16:17], -1
; WAVE64-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
Expand Down Expand Up @@ -1556,6 +1560,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE64-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE64-O0-NEXT: s_or_saveexec_b64 s[26:27], -1
; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
Expand All @@ -1575,14 +1580,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0xffffdc00
; WAVE64-O0-NEXT: s_mov_b32 s33, s19
; WAVE64-O0-NEXT: s_mov_b32 s33, s28
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s33
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s25, s33
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s32
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
Expand Down Expand Up @@ -1672,6 +1677,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18_sgpr19
; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s5, v32, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v32, 0
Expand All @@ -1687,7 +1693,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0xffffee00
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s24
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s25
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [32 x i32], addrspace(5)
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: bb.1.Flow:
; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %47:vgpr_32, %bb.0, %4, %bb.9
; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %49:vgpr_32, %bb.9
; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %51:vgpr_32, %bb.9
; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %53:vgpr_32, %bb.9
; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %49:vgpr_32, %bb.0, %4, %bb.9
; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %51:vgpr_32, %bb.9
; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %53:vgpr_32, %bb.9
; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %55:vgpr_32, %bb.9
; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; SI-NEXT: S_BRANCH %bb.2
; SI-NEXT: {{ $}}
Expand All @@ -249,8 +249,8 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: bb.3:
; SI-NEXT: successors: %bb.4(0x80000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %55:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2
; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI undef %57:vgpr_32, %bb.4, [[PHI1]], %bb.2
; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %57:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2
; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI undef %59:vgpr_32, %bb.4, [[PHI1]], %bb.2
; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec
; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec
; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1
Expand Down Expand Up @@ -286,8 +286,8 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: bb.7:
; SI-NEXT: successors: %bb.8(0x80000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %59:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6
; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %61:vgpr_32, %bb.8, [[COPY4]], %bb.6
; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %61:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6
; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %63:vgpr_32, %bb.8, [[COPY4]], %bb.6
; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec
; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec
; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1
Expand Down Expand Up @@ -356,9 +356,9 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: bb.1.Flow:
; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %48:vgpr_32, %bb.0, %4, %bb.9
; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %50:vgpr_32, %bb.9
; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %52:vgpr_32, %bb.9
; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %50:vgpr_32, %bb.0, %4, %bb.9
; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %52:vgpr_32, %bb.9
; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %54:vgpr_32, %bb.9
; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; SI-NEXT: S_BRANCH %bb.2
; SI-NEXT: {{ $}}
Expand All @@ -371,7 +371,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: bb.3:
; SI-NEXT: successors: %bb.4(0x80000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %54:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2
; SI-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %56:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2
; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub0, implicit $exec
; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub1, implicit $exec
; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1
Expand Down Expand Up @@ -407,7 +407,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: bb.7:
; SI-NEXT: successors: %bb.8(0x80000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %56:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6
; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %58:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6
; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec
; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec
; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id
; CHECK-NEXT: s_mov_b32 s15, 42
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ define void @test() #0 {
; GCN-O0-NEXT: s_mov_b64 s[20:21], s[0:1]
; GCN-O0-NEXT: s_mov_b64 s[0:1], s[20:21]
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[22:23]
; GCN-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45]
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47]
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O0-NEXT: ; implicit-def: $sgpr44_sgpr45
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2
Expand Down Expand Up @@ -632,6 +633,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr15
; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
Expand Down Expand Up @@ -656,6 +657,7 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
Expand Down Expand Up @@ -1283,6 +1285,7 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
; GFX9-O0-NEXT: ; implicit-def: $sgpr15
; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
Expand Down Expand Up @@ -1526,6 +1529,7 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/PowerPC/fmf-propagation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,15 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
; FP library calls can have fast-math-flags.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<ptr @log2>
; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
; FMFDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
; FMFDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
; FMFDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<ptr @log2>
; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
; GLOBALDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
; GLOBALDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'

declare double @log2(double)
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/pr83017.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck %s

; This showcases a miscompile that was fixed in #83107:
; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores.
; - the load and store of q aliases the upper 128 bits store of p.
; - The aliasing 128 bit store will be between the chain of the scalar
; load/store:
;
; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ...
; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ...
;
; t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ...
; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ...
; t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ...
;
; Previously, the scalar load/store was incorrectly combined away:
;
; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ...
; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ...
;
; // MISSING
; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ...
; // MISSING
;
; - We need to compile with an exact VLEN so that we select an ISD::STORE node
; which triggers the combine
; - The miscompile doesn't happen if we use separate GEPs as we need the stores
; to share the same MachinePointerInfo
define void @aliasing(ptr %p) {
; CHECK-LABEL: aliasing:
; CHECK: # %bb.0:
; CHECK-NEXT: lw a1, 84(a0)
; CHECK-NEXT: addi a2, a0, 80
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vs1r.v v8, (a2)
; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vs4r.v v12, (a0)
; CHECK-NEXT: addi a2, a0, 64
; CHECK-NEXT: vs1r.v v8, (a2)
; CHECK-NEXT: sw a1, 84(a0)
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i64 84
%tmp = load i32, ptr %q
tail call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 96, i1 false)
store i32 %tmp, ptr %q
ret void
}
11 changes: 4 additions & 7 deletions llvm/test/CodeGen/SPIRV/function/alloca-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
; CHECK-DAG: OpName %[[#FOO:]] "foo"
; CHECK-DAG: OpName %[[#GOO:]] "goo"

; CHECK-DAG: %[[#CHAR:]] = OpTypeInt 8
; CHECK-DAG: %[[#INT:]] = OpTypeInt 32
; CHECK-DAG: %[[#STACK_PTR_INT:]] = OpTypePointer Function %[[#INT]]
; CHECK-DAG: %[[#GLOBAL_PTR_INT:]] = OpTypePointer CrossWorkgroup %[[#INT]]
; CHECK-DAG: %[[#GLOBAL_PTR_CHAR:]] = OpTypePointer CrossWorkgroup %[[#CHAR]]
; CHECK-DAG: %[[#FN1:]] = OpTypeFunction %[[#INT]] %[[#INT]]
; CHECK-DAG: %[[#FN2:]] = OpTypeFunction %[[#INT]] %[[#INT]] %[[#GLOBAL_PTR_CHAR]]
; CHECK-DAG: %[[#FN2:]] = OpTypeFunction %[[#INT]] %[[#INT]] %[[#GLOBAL_PTR_INT]]

define i32 @bar(i32 %a) {
%p = alloca i32
Expand Down Expand Up @@ -55,10 +53,9 @@ define i32 @goo(i32 %a, ptr addrspace(1) %p) {

; CHECK: %[[#GOO]] = OpFunction %[[#INT]] None %[[#FN2]]
; CHECK: %[[#A:]] = OpFunctionParameter %[[#INT]]
; CHECK: %[[#P:]] = OpFunctionParameter %[[#GLOBAL_PTR_CHAR]]
; CHECK: %[[#P:]] = OpFunctionParameter %[[#GLOBAL_PTR_INT]]
; CHECK: OpLabel
; CHECK: %[[#C:]] = OpBitcast %[[#GLOBAL_PTR_INT]] %[[#P]]
; CHECK: OpStore %[[#C]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#C]]
; CHECK: OpStore %[[#P]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#P]]
; CHECK: OpReturnValue %[[#B]]
; CHECK: OpFunctionEnd
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/SPIRV/half_no_extension.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV

; TODO(#60133): Requires updates following opaque pointer migration.
; XFAIL: *

; CHECK-SPIRV: OpCapability Float16Buffer
; CHECK-SPIRV-NOT: OpCapability Float16

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@
; CHECK-DAG: %[[#UNDEF:]] = OpUndef %[[#NESTED_STRUCT]]

; CHECK: %[[#]] = OpFunction %[[#]] None %[[#]]
; CHECK-NEXT: %[[#PTR:]] = OpFunctionParameter %[[#]]
; CHECK-NEXT: %[[#]] = OpLabel
; CHECK-NEXT: %[[#BC:]] = OpBitcast %[[#]] %[[#PTR]]
; CHECK-NEXT: OpStore %[[#BC]] %[[#UNDEF]] Aligned 4
; CHECK-NEXT: OpReturn
; CHECK-NEXT: OpFunctionEnd
; CHECK: %[[#]] = OpLabel
; CHECK: OpStore %[[#]] %[[#UNDEF]] Aligned 4
; CHECK: OpReturn
; CHECK: OpFunctionEnd

%struct = type {
i32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
; CHECK-DAG: %[[#UNDEF:]] = OpUndef %[[#STRUCT]]

; CHECK: %[[#]] = OpFunction %[[#]] None %[[#]]
; CHECK-NEXT: %[[#PTR:]] = OpFunctionParameter %[[#]]
; CHECK-NEXT: %[[#]] = OpLabel
; CHECK-NEXT: %[[#BC:]] = OpBitcast %[[#]] %[[#PTR]]
; CHECK-NEXT: OpStore %[[#BC]] %[[#UNDEF]] Aligned 4
; CHECK-NEXT: OpReturn
; CHECK-NEXT: OpFunctionEnd
; CHECK: %[[#]] = OpLabel
; CHECK: OpStore %[[#]] %[[#UNDEF]] Aligned 4
; CHECK: OpReturn
; CHECK: OpFunctionEnd

define void @foo(ptr %ptr) {
store { i32, i16 } undef, ptr %ptr
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/SPIRV/opaque_pointers.ll
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK

; CHECK-DAG: %[[#Int8Ty:]] = OpTypeInt 8 0
; CHECK-DAG: %[[#PtrInt8Ty:]] = OpTypePointer Function %[[#Int8Ty]]
; CHECK-DAG: %[[#Int32Ty:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#PtrInt32Ty:]] = OpTypePointer Function %[[#Int32Ty]]
; CHECK-DAG: %[[#Int64Ty:]] = OpTypeInt 64 0
; CHECK-DAG: %[[#PtrInt64Ty:]] = OpTypePointer Function %[[#Int64Ty]]
; CHECK-DAG: %[[#FTy:]] = OpTypeFunction %[[#Int64Ty]] %[[#PtrInt8Ty]]
; CHECK-DAG: %[[#FTy:]] = OpTypeFunction %[[#Int64Ty]] %[[#PtrInt32Ty]]
; CHECK-DAG: %[[#Const:]] = OpConstant %[[#Int32Ty]] 0
; CHECK: OpFunction %[[#Int64Ty]] None %[[#FTy]]
; CHECK: %[[#Parm:]] = OpFunctionParameter %[[#PtrInt8Ty]]
; CHECK-DAG: %[[#Bitcast1:]] = OpBitcast %[[#PtrInt32Ty]] %[[#Parm]]
; CHECK: OpStore %[[#Bitcast1]] %[[#Const]] Aligned 4
; CHECK-DAG: %[[#Bitcast2:]] = OpBitcast %[[#PtrInt64Ty]] %[[#Parm]]
; CHECK: %[[#Res:]] = OpLoad %[[#Int64Ty]] %[[#Bitcast2]] Aligned 4
; CHECK: %[[#Param:]] = OpFunctionParameter %[[#PtrInt32Ty]]
; CHECK: OpStore %[[#Param]] %[[#Const]] Aligned 4
; CHECK-DAG: %[[#Bitcast:]] = OpBitcast %[[#PtrInt64Ty]] %[[#Param]]
; CHECK: %[[#Res:]] = OpLoad %[[#Int64Ty]] %[[#Bitcast]] Aligned 4
; CHECK: OpReturnValue %[[#Res]]

define i64 @test(ptr %p) {
Expand Down
24 changes: 10 additions & 14 deletions llvm/test/CodeGen/SPIRV/opencl/basic/get_global_offset.ll
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s

; CHECK: OpEntryPoint Kernel %[[#test_func:]] "test"
; CHECK: OpDecorate %[[#f2_decl:]] LinkageAttributes "BuiltInGlobalOffset" Import
; CHECK: %[[#int_ty:]] = OpTypeInt 8 0
; CHECK: %[[#void_ty:]] = OpTypeVoid
; CHECK: %[[#iptr_ty:]] = OpTypePointer CrossWorkgroup %[[#int_ty]]
; CHECK: %[[#func_ty:]] = OpTypeFunction %[[#void_ty]] %[[#iptr_ty]]
; CHECK: %[[#int64_ty:]] = OpTypeInt 64 0
; CHECK: %[[#vec_ty:]] = OpTypeVector %[[#int64_ty]] 3
; CHECK: %[[#func2_ty:]] = OpTypeFunction %[[#vec_ty]]
; CHECK: %[[#int32_ty:]] = OpTypeInt 32 0
; CHECK: %[[#i32ptr_ty:]] = OpTypePointer CrossWorkgroup %[[#int32_ty]]
; CHECK-DAG: OpEntryPoint Kernel %[[#test_func:]] "test"
; CHECK-DAG: OpDecorate %[[#f2_decl:]] LinkageAttributes "BuiltInGlobalOffset" Import
; CHECK-DAG: %[[#int32_ty:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#i32ptr_ty:]] = OpTypePointer CrossWorkgroup %[[#int32_ty]]
; CHECK-DAG: %[[#void_ty:]] = OpTypeVoid
; CHECK-DAG: %[[#func_ty:]] = OpTypeFunction %[[#void_ty]] %[[#i32ptr_ty]]
; CHECK-DAG: %[[#int64_ty:]] = OpTypeInt 64 0
; CHECK-DAG: %[[#vec_ty:]] = OpTypeVector %[[#int64_ty]] 3
; CHECK-DAG: %[[#func2_ty:]] = OpTypeFunction %[[#vec_ty]]
;; TODO: add 64-bit constant defs
; CHECK: %[[#f2_decl]] = OpFunction %[[#vec_ty]] Pure %[[#func2_ty]]
; CHECK-DAG: %[[#f2_decl]] = OpFunction %[[#vec_ty]] Pure %[[#func2_ty]]
; CHECK: OpFunctionEnd
;; Check that the function register name does not match other registers
; CHECK-NOT: %[[#int_ty]] = OpFunction
; CHECK-NOT: %[[#iptr_ty]] = OpFunction
; CHECK-NOT: %[[#void_ty]] = OpFunction
; CHECK-NOT: %[[#func_ty]] = OpFunction
; CHECK-NOT: %[[#int64_ty]] = OpFunction
Expand Down

This file was deleted.

This file was deleted.

20 changes: 14 additions & 6 deletions llvm/test/CodeGen/SPIRV/opencl/vload2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,28 @@
; CHECK-DAG: %[[#VINT64:]] = OpTypeVector %[[#INT64]] 2
; CHECK-DAG: %[[#VFLOAT:]] = OpTypeVector %[[#FLOAT]] 2
; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer CrossWorkgroup %[[#INT8]]
; CHECK-DAG: %[[#PTRINT16:]] = OpTypePointer CrossWorkgroup %[[#INT16]]
; CHECK-DAG: %[[#PTRINT32:]] = OpTypePointer CrossWorkgroup %[[#INT32]]
; CHECK-DAG: %[[#PTRINT64:]] = OpTypePointer CrossWorkgroup %[[#INT64]]
; CHECK-DAG: %[[#PTRFLOAT:]] = OpTypePointer CrossWorkgroup %[[#FLOAT]]

; CHECK: %[[#OFFSET:]] = OpFunctionParameter %[[#INT64]]
; CHECK: %[[#ADDRESS:]] = OpFunctionParameter %[[#PTRINT8]]

define spir_kernel void @test_fn(i64 %offset, ptr addrspace(1) %address) {
; CHECK: %[[#]] = OpExtInst %[[#VINT8]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#ADDRESS]] 2
; CHECK: %[[#CASTorPARAMofPTRI8:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRINT8]]{{.*}}
; CHECK: %[[#]] = OpExtInst %[[#VINT8]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#CASTorPARAMofPTRI8]] 2
%call1 = call spir_func <2 x i8> @_Z6vload2mPU3AS1Kc(i64 %offset, ptr addrspace(1) %address)
; CHECK: %[[#]] = OpExtInst %[[#VINT16]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#ADDRESS]] 2
; CHECK: %[[#CASTorPARAMofPTRI16:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRINT16]]{{.*}}
; CHECK: %[[#]] = OpExtInst %[[#VINT16]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#CASTorPARAMofPTRI16]] 2
%call2 = call spir_func <2 x i16> @_Z6vload2mPU3AS1Ks(i64 %offset, ptr addrspace(1) %address)
; CHECK: %[[#]] = OpExtInst %[[#VINT32]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#ADDRESS]] 2
; CHECK: %[[#CASTorPARAMofPTRI32:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRINT32]]{{.*}}
; CHECK: %[[#]] = OpExtInst %[[#VINT32]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#CASTorPARAMofPTRI32]] 2
%call3 = call spir_func <2 x i32> @_Z6vload2mPU3AS1Ki(i64 %offset, ptr addrspace(1) %address)
; CHECK: %[[#]] = OpExtInst %[[#VINT64]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#ADDRESS]] 2
; CHECK: %[[#CASTorPARAMofPTRI64:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRINT64]]{{.*}}
; CHECK: %[[#]] = OpExtInst %[[#VINT64]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#CASTorPARAMofPTRI64]] 2
%call4 = call spir_func <2 x i64> @_Z6vload2mPU3AS1Kl(i64 %offset, ptr addrspace(1) %address)
; CHECK: %[[#]] = OpExtInst %[[#VFLOAT]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#ADDRESS]] 2
; CHECK: %[[#CASTorPARAMofPTRFLOAT:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRFLOAT]]{{.*}}
; CHECK: %[[#]] = OpExtInst %[[#VFLOAT]] %[[#IMPORT]] vloadn %[[#OFFSET]] %[[#CASTorPARAMofPTRFLOAT]] 2
%call5 = call spir_func <2 x float> @_Z6vload2mPU3AS1Kf(i64 %offset, ptr addrspace(1) %address)
ret void
}
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/SPIRV/opencl/vstore2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; This test only intends to check the vstoren builtin name resolution.
; The calls to the OpenCL builtins are not valid and will not pass SPIR-V validation.

; CHECK-DAG: %[[#IMPORT:]] = OpExtInstImport "OpenCL.std"

; CHECK-DAG: %[[#VOID:]] = OpTypeVoid
; CHECK-DAG: %[[#INT8:]] = OpTypeInt 8 0
; CHECK-DAG: %[[#INT64:]] = OpTypeInt 64 0
; CHECK-DAG: %[[#VINT8:]] = OpTypeVector %[[#INT8]] 2
; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer CrossWorkgroup %[[#INT8]]

; CHECK: %[[#DATA:]] = OpFunctionParameter %[[#VINT8]]
; CHECK: %[[#OFFSET:]] = OpFunctionParameter %[[#INT64]]
; CHECK: %[[#ADDRESS:]] = OpFunctionParameter %[[#PTRINT8]]

define spir_kernel void @test_fn(<2 x i8> %data, i64 %offset, ptr addrspace(1) %address) {
; CHECK: %[[#]] = OpExtInst %[[#VOID]] %[[#IMPORT]] vstoren %[[#DATA]] %[[#OFFSET]] %[[#ADDRESS]]
call spir_func void @_Z7vstore2Dv2_cmPU3AS1c(<2 x i8> %data, i64 %offset, ptr addrspace(1) %address)
ret void
}

declare spir_func void @_Z7vstore2Dv2_cmPU3AS1c(<2 x i8>, i64, ptr addrspace(1))
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -print-after-all -o - 2>&1 | FileCheck %s

; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) ***

define spir_kernel void @test(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %srcimg) {
; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
%call = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %srcimg)
ret void
; CHECK: }
}

declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0))
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -print-after-all -o - 2>&1 | FileCheck %s

; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) ***

define spir_kernel void @test_pointer_cast(ptr addrspace(1) %src) {
; CHECK-NOT: call void @llvm.spv.assign.ptr.type.p1(ptr addrspace(1) %src, metadata i8 undef, i32 1)
; CHECK: call void @llvm.spv.assign.ptr.type.p1(ptr addrspace(1) %src, metadata i32 0, i32 1)
%b = bitcast ptr addrspace(1) %src to ptr addrspace(1)
%g = getelementptr inbounds i32, ptr addrspace(1) %b, i64 52
ret void
; CHECK: }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -print-after-all -o - 2>&1 | FileCheck %s

; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) ***

define spir_kernel void @test(ptr addrspace(1) %srcimg) {
; CHECK: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
%call1 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
%call2 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
ret void
; CHECK: }
}

declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1))
20 changes: 6 additions & 14 deletions llvm/test/CodeGen/SPIRV/pointers/getelementptr-kernel-arg-char.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,20 @@

; CHECK-DAG: %[[#INT8:]] = OpTypeInt 8 0
; CHECK-DAG: %[[#INT64:]] = OpTypeInt 64 0
; CHECK-DAG: %[[#VINT8:]] = OpTypeVector %[[#INT8]] 2
; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer Workgroup %[[#INT8]]
; CHECK-DAG: %[[#PTRVINT8:]] = OpTypePointer Workgroup %[[#VINT8]]
; CHECK-DAG: %[[#CONST:]] = OpConstant %[[#INT64]] 1

; CHECK: %[[#PARAM1:]] = OpFunctionParameter %[[#PTRVINT8]]
define spir_kernel void @test1(ptr addrspace(3) %address) !kernel_arg_type !1 {
; CHECK: %[[#BITCAST1:]] = OpBitcast %[[#PTRINT8]] %[[#PARAM1]]
; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTRINT8]] %[[#BITCAST1]] %[[#CONST]]
; CHECK: %[[#PARAM1:]] = OpFunctionParameter %[[#PTRINT8]]
define spir_kernel void @test1(ptr addrspace(3) %address) {
; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTRINT8]] %[[#PARAM1]] %[[#CONST]]
%cast = bitcast ptr addrspace(3) %address to ptr addrspace(3)
%gep = getelementptr inbounds i8, ptr addrspace(3) %cast, i64 1
ret void
}

; CHECK: %[[#PARAM2:]] = OpFunctionParameter %[[#PTRVINT8]]
define spir_kernel void @test2(ptr addrspace(3) %address) !kernel_arg_type !1 {
; CHECK: %[[#BITCAST2:]] = OpBitcast %[[#PTRINT8]] %[[#PARAM2]]
; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTRINT8]] %[[#BITCAST2]] %[[#CONST]]
; CHECK: %[[#PARAM2:]] = OpFunctionParameter %[[#PTRINT8]]
define spir_kernel void @test2(ptr addrspace(3) %address) {
; CHECK: %[[#]] = OpInBoundsPtrAccessChain %[[#PTRINT8]] %[[#PARAM2]] %[[#CONST]]
%gep = getelementptr inbounds i8, ptr addrspace(3) %address, i64 1
ret void
}

declare spir_func <2 x i8> @_Z6vload2mPU3AS3Kc(i64, ptr addrspace(3))

!1 = !{!"char2*"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}

; CHECK-DAG: %[[#INT8:]] = OpTypeInt 8 0
; CHECK-DAG: %[[#PTRINT8:]] = OpTypePointer CrossWorkgroup %[[#INT8]]

define spir_kernel void @test_fn(ptr addrspace(1) %src) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_type_qual !4 !kernel_arg_base_type !3 {
entry:
%g1 = call spir_func i64 @_Z13get_global_idj(i32 0)
%i1 = insertelement <3 x i64> undef, i64 %g1, i32 0
%g2 = call spir_func i64 @_Z13get_global_idj(i32 1)
%i2 = insertelement <3 x i64> %i1, i64 %g2, i32 1
%g3 = call spir_func i64 @_Z13get_global_idj(i32 2)
%i3 = insertelement <3 x i64> %i2, i64 %g3, i32 2
%e = extractelement <3 x i64> %i3, i32 0
%c1 = trunc i64 %e to i32
%c2 = sext i32 %c1 to i64
%b = bitcast ptr addrspace(1) %src to ptr addrspace(1)

; Make sure that builtin call directly uses either a OpBitcast or OpFunctionParameter of i8* type
; CHECK: %[[#BITCASTorPARAMETER:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#PTRINT8]]{{.*}}
; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] vloadn %[[#]] %[[#BITCASTorPARAMETER]] 3
%call = call spir_func <3 x i8> @_Z6vload3mPU3AS1Kc(i64 %c2, ptr addrspace(1) %b)

ret void
}

declare spir_func i64 @_Z13get_global_idj(i32)

declare spir_func <3 x i8> @_Z6vload3mPU3AS1Kc(i64, ptr addrspace(1))

!1 = !{i32 1}
!2 = !{!"none"}
!3 = !{!"char3*"}
!4 = !{!""}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o - 2>&1 | FileCheck %s

; CHECK: LLVM ERROR: Type mismatch {{.*}}

define spir_kernel void @test(ptr addrspace(1) %srcimg) {
%call1 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
%call2 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_rw(ptr addrspace(1) %srcimg)
ret void
}

declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1))
declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_rw(ptr addrspace(1))
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}

define spir_kernel void @test(ptr addrspace(1) %srcimg) {
; CHECK: %[[#VOID:]] = OpTypeVoid
; CHECK: %[[#IMAGE:]] = OpTypeImage %[[#VOID]] 2D 0 0 0 0 Unknown ReadOnly
; CHECK: %[[#PARAM:]] = OpFunctionParameter %[[#IMAGE]]
; CHECK: %[[#]] = OpImageQuerySizeLod %[[#]] %[[#PARAM]] %[[#]]
%call = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
ret void
}

declare spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1))
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}

target triple = "spirv64-unknown-unknown"

; CHECK-DAG: %[[#VOID:]] = OpTypeVoid
; CHECK-DAG: %[[#INT32:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#STRUCT1:]] = OpTypeStruct %[[#INT32]]
; CHECK-DAG: %[[#CONST:]] = OpConstant %[[#INT32]] 7
; CHECK-DAG: %[[#ARRAY:]] = OpTypeArray %[[#STRUCT1]] %[[#CONST]]
; CHECK-DAG: %[[#STRUCT2:]] = OpTypeStruct %[[#ARRAY]]
; CHECK-DAG: %[[#PTR:]] = OpTypePointer Function %[[#STRUCT2]]

; CHECK: %[[#FUNC:]] = OpTypeFunction %[[#VOID]] %[[#PTR]]
; CHECK: %[[#]] = OpFunction %[[#VOID]] None %[[#FUNC]]
; CHECK: %[[#]] = OpFunctionParameter %[[#PTR]]

%struct.S = type { i32 }
%struct.__wrapper_class = type { [7 x %struct.S] }

define spir_kernel void @foo(ptr noundef byref(%struct.__wrapper_class) align 4 %_arg_Arr) {
entry:
ret void
}
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byval.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}

; CHECK-DAG: %[[#VOID:]] = OpTypeVoid
; CHECK-DAG: %[[#INT32:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#STRUCT1:]] = OpTypeStruct %[[#INT32]]
; CHECK-DAG: %[[#CONST:]] = OpConstant %[[#INT32]] 7
; CHECK-DAG: %[[#ARRAY:]] = OpTypeArray %[[#STRUCT1]] %[[#CONST]]
; CHECK-DAG: %[[#STRUCT2:]] = OpTypeStruct %[[#ARRAY]]
; CHECK-DAG: %[[#PTR:]] = OpTypePointer Function %[[#STRUCT2]]

; CHECK: %[[#FUNC:]] = OpTypeFunction %[[#VOID]] %[[#PTR]]
; CHECK: %[[#]] = OpFunction %[[#VOID]] None %[[#FUNC]]
; CHECK: %[[#]] = OpFunctionParameter %[[#PTR]]

%struct.S = type { i32 }
%struct.__wrapper_class = type { [7 x %struct.S] }

define spir_kernel void @foo(ptr noundef byval(%struct.__wrapper_class) align 4 %_arg_Arr) {
entry:
ret void
}
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/SPIRV/pointers/store-operand-ptr-to-struct.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s

; TODO: OpFunctionParameter should be a pointer of struct base type.
; XFAIL: *

%struct = type {
i32,
i16
}

%nested_struct = type {
%struct,
i16
}

define void @foo(ptr %ptr) {
store %nested_struct undef, ptr %ptr
ret void
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s

; CHECK-DAG: %[[#CHAR:]] = OpTypeInt 8
; CHECK-DAG: %[[#INT:]] = OpTypeInt 32
; CHECK-DAG: %[[#GLOBAL_PTR_INT:]] = OpTypePointer CrossWorkgroup %[[#INT]]
; CHECK-DAG: %[[#GLOBAL_PTR_CHAR:]] = OpTypePointer CrossWorkgroup %[[#CHAR]]

define i32 @foo(i32 %a, ptr addrspace(1) %p) {
store i32 %a, i32 addrspace(1)* %p
Expand All @@ -12,8 +10,6 @@ define i32 @foo(i32 %a, ptr addrspace(1) %p) {
}

; CHECK: %[[#A:]] = OpFunctionParameter %[[#INT]]
; CHECK: %[[#P:]] = OpFunctionParameter %[[#GLOBAL_PTR_CHAR]]
; CHECK: %[[#C:]] = OpBitcast %[[#GLOBAL_PTR_INT]] %[[#P]]
; CHECK: OpStore %[[#C]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#C]]
; CHECK-NOT: %[[#B:]] = OpLoad %[[#INT]] %[[#P]]
; CHECK: %[[#CorP:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#GLOBAL_PTR_INT]]{{.*}}
; CHECK: OpStore %[[#CorP]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#CorP]]
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/SPIRV/pointers/two-subsequent-bitcasts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

; CHECK-DAG: %[[#float:]] = OpTypeFloat 32
; CHECK-DAG: %[[#pointer:]] = OpTypePointer CrossWorkgroup %[[#float]]
; CHECK: %[[#A:]] = OpFunctionParameter %[[#]]

define void @foo(float addrspace(1)* %A, i32 %B) {
%cmp = icmp sgt i32 %B, 0
%conv = uitofp i1 %cmp to float
; CHECK: %[[#utof_res:]] = OpConvertUToF %[[#float]] %[[#]]
; CHECK: %[[#bitcast:]] = OpBitcast %[[#pointer]] %[[#A]]
; CHECK: OpStore %[[#bitcast]] %[[#utof_res]]
; CHECK-DAG: %[[#utof_res:]] = OpConvertUToF %[[#float]] %[[#]]
; CHECK-DAG: %[[#bitcastORparam:]] = {{OpBitcast|OpFunctionParameter}}{{.*}}%[[#pointer]]{{.*}}
; CHECK: OpStore %[[#bitcastORparam]] %[[#utof_res]]
%BC1 = bitcast float addrspace(1)* %A to i32 addrspace(1)*
%BC2 = bitcast i32 addrspace(1)* %BC1 to float addrspace(1)*
store float %conv, float addrspace(1)* %BC2, align 4;
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/SPIRV/sitofp-with-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
; CHECK-DAG: %[[#ptr:]] = OpTypePointer CrossWorkgroup %[[#float]]

; CHECK: OpFunction
; CHECK: %[[#A:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#A:]] = OpFunctionParameter %[[#ptr]]
; CHECK: %[[#B:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#cmp_res:]] = OpSGreaterThan %[[#bool]] %[[#B]] %[[#zero]]
; CHECK: %[[#select_res:]] = OpSelect %[[#int_32]] %[[#cmp_res]] %[[#one]] %[[#zero]]
; CHECK: %[[#stof_res:]] = OpConvertSToF %[[#]] %[[#select_res]]
; CHECK: %[[#bitcast:]] = OpBitcast %[[#ptr]] %[[#A]]
; CHECK: OpStore %[[#bitcast]] %[[#stof_res]]
; CHECK: OpStore %[[#A]] %[[#stof_res]]

define dso_local spir_kernel void @K(ptr addrspace(1) nocapture %A, i32 %B) local_unnamed_addr {
entry:
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
;; }

; CHECK-SPIRV: OpName %[[#TEST:]] "test_atomic_cmpxchg"
; CHECK-SPIRV-DAG: %[[#UCHAR:]] = OpTypeInt 8 0
; CHECK-SPIRV-DAG: %[[#UINT:]] = OpTypeInt 32 0
; CHECK-SPIRV-DAG: %[[#UCHAR_PTR:]] = OpTypePointer CrossWorkgroup %[[#UCHAR]]
; CHECK-SPIRV-DAG: %[[#UINT_PTR:]] = OpTypePointer CrossWorkgroup %[[#UINT]]

;; In SPIR-V, atomic_cmpxchg is represented as OpAtomicCompareExchange [2],
;; which also includes memory scope and two memory semantic arguments. The
Expand All @@ -31,7 +30,7 @@
; CHECK-SPIRV-DAG: %[[#RELAXED:]] = OpConstant %[[#UINT]] 0

; CHECK-SPIRV: %[[#TEST]] = OpFunction %[[#]]
; CHECK-SPIRV: %[[#PTR:]] = OpFunctionParameter %[[#UCHAR_PTR]]
; CHECK-SPIRV: %[[#PTR:]] = OpFunctionParameter %[[#UINT_PTR]]
; CHECK-SPIRV: %[[#CMP:]] = OpFunctionParameter %[[#UINT]]
; CHECK-SPIRV: %[[#VAL:]] = OpFunctionParameter %[[#UINT]]
; CHECK-SPIRV: %[[#]] = OpAtomicCompareExchange %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#RELAXED]] %[[#VAL]] %[[#CMP]]
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/SPIRV/transcoding/OpenCL/atomic_legacy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

; CHECK-SPIRV: OpName %[[#TEST:]] "test_legacy_atomics"
; CHECK-SPIRV-DAG: %[[#UINT:]] = OpTypeInt 32 0
; CHECK-SPIRV-DAG: %[[#UCHAR:]] = OpTypeInt 8 0
; CHECK-SPIRV-DAG: %[[#UCHAR_PTR:]] = OpTypePointer CrossWorkgroup %[[#UCHAR]]
; CHECK-SPIRV-DAG: %[[#UINT_PTR:]] = OpTypePointer CrossWorkgroup %[[#UINT]]

;; In SPIR-V, atomic_add is represented as OpAtomicIAdd [2], which also includes
;; memory scope and memory semantic arguments. The backend applies a default
Expand All @@ -26,7 +25,7 @@
; CHECK-SPIRV-DAG: %[[#RELAXED:]] = OpConstant %[[#UINT]] 0

; CHECK-SPIRV: %[[#TEST]] = OpFunction %[[#]]
; CHECK-SPIRV: %[[#PTR:]] = OpFunctionParameter %[[#UCHAR_PTR]]
; CHECK-SPIRV: %[[#PTR:]] = OpFunctionParameter %[[#UINT_PTR]]
; CHECK-SPIRV: %[[#VAL:]] = OpFunctionParameter %[[#UINT]]
; CHECK-SPIRV: %[[#]] = OpAtomicIAdd %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#VAL]]
; CHECK-SPIRV: %[[#]] = OpAtomicIAdd %[[#UINT]] %[[#PTR]] %[[#WORKGROUP_SCOPE]] %[[#RELAXED]] %[[#VAL]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
; CHECK-SPIRV: %[[#test_arr2:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]]
; CHECK-SPIRV: %[[#test_arr:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]]

; CHECK-SPIRV-DAG: %[[#i8_ptr:]] = OpTypePointer Function %[[#i8]]
; CHECK-SPIRV-DAG: %[[#const_i8_ptr:]] = OpTypePointer UniformConstant %[[#i8]]
; CHECK-SPIRV-DAG: %[[#i32x3_ptr:]] = OpTypePointer Function %[[#i32x3]]

Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/SPIRV/uitofp-with-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
; SPV-DAG: %[[#pointer:]] = OpTypePointer CrossWorkgroup %[[#float]]

; SPV-DAG: OpFunction
; SPV-DAG: %[[#A:]] = OpFunctionParameter %[[#]]
; SPV-DAG: %[[#A:]] = OpFunctionParameter %[[#pointer]]
; SPV-DAG: %[[#B:]] = OpFunctionParameter %[[#]]
; SPV-DAG: %[[#i1s:]] = OpFunctionParameter %[[#]]
; SPV-DAG: %[[#i1v:]] = OpFunctionParameter %[[#]]
Expand All @@ -82,8 +82,7 @@ entry:
; SPV-DAG: %[[#select_res:]] = OpSelect %[[#int_32]] %[[#cmp_res]] %[[#one_32]] %[[#zero_32]]
; SPV-DAG: %[[#utof_res:]] = OpConvertUToF %[[#float]] %[[#select_res]]
%conv = uitofp i1 %cmp to float
; SPV-DAG: %[[#bitcast:]] = OpBitcast %[[#pointer]] %[[#A]]
; SPV-DAG: OpStore %[[#bitcast]] %[[#utof_res]]
; SPV-DAG: OpStore %[[#A]] %[[#utof_res]]
store float %conv, float addrspace(1)* %A, align 4;

; SPV-DAG: %[[#s1]] = OpSelect %[[#int_8]] %[[#i1s]] %[[#mone_8]] %[[#zero_8]]
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/MC/X86/apx/long-instruction-err.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# RUN: not llvm-mc -triple x86_64 -show-encoding %s 2>&1 | FileCheck %s

# CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15
# CHECK: addq $1234, %cs:-96, %rax
addq $1234, %cs:-96, %rax

# CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15
# CHECK: subq $1234, %fs:257(%rbx, %rcx), %rax
subq $1234, %fs:257(%rbx, %rcx), %rax

# CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15
# CHECK: orq $1234, 257(%ebx, %ecx), %rax
orq $1234, 257(%ebx, %ecx), %rax

# CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15
# CHECK: xorq $1234, %gs:257(%ebx), %rax
xorq $1234, %gs:257(%ebx), %rax

# CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15
# CHECK: {nf} andq $1234, %cs:-96
{nf} andq $1234, %cs:-96

# CHECK: [[#@LINE+2]]:1: error: instruction length exceeds the limit of 15
# CHECK: {evex} adcq $1234, %cs:-96
{evex} adcq $1234, %cs:-96
37 changes: 37 additions & 0 deletions llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
---
name: basic
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1, %bb.2;
%0:sgpr_64 = CONVERGENCECTRL_ANCHOR
; CHECK: Entry intrinsic cannot be preceded by a convergent operation in the same basic block.
; CHECK: CONVERGENCECTRL_ENTRY
%1:sgpr_64 = CONVERGENCECTRL_ENTRY
; CHECK: Loop intrinsic cannot be preceded by a convergent operation in the same basic block.
; CHECK: CONVERGENCECTRL_LOOP
%2:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
S_CBRANCH_EXECZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2;
; CHECK: Entry intrinsic can occur only in the entry block.
; CHECK: CONVERGENCECTRL_ENTRY
%5:sgpr_64 = CONVERGENCECTRL_ENTRY
bb.2:
; CHECK: Convergence control tokens can only be used by convergent operations.
; CHECK: G_PHI
%6:sgpr_64 = G_PHI %0:sgpr_64, %bb.0, %0:sgpr_64, %bb.1
%7:sgpr_64 = CONVERGENCECTRL_ANCHOR
%8:sgpr_64 = IMPLICIT_DEF
%4:sgpr_64 = SI_CALL %8:sgpr_64, 1, implicit %7:sgpr_64
; CHECK: An operation can use at most one convergence control token.
; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
%9:sgpr_64 = SI_CALL %8:sgpr_64, 2, implicit %7:sgpr_64, implicit %7:sgpr_64
; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 3
%10:sgpr_64 = SI_CALL %8:sgpr_64, 3
...
52 changes: 52 additions & 0 deletions llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
---
name: cycles
body: |
bb.0:
%0:sgpr_64 = CONVERGENCECTRL_ANCHOR
%1:sgpr_64 = IMPLICIT_DEF
S_CBRANCH_EXECZ %bb.9, implicit $exec
S_BRANCH %bb.1
bb.1:
S_CBRANCH_EXECZ %bb.8, implicit $exec
S_BRANCH %bb.5
bb.2:
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.4
bb.3:
; CHECK: Cycle heart must dominate all blocks in the cycle.
; Irreducible cycle: entries(bb.4 bb.3)
%3:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
S_BRANCH %bb.4
bb.4:
S_BRANCH %bb.3
bb.5:
S_CBRANCH_EXECZ %bb.6, implicit $exec
S_BRANCH %bb.2
bb.6:
S_BRANCH %bb.7
bb.7:
; CHECK: Cycle heart must dominate all blocks in the cycle.
; Reducible cycle: entries(bb.6) bb.7
%4:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
S_BRANCH %bb.6
bb.8:
; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition.
%5:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
%6:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
S_BRANCH %bb.8
bb.9:
; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition.
%7:sgpr_64 = G_SI_CALL %1:sgpr_64, 3, implicit %0:sgpr_64
S_BRANCH %bb.9
...
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "AMDGPU" in config.root.targets:
config.unsupported = True
15 changes: 15 additions & 0 deletions llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
---
name: mixed2
body: |
bb.0:
%0:sgpr_64 = IMPLICIT_DEF
%1:sgpr_64 = SI_CALL %0, 1
; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
; CHECK: CONVERGENCECTRL_ANCHOR
%2:sgpr_64 = CONVERGENCECTRL_ANCHOR
; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
%3:sgpr_64 = SI_CALL %0, 2, implicit %2:sgpr_64
...
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
---
name: region_nesting
body: |
bb.0:
%0:sgpr_64 = CONVERGENCECTRL_ANCHOR
%1:sgpr_64 = CONVERGENCECTRL_ANCHOR
%2:sgpr_64 = IMPLICIT_DEF
%3:sgpr_64 = SI_CALL %2, 1, implicit %0:sgpr_64
; CHECK: Convergence region is not well-nested.
; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
%4:sgpr_64 = SI_CALL %2, 2, implicit %1:sgpr_64
S_CBRANCH_EXECZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.1:
%5:sgpr_64 = SI_CALL %2, 3, implicit %0:sgpr_64
bb.2:
; CHECK: Convergence region is not well-nested.
; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 4
%6:sgpr_64 = SI_CALL %2, 4, implicit %1:sgpr_64
...
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [

// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(65), GIMT_Encode2(182), /*)*//*default:*//*Label 2*/ GIMT_Encode4(562),
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(69), GIMT_Encode2(186), /*)*//*default:*//*Label 2*/ GIMT_Encode4(562),
// CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(478), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_FNEG*//*Label 1*/ GIMT_Encode4(530),
// CHECK-NEXT: // Label 0: @478
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ def MyCombiner: GICombiner<"GenMyCombiner", [

// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(128), /*)*//*default:*//*Label 3*/ GIMT_Encode4(563),
// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(446), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ GIMT_Encode4(477), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(523),
// CHECK-NEXT: // Label 0: @446
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(476), // Rule ID 0 //
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(132), /*)*//*default:*//*Label 3*/ GIMT_Encode4(579),
// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(462), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ GIMT_Encode4(493), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(539),
// CHECK-NEXT: // Label 0: @462
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(492), // Rule ID 0 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled),
// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32,
// CHECK-NEXT: // MIs[0] a
Expand All @@ -51,10 +51,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_AddImm8, /*InsnID*/0, /*Imm*/0,
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 4: @476
// CHECK-NEXT: // Label 4: @492
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 1: @477
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(522), // Rule ID 2 //
// CHECK-NEXT: // Label 1: @493
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(538), // Rule ID 2 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled),
// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32,
// CHECK-NEXT: // MIs[0] a
Expand All @@ -66,10 +66,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_AddCImm, /*InsnID*/0, /*Type*/GILLT_s32, /*Imm*/GIMT_Encode8(42),
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 5: @522
// CHECK-NEXT: // Label 5: @538
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 2: @523
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(562), // Rule ID 1 //
// CHECK-NEXT: // Label 2: @539
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(578), // Rule ID 1 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
// CHECK-NEXT: // MIs[0] a
// CHECK-NEXT: // No operand predicates
Expand All @@ -83,10 +83,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0,
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 6: @562
// CHECK-NEXT: // Label 6: @578
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 3: @563
// CHECK-NEXT: // Label 3: @579
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: }; // Size: 564 bytes
// CHECK-NEXT: }; // Size: 580 bytes
// CHECK-NEXT: return MatchTable0;
// CHECK-NEXT: }
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [

// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(115), GIMT_Encode2(117), /*)*//*default:*//*Label 2*/ GIMT_Encode4(132),
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(119), GIMT_Encode2(121), /*)*//*default:*//*Label 2*/ GIMT_Encode4(132),
// CHECK-NEXT: /*TargetOpcode::G_INTRINSIC*//*Label 0*/ GIMT_Encode4(18),
// CHECK-NEXT: /*TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS*//*Label 1*/ GIMT_Encode4(73),
// CHECK-NEXT: // Label 0: @18
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [

// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(120), GIMT_Encode2(183), /*)*//*default:*//*Label 3*/ GIMT_Encode4(380),
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(124), GIMT_Encode2(187), /*)*//*default:*//*Label 3*/ GIMT_Encode4(380),
// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 0*/ GIMT_Encode4(262), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 1*/ GIMT_Encode4(298), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_FPEXT*//*Label 2*/ GIMT_Encode4(344),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [

// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(65), GIMT_Encode2(69), /*)*//*default:*//*Label 2*/ GIMT_Encode4(88),
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(69), GIMT_Encode2(73), /*)*//*default:*//*Label 2*/ GIMT_Encode4(88),
// CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(26), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_BUILD_VECTOR*//*Label 1*/ GIMT_Encode4(57),
// CHECK-NEXT: // Label 0: @26
Expand Down Expand Up @@ -98,6 +98,6 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 2: @88
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: };
// CHECK-NEXT: }; // Size: 89 bytes
// CHECK-NEXT: return MatchTable0;
// CHECK-NEXT: }
62 changes: 31 additions & 31 deletions llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// Verify match table.
// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(128), /*)*//*default:*//*Label 6*/ GIMT_Encode4(661),
// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(446), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(488), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(541), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(583), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(608), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(621),
// CHECK-NEXT: // Label 0: @446
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(475), // Rule ID 4 //
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(132), /*)*//*default:*//*Label 6*/ GIMT_Encode4(677),
// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(462), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(504), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(557), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(599), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(624), GIMT_Encode4(0),
// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(637),
// CHECK-NEXT: // Label 0: @462
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(491), // Rule ID 4 //
// CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything),
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled),
// CHECK-NEXT: // MIs[0] a
Expand All @@ -155,8 +155,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: // Combiner Rule #3: InstTest1
// CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0),
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 7: @475
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(487), // Rule ID 3 //
// CHECK-NEXT: // Label 7: @491
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(503), // Rule ID 3 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled),
// CHECK-NEXT: // MIs[0] a
// CHECK-NEXT: // No operand predicates
Expand All @@ -165,10 +165,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: // Combiner Rule #2: InstTest0
// CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner1),
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 8: @487
// CHECK-NEXT: // Label 8: @503
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 1: @488
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(540), // Rule ID 6 //
// CHECK-NEXT: // Label 1: @504
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(556), // Rule ID 6 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled),
// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
// CHECK-NEXT: // MIs[0] dst
Expand All @@ -186,10 +186,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 9: @540
// CHECK-NEXT: // Label 9: @556
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 2: @541
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(582), // Rule ID 5 //
// CHECK-NEXT: // Label 2: @557
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(598), // Rule ID 5 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled),
// CHECK-NEXT: // MIs[0] tmp
// CHECK-NEXT: GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1]
Expand All @@ -207,32 +207,32 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner2),
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 10: @582
// CHECK-NEXT: // Label 10: @598
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 3: @583
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(595), // Rule ID 0 //
// CHECK-NEXT: // Label 3: @599
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(611), // Rule ID 0 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled),
// CHECK-NEXT: // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC'
// CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0),
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 11: @595
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(607), // Rule ID 1 //
// CHECK-NEXT: // Label 11: @611
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(623), // Rule ID 1 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
// CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC'
// CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0),
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 12: @607
// CHECK-NEXT: // Label 12: @623
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 4: @608
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(620), // Rule ID 2 //
// CHECK-NEXT: // Label 4: @624
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(636), // Rule ID 2 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
// CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT'
// CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0),
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 13: @620
// CHECK-NEXT: // Label 13: @636
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 5: @621
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(660), // Rule ID 7 //
// CHECK-NEXT: // Label 5: @637
// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(676), // Rule ID 7 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled),
// CHECK-NEXT: // MIs[0] dst
// CHECK-NEXT: // No operand predicates
Expand All @@ -247,10 +247,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0,
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_Done,
// CHECK-NEXT: // Label 14: @660
// CHECK-NEXT: // Label 14: @676
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 6: @661
// CHECK-NEXT: // Label 6: @677
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: }; // Size: 662 bytes
// CHECK-NEXT: }; // Size: 678 bytes
// CHECK-NEXT: return MatchTable0;
// CHECK-NEXT: }
54 changes: 54 additions & 0 deletions llvm/test/Transforms/Inline/update_invoke_counts.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; A pre-commit test to show that branch weight associated with invoke are not updated.
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s

declare i32 @__gxx_personality_v0(...)

define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
call void @callee(ptr %func), !prof !16
ret void
}

declare void @inner_callee(ptr %func)

define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
invoke void @inner_callee(ptr %func)
to label %ret unwind label %lpad, !prof !18

ret:
ret void

lpad:
%exn = landingpad {ptr, i32}
cleanup
unreachable
}

!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 2000}
!8 = !{!"NumCounts", i64 2}
!9 = !{!"NumFunctions", i64 2}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"function_entry_count", i64 1000}
!16 = !{!"branch_weights", i32 1000}
!17 = !{!"function_entry_count", i32 1500}
!18 = !{!"branch_weights", i32 1500}

; CHECK-LABEL: @caller(
; CHECK: invoke void @inner_callee(
; CHECK-NEXT: {{.*}} !prof ![[PROF0:[0-9]+]]

; CHECK-LABL: @callee(
; CHECK: invoke void @inner_callee(
; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]

; CHECK: ![[PROF0]] = !{!"branch_weights", i32 1000}
202 changes: 36 additions & 166 deletions llvm/test/Transforms/Inline/update_invoke_value_profile.ll
Original file line number Diff line number Diff line change
@@ -1,185 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; A pre-commit test to show that value profiles associated with invoke are not updated.
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s

; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=1000 -S | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%class.Error = type { i32 }
@_ZTI5Error = external constant { ptr, ptr }

define i32 @callee(ptr %b) personality ptr @__gxx_personality_v0 !prof !17 {
; CHECK-LABEL: define i32 @callee(
; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
; CHECK-NEXT: [[CALL:%.*]] = invoke i32 [[TMP0]](ptr [[B]])
; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: lpad:
; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: cleanup
; CHECK-NEXT: catch ptr @_ZTI5Error
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
; CHECK: catch:
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0
; CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: store i32 [[TMP6]], ptr [[E]], align 4
; CHECK-NEXT: [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E]])
; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
; CHECK: invoke.cont2:
; CHECK-NEXT: br label [[TRY_CONT]]
; CHECK: try.cont:
; CHECK-NEXT: [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[CALL]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[RET_0]]
; CHECK: lpad1:
; CHECK-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: cleanup
; CHECK-NEXT: invoke void @__cxa_end_catch()
; CHECK-NEXT: to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
; CHECK: invoke.cont4:
; CHECK-NEXT: br label [[EHCLEANUP]]
; CHECK: ehcleanup:
; CHECK-NEXT: [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4]] ], [ [[TMP1]], [[LPAD]] ]
; CHECK-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
; CHECK: terminate.lpad:
; CHECK-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: catch ptr null
; CHECK-NEXT: unreachable
;
entry:
%e = alloca %class.Error
%vtable = load ptr, ptr %b
%0 = load ptr, ptr %vtable
%call = invoke i32 %0(ptr %b)
to label %try.cont unwind label %lpad, !prof !15

lpad:
%1 = landingpad { ptr, i32 }
cleanup
catch ptr @_ZTI5Error
%2 = extractvalue { ptr, i32 } %1, 1
%3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
%matches = icmp eq i32 %2, %3
br i1 %matches, label %catch, label %ehcleanup

catch:
%4 = extractvalue { ptr, i32 } %1, 0
%5 = tail call ptr @__cxa_begin_catch(ptr %4)
%6 = load i32, ptr %5
store i32 %6, ptr %e
%call3 = invoke i32 @_ZN5Error10error_codeEv(ptr %e)
to label %invoke.cont2 unwind label %lpad1

invoke.cont2:
br label %try.cont
declare i32 @__gxx_personality_v0(...)

try.cont:
%ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
ret i32 %ret.0
define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
call void @callee(ptr %func), !prof !16
ret void
}

lpad1:
%7 = landingpad { ptr, i32 }
cleanup
invoke void @__cxa_end_catch()
to label %invoke.cont4 unwind label %terminate.lpad
declare void @inner_callee(ptr %func)

invoke.cont4:
br label %ehcleanup
define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
invoke void %func()
to label %ret unwind label %lpad, !prof !18

ehcleanup:
%lpad.val7.merged = phi { ptr, i32 } [ %7, %invoke.cont4 ], [ %1, %lpad ]
resume { ptr, i32 } %lpad.val7.merged
ret:
ret void

terminate.lpad:
%8 = landingpad { ptr, i32 }
catch ptr null
lpad:
%exn = landingpad {ptr, i32}
cleanup
unreachable
}

define i32 @caller(ptr %b) !prof !16 {
; CHECK-LABEL: define i32 @caller(
; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF2:![0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[E_I:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[E_I]])
; CHECK-NEXT: [[VTABLE_I:%.*]] = load ptr, ptr [[B]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE_I]], align 8
; CHECK-NEXT: [[CALL_I:%.*]] = invoke i32 [[TMP0]](ptr [[B]])
; CHECK-NEXT: to label [[CALLEE_EXIT:%.*]] unwind label [[LPAD_I:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: lpad.i:
; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: cleanup
; CHECK-NEXT: catch ptr @_ZTI5Error
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
; CHECK-NEXT: [[MATCHES_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[MATCHES_I]], label [[CATCH_I:%.*]], label [[EHCLEANUP_I:%.*]]
; CHECK: catch.i:
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0
; CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: store i32 [[TMP6]], ptr [[E_I]], align 4
; CHECK-NEXT: [[CALL3_I:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E_I]])
; CHECK-NEXT: to label [[INVOKE_CONT2_I:%.*]] unwind label [[LPAD1_I:%.*]]
; CHECK: invoke.cont2.i:
; CHECK-NEXT: br label [[CALLEE_EXIT]]
; CHECK: lpad1.i:
; CHECK-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: cleanup
; CHECK-NEXT: invoke void @__cxa_end_catch()
; CHECK-NEXT: to label [[INVOKE_CONT4_I:%.*]] unwind label [[TERMINATE_LPAD_I:%.*]]
; CHECK: invoke.cont4.i:
; CHECK-NEXT: br label [[EHCLEANUP_I]]
; CHECK: ehcleanup.i:
; CHECK-NEXT: [[LPAD_VAL7_MERGED_I:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4_I]] ], [ [[TMP1]], [[LPAD_I]] ]
; CHECK-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED_I]]
; CHECK: terminate.lpad.i:
; CHECK-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: catch ptr null
; CHECK-NEXT: unreachable
; CHECK: callee.exit:
; CHECK-NEXT: [[RET_0_I:%.*]] = phi i32 [ [[CALL3_I]], [[INVOKE_CONT2_I]] ], [ [[CALL_I]], [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[E_I]])
; CHECK-NEXT: ret i32 [[RET_0_I]]
;
entry:
%call = tail call i32 @callee(ptr %b)
ret i32 %call
}

declare i32 @__gxx_personality_v0(...)
declare i32 @llvm.eh.typeid.for(ptr)
declare ptr @__cxa_begin_catch(ptr)
declare i32 @_ZN5Error10error_codeEv(ptr)
declare void @__cxa_end_catch()


!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!7 = !{!"MaxFunctionCount", i64 2000}
!8 = !{!"NumCounts", i64 2}
!9 = !{!"NumFunctions", i64 2}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"VP", i32 0, i64 1500, i64 9261744921105590125, i64 1500}
!16 = !{!"function_entry_count", i64 1000}
!17 = !{!"function_entry_count", i64 1500}
;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 500}
; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 500, i64 -9184999152603961491, i64 500}
; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 1000, i64 -9184999152603961491, i64 1000}
;.
!15 = !{!"function_entry_count", i64 1000}
!16 = !{!"branch_weights", i64 1000}
!17 = !{!"function_entry_count", i32 1500}
!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}

; CHECK-LABEL: @caller(
; CHECK: invoke void %func(
; CHECK-NEXT: {{.*}} !prof ![[PROF0:[0-9]+]]

; CHECK-LABL: @callee(
; CHECK: invoke void %func(
; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]

; CHECK: ![[PROF0]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400}
; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200}
162 changes: 162 additions & 0 deletions llvm/test/tools/llvm-mca/AMDGPU/gfx940-mfma.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s

# CHECK: Iterations: 1
# CHECK: Instructions: 58
# CHECK: Total Cycles: 543
# CHECK: Total uOps: 58

# CHECK: Resources:
# CHECK: [0] - HWBranch
# CHECK: [1] - HWExport
# CHECK: [2] - HWLGKM
# CHECK: [3] - HWSALU
# CHECK: [4] - HWVALU
# CHECK: [5] - HWVMEM
# CHECK: [6] - HWXDL

v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]

v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]

v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]

v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]

v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]

v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]

v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]

v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]

v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]

v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]

v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]

v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]

v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]

v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]

v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]

v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]

v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]

v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]

v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]

v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7

v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]

v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]

v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]

v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]

v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1

v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3

v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5

v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9

v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11

# CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]
# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
Loading