554 changes: 310 additions & 244 deletions llvm/test/CodeGen/AMDGPU/indirect-call.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX10-NEXT: s_and_saveexec_b32 s0, s0
; GFX10-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[4:7], 0 idxen
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: ; implicit-def: $vgpr4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_cbranch_execnz BB0_1
Expand All @@ -44,6 +46,8 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[4:7], 0 idxen
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr4
; GFX9-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX9-NEXT: s_cbranch_execnz BB0_1
; GFX9-NEXT: ; %bb.2:
Expand All @@ -68,6 +72,8 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX8-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[4:7], 0 idxen
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: ; implicit-def: $vgpr4
; GFX8-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX8-NEXT: s_cbranch_execnz BB0_1
; GFX8-NEXT: ; %bb.2:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]

Expand All @@ -55,7 +55,7 @@
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
Expand Down Expand Up @@ -103,7 +103,7 @@ body: |
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]

Expand All @@ -128,7 +128,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
Expand Down Expand Up @@ -176,7 +176,7 @@ body: |
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]

Expand All @@ -201,7 +201,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
Expand Down Expand Up @@ -286,7 +286,7 @@ body: |
# W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-NO-ADDR64-LABEL: bb.2:
# W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]]

Expand All @@ -309,7 +309,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,20 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
; GCN-NEXT: s_mov_b32 s5, exec_lo
; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo
; GCN-NEXT: s_clause 0x1
; GCN-NEXT: flat_load_dwordx2 v[2:3], v[6:7]
; GCN-NEXT: flat_load_dwordx2 v[4:5], v[0:1]
; GCN-NEXT: flat_load_dwordx2 v[4:5], v[6:7]
; GCN-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GCN-NEXT: BB0_2: ; Parent Loop BB0_1 Depth=1
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s8, v4
; GCN-NEXT: v_readfirstlane_b32 s9, v5
; GCN-NEXT: v_readfirstlane_b32 s10, v2
; GCN-NEXT: v_readfirstlane_b32 s11, v3
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[4:5]
; GCN-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3]
; GCN-NEXT: v_readfirstlane_b32 s8, v2
; GCN-NEXT: v_readfirstlane_b32 s9, v3
; GCN-NEXT: v_readfirstlane_b32 s10, v4
; GCN-NEXT: v_readfirstlane_b32 s11, v5
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[2:3]
; GCN-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[4:5]
; GCN-NEXT: s_and_b32 s4, vcc_lo, s4
; GCN-NEXT: s_and_saveexec_b32 s4, s4
; GCN-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GCN-NEXT: buffer_store_dword v0, v0, s[8:11], 0 offen
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s4
Expand Down